Skip to contents

Introduction

This vignette demonstrates various visualization techniques for scTenifoldKnk results. All plots use base R graphics for maximum compatibility.

Run Analysis

library(scTenifoldKnk)
library(Matrix)

# Load and run analysis
data_path <- system.file("single-cell/example.csv", package = "scTenifoldKnk")
scRNAseq <- as.matrix(read.csv(data_path, row.names = 1))

result <- scTenifoldKnk(
  countMatrix = scRNAseq,
  gKO = "G100",
  qc_minLSize = 0,
  nc_nNet = 5,
  nc_nCells = 100,
  nc_nComp = 3,
  td_K = 3,
  verbose = FALSE
)

1. Volcano Plot

The volcano plot shows fold change vs. statistical significance.

dr <- result$diffRegulation
dr$log10_padj <- -log10(dr$p.adj + 1e-300)
dr$significant <- dr$p.adj < 0.05
dr$is_ko <- dr$gene == "G100"

# Color scheme
colors <- ifelse(dr$is_ko, "#E74C3C",
                 ifelse(dr$significant, "#3498DB", "#BDC3C7"))

# Create plot
par(mar = c(5, 5, 4, 2))
plot(dr$FC, dr$log10_padj,
     pch = 19,
     col = colors,
     cex = ifelse(dr$is_ko, 2.5, 1),
     xlab = "Fold Change (FC)",
     ylab = expression(-log[10](adjusted~p-value)),
     main = "Volcano Plot: Virtual Knockout Effect",
     cex.lab = 1.3,
     cex.axis = 1.1,
     cex.main = 1.4)

# Add threshold lines
abline(h = -log10(0.05), lty = 2, col = "gray40", lwd = 1.5)
abline(v = 2, lty = 2, col = "gray40", lwd = 1.5)

# Label top genes
top_genes <- head(dr[order(dr$p.adj), ], 5)
text(top_genes$FC, -log10(top_genes$p.adj + 1e-300),
     labels = top_genes$gene,
     pos = 4, cex = 0.9, col = "#2C3E50", font = 2)

# Legend
legend("topright",
       legend = c("Knockout gene (G100)", "Significant (FDR<0.05)", "Not significant"),
       col = c("#E74C3C", "#3498DB", "#BDC3C7"),
       pch = 19,
       pt.cex = c(2, 1.2, 1.2),
       bty = "n",
       cex = 1.1)

# Add annotation box
text(max(dr$FC) * 0.7, max(dr$log10_padj) * 0.9,
     paste("Total genes:", nrow(dr), "\n",
           "Significant:", sum(dr$significant)),
     adj = 0, cex = 1, col = "#2C3E50")

2. Distance Distribution Plot

Visualize the distribution of WT-KO distances.

par(mar = c(5, 5, 4, 2))

# Histogram with density
hist(dr$distance, breaks = 30, 
     col = "#3498DB", border = "white",
     probability = TRUE,
     main = "Distribution of WT-KO Distances",
     xlab = "Euclidean Distance",
     ylab = "Density",
     cex.lab = 1.3,
     cex.axis = 1.1,
     cex.main = 1.4)

# Add density curve
dens <- density(dr$distance)
lines(dens, col = "#E74C3C", lwd = 3)

# Mark knockout gene
ko_dist <- dr$distance[dr$gene == "G100"]
abline(v = ko_dist, col = "#E74C3C", lwd = 2, lty = 2)

# Add rug plot
rug(dr$distance, col = "#2C3E50", lwd = 0.5)

# Legend
legend("topright",
       legend = c("Density curve", paste("G100 distance:", round(ko_dist, 3))),
       col = c("#E74C3C", "#E74C3C"),
       lty = c(1, 2),
       lwd = c(3, 2),
       bty = "n",
       cex = 1.1)

3. Ranking Plot

Show gene rankings by fold change or p-value.

# Sort by FC
dr_sorted <- dr[order(dr$FC, decreasing = TRUE), ]
dr_sorted$rank <- 1:nrow(dr_sorted)

par(mar = c(5, 5, 4, 2))

# Create barplot for top genes
top_n <- 20
top_data <- dr_sorted[1:top_n, ]

# Colors based on significance
bar_colors <- ifelse(top_data$gene == "G100", "#E74C3C",
                     ifelse(top_data$p.adj < 0.05, "#3498DB", "#95A5A6"))

bp <- barplot(top_data$FC,
              col = bar_colors,
              border = NA,
              main = paste("Top", top_n, "Genes by Fold Change"),
              ylab = "Fold Change (FC)",
              cex.lab = 1.3,
              cex.axis = 1.1,
              cex.main = 1.4,
              ylim = c(0, max(top_data$FC) * 1.15))

# Add gene labels
text(bp, top_data$FC + max(top_data$FC) * 0.02,
     labels = top_data$gene,
     srt = 45, adj = 0, cex = 0.8, xpd = TRUE)

# Legend
legend("topright",
       legend = c("Knockout gene", "Significant (FDR<0.05)", "Not significant"),
       fill = c("#E74C3C", "#3498DB", "#95A5A6"),
       border = NA,
       bty = "n",
       cex = 1)

4. Network Heatmap

Visualize the gene regulatory network.

# Get networks
WT_net <- as.matrix(result$tensorNetworks$WT)
KO_net <- as.matrix(result$tensorNetworks$KO)

# Select top affected genes for visualization
top_genes <- head(dr[order(dr$FC, decreasing = TRUE), "gene"], 15)
WT_sub <- WT_net[top_genes, top_genes]
KO_sub <- KO_net[top_genes, top_genes]

# Create color palette
n_colors <- 100
color_palette <- colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(n_colors)

# Set up layout
par(mfrow = c(1, 2), mar = c(5, 5, 4, 2))

# WT Network
image(1:nrow(WT_sub), 1:ncol(WT_sub), WT_sub,
      col = color_palette,
      xlab = "", ylab = "",
      main = "Wild-Type Network",
      axes = FALSE,
      cex.main = 1.3)
axis(1, at = 1:nrow(WT_sub), labels = rownames(WT_sub), las = 2, cex.axis = 0.7)
axis(2, at = 1:ncol(WT_sub), labels = colnames(WT_sub), las = 2, cex.axis = 0.7)

# KO Network
image(1:nrow(KO_sub), 1:ncol(KO_sub), KO_sub,
      col = color_palette,
      xlab = "", ylab = "",
      main = "Knockout Network (G100)",
      axes = FALSE,
      cex.main = 1.3)
axis(1, at = 1:nrow(KO_sub), labels = rownames(KO_sub), las = 2, cex.axis = 0.7)
axis(2, at = 1:ncol(KO_sub), labels = colnames(KO_sub), las = 2, cex.axis = 0.7)

5. Manifold Alignment Plot

Visualize the low-dimensional embedding.

# Get manifold alignment
MA <- result$manifoldAlignment
n_genes <- nrow(MA) / 2

# Extract embeddings
WT_embed <- MA[1:n_genes, ]
KO_embed <- MA[(n_genes+1):(2*n_genes), ]

# Get gene names
gene_names <- gsub("^X_", "", rownames(MA)[1:n_genes])

# Compute distances for coloring
distances <- sqrt(rowSums((WT_embed - KO_embed)^2))

# Create color gradient based on distance
color_gradient <- colorRampPalette(c("#2ECC71", "#F1C40F", "#E74C3C"))(100)
dist_normalized <- (distances - min(distances)) / (max(distances) - min(distances))
point_colors <- color_gradient[ceiling(dist_normalized * 99) + 1]

par(mar = c(5, 5, 4, 6))

# Plot WT points
plot(WT_embed[, 1], WT_embed[, 2],
     pch = 19, col = point_colors, cex = 1.5,
     xlab = "NLMA Dimension 1",
     ylab = "NLMA Dimension 2",
     main = "Manifold Alignment: WT vs KO",
     cex.lab = 1.3,
     cex.axis = 1.1,
     cex.main = 1.4)

# Plot KO points
points(KO_embed[, 1], KO_embed[, 2],
       pch = 17, col = point_colors, cex = 1.2)

# Draw lines connecting WT-KO pairs for top genes
top_idx <- order(distances, decreasing = TRUE)[1:10]
for (i in top_idx) {
  lines(c(WT_embed[i, 1], KO_embed[i, 1]),
        c(WT_embed[i, 2], KO_embed[i, 2]),
        col = adjustcolor(point_colors[i], alpha.f = 0.7), lwd = 2)
}

# Label knockout gene
ko_idx <- which(gene_names == "G100")
if (length(ko_idx) > 0) {
  points(WT_embed[ko_idx, 1], WT_embed[ko_idx, 2],
         pch = 19, col = "black", cex = 3)
  points(KO_embed[ko_idx, 1], KO_embed[ko_idx, 2],
         pch = 17, col = "black", cex = 2.5)
  text(WT_embed[ko_idx, 1], WT_embed[ko_idx, 2],
       "G100", pos = 3, cex = 1, font = 2)
}

# Add legend
legend("topright",
       legend = c("WT position", "KO position", "G100"),
       pch = c(19, 17, 19),
       col = c("#3498DB", "#E74C3C", "black"),
       pt.cex = c(1.5, 1.2, 2),
       bty = "n",
       cex = 1,
       inset = c(-0.15, 0),
       xpd = TRUE)

# Add color bar
par(xpd = TRUE)
color_bar_x <- max(WT_embed[, 1]) + diff(range(WT_embed[, 1])) * 0.15
color_bar_y <- seq(min(WT_embed[, 2]), max(WT_embed[, 2]), length.out = 100)
for (i in 1:99) {
  rect(color_bar_x, color_bar_y[i], color_bar_x + 0.3, color_bar_y[i+1],
       col = color_gradient[i], border = NA)
}
text(color_bar_x + 0.4, min(color_bar_y), round(min(distances), 2), cex = 0.8, adj = 0)
text(color_bar_x + 0.4, max(color_bar_y), round(max(distances), 2), cex = 0.8, adj = 0)
text(color_bar_x + 0.15, max(color_bar_y) + diff(range(color_bar_y)) * 0.1, 
     "Distance", cex = 0.9, font = 2)

6. P-value Distribution

Check the distribution of p-values.

par(mfrow = c(1, 2), mar = c(5, 5, 4, 2))

# Raw p-values
hist(dr$p.value, breaks = 50,
     col = "#3498DB", border = "white",
     main = "Raw P-value Distribution",
     xlab = "P-value",
     ylab = "Frequency",
     cex.lab = 1.2,
     cex.main = 1.3)
abline(v = 0.05, col = "#E74C3C", lwd = 2, lty = 2)

# Adjusted p-values
hist(dr$p.adj, breaks = 50,
     col = "#2ECC71", border = "white",
     main = "Adjusted P-value Distribution",
     xlab = "FDR-adjusted P-value",
     ylab = "Frequency",
     cex.lab = 1.2,
     cex.main = 1.3)
abline(v = 0.05, col = "#E74C3C", lwd = 2, lty = 2)

7. Summary Statistics Table

# Create summary
summary_stats <- data.frame(
  Metric = c("Total genes", 
             "Significant genes (FDR<0.05)",
             "Significant genes (FDR<0.01)",
             "Mean FC (all genes)",
             "Max FC",
             "Knockout gene rank",
             "Knockout gene FC",
             "Knockout gene p.adj"),
  Value = c(nrow(dr),
            sum(dr$p.adj < 0.05),
            sum(dr$p.adj < 0.01),
            round(mean(dr$FC), 3),
            round(max(dr$FC), 3),
            which(dr$gene == "G100"),
            round(dr$FC[dr$gene == "G100"], 3),
            formatC(dr$p.adj[dr$gene == "G100"], format = "e", digits = 2))
)

knitr::kable(summary_stats, 
             caption = "Summary Statistics",
             align = c("l", "r"))
Summary Statistics
Metric Value
Total genes 100
Significant genes (FDR<0.05) 1
Significant genes (FDR<0.01) 1
Mean FC (all genes) 87.289
Max FC 8629.867
Knockout gene rank 1
Knockout gene FC 8629.867
Knockout gene p.adj 0.00e+00

Session Info

sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#> 
#> locale:
#> [1] C
#> 
#> time zone: Asia/Shanghai
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] Matrix_1.7-4        scTenifoldKnk_2.1.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] cli_3.6.5         knitr_1.51        rlang_1.1.7       xfun_0.56        
#>  [5] otel_0.2.0        textshaping_1.0.4 jsonlite_2.0.0    htmltools_0.5.9  
#>  [9] ragg_1.5.0        sass_0.4.10       rmarkdown_2.30    grid_4.4.0       
#> [13] evaluate_1.0.5    jquerylib_0.1.4   MASS_7.3-65       fastmap_1.2.0    
#> [17] yaml_2.3.12       lifecycle_1.0.5   compiler_4.4.0    RSpectra_0.16-2  
#> [21] fs_1.6.6          htmlwidgets_1.6.4 Rcpp_1.1.1        lattice_0.22-7   
#> [25] systemfonts_1.3.1 digest_0.6.39     R6_2.6.1          parallel_4.4.0   
#> [29] bslib_0.9.0       tools_4.4.0       pkgdown_2.1.3     cachem_1.1.0     
#> [33] desc_1.4.3