Visualization and Result Interpretation
Zaoqu Liu
2026-01-26
Source:vignettes/visualization-tutorial.Rmd
visualization-tutorial.RmdIntroduction
This vignette demonstrates how to visualize and interpret TorchDecon results. Effective visualization is crucial for understanding cell type composition patterns and validating deconvolution accuracy.
Author: Zaoqu Liu (liuzaoqu@163.com)
Generate Example Data and Run Deconvolution
# Generate example data
set.seed(42)
example_data <- GenerateExampleData(
n_cells = 1000,
n_genes = 500,
n_celltypes = 5,
n_bulk_samples = 50
)
# Run deconvolution
result <- RunTorchDecon(
seurat_object = example_data$seurat,
bulk_data = example_data$bulk_data,
n_samples = 1000,
num_steps = 2000,
verbose = FALSE
)
predictions <- result$predictionsVisualization Methods
1. Stacked Bar Plot
The stacked bar plot is the most intuitive way to visualize cell type composition across samples.
# Prepare data for plotting
plot_data <- predictions
plot_data$Sample <- rownames(plot_data)
# Convert to long format
plot_long <- melt(plot_data, id.vars = "Sample",
variable.name = "CellType",
value.name = "Fraction")
# Define color palette
colors <- brewer.pal(n = ncol(predictions), name = "Set2")
# Create stacked bar plot
ggplot(plot_long, aes(x = Sample, y = Fraction, fill = CellType)) +
geom_bar(stat = "identity", width = 0.8) +
scale_fill_manual(values = colors) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.position = "right",
panel.grid.minor = element_blank()
) +
labs(
title = "Cell Type Composition Across Samples",
subtitle = "TorchDecon Deconvolution Results",
x = "Sample",
y = "Cell Type Fraction",
fill = "Cell Type"
)2. Heatmap Visualization
Heatmaps provide a comprehensive view of cell type proportions with hierarchical clustering.
# Create annotation for samples (e.g., sample groups)
annotation_row <- data.frame(
Group = factor(rep(c("Group1", "Group2"), each = 25))
)
rownames(annotation_row) <- rownames(predictions)
# Define colors
ann_colors <- list(
Group = c(Group1 = "#E41A1C", Group2 = "#377EB8")
)
# Create heatmap
pheatmap(
as.matrix(predictions),
color = colorRampPalette(c("white", "steelblue", "darkblue"))(100),
cluster_rows = TRUE,
cluster_cols = FALSE,
show_rownames = TRUE,
show_colnames = TRUE,
annotation_row = annotation_row,
annotation_colors = ann_colors,
fontsize = 10,
fontsize_row = 8,
main = "Cell Type Fractions Heatmap"
)3. Box Plot Comparison
Box plots are useful for comparing cell type proportions between groups.
# Add group information
plot_long$Group <- rep(c("Control", "Treatment"), each = nrow(plot_long)/2)
# Create box plot
ggplot(plot_long, aes(x = CellType, y = Fraction, fill = Group)) +
geom_boxplot(outlier.shape = 21, outlier.size = 2) +
scale_fill_manual(values = c("Control" = "#66C2A5", "Treatment" = "#FC8D62")) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
) +
labs(
title = "Cell Type Composition by Group",
x = "Cell Type",
y = "Fraction"
) +
stat_compare_means(method = "wilcox.test", label = "p.signif")4. Training History Plot
Monitoring training loss is essential for ensuring model convergence.
# Get training history
history <- GetTrainingHistory(result$ensemble)
# Plot training loss
ggplot(history, aes(x = step, y = loss, color = model)) +
geom_line(alpha = 0.7, size = 0.8) +
scale_color_brewer(palette = "Set1") +
scale_y_log10() +
theme_minimal() +
theme(legend.position = "top") +
labs(
title = "Training Loss Over Time",
subtitle = "Ensemble Model Training Progress",
x = "Training Step",
y = "Loss (log scale)",
color = "Model"
)5. Correlation Plot (with Ground Truth)
When ground truth is available, visualize prediction accuracy.
# Assuming we have ground truth (e.g., from simulation)
true_fractions <- result$simulation$cell_fractions
# Flatten matrices for correlation
pred_vec <- as.vector(as.matrix(predictions))
true_vec <- as.vector(as.matrix(true_fractions[rownames(predictions), ]))
# Create data frame
cor_data <- data.frame(
Predicted = pred_vec,
True = true_vec
)
# Calculate correlation
r <- cor(pred_vec, true_vec)
# Create scatter plot
ggplot(cor_data, aes(x = True, y = Predicted)) +
geom_point(alpha = 0.5, color = "steelblue") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
geom_smooth(method = "lm", se = TRUE, color = "darkblue") +
annotate("text", x = 0.1, y = 0.9,
label = paste0("r = ", round(r, 3)),
size = 5, fontface = "bold") +
coord_fixed(ratio = 1) +
xlim(0, 1) + ylim(0, 1) +
theme_minimal() +
labs(
title = "Prediction Accuracy",
subtitle = "Predicted vs. True Cell Type Fractions",
x = "True Fraction",
y = "Predicted Fraction"
)6. Per-Cell Type Accuracy
Evaluate accuracy for each cell type separately.
# Calculate metrics per cell type
celltypes <- colnames(predictions)
metrics_list <- lapply(celltypes, function(ct) {
pred <- predictions[[ct]]
true <- true_fractions[[ct]]
data.frame(
CellType = ct,
RMSE = sqrt(mean((pred - true)^2)),
MAE = mean(abs(pred - true)),
Correlation = cor(pred, true)
)
})
metrics_df <- do.call(rbind, metrics_list)
# Plot correlation by cell type
ggplot(metrics_df, aes(x = reorder(CellType, -Correlation), y = Correlation, fill = CellType)) +
geom_bar(stat = "identity", width = 0.7) +
geom_hline(yintercept = 0.9, linetype = "dashed", color = "red") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
labs(
title = "Prediction Accuracy by Cell Type",
subtitle = "Pearson Correlation (dashed line = 0.9 threshold)",
x = "Cell Type",
y = "Correlation"
)7. Pie Chart for Individual Samples
Pie charts can effectively show composition for selected samples.
# Select a sample
sample_name <- rownames(predictions)[1]
sample_data <- predictions[sample_name, ]
# Prepare data
pie_data <- data.frame(
CellType = names(sample_data),
Fraction = as.numeric(sample_data)
)
pie_data$Percentage <- paste0(round(pie_data$Fraction * 100, 1), "%")
# Create pie chart
ggplot(pie_data, aes(x = "", y = Fraction, fill = CellType)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
scale_fill_brewer(palette = "Set2") +
geom_text(aes(label = Percentage),
position = position_stack(vjust = 0.5),
color = "white", fontface = "bold") +
theme_void() +
labs(
title = paste("Cell Type Composition:", sample_name),
fill = "Cell Type"
)Advanced Visualization
Ensemble Model Comparison
Compare predictions from individual models in the ensemble.
# Get individual predictions
all_preds <- PredictFractions(result$ensemble, example_data$bulk_data,
return_all = TRUE, verbose = FALSE)
# Prepare comparison data
comparison <- data.frame(
M256 = as.vector(as.matrix(all_preds$individual$m256)),
M512 = as.vector(as.matrix(all_preds$individual$m512)),
M1024 = as.vector(as.matrix(all_preds$individual$m1024)),
Ensemble = as.vector(as.matrix(all_preds$average))
)
# Create pairs plot
pairs(comparison,
lower.panel = panel.smooth,
upper.panel = function(x, y) {
usr <- par("usr")
par(usr = c(0, 1, 0, 1))
r <- round(cor(x, y), 3)
text(0.5, 0.5, paste0("r=", r), cex = 1.5)
},
main = "Model Agreement in Ensemble")Summary
This vignette demonstrated various visualization techniques for TorchDecon results:
- Stacked bar plots - Overview of composition across samples
- Heatmaps - Clustered visualization with annotations
- Box plots - Group comparisons
- Training curves - Model convergence monitoring
- Correlation plots - Accuracy assessment
- Per-cell type metrics - Detailed performance evaluation
- Pie charts - Individual sample composition
Effective visualization helps validate deconvolution results and communicate findings clearly.
Package Author: Zaoqu Liu
Contact: liuzaoqu@163.com
GitHub: https://github.com/Zaoqu-Liu/TorchDecon