Introduction
CytoSPACER is designed for high performance through C++ implementations and parallel processing. This vignette provides guidance on optimizing performance for large datasets.
Performance Architecture
CytoSPACER uses several strategies for optimal performance:
- C++ Backend: Core algorithms implemented in C++ via Rcpp
-
Parallel Processing: Multi-core support via the
futureframework - Chunked Processing: Memory-efficient handling of large datasets
- Sparse Matrix Support: Efficient handling of sparse data
Benchmarking
C++ vs R Implementation
The C++ implementations provide significant speedups:
# Create test data
set.seed(42)
n_genes <- 500
n_cells <- 1000
n_spots <- 200
sc_data <- matrix(rnorm(n_genes * n_cells), nrow = n_genes, ncol = n_cells)
st_data <- matrix(rnorm(n_genes * n_spots), nrow = n_genes, ncol = n_spots)
rownames(sc_data) <- rownames(st_data) <- paste0("Gene", seq_len(n_genes))
colnames(sc_data) <- paste0("Cell", seq_len(n_cells))
colnames(st_data) <- paste0("Spot", seq_len(n_spots))
# Benchmark correlation computation
time_cpp <- system.time({
cost_cpp <- compute_cost_matrix(sc_data, st_data, method = "pearson", use_cpp = TRUE)
})
time_r <- system.time({
cost_r <- compute_cost_matrix(sc_data, st_data, method = "pearson", use_cpp = FALSE)
})
cat("C++ implementation:", round(time_cpp["elapsed"], 3), "seconds\n")
#> C++ implementation: 1.222 seconds
cat("R implementation:", round(time_r["elapsed"], 3), "seconds\n")
#> R implementation: 0.048 seconds
cat("Speedup:", round(time_r["elapsed"] / time_cpp["elapsed"], 1), "x\n")
#> Speedup: 0 xLAP Solver Performance
# Create cost matrices of different sizes
sizes <- c(100, 200, 500)
times <- numeric(length(sizes))
for (i in seq_along(sizes)) {
n <- sizes[i]
cost <- matrix(runif(n * n), nrow = n, ncol = n)
times[i] <- system.time({
assignment <- solve_lap(cost)
})["elapsed"]
}
# Display results
results_df <- data.frame(
Size = paste0(sizes, "x", sizes),
Time_sec = round(times, 4)
)
print(results_df)
#> Size Time_sec
#> 1 100x100 0.000
#> 2 200x200 0.003
#> 3 500x500 0.150Memory Optimization
Sparse Matrix Handling
CytoSPACER efficiently handles sparse matrices:
library(Matrix)
# Create sparse data (typical for scRNA-seq)
sparse_data <- rsparsematrix(1000, 5000, density = 0.1)
rownames(sparse_data) <- paste0("Gene", 1:1000)
colnames(sparse_data) <- paste0("Cell", 1:5000)
# Check memory usage
dense_size <- object.size(as.matrix(sparse_data))
sparse_size <- object.size(sparse_data)
cat("Dense matrix size:", format(dense_size, units = "MB"), "\n")
#> Dense matrix size: 38.5 Mb
cat("Sparse matrix size:", format(sparse_size, units = "MB"), "\n")
#> Sparse matrix size: 6.1 Mb
cat("Memory savings:", round((1 - as.numeric(sparse_size)/as.numeric(dense_size)) * 100), "%\n")
#> Memory savings: 84 %Downsampling
For very large scRNA-seq datasets, downsampling reduces memory and computation:
# Create large count matrix
large_counts <- matrix(rpois(500 * 10000, lambda = 10), nrow = 500, ncol = 10000)
rownames(large_counts) <- paste0("Gene", 1:500)
colnames(large_counts) <- paste0("Cell", 1:10000)
# Downsample to target count
time_downsample <- system.time({
downsampled <- downsample_expression(large_counts, target_count = 1500, seed = 42)
})
cat("Downsampling time:", round(time_downsample["elapsed"], 3), "seconds\n")
#> Downsampling time: 0.75 seconds
cat("Original total counts:", sum(large_counts[,1]), "\n")
#> Original total counts: 5031
cat("Downsampled total counts:", sum(downsampled[,1]), "\n")
#> Downsampled total counts: 1500Parallel Processing
Parallel LAP Solving
# Create multiple cost matrices
cost_list <- lapply(1:4, function(i) {
matrix(runif(200 * 200), nrow = 200, ncol = 200)
})
# Sequential processing
time_seq <- system.time({
results_seq <- lapply(cost_list, solve_lap)
})
# Parallel processing
time_par <- system.time({
results_par <- solve_lap_parallel(cost_list, n_workers = 2, progress = FALSE)
})
cat("Sequential time:", round(time_seq["elapsed"], 3), "seconds\n")
#> Sequential time: 0.012 seconds
cat("Parallel time:", round(time_par["elapsed"], 3), "seconds\n")
#> Parallel time: 1.749 secondsUsing Future Plans
library(future)
# Multisession (recommended for most cases)
plan(multisession, workers = 4)
# Multicore (Unix/macOS only, more efficient)
plan(multicore, workers = 4)
# Sequential (for debugging)
plan(sequential)
# Run CytoSPACER with custom plan
results <- run_cytospace(
sc_data = sc_data,
cell_types = cell_types,
st_data = st_data,
coordinates = coordinates,
n_workers = NULL # Uses current plan
)
# Reset to default
plan(sequential)Chunked Processing
For very large datasets, CytoSPACER uses chunked processing:
# Large dataset example
results <- run_cytospace(
sc_data = sc_data,
cell_types = cell_types,
st_data = st_data,
coordinates = coordinates,
chunk_size = 5000, # Process 5000 cells at a time
n_workers = 4
)Optimization Guidelines
Dataset Size Recommendations
| Dataset Size | Recommended Settings |
|---|---|
| < 10,000 cells | Default settings |
| 10,000 - 50,000 cells |
n_workers = 4, chunk_size = 10000
|
| 50,000 - 200,000 cells |
n_workers = 8, chunk_size = 5000,
downsample = TRUE
|
| > 200,000 cells | Subsample reference, n_workers = max,
chunk_size = 5000
|
Memory Guidelines
# Estimate memory requirements
estimate_memory <- function(n_cells, n_spots, n_genes) {
# Cost matrix: n_cells x n_spots (double precision)
cost_matrix_mb <- (n_cells * n_spots * 8) / (1024^2)
# Expression matrices
sc_matrix_mb <- (n_genes * n_cells * 8) / (1024^2)
st_matrix_mb <- (n_genes * n_spots * 8) / (1024^2)
total_mb <- cost_matrix_mb + sc_matrix_mb + st_matrix_mb
cat("Estimated memory requirements:\n")
cat(" Cost matrix:", round(cost_matrix_mb), "MB\n")
cat(" scRNA-seq matrix:", round(sc_matrix_mb), "MB\n")
cat(" ST matrix:", round(st_matrix_mb), "MB\n")
cat(" Total (approx):", round(total_mb), "MB\n")
}
# Example: 50,000 cells, 5,000 spots, 20,000 genes
estimate_memory(50000, 5000, 20000)
#> Estimated memory requirements:
#> Cost matrix: 1907 MB
#> scRNA-seq matrix: 7629 MB
#> ST matrix: 763 MB
#> Total (approx): 10300 MBBest Practices
Use sparse matrices when possible (typical scRNA-seq data is >90% zeros)
Downsample scRNA-seq to ~1500 counts per cell before analysis
Subset genes to highly variable genes if memory is limited
Monitor memory during analysis:
# Check current memory usage
gc_info <- gc()
mem_used <- sum(gc_info[, 2])
cat("Current memory used:", round(mem_used), "MB\n")
#> Current memory used: 217 MB-
Use appropriate chunk size:
- Larger chunks = faster but more memory
- Smaller chunks = slower but less memory
Profiling
Time Profiling
# Profile individual steps
set.seed(42)
n_cells <- 50
n_spots <- 100
sc_test <- matrix(rpois(100 * n_cells, 10), nrow = 100)
st_test <- matrix(rpois(100 * n_spots, 50), nrow = 100)
rownames(sc_test) <- rownames(st_test) <- paste0("Gene", 1:100)
colnames(sc_test) <- paste0("Cell", 1:n_cells)
colnames(st_test) <- paste0("Spot", 1:n_spots)
# Normalization
time_norm <- system.time({
sc_norm <- normalize_expression(sc_test)
st_norm <- normalize_expression(st_test)
})
# Cost matrix
time_cost <- system.time({
cost <- compute_cost_matrix(sc_norm, st_norm, method = "pearson")
})
# LAP solving (rows must be <= cols)
time_lap <- system.time({
assignment <- solve_lap(cost)
})
cat("Timing breakdown:\n")
#> Timing breakdown:
cat(" Normalization:", round(time_norm["elapsed"], 4), "s\n")
#> Normalization: 0.001 s
cat(" Cost matrix:", round(time_cost["elapsed"], 4), "s\n")
#> Cost matrix: 0.007 s
cat(" LAP solving:", round(time_lap["elapsed"], 4), "s\n")
#> LAP solving: 0.004 sTroubleshooting Performance Issues
Issue: Out of Memory
# Solution 1: Reduce chunk size
results <- run_cytospace(..., chunk_size = 2000)
# Solution 2: Downsample more aggressively
results <- run_cytospace(..., downsample_target = 1000)
# Solution 3: Subset to highly variable genes
hvg <- VariableFeatures(seurat_obj)[1:2000]
sc_subset <- sc_data[hvg, ]
st_subset <- st_data[hvg, ]Issue: Slow Performance
# Solution 1: Increase workers
results <- run_cytospace(..., n_workers = 8)
# Solution 2: Use larger chunks (if memory allows)
results <- run_cytospace(..., chunk_size = 20000)
# Solution 3: Use C++ implementations (default)
cost <- compute_cost_matrix(..., use_cpp = TRUE)Session Info
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#>
#> locale:
#> [1] C
#>
#> time zone: Asia/Shanghai
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] future_1.69.0 Matrix_1.7-4 CytoSPACER_1.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] gtable_0.3.6 future.apply_1.20.1 jsonlite_2.0.0
#> [4] dplyr_1.1.4 compiler_4.4.0 Rcpp_1.1.1
#> [7] tidyselect_1.2.1 parallel_4.4.0 dichromat_2.0-0.1
#> [10] jquerylib_0.1.4 globals_0.18.0 systemfonts_1.3.1
#> [13] scales_1.4.0 textshaping_1.0.4 yaml_2.3.12
#> [16] fastmap_1.2.0 lattice_0.22-7 ggplot2_4.0.1
#> [19] R6_2.6.1 generics_0.1.4 knitr_1.51
#> [22] htmlwidgets_1.6.4 tibble_3.3.1 desc_1.4.3
#> [25] pillar_1.11.1 bslib_0.9.0 RColorBrewer_1.1-3
#> [28] rlang_1.1.7 cachem_1.1.0 xfun_0.56
#> [31] S7_0.2.1 fs_1.6.6 sass_0.4.10
#> [34] otel_0.2.0 cli_3.6.5 progressr_0.18.0
#> [37] magrittr_2.0.4 pkgdown_2.1.3 digest_0.6.39
#> [40] grid_4.4.0 lifecycle_1.0.5 vctrs_0.7.1
#> [43] evaluate_1.0.5 glue_1.8.0 data.table_1.18.0
#> [46] farver_2.1.2 listenv_0.10.0 codetools_0.2-20
#> [49] ragg_1.5.0 parallelly_1.46.1 rmarkdown_2.30
#> [52] pkgconfig_2.0.3 tools_4.4.0 htmltools_0.5.9