Introduction
This vignette covers advanced NOVA usage including:
- Performance optimization
- Custom LR databases
- Parallel processing
- Integration with other tools
- Programmatic workflows
Performance Optimization
Parallel Processing
NOVA supports parallel computation via the future
package:
# Enable parallel processing
library(future)
# Use all cores minus 1
plan(multisession, workers = parallel::detectCores() - 1)
# Run analysis (automatically parallelized)
result <- ExtractEdges(
expression = expr,
annotation = annotation,
species = "human"
)
# Reset to sequential
plan(sequential)NOVA Options
Configure global behavior:
# View current options
cat("Verbose:", getOption("nova.verbose", TRUE), "\n")
#> Verbose: TRUE
cat("Parallel:", getOption("nova.parallel", TRUE), "\n")
#> Parallel: TRUE
cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n")
#> Workers: 11
# Customize options
options(
nova.verbose = TRUE, # Print progress messages
nova.parallel = TRUE, # Enable parallelization
nova.workers = 4 # Number of parallel workers
)Memory Efficiency
For large datasets, use sparse matrices:
# Check if expression is sparse
expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE)
cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n")
#> Dense size: 8000216 bytes
cat("Sparse size:", object.size(expr_example), "bytes\n")
#> Sparse size: 9240 bytes
# NOVA automatically handles sparse matrices efficientlyCustom Ligand-Receptor Database
Creating Custom Database
# Create custom LR database
custom_lr <- data.table::data.table(
ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"),
receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"),
category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"),
source = rep("custom", 5)
)
print(custom_lr)
#> ligand receptor category source
#> <char> <char> <char> <char>
#> 1: CXCL12 CXCR4 chemokine custom
#> 2: CCL2 CCR2 chemokine custom
#> 3: IL6 IL6R cytokine custom
#> 4: TGFB1 TGFBR1 growth_factor custom
#> 5: VEGFA KDR growth_factor customUsing Custom Database
# Use custom database in analysis
result <- ExtractEdges(
expression = expr,
annotation = annotation,
species = "human",
lr_database = custom_lr # Custom database
)Extending Built-in Database
# Get built-in database
builtin_lr <- GetLRDatabase("lrc2p")
# Add custom pairs
custom_pairs <- data.table::data.table(
ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"),
receptor = c("CUSTOM_REC1", "CUSTOM_REC2")
)
# Combine (ensure matching columns)
extended_lr <- rbind(builtin_lr[, .(ligand, receptor)],
custom_pairs,
fill = TRUE)
cat("Extended database size:", nrow(extended_lr), "pairs\n")
#> Extended database size: 2295 pairsFiltering and Subsetting
Advanced Filtering
# Create example result
set.seed(42)
n_genes <- 100
n_cells <- 200
expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells)
lr_db <- GetLRDatabase("lrc2p")
rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50])
colnames(expr) <- paste0("Cell", 1:n_cells)
expr <- Matrix::Matrix(expr, sparse = TRUE)
clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE)
annotation <- data.frame(cell = colnames(expr), cluster = clusters)
result <- ExtractEdges(expr, annotation, species = "human")
# Filter by multiple criteria
filtered <- FilterEdges(
result,
min_pct = 0.1,
min_mean = 0.5,
min_specificity = 0.2
)
cat("Original edges:", nrow(result$edges), "\n")
cat("Filtered edges:", nrow(filtered$edges), "\n")Subsetting by Cluster
# Get edges for specific cluster pairs
edges_A_to_B <- GetEdges(result, sending = "A", target = "B")
edges_from_A <- GetEdges(result, sending = "A")
edges_to_C <- GetEdges(result, target = "C")
cat("A -> B:", nrow(edges_A_to_B), "edges\n")
cat("A -> any:", nrow(edges_from_A), "edges\n")
cat("any -> C:", nrow(edges_to_C), "edges\n")Programmatic Workflows
Batch Processing
# Process multiple samples
samples <- c("sample1", "sample2", "sample3")
results <- list()
for (sample in samples) {
# Load data
expr <- readRDS(paste0(sample, "_expression.rds"))
ann <- read.csv(paste0(sample, "_annotation.csv"))
# Run analysis
results[[sample]] <- ExtractEdges(
expression = expr,
annotation = ann,
species = "human"
)
cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n")
}
# Combine results
all_edges <- rbindlist(lapply(names(results), function(s) {
edges <- results[[s]]$edges
edges$sample <- s
return(edges)
}))Custom Analysis Pipeline
# Define analysis function
analyze_communication <- function(seurat_obj,
cluster_col = "cell_type",
species = "human",
...) {
# Convert Seurat object
nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col)
# Run analysis
result <- ExtractEdges(
expression = nova_input$expression,
annotation = nova_input$annotation,
species = species,
...
)
# Store back in Seurat
seurat_obj <- AddNOVAResults(seurat_obj, result)
return(list(seurat = seurat_obj, nova = result))
}
# Use the pipeline
output <- analyze_communication(
seurat_obj,
cluster_col = "cell_type",
species = "mouse",
min_pct = 0.1
)Integration with Other Tools
Export for Cytoscape
# Export edges for Cytoscape visualization
edges <- result$edges[, .(
source = sending_cluster,
target = target_cluster,
interaction = paste(ligand, receptor, sep = "-"),
weight = edge_specificity_mean
)]
write.csv(edges, "cytoscape_edges.csv", row.names = FALSE)
# Export node attributes
nodes <- data.frame(
id = unique(c(edges$source, edges$target)),
type = "cluster"
)
write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE)Integration with CellChat/LIANA
# Convert NOVA results to CellChat format
nova_to_cellchat <- function(result) {
edges <- result$edges
# Create interaction data frame
df <- data.frame(
source = edges$sending_cluster,
target = edges$target_cluster,
ligand = edges$ligand,
receptor = edges$receptor,
prob = edges$edge_specificity_mean,
pval = NA # NOVA doesn't compute p-values
)
return(df)
}Troubleshooting
Common Issues
1. No edges detected
# Check gene overlap with database
lr_db <- GetLRDatabase("lrc2p")
genes_in_data <- rownames(expr)
ligand_overlap <- sum(lr_db$ligand %in% genes_in_data)
receptor_overlap <- sum(lr_db$receptor %in% genes_in_data)
cat("Ligands found:", ligand_overlap, "\n")
cat("Receptors found:", receptor_overlap, "\n")
# Lower thresholds if needed
result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0)2. Memory issues
# Use sparse matrix
expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE)
# Process clusters in batches
unique_clusters <- unique(annotation$cluster)
for (i in seq(1, length(unique_clusters), by = 5)) {
subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))]
# Process subset...
}3. Species mapping issues
# Check gene name format
head(rownames(expr)) # Should match species conventions
# Verify species parameter
result <- ExtractEdges(expr, annotation, species = "mouse") # Not "Mouse" or "MOUSE"Session Info
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#>
#> locale:
#> [1] C
#>
#> time zone: Asia/Shanghai
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] data.table_1.18.0 NOVA_1.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] Matrix_1.7-4 gtable_0.3.6 jsonlite_2.0.0 dplyr_1.1.4
#> [5] compiler_4.4.0 Rcpp_1.1.1 tidyselect_1.2.1 parallel_4.4.0
#> [9] dichromat_2.0-0.1 jquerylib_0.1.4 systemfonts_1.3.1 scales_1.4.0
#> [13] textshaping_1.0.4 yaml_2.3.12 fastmap_1.2.0 lattice_0.22-7
#> [17] ggplot2_4.0.1 R6_2.6.1 generics_0.1.4 knitr_1.51
#> [21] htmlwidgets_1.6.4 tibble_3.3.1 desc_1.4.3 bslib_0.9.0
#> [25] pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.7 cachem_1.1.0
#> [29] xfun_0.56 fs_1.6.6 sass_0.4.10 S7_0.2.1
#> [33] otel_0.2.0 cli_3.6.5 pkgdown_2.2.0 magrittr_2.0.4
#> [37] digest_0.6.39 grid_4.4.0 lifecycle_1.0.5 vctrs_0.7.1
#> [41] evaluate_1.0.5 glue_1.8.0 farver_2.1.2 ragg_1.5.0
#> [45] rmarkdown_2.30 tools_4.4.0 pkgconfig_2.0.3 htmltools_0.5.9