Advanced Usage

Introduction

This vignette covers advanced NOVA usage including:

Performance optimization
Custom LR databases
Parallel processing
Integration with other tools
Programmatic workflows

Setup

library(NOVA)
library(data.table)

Performance Optimization

Parallel Processing

NOVA supports parallel computation via the future package:

# Enable parallel processing
library(future)

# Use all cores minus 1
plan(multisession, workers = parallel::detectCores() - 1)

# Run analysis (automatically parallelized)
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human"
)

# Reset to sequential
plan(sequential)

NOVA Options

Configure global behavior:

# View current options
cat("Verbose:", getOption("nova.verbose", TRUE), "\n")
#> Verbose: TRUE
cat("Parallel:", getOption("nova.parallel", TRUE), "\n")
#> Parallel: TRUE
cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n")
#> Workers: 11

# Customize options
options(
  nova.verbose = TRUE,      # Print progress messages
  nova.parallel = TRUE,     # Enable parallelization
  nova.workers = 4          # Number of parallel workers
)

Memory Efficiency

For large datasets, use sparse matrices:

# Check if expression is sparse
expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE)
cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n")
#> Dense size: 8000216 bytes
cat("Sparse size:", object.size(expr_example), "bytes\n")
#> Sparse size: 9240 bytes

# NOVA automatically handles sparse matrices efficiently

Custom Ligand-Receptor Database

Creating Custom Database

# Create custom LR database
custom_lr <- data.table::data.table(
  ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"),
  receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"),
  category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"),
  source = rep("custom", 5)
)

print(custom_lr)
#>    ligand receptor      category source
#>    <char>   <char>        <char> <char>
#> 1: CXCL12    CXCR4     chemokine custom
#> 2:   CCL2     CCR2     chemokine custom
#> 3:    IL6     IL6R      cytokine custom
#> 4:  TGFB1   TGFBR1 growth_factor custom
#> 5:  VEGFA      KDR growth_factor custom

Using Custom Database

# Use custom database in analysis
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human",
  lr_database = custom_lr  # Custom database
)

Extending Built-in Database

# Get built-in database
builtin_lr <- GetLRDatabase("lrc2p")

# Add custom pairs
custom_pairs <- data.table::data.table(
  ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"),
  receptor = c("CUSTOM_REC1", "CUSTOM_REC2")
)

# Combine (ensure matching columns)
extended_lr <- rbind(builtin_lr[, .(ligand, receptor)], 
                     custom_pairs, 
                     fill = TRUE)
cat("Extended database size:", nrow(extended_lr), "pairs\n")
#> Extended database size: 2295 pairs

Filtering and Subsetting

Advanced Filtering

# Create example result
set.seed(42)
n_genes <- 100
n_cells <- 200

expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells)
lr_db <- GetLRDatabase("lrc2p")
rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50])
colnames(expr) <- paste0("Cell", 1:n_cells)
expr <- Matrix::Matrix(expr, sparse = TRUE)

clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE)
annotation <- data.frame(cell = colnames(expr), cluster = clusters)

result <- ExtractEdges(expr, annotation, species = "human")

# Filter by multiple criteria
filtered <- FilterEdges(
  result,
  min_pct = 0.1,
  min_mean = 0.5,
  min_specificity = 0.2
)

cat("Original edges:", nrow(result$edges), "\n")
cat("Filtered edges:", nrow(filtered$edges), "\n")

Subsetting by Cluster

# Get edges for specific cluster pairs
edges_A_to_B <- GetEdges(result, sending = "A", target = "B")
edges_from_A <- GetEdges(result, sending = "A")
edges_to_C <- GetEdges(result, target = "C")

cat("A -> B:", nrow(edges_A_to_B), "edges\n")
cat("A -> any:", nrow(edges_from_A), "edges\n")
cat("any -> C:", nrow(edges_to_C), "edges\n")

Programmatic Workflows

Batch Processing

# Process multiple samples
samples <- c("sample1", "sample2", "sample3")
results <- list()

for (sample in samples) {
  # Load data
  expr <- readRDS(paste0(sample, "_expression.rds"))
  ann <- read.csv(paste0(sample, "_annotation.csv"))
  
  # Run analysis
  results[[sample]] <- ExtractEdges(
    expression = expr,
    annotation = ann,
    species = "human"
  )
  
  cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n")
}

# Combine results
all_edges <- rbindlist(lapply(names(results), function(s) {
  edges <- results[[s]]$edges
  edges$sample <- s
  return(edges)
}))

Custom Analysis Pipeline

# Define analysis function
analyze_communication <- function(seurat_obj, 
                                   cluster_col = "cell_type",
                                   species = "human",
                                   ...) {
  # Convert Seurat object
  nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col)
  
  # Run analysis
  result <- ExtractEdges(
    expression = nova_input$expression,
    annotation = nova_input$annotation,
    species = species,
    ...
  )
  
  # Store back in Seurat
  seurat_obj <- AddNOVAResults(seurat_obj, result)
  
  return(list(seurat = seurat_obj, nova = result))
}

# Use the pipeline
output <- analyze_communication(
  seurat_obj,
  cluster_col = "cell_type",
  species = "mouse",
  min_pct = 0.1
)

Integration with Other Tools

Export for Cytoscape

# Export edges for Cytoscape visualization
edges <- result$edges[, .(
  source = sending_cluster,
  target = target_cluster,
  interaction = paste(ligand, receptor, sep = "-"),
  weight = edge_specificity_mean
)]

write.csv(edges, "cytoscape_edges.csv", row.names = FALSE)

# Export node attributes
nodes <- data.frame(
  id = unique(c(edges$source, edges$target)),
  type = "cluster"
)
write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE)

Integration with CellChat/LIANA

# Convert NOVA results to CellChat format
nova_to_cellchat <- function(result) {
  edges <- result$edges
  
  # Create interaction data frame
  df <- data.frame(
    source = edges$sending_cluster,
    target = edges$target_cluster,
    ligand = edges$ligand,
    receptor = edges$receptor,
    prob = edges$edge_specificity_mean,
    pval = NA  # NOVA doesn't compute p-values
  )
  
  return(df)
}

Troubleshooting

Common Issues

1. No edges detected

# Check gene overlap with database
lr_db <- GetLRDatabase("lrc2p")
genes_in_data <- rownames(expr)
ligand_overlap <- sum(lr_db$ligand %in% genes_in_data)
receptor_overlap <- sum(lr_db$receptor %in% genes_in_data)

cat("Ligands found:", ligand_overlap, "\n")
cat("Receptors found:", receptor_overlap, "\n")

# Lower thresholds if needed
result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0)

2. Memory issues

# Use sparse matrix
expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE)

# Process clusters in batches
unique_clusters <- unique(annotation$cluster)
for (i in seq(1, length(unique_clusters), by = 5)) {
  subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))]
  # Process subset...
}

3. Species mapping issues

# Check gene name format
head(rownames(expr))  # Should match species conventions

# Verify species parameter
result <- ExtractEdges(expr, annotation, species = "mouse")  # Not "Mouse" or "MOUSE"

Session Info

sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#> 
#> locale:
#> [1] C
#> 
#> time zone: Asia/Shanghai
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] data.table_1.18.0 NOVA_1.0.0       
#> 
#> loaded via a namespace (and not attached):
#>  [1] Matrix_1.7-4       gtable_0.3.6       jsonlite_2.0.0     dplyr_1.1.4       
#>  [5] compiler_4.4.0     Rcpp_1.1.1         tidyselect_1.2.1   parallel_4.4.0    
#>  [9] dichromat_2.0-0.1  jquerylib_0.1.4    systemfonts_1.3.1  scales_1.4.0      
#> [13] textshaping_1.0.4  yaml_2.3.12        fastmap_1.2.0      lattice_0.22-7    
#> [17] ggplot2_4.0.1      R6_2.6.1           generics_0.1.4     knitr_1.51        
#> [21] htmlwidgets_1.6.4  tibble_3.3.1       desc_1.4.3         bslib_0.9.0       
#> [25] pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.1.7        cachem_1.1.0      
#> [29] xfun_0.56          fs_1.6.6           sass_0.4.10        S7_0.2.1          
#> [33] otel_0.2.0         cli_3.6.5          pkgdown_2.2.0      magrittr_2.0.4    
#> [37] digest_0.6.39      grid_4.4.0         lifecycle_1.0.5    vctrs_0.7.1       
#> [41] evaluate_1.0.5     glue_1.8.0         farver_2.1.2       ragg_1.5.0        
#> [45] rmarkdown_2.30     tools_4.4.0        pkgconfig_2.0.3    htmltools_0.5.9

Author

Zaoqu Liu

Email: liuzaoqu@163.com
GitHub: Zaoqu-Liu

Zaoqu Liu

2026-01-26