Introduction
NOVA supports cell-cell communication analysis across 21 species through integration with NCBI HomoloGene. This enables:
- Analysis of model organism data (mouse, rat, zebrafish, etc.)
- Cross-species comparative studies
- Translational research applications
Supported Species
library(NOVA)
# View all supported species
species <- supported_species()
print(species)
#> human mouse chimpanzee
#> "9606" "10090" "9598"
#> dog monkey cattle
#> "9615" "9544" "9913"
#> rat chicken frog
#> "10116" "9031" "8364"
#> zebrafish fruitfly mosquito
#> "7955" "7227" "7165"
#> nematode thalecress rice
#> "6239" "3702" "4530"
#> riceblastfungus bakeryeast neurosporacrassa
#> "318829" "4932" "5141"
#> fissionyeast eremotheciumgossypii kluyveromyceslactis
#> "4896" "33169" "28985"Homology Mapping
How It Works
NOVA uses NCBI HomoloGene to map gene symbols between species:
- Query species genes → HomoloGene IDs
- HomoloGene IDs → Target species orthologs
- Apply mapping to ligand-receptor database
# Get homology mapping from mouse to human
mapping <- GetHomologyMapping(from = "mouse", to = "human")
head(mapping)
#> from_symbol to_symbol
#> <char> <char>
#> 1: Acadm ACADM
#> 2: Acadvl ACADVL
#> 3: Acat1 ACAT1
#> 4: Acvr1 ACVR1
#> 5: Sgca SGCA
#> 6: Adsl ADSL
cat("\nTotal mappings:", nrow(mapping), "\n")
#>
#> Total mappings: 16766Converting Gene Symbols
# Example mouse genes
mouse_genes <- c("Cd4", "Cd8a", "Ptprc", "Itgam", "Cd19")
# Convert to human symbols
human_genes <- ConvertGeneSymbols(mouse_genes, from = "mouse", to = "human")
print(data.frame(mouse = mouse_genes, human = human_genes))
#> mouse human
#> Cd4 Cd4 CD4
#> Cd8a Cd8a CD8A
#> Ptprc Ptprc PTPRC
#> Itgam Itgam ITGAM
#> Cd19 Cd19 CD19Analyzing Mouse Data
Standard Workflow
set.seed(123)
# Simulate mouse single-cell data
n_genes <- 200
n_cells <- 300
# Create expression matrix with mouse gene names
expr <- matrix(0, nrow = n_genes, ncol = n_cells)
expressed <- sample(length(expr), size = length(expr) * 0.25)
expr[expressed] <- abs(rnorm(length(expressed), mean = 2, sd = 1))
# Get mouse LR pairs
lr_db <- GetLRDatabase("lrc2p")
mouse_mapping <- GetHomologyMapping("human", "mouse")
# Map some human ligands/receptors to mouse
mouse_ligands <- mouse_mapping$to_symbol[match(lr_db$ligand[1:30], mouse_mapping$from_symbol)]
mouse_receptors <- mouse_mapping$to_symbol[match(lr_db$receptor[1:30], mouse_mapping$from_symbol)]
# Remove NAs
mouse_ligands <- na.omit(mouse_ligands)
mouse_receptors <- na.omit(mouse_receptors)
# Set gene names
gene_names <- c(as.character(mouse_ligands[1:20]),
as.character(mouse_receptors[1:20]),
paste0("MouseGene", 41:n_genes))
rownames(expr) <- gene_names
colnames(expr) <- paste0("Cell", 1:n_cells)
# Create annotation
clusters <- sample(c("T_cells", "B_cells", "Macrophages", "Fibroblasts"),
n_cells, replace = TRUE)
annotation <- data.frame(
cell = colnames(expr),
cluster = clusters
)
# Run analysis specifying mouse
result <- ExtractEdges(
expression = Matrix::Matrix(expr, sparse = TRUE),
annotation = annotation,
species = "mouse", # Specify species
database = "lrc2p",
min_pct = 0.05
)
print(result)Cross-Species Comparison
Comparative Study Design
When comparing communication across species:
# Human analysis
human_result <- ExtractEdges(
expression = human_expr,
annotation = human_ann,
species = "human",
database = "lrc2p"
)
# Mouse analysis (genes auto-converted)
mouse_result <- ExtractEdges(
expression = mouse_expr,
annotation = mouse_ann,
species = "mouse",
database = "lrc2p"
)
# Compare conserved interactions
human_pairs <- paste(human_result$edges$ligand,
human_result$edges$receptor, sep = "-")
mouse_pairs <- paste(mouse_result$edges$ligand,
mouse_result$edges$receptor, sep = "-")
conserved <- intersect(human_pairs, mouse_pairs)
cat("Conserved LR interactions:", length(conserved), "\n")Gene ID Types
NOVA supports multiple gene identifier types:
# View supported ID types
id_types <- supported_id_types()
print(id_types)
#> [1] "symbol" "entrez" "ensembl" "uniprot" "hgnc" "mgi" "custom"Converting Between ID Types
# Convert Ensembl IDs to symbols
ensembl_ids <- c("ENSG00000153563", "ENSG00000010610")
symbols <- ConvertGeneIDs(ensembl_ids, from = "ensembl", to = "symbol", species = "human")Special Considerations
1. One-to-Many Mappings
Some genes have multiple orthologs:
# Check for duplicated mappings
mapping <- GetHomologyMapping("mouse", "human")
dup_genes <- mapping$from_symbol[duplicated(mapping$from_symbol)]
cat("Genes with multiple human orthologs:", length(unique(dup_genes)), "\n")
#> Genes with multiple human orthologs: 02. Missing Orthologs
Not all genes have orthologs:
# Example: genes without orthologs
all_mouse_genes <- c("Actb", "Gapdh", "NoOrtholog123")
converted <- ConvertGeneSymbols(all_mouse_genes, "mouse", "human")
print(data.frame(mouse = all_mouse_genes, human = converted))
#> mouse human
#> Actb Actb ACTB
#> Gapdh Gapdh GAPDH
#> NoOrtholog123 NoOrtholog123 <NA>Best Practices
Workflow Recommendations
- Start with human database: The LR database is human-centric
- Verify ortholog coverage: Check how many genes map successfully
- Report unmapped genes: Document genes that couldn’t be mapped
- Validate key interactions: Confirm important findings in species-specific literature
Quality Control
# Check ortholog mapping rate
mapping <- GetHomologyMapping("mouse", "human")
lr_db <- GetLRDatabase("lrc2p")
# How many ligands can be mapped?
ligand_mapped <- sum(lr_db$ligand %in% mapping$from_symbol)
receptor_mapped <- sum(lr_db$receptor %in% mapping$from_symbol)
cat("Ligands mappable to mouse:", ligand_mapped, "/", length(unique(lr_db$ligand)), "\n")
#> Ligands mappable to mouse: 27 / 829
cat("Receptors mappable to mouse:", receptor_mapped, "/", length(unique(lr_db$receptor)), "\n")
#> Receptors mappable to mouse: 3 / 690Session Info
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#>
#> locale:
#> [1] C
#>
#> time zone: Asia/Shanghai
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] NOVA_1.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] Matrix_1.7-4 gtable_0.3.6 jsonlite_2.0.0 dplyr_1.1.4
#> [5] compiler_4.4.0 Rcpp_1.1.1 tidyselect_1.2.1 parallel_4.4.0
#> [9] dichromat_2.0-0.1 jquerylib_0.1.4 systemfonts_1.3.1 scales_1.4.0
#> [13] textshaping_1.0.4 yaml_2.3.12 fastmap_1.2.0 lattice_0.22-7
#> [17] ggplot2_4.0.1 R6_2.6.1 generics_0.1.4 knitr_1.51
#> [21] htmlwidgets_1.6.4 tibble_3.3.1 desc_1.4.3 bslib_0.9.0
#> [25] pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.7 cachem_1.1.0
#> [29] xfun_0.56 fs_1.6.6 sass_0.4.10 S7_0.2.1
#> [33] otel_0.2.0 cli_3.6.5 pkgdown_2.2.0 magrittr_2.0.4
#> [37] digest_0.6.39 grid_4.4.0 lifecycle_1.0.5 vctrs_0.7.1
#> [41] data.table_1.18.0 evaluate_1.0.5 glue_1.8.0 farver_2.1.2
#> [45] ragg_1.5.0 rmarkdown_2.30 tools_4.4.0 pkgconfig_2.0.3
#> [49] htmltools_0.5.9