Best Practices and Troubleshooting
Zaoqu Liu
2026-01-23
Source:vignettes/best-practices.Rmd
best-practices.RmdData Preparation
Input Requirements
Connectome requires a properly processed Seurat object:
library(Seurat)
library(Connectome)
# Check your object
seurat_obj
# Required slots
Assays(seurat_obj) # Should include "RNA"
GetAssayData(seurat_obj, "data") # Normalized data
GetAssayData(seurat_obj, "scale.data") # Scaled data (optional but recommended)
Idents(seurat_obj) # Cell identitiesPreprocessing Checklist
# 1. Quality control (before Connectome)
seurat_obj <- subset(seurat_obj,
nFeature_RNA > 200 &
nFeature_RNA < 5000 &
percent.mt < 20)
# 2. Normalize
seurat_obj <- NormalizeData(seurat_obj)
# 3. Scale (recommended for scaled edge weights)
seurat_obj <- FindVariableFeatures(seurat_obj)
seurat_obj <- ScaleData(seurat_obj)
# 4. Set identities
Idents(seurat_obj) <- "cell_type" # Your cell type columnCell Type Considerations
Minimum cells per cluster:
# Check cell counts
table(Idents(seurat_obj))
# Recommended: at least 50-100 cells per cluster
# Use min.cells.per.ident to exclude small populations
connectome <- CreateConnectome(
seurat_obj,
species = "human",
min.cells.per.ident = 50
)Cell type naming:
Parameter Optimization
CreateConnectome Parameters
| Parameter | Default | Recommendation |
|---|---|---|
min.cells.per.ident |
NULL | 50-100 for robust estimates |
max.cells.per.ident |
NULL | 500-1000 for large datasets |
p.values |
TRUE | FALSE for exploratory analysis |
calculate.DOR |
FALSE | TRUE for specificity analysis |
include.putative |
TRUE | FALSE for high-confidence only |
# Standard analysis
connectome <- CreateConnectome(
seurat_obj,
species = "human",
min.cells.per.ident = 50,
p.values = TRUE,
include.putative = TRUE
)
# High-stringency analysis
connectome_strict <- CreateConnectome(
seurat_obj,
species = "human",
min.cells.per.ident = 100,
p.values = TRUE,
include.putative = FALSE, # Literature-supported only
calculate.DOR = TRUE
)
# Fast exploratory analysis
connectome_fast <- CreateConnectome(
seurat_obj,
species = "human",
max.cells.per.ident = 500, # Downsample
p.values = FALSE,
include.putative = TRUE
)FilterConnectome Parameters
| Parameter | Description | Typical Range |
|---|---|---|
min.pct |
Minimum expression fraction | 0.05 - 0.25 |
min.z |
Minimum z-score | 0 - 1 |
min.exp |
Minimum expression level | 0.1 - 0.5 |
max.p |
Maximum adjusted p-value | 0.01 - 0.05 |
min.DOR |
Minimum log-DOR | 0 - 2 |
# Lenient filtering (discovery)
conn_lenient <- FilterConnectome(
connectome,
min.pct = 0.05,
min.z = 0
)
# Standard filtering
conn_standard <- FilterConnectome(
connectome,
min.pct = 0.1,
min.z = 0.25,
max.p = 0.05
)
# Stringent filtering (high confidence)
conn_stringent <- FilterConnectome(
connectome,
min.pct = 0.2,
min.z = 0.5,
max.p = 0.01,
min.DOR = 1
)Performance Optimization
Large Datasets
# Downsample before analysis
connectome <- CreateConnectome(
seurat_obj,
species = "human",
max.cells.per.ident = 500, # Downsample to 500 cells/cluster
p.values = FALSE # Skip p-values for speed
)
# Check memory usage
format(object.size(connectome), units = "MB")Parallel Processing
# For p-value calculation (requires future packages)
connectome <- CreateConnectome(
seurat_obj,
species = "human",
p.values = TRUE,
parallel = TRUE,
n.cores = 4
)Common Issues and Solutions
Issue 1: No edges after filtering
# Check raw connectome size
nrow(connectome)
# Check available values
summary(connectome$percent.source)
summary(connectome$ligand.scale)
# Solution: Relax filtering parameters
conn_filtered <- FilterConnectome(
connectome,
min.pct = 0.05, # Lower threshold
min.z = -Inf # Accept all z-scores
)Issue 2: Missing cell types in visualization
# Check which cell types are in the connectome
unique(c(connectome$source, connectome$target))
# Solution: Use include.all.nodes in NetworkPlot
NetworkPlot(connectome, include.all.nodes = TRUE)Issue 3: Memory errors
# Solution 1: Downsample
seurat_small <- subset(seurat_obj,
cells = WhichCells(seurat_obj, downsample = 500))
# Solution 2: Process in chunks
cell_types <- unique(Idents(seurat_obj))
conn_list <- list()
for (i in seq_along(cell_types)) {
for (j in seq_along(cell_types)) {
# Process pairs individually
}
}Issue 4: Species mismatch
# Check gene names
head(rownames(seurat_obj))
# For mouse data with human gene symbols
# Use species = "human" if genes are in human format
# For proper mouse symbols
connectome <- CreateConnectome(seurat_obj, species = "mouse")Issue 5: Custom ligand-receptor database
# Create custom database
my_lr_db <- data.frame(
ligand = c("MYL9", "VEGFA", "IL6"),
receptor = c("ITGA1", "KDR", "IL6R"),
mode = c("integrin", "growth_factor", "cytokine")
)
connectome <- CreateConnectome(
seurat_obj,
LR.database = "custom",
custom.list = my_lr_db
)Quality Control
Sanity Checks
# 1. Check connectome dimensions
dim(connectome)
# Expected: (n_celltypes^2 * n_lr_pairs) rows × ~15 columns
# 2. Check for missing values
sum(is.na(connectome$ligand.expression))
sum(is.na(connectome$percent.source))
# 3. Verify cell types
setequal(
unique(c(connectome$source, connectome$target)),
unique(as.character(Idents(seurat_obj)))
)
# 4. Check expression distributions
hist(connectome$ligand.expression, breaks = 50, main = "Ligand Expression")
hist(connectome$ligand.scale, breaks = 50, main = "Ligand Z-scores")Biological Validation
# Check known interactions
known_interactions <- subset(
connectome,
(ligand == "VEGFA" & receptor == "KDR") |
(ligand == "IL6" & receptor == "IL6R") |
(ligand == "TNF" & receptor == "TNFRSF1A")
)
# These should have reasonable expression in relevant cell types
print(known_interactions[, c("source", "target", "pair",
"ligand.expression", "recept.expression")])Reproducibility
Setting Seeds
# For downsampling reproducibility
set.seed(42)
seurat_obj <- subset(seurat_obj,
cells = WhichCells(seurat_obj, downsample = 500))
set.seed(42)
connectome <- CreateConnectome(seurat_obj, species = "human")Saving Results
# Save connectome object
saveRDS(connectome, "connectome_analysis.rds")
# Export as CSV for external tools
write.csv(connectome, "connectome_edges.csv", row.names = FALSE)
# Save filtered version
conn_filtered <- FilterConnectome(connectome, min.pct = 0.1, min.z = 0.25)
write.csv(conn_filtered, "connectome_filtered.csv", row.names = FALSE)Session Documentation
# Document analysis parameters
analysis_params <- list(
date = Sys.Date(),
species = "human",
n_cells = ncol(seurat_obj),
n_clusters = length(unique(Idents(seurat_obj))),
filter_params = list(min.pct = 0.1, min.z = 0.25, max.p = 0.05),
n_edges_raw = nrow(connectome),
n_edges_filtered = nrow(conn_filtered)
)
saveRDS(analysis_params, "analysis_parameters.rds")Session Info
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#>
#> locale:
#> [1] C
#>
#> time zone: Asia/Shanghai
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.39 desc_1.4.3 R6_2.6.1 fastmap_1.2.0
#> [5] xfun_0.56 cachem_1.1.0 knitr_1.51 htmltools_0.5.9
#> [9] rmarkdown_2.30 lifecycle_1.0.5 cli_3.6.5 sass_0.4.10
#> [13] pkgdown_2.1.3 textshaping_1.0.4 jquerylib_0.1.4 systemfonts_1.3.1
#> [17] compiler_4.4.0 tools_4.4.0 ragg_1.5.0 bslib_0.9.0
#> [21] evaluate_1.0.5 yaml_2.3.12 otel_0.2.0 jsonlite_2.0.0
#> [25] rlang_1.1.7 fs_1.6.6 htmlwidgets_1.6.4