Skip to contents

Data Preparation

Input Requirements

Connectome requires a properly processed Seurat object:

library(Seurat)
library(Connectome)

# Check your object
seurat_obj

# Required slots
Assays(seurat_obj)              # Should include "RNA"
GetAssayData(seurat_obj, "data") # Normalized data
GetAssayData(seurat_obj, "scale.data")  # Scaled data (optional but recommended)
Idents(seurat_obj)              # Cell identities

Preprocessing Checklist

# 1. Quality control (before Connectome)
seurat_obj <- subset(seurat_obj, 
                     nFeature_RNA > 200 & 
                     nFeature_RNA < 5000 &
                     percent.mt < 20)

# 2. Normalize
seurat_obj <- NormalizeData(seurat_obj)

# 3. Scale (recommended for scaled edge weights)
seurat_obj <- FindVariableFeatures(seurat_obj)
seurat_obj <- ScaleData(seurat_obj)

# 4. Set identities
Idents(seurat_obj) <- "cell_type"  # Your cell type column

Cell Type Considerations

Minimum cells per cluster:

# Check cell counts
table(Idents(seurat_obj))

# Recommended: at least 50-100 cells per cluster
# Use min.cells.per.ident to exclude small populations
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 50
)

Cell type naming:

# Avoid special characters in cell type names
# Good: "T_cell", "Macrophage_M1", "Epithelial"
# Bad: "T cell", "Macrophage/Monocyte", "Epi (type 1)"

# Fix naming issues
levels(seurat_obj) <- gsub(" ", "_", levels(seurat_obj))
levels(seurat_obj) <- gsub("/", "_", levels(seurat_obj))

Parameter Optimization

CreateConnectome Parameters

Parameter Default Recommendation
min.cells.per.ident NULL 50-100 for robust estimates
max.cells.per.ident NULL 500-1000 for large datasets
p.values TRUE FALSE for exploratory analysis
calculate.DOR FALSE TRUE for specificity analysis
include.putative TRUE FALSE for high-confidence only
# Standard analysis
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 50,
  p.values = TRUE,
  include.putative = TRUE
)

# High-stringency analysis
connectome_strict <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 100,
  p.values = TRUE,
  include.putative = FALSE,  # Literature-supported only
  calculate.DOR = TRUE
)

# Fast exploratory analysis
connectome_fast <- CreateConnectome(
  seurat_obj,
  species = "human",
  max.cells.per.ident = 500,  # Downsample
  p.values = FALSE,
  include.putative = TRUE
)

FilterConnectome Parameters

Parameter Description Typical Range
min.pct Minimum expression fraction 0.05 - 0.25
min.z Minimum z-score 0 - 1
min.exp Minimum expression level 0.1 - 0.5
max.p Maximum adjusted p-value 0.01 - 0.05
min.DOR Minimum log-DOR 0 - 2
# Lenient filtering (discovery)
conn_lenient <- FilterConnectome(
  connectome,
  min.pct = 0.05,
  min.z = 0
)

# Standard filtering
conn_standard <- FilterConnectome(
  connectome,
  min.pct = 0.1,
  min.z = 0.25,
  max.p = 0.05
)

# Stringent filtering (high confidence)
conn_stringent <- FilterConnectome(
  connectome,
  min.pct = 0.2,
  min.z = 0.5,
  max.p = 0.01,
  min.DOR = 1
)

Performance Optimization

Large Datasets

# Downsample before analysis
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  max.cells.per.ident = 500,  # Downsample to 500 cells/cluster
  p.values = FALSE             # Skip p-values for speed
)

# Check memory usage
format(object.size(connectome), units = "MB")

Parallel Processing

# For p-value calculation (requires future packages)
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  p.values = TRUE,
  parallel = TRUE,
  n.cores = 4
)

Common Issues and Solutions

Issue 1: No edges after filtering

# Check raw connectome size
nrow(connectome)

# Check available values
summary(connectome$percent.source)
summary(connectome$ligand.scale)

# Solution: Relax filtering parameters
conn_filtered <- FilterConnectome(
  connectome,
  min.pct = 0.05,  # Lower threshold
  min.z = -Inf     # Accept all z-scores
)

Issue 2: Missing cell types in visualization

# Check which cell types are in the connectome
unique(c(connectome$source, connectome$target))

# Solution: Use include.all.nodes in NetworkPlot
NetworkPlot(connectome, include.all.nodes = TRUE)

Issue 3: Memory errors

# Solution 1: Downsample
seurat_small <- subset(seurat_obj, 
                       cells = WhichCells(seurat_obj, downsample = 500))

# Solution 2: Process in chunks
cell_types <- unique(Idents(seurat_obj))
conn_list <- list()
for (i in seq_along(cell_types)) {
  for (j in seq_along(cell_types)) {
    # Process pairs individually
  }
}

Issue 4: Species mismatch

# Check gene names
head(rownames(seurat_obj))

# For mouse data with human gene symbols
# Use species = "human" if genes are in human format

# For proper mouse symbols
connectome <- CreateConnectome(seurat_obj, species = "mouse")

Issue 5: Custom ligand-receptor database

# Create custom database
my_lr_db <- data.frame(
  ligand = c("MYL9", "VEGFA", "IL6"),
  receptor = c("ITGA1", "KDR", "IL6R"),
  mode = c("integrin", "growth_factor", "cytokine")
)

connectome <- CreateConnectome(
  seurat_obj,
  LR.database = "custom",
  custom.list = my_lr_db
)

Quality Control

Sanity Checks

# 1. Check connectome dimensions
dim(connectome)
# Expected: (n_celltypes^2 * n_lr_pairs) rows × ~15 columns

# 2. Check for missing values
sum(is.na(connectome$ligand.expression))
sum(is.na(connectome$percent.source))

# 3. Verify cell types
setequal(
  unique(c(connectome$source, connectome$target)),
  unique(as.character(Idents(seurat_obj)))
)

# 4. Check expression distributions
hist(connectome$ligand.expression, breaks = 50, main = "Ligand Expression")
hist(connectome$ligand.scale, breaks = 50, main = "Ligand Z-scores")

Biological Validation

# Check known interactions
known_interactions <- subset(
  connectome,
  (ligand == "VEGFA" & receptor == "KDR") |
  (ligand == "IL6" & receptor == "IL6R") |
  (ligand == "TNF" & receptor == "TNFRSF1A")
)

# These should have reasonable expression in relevant cell types
print(known_interactions[, c("source", "target", "pair", 
                             "ligand.expression", "recept.expression")])

Reproducibility

Setting Seeds

# For downsampling reproducibility
set.seed(42)
seurat_obj <- subset(seurat_obj, 
                     cells = WhichCells(seurat_obj, downsample = 500))

set.seed(42)
connectome <- CreateConnectome(seurat_obj, species = "human")

Saving Results

# Save connectome object
saveRDS(connectome, "connectome_analysis.rds")

# Export as CSV for external tools
write.csv(connectome, "connectome_edges.csv", row.names = FALSE)

# Save filtered version
conn_filtered <- FilterConnectome(connectome, min.pct = 0.1, min.z = 0.25)
write.csv(conn_filtered, "connectome_filtered.csv", row.names = FALSE)

Session Documentation

# Document analysis parameters
analysis_params <- list(
  date = Sys.Date(),
  species = "human",
  n_cells = ncol(seurat_obj),
  n_clusters = length(unique(Idents(seurat_obj))),
  filter_params = list(min.pct = 0.1, min.z = 0.25, max.p = 0.05),
  n_edges_raw = nrow(connectome),
  n_edges_filtered = nrow(conn_filtered)
)
saveRDS(analysis_params, "analysis_parameters.rds")

Session Info

sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS 15.6.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#> 
#> locale:
#> [1] C
#> 
#> time zone: Asia/Shanghai
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.39     desc_1.4.3        R6_2.6.1          fastmap_1.2.0    
#>  [5] xfun_0.56         cachem_1.1.0      knitr_1.51        htmltools_0.5.9  
#>  [9] rmarkdown_2.30    lifecycle_1.0.5   cli_3.6.5         sass_0.4.10      
#> [13] pkgdown_2.1.3     textshaping_1.0.4 jquerylib_0.1.4   systemfonts_1.3.1
#> [17] compiler_4.4.0    tools_4.4.0       ragg_1.5.0        bslib_0.9.0      
#> [21] evaluate_1.0.5    yaml_2.3.12       otel_0.2.0        jsonlite_2.0.0   
#> [25] rlang_1.1.7       fs_1.6.6          htmlwidgets_1.6.4