- Deconvolution of Visium data using snRNA-seq data
# Load necessary libraries
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.1 but the current version is
## 4.4.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(Matrix)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spacexr)
library(ggplot2)
# Define paths
data_dir <- "/Users/landonwellendorf/Desktop/STAT530/Final/GSE223066_split-pipe_outputs"
spatial_dir <- "/Users/landonwellendorf/Desktop/STAT530/Final/"
# Load snRNA-seq data files
count_matrix <- readMM(file.path(data_dir, "DGE.mtx"))
genes <- read.csv(file.path(data_dir, "all_genes.csv"))
cell_metadata <- read.csv(file.path(data_dir, "cell_metadata.csv"))
# Filter for SOR1 sample only
sor1_cells <- cell_metadata$sample == "SOR1"
cell_metadata <- cell_metadata[sor1_cells, ]
count_matrix <- count_matrix[sor1_cells, ]
# Set up count matrix with proper row and column names
count_matrix <- t(count_matrix) # Genes should be rows, cells should be columns
rownames(count_matrix) <- make.unique(genes$gene_name)
colnames(count_matrix) <- cell_metadata$bc_wells
# Remove duplicate gene names if any
count_matrix <- count_matrix[!duplicated(rownames(count_matrix)), ]
# Create Seurat object and perform standard preprocessing
sn_obj <- CreateSeuratObject(counts = count_matrix, meta.data = cell_metadata)
## Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
sn_obj[["percent.mt"]] <- PercentageFeatureSet(sn_obj, pattern = "^mt-")
# QC filtering
sn_obj <- subset(sn_obj, subset = nCount_RNA < 35000 & percent.mt < 10)
# Normalize, find variable features, scale data, and run PCA
sn_obj <- NormalizeData(sn_obj)
## Normalizing layer: counts
sn_obj <- FindVariableFeatures(sn_obj, nfeatures = 2000)
## Finding variable features for layer counts
sn_obj <- ScaleData(sn_obj)
## Centering and scaling data matrix
sn_obj <- RunPCA(sn_obj, verbose = FALSE)
# Find neighbors and clusters
sn_obj <- FindNeighbors(sn_obj, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
sn_obj <- FindClusters(sn_obj, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7237
## Number of edges: 291449
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9051
## Number of communities: 19
## Elapsed time: 0 seconds
# Run UMAP for visualization
sn_obj <- RunUMAP(sn_obj, dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:43:15 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:43:15 Read 7237 rows and found 30 numeric columns
## 12:43:15 Using Annoy for neighbor search, n_neighbors = 30
## 12:43:15 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:43:16 Writing NN index file to temp file /var/folders/3_/kq6fv1_d4gl_n25bl_c016gc0000gn/T//RtmpBu5mIu/file87f48d948b3
## 12:43:16 Searching Annoy index using 1 thread, search_k = 3000
## 12:43:17 Annoy recall = 100%
## 12:43:17 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:43:18 Initializing from normalized Laplacian + noise (using RSpectra)
## 12:43:18 Commencing optimization for 500 epochs, with 320164 positive edges
## 12:43:18 Using rng type: pcg
## 12:43:27 Optimization finished
# Define cell type markers
# Excitatory pyramidal neuron markers (associated with theta oscillations)
pyr_markers <- c("Neurod6", "Camk2a", "Slc17a7", "Satb2")
# Inhibitory interneuron markers (associated with gamma oscillations)
int_markers <- c("Gad1", "Gad2", "Sst", "Vip")
# Oscillation-related markers
theta_marker <- "Hcn1"
gamma_marker <- "Pvalb"
# Ensure all markers are present in the dataset
all_markers <- c(pyr_markers, int_markers, theta_marker, gamma_marker)
missing_markers <- all_markers[!all_markers %in% rownames(sn_obj)]
if (length(missing_markers) > 0) {
print(paste("Warning: The following markers are missing from the dataset:",
paste(missing_markers, collapse = ", ")))
}
# Run differential expression to identify cluster markers
cluster_markers <- FindAllMarkers(sn_obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
## Calculating cluster 0
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
# Find clusters enriched for pyramidal and interneuron markers
pyr_enriched <- cluster_markers %>%
filter(gene %in% pyr_markers & p_val_adj < 0.05) %>%
group_by(cluster) %>%
summarize(marker_count = n()) %>%
arrange(desc(marker_count))
int_enriched <- cluster_markers %>%
filter(gene %in% int_markers & p_val_adj < 0.05) %>%
group_by(cluster) %>%
summarize(marker_count = n()) %>%
arrange(desc(marker_count))
# Identify the top clusters for each cell type
pyr_cluster <- as.character(pyr_enriched$cluster[1])
int_cluster <- as.character(int_enriched$cluster[1])
print(paste("Pyramidal neuron cluster:", pyr_cluster))
## [1] "Pyramidal neuron cluster: 2"
print(paste("Interneuron cluster:", int_cluster))
## [1] "Interneuron cluster: 5"
# Check correlation between cell type markers and oscillation markers
# in single-nucleus data first
DefaultAssay(sn_obj) <- "RNA"
gene_expr <- GetAssayData(sn_obj, slot = "data")
# Define the genes to explore
pyr_expr <- colMeans(gene_expr[pyr_markers, , drop = FALSE])
int_expr <- colMeans(gene_expr[int_markers, , drop = FALSE])
hcn1_expr <- gene_expr[theta_marker, ]
pvalb_expr <- gene_expr[gamma_marker, ]
# Add to metadata
sn_obj$pyr_score <- pyr_expr
sn_obj$int_score <- int_expr
# Calculate correlations
pyr_hcn1_cor <- cor(pyr_expr, hcn1_expr, method = "spearman")
int_pvalb_cor <- cor(int_expr, pvalb_expr, method = "spearman")
print(paste("Single-nucleus correlation - Pyramidal score vs Hcn1:", pyr_hcn1_cor))
## [1] "Single-nucleus correlation - Pyramidal score vs Hcn1: 0.151936090927992"
print(paste("Single-nucleus correlation - Interneuron score vs Pvalb:", int_pvalb_cor))
## [1] "Single-nucleus correlation - Interneuron score vs Pvalb: 0.116912961262091"
# Now set up for spatial deconvolution with RCTD
# Create reference for RCTD
ref <- Reference(sn_obj[["RNA"]]$counts, sn_obj$seurat_clusters)
# Load spatial data
spatial_data <- Read10X_h5(file.path(spatial_dir, "filtered_feature_bc_matrix.h5"))
spatial_positions <- read.csv(file.path(spatial_dir, "spatial/tissue_positions_list.csv"),
header = FALSE)
colnames(spatial_positions) <- c("barcode", "in_tissue", "array_row", "array_col", "x", "y")
rownames(spatial_positions) <- spatial_positions$barcode
# Create spatial RNA object
coords <- spatial_positions[, c("x", "y")]
visium_obj <- SpatialRNA(coords, spatial_data)
## Warning in SpatialRNA(coords, spatial_data): SpatialRNA: some barcodes in nUMI,
## coords, or counts were not mutually shared. Such barcodes were removed.
# Create and run RCTD
my_rctd <- create.RCTD(visium_obj, ref, max_cores = 4) # Adjust cores based on your system
## Begin: process_cell_type_info
## process_cell_type_info: number of cells in reference: 7237
## process_cell_type_info: number of genes in reference: 54232
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 1667 1484 1397 599 419 274 264 170 161 132 129 100 97 78 74 72
## 16 17 18
## 66 29 25
## End: process_cell_type_info
## create.RCTD: getting regression differentially expressed genes:
## get_de_genes: 0 found DE genes: 110
## get_de_genes: 1 found DE genes: 107
## get_de_genes: 2 found DE genes: 91
## get_de_genes: 3 found DE genes: 88
## get_de_genes: 4 found DE genes: 264
## get_de_genes: 5 found DE genes: 122
## get_de_genes: 6 found DE genes: 445
## get_de_genes: 7 found DE genes: 111
## get_de_genes: 8 found DE genes: 219
## get_de_genes: 9 found DE genes: 127
## get_de_genes: 10 found DE genes: 124
## get_de_genes: 11 found DE genes: 149
## get_de_genes: 12 found DE genes: 70
## get_de_genes: 13 found DE genes: 169
## get_de_genes: 14 found DE genes: 155
## get_de_genes: 15 found DE genes: 374
## get_de_genes: 16 found DE genes: 271
## get_de_genes: 17 found DE genes: 494
## get_de_genes: 18 found DE genes: 148
## get_de_genes: total DE genes: 2321
## create.RCTD: getting platform effect normalization differentially expressed genes:
## get_de_genes: 0 found DE genes: 288
## get_de_genes: 1 found DE genes: 273
## get_de_genes: 2 found DE genes: 236
## get_de_genes: 3 found DE genes: 236
## get_de_genes: 4 found DE genes: 479
## get_de_genes: 5 found DE genes: 262
## get_de_genes: 6 found DE genes: 752
## get_de_genes: 7 found DE genes: 260
## get_de_genes: 8 found DE genes: 428
## get_de_genes: 9 found DE genes: 292
## get_de_genes: 10 found DE genes: 279
## get_de_genes: 11 found DE genes: 440
## get_de_genes: 12 found DE genes: 285
## get_de_genes: 13 found DE genes: 334
## get_de_genes: 14 found DE genes: 341
## get_de_genes: 15 found DE genes: 709
## get_de_genes: 16 found DE genes: 637
## get_de_genes: 17 found DE genes: 992
## get_de_genes: 18 found DE genes: 344
## get_de_genes: total DE genes: 4013
my_rctd <- run.RCTD(my_rctd, doublet_mode = "doublet")
## fitBulk: decomposing bulk
## chooseSigma: using initial Q_mat with sigma = 1
## Likelihood value: 2236460.73617801
## Sigma value: 0.84
## Likelihood value: 2197603.83078177
## Sigma value: 0.69
## Likelihood value: 2168217.51478491
## Sigma value: 0.61
## Likelihood value: 2156411.80067811
## Sigma value: 0.53
## Likelihood value: 2148004.34521578
## Sigma value: 0.45
## Likelihood value: 2143592.43495127
## Sigma value: 0.42
## Likelihood value: 2143089.36638795
## Sigma value: 0.41
## Likelihood value: 2143069.90306945
## Sigma value: 0.41
## [1] "gather_results: finished 1000"
## [1] "gather_results: finished 2000"
# Extract results
results <- my_rctd@results
norm_weights <- normalize_weights(results$weights)
# Get cell type proportions
pyr_props <- norm_weights[, pyr_cluster]
int_props <- norm_weights[, int_cluster]
# Load spatial data as Seurat object for visualization
vis_seurat <- Load10X_Spatial(spatial_dir)
vis_seurat <- NormalizeData(vis_seurat)
## Normalizing layer: counts
selected_spots <- rownames(norm_weights)
# Add cell type proportions to the Seurat object
vis_seurat$pyr_prop <- NA
vis_seurat$int_prop <- NA
vis_seurat@meta.data[selected_spots, "pyr_prop"] <- pyr_props
vis_seurat@meta.data[selected_spots, "int_prop"] <- int_props
# Visualize cell type proportions
p1 <- SpatialFeaturePlot(vis_seurat, features = "pyr_prop", pt.size.factor = 2) +
ggtitle("Pyramidal Neuron Proportion")
p2 <- SpatialFeaturePlot(vis_seurat, features = "int_prop", pt.size.factor = 2) +
ggtitle("Interneuron Proportion")
print(p1 + p2)

- Spearman to test for correlation
# Add gene expression for Hcn1 and Pvalb to the spatial object
DefaultAssay(vis_seurat) <- "Spatial"
# Get normalized expression values for the markers
hcn1_spatial <- GetAssayData(vis_seurat, slot = "data")[theta_marker, ]
pvalb_spatial <- GetAssayData(vis_seurat, slot = "data")[gamma_marker, ]
# Add to metadata for correlation analysis
vis_seurat$Hcn1_expr <- NA
vis_seurat$Pvalb_expr <- NA
vis_seurat@meta.data[names(hcn1_spatial), "Hcn1_expr"] <- hcn1_spatial
vis_seurat@meta.data[names(pvalb_spatial), "Pvalb_expr"] <- pvalb_spatial
# Identify spots that have both cell type proportions and gene expression data
selected_spots_with_data <- intersect(selected_spots, rownames(vis_seurat@meta.data))
# Calculate Spearman correlations
pyr_hcn1_cor <- cor(
vis_seurat@meta.data[selected_spots_with_data, "pyr_prop"],
vis_seurat@meta.data[selected_spots_with_data, "Hcn1_expr"],
method = "spearman",
use = "complete.obs"
)
int_pvalb_cor <- cor(
vis_seurat@meta.data[selected_spots_with_data, "int_prop"],
vis_seurat@meta.data[selected_spots_with_data, "Pvalb_expr"],
method = "spearman",
use = "complete.obs"
)
# Print the results
print(paste("Spatial correlation - Pyramidal cells vs Hcn1:", pyr_hcn1_cor))
## [1] "Spatial correlation - Pyramidal cells vs Hcn1: 0.231929467295915"
print(paste("Spatial correlation - Interneurons vs Pvalb:", int_pvalb_cor))
## [1] "Spatial correlation - Interneurons vs Pvalb: 0.195541555842559"