This tutorial demonstrates how to apply RareQ to data generated from the 10x Xenium In Situ platform. The example dataset is from a mouse brain coronal section.
library(Seurat)
library(RareQ) # core method for rare-cell detection
library(dplyr)
library(ggplot2)
# Read example data
# Please replace the path with your local "Tutorial_example" directory
dat = readRDS('Tutorial_example/data/Xenium_Mouse_brain.RDS')
count = dat@assays$RNA@counts # raw gene × cell matrix
meta.info = dat@meta.data # Xenium metadata (coordinates, QC metrics, etc.)
We preprocess, perform dimensional reduction and calculate cell neighborhoods using standard workflows for data produced by the Xenium platform. Xenium uses targeted spatial transcriptomics panels, typically measuring hundreds rather than thousands of genes, which results in unique preprocessing considerations compared with scRNA-seq or multiome datasets.
# Create Seurat object
sc_object <- CreateSeuratObject(count=count, project = "sc_object", min.cells = 3)
# Normalize and scale data
sc_object <- NormalizeData(sc_object, scale.factor = 80) # lower scale factor suited for low-gene Xenium data
sc_object <- ScaleData(sc_object)
# Dimensional reduction
sc_object <- RunPCA(sc_object,
features = dimnames(sc_object)[[1]], # use all features for dimensional reduction
npcs = 50)
# Build the kNN graph
sc_object <- FindNeighbors(object = sc_object,
k.param = 20,
compute.SNN = FALSE, # RareQ relies on directed kNN, not SNN
prune.SNN = 0,
reduction = "pca",
dims = 1:20,
force.recalc = FALSE,
return.neighbor = TRUE) # returned neighbor list is essential for Q propagation
Now we use RareQ::FindRare() to derive both major and
rare cell clusters, and store the identified cluster labels to Seurat
metadata.
cluster <- FindRare(sc_object)
table(cluster)
## cluster
## 32 83 545 1056 1137 1155 1189 1435 1529 1649 2017
## 633 171 1933 421 2583 230 764 2376 474 98 3717
## 2479 2506 2755 2981 3157 3705 4048 4221 4396 4591 4877
## 104 233 672 603 461 175 228 453 236 4339 244
## 4994 5277 5658 5776 5857 5960 6019 6053 6514 6784 7153
## 334 356 736 123 3560 960 637 223 553 537 9
## 7322 7354 7366 7522 7696 7812 7878 8515 8547 9368 9903
## 35 2356 1571 739 2723 645 435 1199 264 53 287
## 10055 10149 10188 10600 11126 11211 11343 11482 11541 11669 11684
## 257 824 267 165 252 1831 854 4475 3114 916 12737
## 11685 11947 11962 12350 12812 13232 13387 13666 13948 14578 15315
## 41 135 158 585 767 1508 156 325 353 191 110
## 15837 15968 16513 18434 20783 21464 21693 22927 23172 23485 23837
## 255 2388 202 416 10 1929 16 351 666 297 4679
## 24398 24471 25427 25999 26844 27582 28214 28675 29096 29302 29833
## 430 352 400 171 964 285 2050 3177 4820 143 1681
## 30413 30908 31156 31537 31798 31951 33376 35799 35899 35934 36204
## 590 262 16 646 706 47 879 298 1067 922 909
## 36456 39579 40144 40320 40569 40758 41336 41788 42690 42726 43435
## 30 349 366 7120 885 303 326 775 188 567 652
## 44183 44298 44947 45204 45328 45440 45464 45494 45680 47591 48002
## 399 90 118 77 46 290 90 2328 69 2496 128
## 48081 49988 50288 50439 51769 52107 52367 53157 54226 54912 56948
## 34 222 2397 398 121 1759 619 2547 189 2816 213
## 57192 57442 57836 60436 60888 61701 63886 64605 66183 66944 69101
## 186 271 243 350 444 540 111 613 98 833 684
## 69355 69836 71730 72012 72558 72763 74707 76891 77630 78158 80396
## 154 464 544 263 387 749 1001 149 15 58 960
## 81426 82683 82742 82744 82838 82936 82949 86820 89513 92316 99022
## 1627 427 83 165 44 188 53 131 651 1107 407
## 99305 101781 102349 102714 106166 107727 107796 110255 111009 111487 112931
## 385 3547 79 695 1396 1209 57 1123 179 174 86
## 114269 115631 116930 118874 119101 123342 123556 127532 127628 129746 130876
## 171 76 538 62 311 39 1177 77 109 142 129
## 130990 133406 134733 139734 140991 141141 146524 149161 149778 151603 155154
## 339 142 218 871 276 16 563 323 327 116 463
## 157347
## 2083
sc_object$cluster = cluster
sc_object$X = meta.info$X # spatial coordinate X from Xenium
sc_object$Y = meta.info$Y # spatial coordinate Y from Xenium
# Define color palette for visualization
cols <- c("#532C8A","#c19f70","#f9decf","#c9a997","#B51D8D","#9e6762","#3F84AA","#F397C0",
"#C594BF","#DFCDE4","#eda450","#635547","#C72228","#EF4E22","#f77b59","#989898",
"#7F6874","#8870ad","#65A83E","#EF5A9D","#647a4f","#FBBE92","#354E23","#139992",
"#C3C388","#8EC792","#0F4A9C","#8DB5CE","#1A1A1A","#FACB12","#C9EBFB","#DABE99",
"#ed8f84","#005579","#CDE088","#BBDCA8","#F6BFCB"
)
getPalette = colorRampPalette(cols[c(2,3,1,5,6,7,8,9,11,12,13,14,16,18,19,20,22,23,24,27,28,29,30,31,34)])
# Spatial scatterplot
cluster.df = sc_object@meta.data
p.cluster <- ggplot(data = cluster.df) + geom_point(aes(x = X, y = Y, color=factor(cluster)), size=0.001) +
theme_bw() + theme_minimal() +
theme(panel.grid = element_blank(), legend.position = 'none',
axis.text = element_blank(),
axis.title = element_blank()) +
scale_color_manual(values = getPalette(length(unique(cluster.df$cluster))))
p.cluster
We applied RareQ to the dataset and successfully recovered the CA2
population—a hippocampal subregion that was not detected in the original
analysis. CA2 may appear with the same color with its neighboring CA3
region in the cluster-level spatial plot (p.cluster). Users
can check it with the reference cluster 82936 in dat.
cnt = table(cluster[dat$cluster==82936])
cnt
##
## 82936
## 188
# Plot CA2 region
plot(cluster.df$X, cluster.df$Y, cex=0.1,
col=ifelse(cluster.df$cluster==names(cnt)[which.max(cnt)],'red','grey'))