This tutorial demonstrates how to apply RareQ to data generated from the 10x Xenium In Situ platform. The example dataset is from a mouse brain coronal section.

Load the data

library(Seurat)
library(RareQ)   # core method for rare-cell detection
library(dplyr)
library(ggplot2)

# Read example data
# Please replace the path with your local "Tutorial_example" directory
dat = readRDS('Tutorial_example/data/Xenium_Mouse_brain.RDS')
count = dat@assays$RNA@counts      # raw gene × cell matrix
meta.info = dat@meta.data          # Xenium metadata (coordinates, QC metrics, etc.)

Preprocess the Xenium data

We preprocess, perform dimensional reduction and calculate cell neighborhoods using standard workflows for data produced by the Xenium platform. Xenium uses targeted spatial transcriptomics panels, typically measuring hundreds rather than thousands of genes, which results in unique preprocessing considerations compared with scRNA-seq or multiome datasets.

# Create Seurat object
sc_object <- CreateSeuratObject(count=count, project = "sc_object", min.cells = 3)

# Normalize and scale data
sc_object <- NormalizeData(sc_object, scale.factor = 80)  # lower scale factor suited for low-gene Xenium data
sc_object <- ScaleData(sc_object)

# Dimensional reduction
sc_object <- RunPCA(sc_object, 
                    features = dimnames(sc_object)[[1]],  # use all features for dimensional reduction
                    npcs = 50)

# Build the kNN graph
sc_object <- FindNeighbors(object = sc_object,
                           k.param = 20,
                           compute.SNN = FALSE,      # RareQ relies on directed kNN, not SNN
                           prune.SNN = 0,
                           reduction = "pca",
                           dims = 1:20,
                           force.recalc = FALSE, 
                           return.neighbor = TRUE)   # returned neighbor list is essential for Q propagation

Run RareQ

Now we use RareQ::FindRare() to derive both major and rare cell clusters, and store the identified cluster labels to Seurat metadata.

cluster <- FindRare(sc_object)
table(cluster)
## cluster
##     32     83    545   1056   1137   1155   1189   1435   1529   1649   2017 
##    633    171   1933    421   2583    230    764   2376    474     98   3717 
##   2479   2506   2755   2981   3157   3705   4048   4221   4396   4591   4877 
##    104    233    672    603    461    175    228    453    236   4339    244 
##   4994   5277   5658   5776   5857   5960   6019   6053   6514   6784   7153 
##    334    356    736    123   3560    960    637    223    553    537      9 
##   7322   7354   7366   7522   7696   7812   7878   8515   8547   9368   9903 
##     35   2356   1571    739   2723    645    435   1199    264     53    287 
##  10055  10149  10188  10600  11126  11211  11343  11482  11541  11669  11684 
##    257    824    267    165    252   1831    854   4475   3114    916  12737 
##  11685  11947  11962  12350  12812  13232  13387  13666  13948  14578  15315 
##     41    135    158    585    767   1508    156    325    353    191    110 
##  15837  15968  16513  18434  20783  21464  21693  22927  23172  23485  23837 
##    255   2388    202    416     10   1929     16    351    666    297   4679 
##  24398  24471  25427  25999  26844  27582  28214  28675  29096  29302  29833 
##    430    352    400    171    964    285   2050   3177   4820    143   1681 
##  30413  30908  31156  31537  31798  31951  33376  35799  35899  35934  36204 
##    590    262     16    646    706     47    879    298   1067    922    909 
##  36456  39579  40144  40320  40569  40758  41336  41788  42690  42726  43435 
##     30    349    366   7120    885    303    326    775    188    567    652 
##  44183  44298  44947  45204  45328  45440  45464  45494  45680  47591  48002 
##    399     90    118     77     46    290     90   2328     69   2496    128 
##  48081  49988  50288  50439  51769  52107  52367  53157  54226  54912  56948 
##     34    222   2397    398    121   1759    619   2547    189   2816    213 
##  57192  57442  57836  60436  60888  61701  63886  64605  66183  66944  69101 
##    186    271    243    350    444    540    111    613     98    833    684 
##  69355  69836  71730  72012  72558  72763  74707  76891  77630  78158  80396 
##    154    464    544    263    387    749   1001    149     15     58    960 
##  81426  82683  82742  82744  82838  82936  82949  86820  89513  92316  99022 
##   1627    427     83    165     44    188     53    131    651   1107    407 
##  99305 101781 102349 102714 106166 107727 107796 110255 111009 111487 112931 
##    385   3547     79    695   1396   1209     57   1123    179    174     86 
## 114269 115631 116930 118874 119101 123342 123556 127532 127628 129746 130876 
##    171     76    538     62    311     39   1177     77    109    142    129 
## 130990 133406 134733 139734 140991 141141 146524 149161 149778 151603 155154 
##    339    142    218    871    276     16    563    323    327    116    463 
## 157347 
##   2083
sc_object$cluster = cluster 
sc_object$X = meta.info$X    # spatial coordinate X from Xenium
sc_object$Y = meta.info$Y    # spatial coordinate Y from Xenium

Visualization

# Define color palette for visualization
cols <- c("#532C8A","#c19f70","#f9decf","#c9a997","#B51D8D","#9e6762","#3F84AA","#F397C0",
          "#C594BF","#DFCDE4","#eda450","#635547","#C72228","#EF4E22","#f77b59","#989898",
          "#7F6874","#8870ad","#65A83E","#EF5A9D","#647a4f","#FBBE92","#354E23","#139992",
          "#C3C388","#8EC792","#0F4A9C","#8DB5CE","#1A1A1A","#FACB12","#C9EBFB","#DABE99",
          "#ed8f84","#005579","#CDE088","#BBDCA8","#F6BFCB"
)

getPalette = colorRampPalette(cols[c(2,3,1,5,6,7,8,9,11,12,13,14,16,18,19,20,22,23,24,27,28,29,30,31,34)])

# Spatial scatterplot
cluster.df = sc_object@meta.data
p.cluster <- ggplot(data = cluster.df) + geom_point(aes(x = X, y = Y, color=factor(cluster)), size=0.001) +
  theme_bw() + theme_minimal() +
  theme(panel.grid = element_blank(), legend.position = 'none',
        axis.text = element_blank(),
        axis.title = element_blank()) +
  scale_color_manual(values = getPalette(length(unique(cluster.df$cluster))))
p.cluster

Example: CA2

We applied RareQ to the dataset and successfully recovered the CA2 population—a hippocampal subregion that was not detected in the original analysis. CA2 may appear with the same color with its neighboring CA3 region in the cluster-level spatial plot (p.cluster). Users can check it with the reference cluster 82936 in dat.

cnt = table(cluster[dat$cluster==82936])
cnt
## 
## 82936 
##   188
# Plot CA2 region
plot(cluster.df$X, cluster.df$Y, cex=0.1,
     col=ifelse(cluster.df$cluster==names(cnt)[which.max(cnt)],'red','grey'))