Introduction


The following code replicates the analysis presented in the Kandinsky paper to investigate spatial pattern of CD74 gene expression in a colorectal cancer (CRC) sample, using CosMx and Visium spatial transcriptomic data.

CosMx


Raw CosMx data and cell annotation files of CRC sample “CR48” were downloaded from Zenodo.

The dataset consists of 93 fields of view (FOVs) from a human CRC FFPE tissue, profiled using the CosMx Universal Cell Characterization RNA Panel.

Cell type annotation for all single cells in the dataset was generated with InSituType as part of an independent study.


1) Loading data


In order to work with CosMx data using Kandinsky, we first need to set the path to the folder containing raw input data:

cosmx_path = 'Kandinsky/data/CosMx/'
list.files(cosmx_path)
#> [1] "CosMx_S0_processed_metadata_plus_cell_anno.rds"
#> [2] "S0_exprMat_file_CR48.csv.gz"                   
#> [3] "S0_fov_positions_file_CR48.csv.gz"             
#> [4] "S0_metadata_file_CR48.csv.gz"                  
#> [5] "S0_tx_file_CR48.csv.gz"                        
#> [6] "S0-polygons_CR48.csv.gz"

Now we can import and load CosMx data into a Seurat object using the Kandinsky prepare_cosmx_seurat() function, by specifying:

  • the path to the input folder

  • the dataset.id which will be used to name the resulting Seurat object

Additional parameters include:

  • pattern: to selectively import only files whose filenames contain a specific string, useful when the folder includes input files from multiple datasets

  • fovs: to load only the cells located within a specified list of FOVs. To load the full dataset, you can set fovs = NULL (default)

cosmx = prepare_cosmx_seurat(path = cosmx_path, dataset.id = 'CR48', pattern = 'CR48')
#> Preparing gene expression matrix...
#> Loading cell metadata...
#> Loading cell polygon file...
#> Creating Seurat object...
#> Locating cell transcript file...
#> Loading FOV position file...


2) c-NB identification


Now that the new Seurat object is ready, we use Kandinsky to identify cell neighborhoods (c-NBs) based on cell-to-cell relationships using the kandinsky_init() function.

Since we are working with CosMx data, we set the parameter tech = 'cosmx' to ensure Kandinsky correctly interprets the CosMx data format.

For neighborhood identification, we use the membrane distance method (nb.method = 'M') and set the distance threshold to 30um (d.max = 30).

cosmx = kandinsky_init(cosmx, tech = 'cosmx', nb.method = 'M', d.max = 30)
#> Building Kandinsky slot "sf"...
#> Building Kandinsky slot "nb"...
#> Building Kandinsky slot "tx"...
#> Building Kandinsky slot "fov_mask"...
#> Kandinsky data loaded into Seurat object!


3) Loading cell type annotation


We can load the annotation file and store the cell type information within the Seurat object metadata.

# Add cell type annotation to CosMx metadata
anno_file = list.files(cosmx_path, pattern = 'CosMx_S0_processed_metadata_plus_cell_anno', full.names = T)
anno = readRDS(anno_file)
head(anno)
#>                   cell_id slideID_fov orig.ident nCount_negprobes
#> c_1_100_1       c_1_100_1       1_100          c                0
#> c_1_100_100   c_1_100_100       1_100          c                1
#> c_1_100_1000 c_1_100_1000       1_100          c                0
#> c_1_100_1001 c_1_100_1001       1_100          c                0
#> c_1_100_1002 c_1_100_1002       1_100          c                0
#> c_1_100_1003 c_1_100_1003       1_100          c                0
#>              nFeature_negprobes nCount_RNA nFeature_RNA nCount_falsecode
#> c_1_100_1                     0         40           33                2
#> c_1_100_100                   1       1290          264                4
#> c_1_100_1000                  0         67           55                0
#> c_1_100_1001                  0        472          203                0
#> c_1_100_1002                  0         23           22                0
#> c_1_100_1003                  0       1468          297                9
#>              nFeature_falsecode fov  Area AspectRatio x_FOV_px y_FOV_px Width
#> c_1_100_1                     2 100  5870        0.86     1712       52    89
#> c_1_100_100                   4 100 10862        0.91     3845     1027   127
#> c_1_100_1000                  0 100  4119        1.04     1749     1951    82
#> c_1_100_1001                  0 100  3645        0.83      299     1965    73
#> c_1_100_1002                  0 100  4214        0.63     2380     1976    61
#> c_1_100_1003                  9 100  7842        0.97     1022     1974   101
#>              Height Mean.PanCK Max.PanCK Mean.CD68 Max.CD68 Mean.Membrane
#> c_1_100_1       104       1022      3832       555     1656           379
#> c_1_100_100     139       2691      6255       164      480         12494
#> c_1_100_1000     79        845      3697       158      622          8378
#> c_1_100_1001     88        817      1806       128      337          4991
#> c_1_100_1002     97       1231      2448       202      775          9555
#> c_1_100_1003    104       2315      6754       189      769          4296
#>              Max.Membrane Mean.CD45 Max.CD45 Mean.DAPI Max.DAPI assay_type
#> c_1_100_1            2220       543     1435      1846     5589        RNA
#> c_1_100_100         36909       313      893      2721     5858        RNA
#> c_1_100_1000        23449       196      730      1417     2628        RNA
#> c_1_100_1001        11428       213      601      4970     6559        RNA
#> c_1_100_1002        33289       253      757      5414    10532        RNA
#> c_1_100_1003        21392       276      841      2501     4669        RNA
#>              slide_ID_numeric Run_Tissue_name Panel      cell_ID slide sample
#> c_1_100_1                   1              S0  980p    c_1_100_1    S0   CR48
#> c_1_100_100                 1              S0  980p  c_1_100_100    S0   CR48
#> c_1_100_1000                1              S0  980p c_1_100_1000    S0   CR48
#> c_1_100_1001                1              S0  980p c_1_100_1001    S0   CR48
#> c_1_100_1002                1              S0  980p c_1_100_1002    S0   CR48
#> c_1_100_1003                1              S0  980p c_1_100_1003    S0   CR48
#>                        ID x_global_px y_global_px InSitu_clusters_raw
#> c_1_100_1       c_1_100_1    21900.38    17646.67                   a
#> c_1_100_100   c_1_100_100    24035.62    16671.90                   k
#> c_1_100_1000 c_1_100_1000    21937.41    15746.23                   m
#> c_1_100_1001 c_1_100_1001    20492.89    15734.45                   n
#> c_1_100_1002 c_1_100_1002    22570.12    15726.06                   m
#> c_1_100_1003 c_1_100_1003    21214.96    15726.32                   e
#>              InSitu_clusters_aggr      final_anno n_neighbors        neighbors
#> c_1_100_1         FibroEndoMuscle FibroEndoMuscle           2          653,950
#> c_1_100_100                   Epi             Epi           2        1201,1292
#> c_1_100_1000              Myeloid         Myeloid           1              564
#> c_1_100_1001                  Epi             Epi           4 11,594,1379,1384
#> c_1_100_1002              Myeloid         Myeloid           2          26,1383
#> c_1_100_1003                  Epi             Epi           3     12,1368,1377
#>                              neighbors_label is_epiT nb_tnk is_epionly nb_epi
#> c_1_100_1    FibroEndoMuscle,FibroEndoMuscle       0      0          0      0
#> c_1_100_100                          Epi,Epi       0      0          1      2
#> c_1_100_1000                             Epi       0      0          0      1
#> c_1_100_1001                 Epi,Epi,Epi,Epi       0      0          1      4
#> c_1_100_1002                         Epi,Epi       0      0          0      2
#> c_1_100_1003                     Epi,Epi,Epi       0      0          1      3
#>                   epiType nb_noepi                       geometry
#> c_1_100_1    StromaImmune        2 POLYGON ((21910.67 17699, 2...
#> c_1_100_100       epiOnly        0 POLYGON ((24001.67 16742, 2...
#> c_1_100_1000 StromaImmune        0 POLYGON ((21929.67 15788, 2...
#> c_1_100_1001      epiOnly        0 POLYGON ((20486.67 15778, 2...
#> c_1_100_1002 StromaImmune        0 POLYGON ((22576.67 15772, 2...
#> c_1_100_1003      epiOnly        0 POLYGON ((21191.67 15777, 2...
anno = anno[anno$sample == 'CR48',]
anno = anno[,c('cell_ID','final_anno')]

anno = sf::st_drop_geometry(anno)
anno$cell_ID = gsub("c_1_","",anno$cell_ID)
rownames(anno) = anno$cell_ID
common = intersect(colnames(cosmx),rownames(anno))
cosmx = cosmx[,rownames(anno)]

cosmx = update_kandinsky(cosmx)
#> Updating Kandinsky data
anno = anno[colnames(cosmx),]
cosmx$final_anno = as.character(anno$final_anno)

message('CosMx dataset composed of ',nrow(anno),' cells')
#> CosMx dataset composed of 152100 cells

We can now visualize our dataset using the Kandinsky KanPlot() function, with cells coloured according to their annotated cell types:

KanPlot(cosmx,feature = 'final_anno') + 
  scale_fill_manual(values = c('Epi' = 'green', 'FibroEndoMuscle' = 'magenta', 'Myeloid' = 'cyan', 'T/NK' = 'red', 'Plasma/B' = 'yellow'))
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.

We can assess the size of the identified c-NBs, expressed as number of cells found within each c-NB, using the Kandinsky nbSizePlot() function:

nbSizePlot(cosmx) + theme(aspect.ratio = 2)

Before continuing with the downstream analysis, we need to perform gene expression normalization using Seurat function NormalizeData():

# Normalize counts
cosmx = NormalizeData(cosmx)
#> Normalizing layer: counts


4) Hot/cold areas


We can visualize single cell CD74 expression levels across different FOVs:

KanPlot(cosmx,'CD74', palette_cont = 'viridis')

We can define CD74 hot and cold areas according Getis-Ord Gi statistics calculated using Kandinsky function hotspot_analysis().

In particular, we can specify as parameter:

  • feature: the name of the feature to use to compute Getis-Ord Gi* statistics
  • sim: number of Monte Carlo simulations to be run for estimating Local Gi* coefficients significance
  • padj.thresh: numeric value indicating the significance threshold to be applied to the adjusted p-values resulting from Monte Carlo simulations
cosmx = hotspot_analysis(cosmx, feature = 'CD74', layer = 'data', sim=999, padj.thresh = 0.05)

We can now visualize cells coloured according to whether they are assigned to hot or cold areas based on CD74 expression

KanPlot(cosmx, feature = 'CD74_clust', palette_discrete = c('dodgerblue3','firebrick3','#D9D9D9'))

After selecting a subset of representative FOVs, we can now visualize the same subset of cells coloured according to:

  • their cell type of origin
  • CD74 expression
  • whether they are assigned to hot or cold areas based on CD74 expression
fovs = c(154,157:159)

(KanPlot(cosmx, feature = 'final_anno', fovs = fovs
         , show_border = T, border.size = 0.1, border.col = '#808285') +
    scale_fill_manual(values = c('Epi' = 'green', 'FibroEndoMuscle' = 'magenta', 'Myeloid' = 'cyan', 'T/NK' = 'red', 'Plasma/B' = 'yellow'))) +
  
  KanPlot(cosmx, feature = 'CD74', fovs = fovs
          , palette_cont = 'viridis', show_border = T, border.size = 0.1, border.col = '#808285') +
  
  KanPlot(cosmx,feature = 'CD74_clust', fovs = fovs
          , palette_discrete = c('#1A6AA0','#A94349','#D9D9D9'), show_border = T, border.size = 0.1, border.col = '#808285')
#> Updating Kandinsky data
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Updating Kandinsky data
#> 
#> Updating Kandinsky data

We can test significant enrichment of annotated cell types within CD74 hot and cold areas using one-tailed Fisher’s exact test:

cd74 = model.matrix(data = cosmx@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
colnames(cd74) = c('Cold','Hot','NS')

immune = model.matrix(data = cosmx@meta.data, ~0 + final_anno) %>% as.data.frame()
colnames(immune) = c('Epi','FibroEndoMuscle','Myeloid','Plasma/B','TNK')

immune  = immune[,c('Epi','Myeloid','TNK')]

all_res = lapply(colnames(immune),function(x){
  fisher_hot = fisher.test(table(cd74$Hot, immune[[x]]), alternative = 'greater')$p.value
  return(data.frame(cell_type = x
                    ,hot_pval = fisher_hot))
})

all_res = purrr::reduce(all_res, rbind)


cosmx_hot = list()
for(c in c('Epi','Myeloid','TNK')){
  fisher_hot = all_res[all_res$cell_type == c,]$hot_pval
  
  if(fisher_hot < 2.2*(10^-16)){
    fisher_hot = expression('Fisher p < 2.2x10'^-16)
  }else{
    fisher_hot = paste0('Fisher p = ', format(fisher_hot, scientific = T, digits = 3))
  }
  
  hot_props = table(cd74$Hot,immune[[c]]) %>% as.data.frame() %>% 
    group_by(Var1) %>% 
    mutate(prop = Freq/sum(Freq)) %>% ungroup() %>% 
    filter(Var2 == 1)
  
  cosmx_hot[[c]] = ggplot(hot_props, aes(x = Var1, y = prop))+
    theme_classic() + geom_bar(stat = 'identity', fill = ifelse(
      c == 'Epi','green',ifelse(c == 'Myeloid','cyan','red')
    ), color = 'black') +
    scale_x_discrete(breaks = c('1','0'), limits = c('1','0'), labels = c('Hot','Rest')) +
    labs(x = 'CD74 areas', y = paste0('Area (%)')) +
    scale_y_continuous(limits = c(0, (max(hot_props$prop)*1.1)), breaks = c(0,max(hot_props$prop)), labels = round(c(0,100*(max(hot_props$prop))), 1))+
    annotate('text', x = 1.5, y = max(hot_props$prop)*1.1, label = fisher_hot) +
    guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) + 
    theme(axis.text = element_text(color='black'), aspect.ratio = 1)
}

wrap_plots(cosmx_hot[c('Myeloid','TNK','Epi')], ncol = 1)

The results from Fisher’s tests indicate an enrichment of both myeloid and T/NK cells within CD74 hot areas compared to other regions of the sample.

Although CRC epithelial cells are not enriched in CD74 hot areas, findings from Acha-Sagredo et al suggest that CRC cells can be induced to overexpress CD74 as a result of proximity to T/NK cell.

We can corroborate this observation by dividing our CRC cells into two groups, based on whether or not at least one T/NK cells is present within their c-NBs.
We can then compare CD74 expression between these two groups of CRC cells via Wilcoxon Rank test.

cosmx = nnMat(cosmx, label = 'final_anno')
#> Creating neighbour matrix using pre-defined neighbour network
cosmx = nn_query(cosmx
                 , query = "`T/NK` >0, final_anno == 'Epi'"
                 , anno_name = 'Epi_type'
                 , diffexpr = F
                 , label = 'final_anno'
                 , which = 'Epi')

cosmx$CD74_expr = FetchData(cosmx, 'CD74', clean = 'none')[,1]

cosmx_cd74_data = cosmx@meta.data[cosmx@meta.data$final_anno == 'Epi',c('Epi_type','CD74_expr')]

pval = wilcox.test(CD74_expr ~ Epi_type, data = cosmx_cd74_data)$p.value

if(pval < 2.2e-16){
  pval = expression('Wilcoxon p-value < 2.2x10'^-16)
}else{
  pval = format(pval, scientific = T, digits = 3)
}


n_epi1 = nrow(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',])
n_epi2 = nrow(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',])

dens1 = density(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',]$CD74_expr)$y
dens2 = density(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',]$CD74_expr)$y
max_dens = max(c(dens1,dens2))
min_dens = min(c(dens1,dens2))
median1 = median(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',]$CD74_expr)
median2 = median(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',]$CD74_expr)

ggplot(cosmx_cd74_data, aes(x = CD74_expr, fill = Epi_type)) +
  theme_classic() + geom_density(alpha = 0.4,color=NA)+
  scale_x_continuous(breaks = c(min(cosmx_cd74_data$CD74_expr), median1, median2, max(cosmx_cd74_data$CD74_expr))
                     , labels = round(c(min(cosmx_cd74_data$CD74_expr), median1, median2, max(cosmx_cd74_data$CD74_expr)), 1)) +
  scale_y_continuous(breaks = c(min_dens, max_dens)
                     , labels = round(c(min_dens, max_dens),2)) +
  guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) +
  labs(x = 'CD74 normalized expression', y = 'Density', fill = 'Epi type') +
  annotate('text', x = median(cosmx_cd74_data$CD74_expr), y = max_dens, label = pval) +
  scale_fill_manual(values = c('#2E8B57','#80E580')
                    , labels = c(paste0('Rest of CRC cells (', n_epi1, ')'), paste0('CRC cells near T/NK (', n_epi2, ')'))) +
  geom_vline(xintercept = median1, linetype = 'dashed', color = '#2E8B57') +
  geom_vline(xintercept = median2, linetype = 'dashed', color = '#80E580') + 
  coord_fixed(10)

Indeed, CRC cells with at least one T/NK cell within their c-NBs show a significantly higher CD74 expression compared to the rest of CRC cells.


Visium


The Visium dataset analysed in this vignette has been downloaded from Zenodo.

It consists of a distinct tissue regions obtained from the same human CRC FFPE sample previously profiled with the CosMx Universal Cell Characterization RNA Panel.

The Visium Human Transcriptome Probe kit (v1) was used to capture RNA molecules from the FFPE sample after tissue permeabilization, enabling gene expression quantification for ~18,000 human genes.

After sequencing, Visium FASTQ files were processed with spaceranger (v2.0). Loupe Browser (v6.2) was used to visualize Visium spot-level gene expression data and the associated H&E image, to check for the presence of “in-tissue” spots located in empty regions of the tissue slide.

A separate spot annotation file A1_CR48_TissueType_Anno.csv was generated.

1) Loading data


We first need to specify the path to the folder containing raw Visium input data.
Next, we import Visium data into R as a Seurat object using the Load10X_Spatial() function.

Once the data is loaded, we perform initial quality checks. Specifically:

  • Visium spots mapped to empty tissue regions are removed based on the A1_CR48_TissueType_Anno.csv annotation file
  • Spots with fewer than 500 transcripts (UMI) and fewer than 300 unique genes detected are discarded
  • Genes detected in fewer than 10 spots (after filtering out low-quality spots) are removed
visium_path = 'Kandinsky/data/Visium/'
visium = Load10X_Spatial(
  , data.dir = visium_path
  , filename = "filtered_feature_bc_matrix.h5"
  , assay = "Spatial"
  , slice = 'CR48'
  , filter.matrix = TRUE
)   
genes = rownames(visium)

message("Total spots: ",ncol(visium))
#> Total spots: 3106

# Load spot annotation file (used to remove 'empty' spots)
anno_spots = read.delim(paste0(visium_path,'A1_CR48_TissueType_Anno.csv'), sep = ',')
rownames(anno_spots) = anno_spots$Barcode

# Remove spots without tissue annotation (corresponding to empty regions)
anno_spots = anno_spots[which(anno_spots$Location != ""),]


# Remove spots corresponding to empty regions (if any)
if(ncol(visium) > nrow(anno_spots)){
  message("Removing ", ncol(visium) - nrow(anno_spots), " empty spots")
  visium = visium[,which(colnames(visium) %in% rownames(anno_spots))]
} else {
  message("No empty spots detected")
}
#> Removing 124 empty spots

anno_spots = anno_spots[colnames(visium),]
visium = AddMetaData(visium, anno_spots$Location, col.name = "Location")


# Remove low-quality spots
visium = subset(x = visium, subset = nCount_Spatial >= 500)
message(ncol(visium)," spots with at least 500 UMIs")
#> 2955 spots with at least 500 UMIs

visium = subset(x = visium, subset = nFeature_Spatial >= 300)
message(ncol(visium)," spots with at least 300 genes detected")
#> 2955 spots with at least 300 genes detected
cell_id = colnames(visium)

keep_genes = which(rowSums(LayerData(visium,'counts') > 0) >= 10)
visium = subset(visium, features = keep_genes)
dim(visium)
#> [1] 14487  2955


2) s-NB identification


Using Kandinsky kandinsky_init() function, we define spot neighbourhoods (s-NBs) based on the Queen contiguity method (nb.method = ‘Q’), selecting the first layer of neighboring spots around each spot

We then use the Kandinsky nb_expand() function to include the second closest layer of spots in each s-NB.

visium = kandinsky_init(visium
                        , tech ='visium'
                        , nb.method = 'Q'
                        , res = 'high'
                        , img = paste0(visium_path,'spatial/tissue_hires_image.png')
)
#> Building Kandinsky slot "img"...
#> Building Kandinsky slot "sf"...
#> Building Kandinsky slot "nb"...
#> Queen neighbour method is designed to work with Visium / Visium HD data only
#> Building Kandinsky slot "tx"...
#> Building Kandinsky slot "fov_mask"...
#> Kandinsky data loaded into Seurat object!

# Expand neighbourhood definition by including the second ring of surrounding spots
visium = nb_expand(visium, maxorder = 2)

Moreover, we can assess the size of identified s-NBs, expressed as number of cells found within each s-NB, using the Kandinsky nbSizePlot() function:

nbSizePlot(visium) + theme(aspect.ratio = 2)

The H&E image associated with the Visium sample is automatically stored in the Kandinsky slot img.
For visualization purposes, the user can create a mask of the H&E tissue slide, retaining only the image area covered by tissue, using the Kandinsky he_mask() function.

# Create a mask image starting from visium H&E staining image
visium = he_mask(visium, stretch = T, sd_thresh = 7)

SlidePlot(visium, img_type = 'img') + coord_fixed(1) +
SlidePlot(visium, img_type = 'mask') + coord_fixed(1)
#> SpatRaster resampled to ncells = 1001000
#> SpatRaster resampled to ncells = 1001325

Before continuing with the downstream analysis, we need to perform gene expression normalization using Seurat function NormalizeData():

visium = NormalizeData(visium)
#> Normalizing layer: counts


3) Hot/cold areas


We can visualize CD74 expression levels across spots:

KanPlot(visium, feature = 'CD74', palette_cont = 'viridis')

Now, we can define CD74 hot and cold areas according to Getis-Ord Gi statistics calculated using Kandinsky hotspot_analysis() function.

# Define hot/cold areas of CD74 expression (default values used are the normalized counts stored in the 'data' layer)
visium = hotspot_analysis(visium, feature='CD74', sim = 999, padj.thresh = 0.05)

KanPlot(visium, feature = 'CD74_clust', palette_discrete = c('#1A6AA0','#A94349','#D9D9D9'))

Since Visium spots cover tissue regions that can generally contain multiple individual cells, we can use cell type gene expression signatures to estimate the abundance of different cell populations within each spot.

We load the list of CRC immune-related signatures described in Acha-Sagredo et al. Based on our analysis of the CosMx CRC dataset, we revealed an association between CD74 hot areas and myeloid and T/NK cells.

Now, we are interested in defining the relationship between CD74 expression and specific subpopulations of tumor-associated macrophages and T/NK cells.

signatures = readRDS('Kandinsky/data/Visium/CRC_LCM_Ext_sigs.rds')

We can calculate gene expression scores across spots for each immune-related signature using UCell.
Next, we define hot and cold areas for each immune signature using Kandinsky hotspot_analysis() function, considering the signature scores calculated with UCell across all spots .

# Test signatures related to TAMs/TNK subtypes or IFN production
signatures = signatures[c(4,5,6,10,12,13,14)]

# Calculate signature scores within each spot using UCell
visium = UCell::AddModuleScore_UCell(visium, features = signatures, maxRank = 5000)

visium = visium %>% 
  FindVariableFeatures(., nfeatures = 5000, selection.method = "vst") %>% 
  ScaleData() %>%
  RunPCA(assay = "Spatial", verbose = FALSE,npcs = 50) %>%
  FindNeighbors(reduction = "pca", dims = 1:30)
#> Finding variable features for layer counts
#> Centering and scaling data matrix
#> Computing nearest neighbor graph
#> Computing SNN

# Smooth UCell scores across spots according to their 20 nearest neighbour in the PCA space
visium = UCell::SmoothKNN(visium, signature.names = paste0(names(signatures),'_UCell'), k = 20)

# Immune sig hot/cold areas
for(s in paste0(names(signatures),'_UCell_kNN')){
  visium = hotspot_analysis(visium, feature = s, sim = 999, padj.thres = 0.05)
}

# Clean column names for hot/cold areas annotation
colnames(visium@meta.data) = gsub('_UCell_kNN' ,'', colnames(visium@meta.data))


When hot and cold areas are identified for more than one variable within the same dataset, the Kandinsky hotspot_overlap function can be used to create a new annotation that reflects the overlap between hot and cold areas for two independent variables, specified using feat.1 and feat.2 parameters.

KanPlot(visium, feature = 'M1_16_clust', palette_discrete = c('#1A6AA0','#A94349','#D9D9D9'))


visium = hotspot_overlap(visium, feat.1 = 'CD74', feat.2 = 'M1_16')

KanPlot(visium, feature = 'CD74_M1_16_clust', palette_discrete = c('dodgerblue4','skyblue1','purple','firebrick4','firebrick1','gray85'), border.col = 'gray95')

KanPlot(visium, feature = 'CD74_M1_16_clust', palette_discrete = c('gray85','skyblue1','gray85','gray85','firebrick1','gray85'), border.col = 'gray95')

We can test the association between CD74 hot and cold areas and each TAM or T/NK related signature using a one-tailed Fisher’s Exact test.
Specifically, we assess the overlap between CD74 hot/cold areas and hot/cold areas for each signature, respectively:

res = list()

for(s in paste0(names(signatures),'_clust')){
  cd74 = model.matrix(data = visium@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
  colnames(cd74) = c('Cold','Hot','NS')
  
  immune = model.matrix(data = visium@meta.data, formula(paste0("~0+", s))) %>% as.data.frame()
  colnames(immune) = c('Cold','Hot','NS')
  
  fisher_hot = fisher.test(table(cd74$Hot, immune$Hot), alternative = 'greater')$p.value
  fisher_cold = fisher.test(table(cd74$Cold, immune$Cold), alternative = 'greater')$p.value
  tot_res = data.frame(signature = gsub("_clust","", s),
                       hot_hot_pval = fisher_hot,
                       cold_cold_pval = fisher_cold)
  res[[ gsub("_clust","",s)]] = tot_res
}

res = purrr::reduce(res,rbind)

res$hot_FDR = p.adjust(res$hot_hot_pval, method = 'BH')
res$cold_FDR = p.adjust(res$cold_cold_pval, method = 'BH')
rownames(res) = res$signature


summary_res = res %>% dplyr::select(signature, hot_FDR, cold_FDR)
summary_res = reshape2::melt(summary_res)
#> Using signature as id variables

We can now visualize the association between CD74 expression levels and each TAM or T/NK related signature by

plots_hot = list()

for(s in paste0(names(signatures),'_clust')){
  print(s)
  cd74 = model.matrix(data = visium@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
  colnames(cd74) = c('Cold','Hot','NS')
  
  immune = model.matrix(data = visium@meta.data, formula(paste0("~0+",s))) %>% as.data.frame()
  colnames(immune) = c('Cold','Hot','NS')
  
  fisher_hot = summary_res[summary_res$signature == gsub('_clust','',s) & summary_res$variable == 'hot_FDR',]$value
  
  if(fisher_hot < 2.2e-16){
    fisher_hot = expression('Fisher FDR < 2.2x10'^-16)
  }else{
    fisher_hot = paste0('Fisher FDR = ',format(fisher_hot, scientific = TRUE, digits = 3))
  }
  
  hot_props = table(cd74$Hot,immune$Hot) %>% as.data.frame() %>% 
    group_by(Var1) %>% 
    mutate(prop = Freq/sum(Freq)) %>% ungroup() %>% 
    filter(Var2 == 1)
  
  plots_hot[[s]] = ggplot(hot_props,aes(x = Var1, y = prop)) + 
    theme_classic() + geom_bar(stat = 'identity', fill = '#A94349', color = 'black') +
    scale_x_discrete(breaks = c('1','0'), limits = c('1','0'), labels = c('Hot','Rest')) +
    labs(x = 'CD74 area', y = paste0('Hot for ',gsub('_clust','',s),' (%)'), fill = paste0(gsub('_clust','',s),' area')) +
    scale_y_continuous(limits = c(0,max(hot_props$prop)+0.05)
                       , breaks = c(0,max(hot_props$prop))
                       ,labels = round(100*c(0,max(hot_props$prop)),1)) +
    annotate('text', x = 1.5, y = max(hot_props$prop) + 0.05, label = fisher_hot)+
    guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) + 
    theme(axis.text = element_text(color = 'black'))
}
#> [1] "M0_27_clust"
#> [1] "M1_16_clust"
#> [1] "M2_13_clust"
#> [1] "NK_33_clust"
#> [1] "CD4_17_clust"
#> [1] "T_8_clust"
#> [1] "Cytotoxic_23_clust"

wrap_plots(plots_hot, ncol = 4)


plots_cold = list()
for(s in paste0(names(signatures),'_clust')){
  print(s)
  cd74 = model.matrix(data = visium@meta.data, ~0 + CD74_clust) %>% as.data.frame()
  colnames(cd74) = c('Cold','Hot','NS')
  
  immune = model.matrix(data = visium@meta.data, formula(paste0("~0+",s))) %>% as.data.frame()
  colnames(immune) = c('Cold','Hot','NS')
  
  fisher_cold = summary_res[summary_res$signature == gsub('_clust','',s) & summary_res$variable == 'cold_FDR',]$value
  if(fisher_cold < 2.2e-16){
    fisher_cold = expression('Fisher FDR < 2.2x10'^-16)
  }else{
    fisher_cold = paste0('Fisher FDR = ',format(fisher_cold, scientific = T, digits = 3))
  }
  
  cold_props = table(cd74$Cold, immune$Cold) %>% as.data.frame() %>% 
    group_by(Var1) %>% 
    mutate(prop = Freq/sum(Freq)) %>% ungroup() %>% 
    filter(Var2 == 1)
  
  plots_cold[[s]] = ggplot(cold_props,aes(x = Var1, y = prop)) + 
    theme_classic() + geom_bar(stat = 'identity', fill = '#1A6AA0', color = 'black')+
    scale_x_discrete(breaks = c('1','0')
                     , limits = c('1','0')
                     , labels = c('Cold','Rest')) +
    labs(x = 'CD74 area', y = paste0('Cold for ',gsub('_clust','',s),' (%)'), fill = paste0(gsub('_clust','',s),' area')) +
    scale_y_continuous(limits = c(0, max(cold_props$prop)+0.05)
                       , breaks = c(0, max(cold_props$prop))
                       , labels = round(100*c(0, max(cold_props$prop)),1)) +
    annotate('text', x = 1.5, y = max(cold_props$prop) + 0.05, label = fisher_cold) +
    guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) + 
    theme(axis.text = element_text(color = 'black'))
}
#> [1] "M0_27_clust"
#> [1] "M1_16_clust"
#> [1] "M2_13_clust"
#> [1] "NK_33_clust"
#> [1] "CD4_17_clust"
#> [1] "T_8_clust"
#> [1] "Cytotoxic_23_clust"
wrap_plots(plots_cold, ncol = 4)