The following code replicates the analysis presented in the Kandinsky paper to investigate spatial pattern of CD74 gene expression in a colorectal cancer (CRC) sample, using CosMx and Visium spatial transcriptomic data.
Raw CosMx data and cell annotation files of CRC sample “CR48” were downloaded from Zenodo.
The dataset consists of 93 fields of view (FOVs) from a human CRC FFPE tissue, profiled using the CosMx Universal Cell Characterization RNA Panel.
Cell type annotation for all single cells in the dataset was
generated with InSituType
as part of an independent study.
In order to work with CosMx data using Kandinsky, we first need to set the path to the folder containing raw input data:
list.files(cosmx_path)
#> [1] "CosMx_S0_processed_metadata_plus_cell_anno.rds"
#> [2] "S0_exprMat_file_CR48.csv.gz"
#> [3] "S0_fov_positions_file_CR48.csv.gz"
#> [4] "S0_metadata_file_CR48.csv.gz"
#> [5] "S0_tx_file_CR48.csv.gz"
#> [6] "S0-polygons_CR48.csv.gz"
Now we can import and load CosMx data into a
Seurat object
using the Kandinsky
prepare_cosmx_seurat()
function, by specifying:
the path
to the input folder
the dataset.id
which will be used to name the
resulting Seurat object
Additional parameters include:
pattern
: to selectively import only files whose
filenames contain a specific string, useful when the folder includes
input files from multiple datasets
fovs
: to load only the cells located within a
specified list of FOVs. To load the full dataset, you can set
fovs = NULL
(default)
cosmx = prepare_cosmx_seurat(path = cosmx_path, dataset.id = 'CR48', pattern = 'CR48')
#> Preparing gene expression matrix...
#> Loading cell metadata...
#> Loading cell polygon file...
#> Creating Seurat object...
#> Locating cell transcript file...
#> Loading FOV position file...
Now that the new Seurat object
is ready, we use
Kandinsky to identify cell neighborhoods
(c-NBs) based on cell-to-cell relationships using the
kandinsky_init()
function.
Since we are working with CosMx data, we set the parameter
tech = 'cosmx'
to ensure Kandinsky correctly interprets the
CosMx data format.
For neighborhood identification, we use the membrane distance
method (nb.method = 'M'
) and set the distance
threshold to 30um (d.max = 30
).
cosmx = kandinsky_init(cosmx, tech = 'cosmx', nb.method = 'M', d.max = 30)
#> Building Kandinsky slot "sf"...
#> Building Kandinsky slot "nb"...
#> Building Kandinsky slot "tx"...
#> Building Kandinsky slot "fov_mask"...
#> Kandinsky data loaded into Seurat object!
We can load the annotation file and store the cell type information
within the Seurat object
metadata.
# Add cell type annotation to CosMx metadata
anno_file = list.files(cosmx_path, pattern = 'CosMx_S0_processed_metadata_plus_cell_anno', full.names = T)
anno = readRDS(anno_file)
head(anno)
#> cell_id slideID_fov orig.ident nCount_negprobes
#> c_1_100_1 c_1_100_1 1_100 c 0
#> c_1_100_100 c_1_100_100 1_100 c 1
#> c_1_100_1000 c_1_100_1000 1_100 c 0
#> c_1_100_1001 c_1_100_1001 1_100 c 0
#> c_1_100_1002 c_1_100_1002 1_100 c 0
#> c_1_100_1003 c_1_100_1003 1_100 c 0
#> nFeature_negprobes nCount_RNA nFeature_RNA nCount_falsecode
#> c_1_100_1 0 40 33 2
#> c_1_100_100 1 1290 264 4
#> c_1_100_1000 0 67 55 0
#> c_1_100_1001 0 472 203 0
#> c_1_100_1002 0 23 22 0
#> c_1_100_1003 0 1468 297 9
#> nFeature_falsecode fov Area AspectRatio x_FOV_px y_FOV_px Width
#> c_1_100_1 2 100 5870 0.86 1712 52 89
#> c_1_100_100 4 100 10862 0.91 3845 1027 127
#> c_1_100_1000 0 100 4119 1.04 1749 1951 82
#> c_1_100_1001 0 100 3645 0.83 299 1965 73
#> c_1_100_1002 0 100 4214 0.63 2380 1976 61
#> c_1_100_1003 9 100 7842 0.97 1022 1974 101
#> Height Mean.PanCK Max.PanCK Mean.CD68 Max.CD68 Mean.Membrane
#> c_1_100_1 104 1022 3832 555 1656 379
#> c_1_100_100 139 2691 6255 164 480 12494
#> c_1_100_1000 79 845 3697 158 622 8378
#> c_1_100_1001 88 817 1806 128 337 4991
#> c_1_100_1002 97 1231 2448 202 775 9555
#> c_1_100_1003 104 2315 6754 189 769 4296
#> Max.Membrane Mean.CD45 Max.CD45 Mean.DAPI Max.DAPI assay_type
#> c_1_100_1 2220 543 1435 1846 5589 RNA
#> c_1_100_100 36909 313 893 2721 5858 RNA
#> c_1_100_1000 23449 196 730 1417 2628 RNA
#> c_1_100_1001 11428 213 601 4970 6559 RNA
#> c_1_100_1002 33289 253 757 5414 10532 RNA
#> c_1_100_1003 21392 276 841 2501 4669 RNA
#> slide_ID_numeric Run_Tissue_name Panel cell_ID slide sample
#> c_1_100_1 1 S0 980p c_1_100_1 S0 CR48
#> c_1_100_100 1 S0 980p c_1_100_100 S0 CR48
#> c_1_100_1000 1 S0 980p c_1_100_1000 S0 CR48
#> c_1_100_1001 1 S0 980p c_1_100_1001 S0 CR48
#> c_1_100_1002 1 S0 980p c_1_100_1002 S0 CR48
#> c_1_100_1003 1 S0 980p c_1_100_1003 S0 CR48
#> ID x_global_px y_global_px InSitu_clusters_raw
#> c_1_100_1 c_1_100_1 21900.38 17646.67 a
#> c_1_100_100 c_1_100_100 24035.62 16671.90 k
#> c_1_100_1000 c_1_100_1000 21937.41 15746.23 m
#> c_1_100_1001 c_1_100_1001 20492.89 15734.45 n
#> c_1_100_1002 c_1_100_1002 22570.12 15726.06 m
#> c_1_100_1003 c_1_100_1003 21214.96 15726.32 e
#> InSitu_clusters_aggr final_anno n_neighbors neighbors
#> c_1_100_1 FibroEndoMuscle FibroEndoMuscle 2 653,950
#> c_1_100_100 Epi Epi 2 1201,1292
#> c_1_100_1000 Myeloid Myeloid 1 564
#> c_1_100_1001 Epi Epi 4 11,594,1379,1384
#> c_1_100_1002 Myeloid Myeloid 2 26,1383
#> c_1_100_1003 Epi Epi 3 12,1368,1377
#> neighbors_label is_epiT nb_tnk is_epionly nb_epi
#> c_1_100_1 FibroEndoMuscle,FibroEndoMuscle 0 0 0 0
#> c_1_100_100 Epi,Epi 0 0 1 2
#> c_1_100_1000 Epi 0 0 0 1
#> c_1_100_1001 Epi,Epi,Epi,Epi 0 0 1 4
#> c_1_100_1002 Epi,Epi 0 0 0 2
#> c_1_100_1003 Epi,Epi,Epi 0 0 1 3
#> epiType nb_noepi geometry
#> c_1_100_1 StromaImmune 2 POLYGON ((21910.67 17699, 2...
#> c_1_100_100 epiOnly 0 POLYGON ((24001.67 16742, 2...
#> c_1_100_1000 StromaImmune 0 POLYGON ((21929.67 15788, 2...
#> c_1_100_1001 epiOnly 0 POLYGON ((20486.67 15778, 2...
#> c_1_100_1002 StromaImmune 0 POLYGON ((22576.67 15772, 2...
#> c_1_100_1003 epiOnly 0 POLYGON ((21191.67 15777, 2...
anno = anno[anno$sample == 'CR48',]
anno = anno[,c('cell_ID','final_anno')]
anno = sf::st_drop_geometry(anno)
anno$cell_ID = gsub("c_1_","",anno$cell_ID)
rownames(anno) = anno$cell_ID
common = intersect(colnames(cosmx),rownames(anno))
cosmx = cosmx[,rownames(anno)]
cosmx = update_kandinsky(cosmx)
#> Updating Kandinsky data
anno = anno[colnames(cosmx),]
cosmx$final_anno = as.character(anno$final_anno)
message('CosMx dataset composed of ',nrow(anno),' cells')
#> CosMx dataset composed of 152100 cells
We can now visualize our dataset using the Kandinsky
KanPlot()
function, with cells coloured according to their
annotated cell types:
KanPlot(cosmx,feature = 'final_anno') +
scale_fill_manual(values = c('Epi' = 'green', 'FibroEndoMuscle' = 'magenta', 'Myeloid' = 'cyan', 'T/NK' = 'red', 'Plasma/B' = 'yellow'))
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
We can assess the size of the identified c-NBs,
expressed as number of cells found within each c-NB, using the Kandinsky
nbSizePlot()
function:
Before continuing with the downstream analysis, we need to perform
gene expression normalization using Seurat function
NormalizeData()
:
We can visualize single cell CD74 expression levels across different FOVs:
We can define CD74 hot and cold areas according Getis-Ord Gi
statistics calculated using Kandinsky function
hotspot_analysis()
.
In particular, we can specify as parameter:
feature
: the name of the feature to use to compute
Getis-Ord Gi* statisticssim
: number of Monte Carlo simulations to be run for
estimating Local Gi* coefficients significancepadj.thresh
: numeric value indicating the significance
threshold to be applied to the adjusted p-values resulting from Monte
Carlo simulationsWe can now visualize cells coloured according to whether they are assigned to hot or cold areas based on CD74 expression
After selecting a subset of representative FOVs, we can now visualize the same subset of cells coloured according to:
fovs = c(154,157:159)
(KanPlot(cosmx, feature = 'final_anno', fovs = fovs
, show_border = T, border.size = 0.1, border.col = '#808285') +
scale_fill_manual(values = c('Epi' = 'green', 'FibroEndoMuscle' = 'magenta', 'Myeloid' = 'cyan', 'T/NK' = 'red', 'Plasma/B' = 'yellow'))) +
KanPlot(cosmx, feature = 'CD74', fovs = fovs
, palette_cont = 'viridis', show_border = T, border.size = 0.1, border.col = '#808285') +
KanPlot(cosmx,feature = 'CD74_clust', fovs = fovs
, palette_discrete = c('#1A6AA0','#A94349','#D9D9D9'), show_border = T, border.size = 0.1, border.col = '#808285')
#> Updating Kandinsky data
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.
#> Updating Kandinsky data
#>
#> Updating Kandinsky data
We can test significant enrichment of annotated cell types within CD74 hot and cold areas using one-tailed Fisher’s exact test:
cd74 = model.matrix(data = cosmx@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
colnames(cd74) = c('Cold','Hot','NS')
immune = model.matrix(data = cosmx@meta.data, ~0 + final_anno) %>% as.data.frame()
colnames(immune) = c('Epi','FibroEndoMuscle','Myeloid','Plasma/B','TNK')
immune = immune[,c('Epi','Myeloid','TNK')]
all_res = lapply(colnames(immune),function(x){
fisher_hot = fisher.test(table(cd74$Hot, immune[[x]]), alternative = 'greater')$p.value
return(data.frame(cell_type = x
,hot_pval = fisher_hot))
})
all_res = purrr::reduce(all_res, rbind)
cosmx_hot = list()
for(c in c('Epi','Myeloid','TNK')){
fisher_hot = all_res[all_res$cell_type == c,]$hot_pval
if(fisher_hot < 2.2*(10^-16)){
fisher_hot = expression('Fisher p < 2.2x10'^-16)
}else{
fisher_hot = paste0('Fisher p = ', format(fisher_hot, scientific = T, digits = 3))
}
hot_props = table(cd74$Hot,immune[[c]]) %>% as.data.frame() %>%
group_by(Var1) %>%
mutate(prop = Freq/sum(Freq)) %>% ungroup() %>%
filter(Var2 == 1)
cosmx_hot[[c]] = ggplot(hot_props, aes(x = Var1, y = prop))+
theme_classic() + geom_bar(stat = 'identity', fill = ifelse(
c == 'Epi','green',ifelse(c == 'Myeloid','cyan','red')
), color = 'black') +
scale_x_discrete(breaks = c('1','0'), limits = c('1','0'), labels = c('Hot','Rest')) +
labs(x = 'CD74 areas', y = paste0('Area (%)')) +
scale_y_continuous(limits = c(0, (max(hot_props$prop)*1.1)), breaks = c(0,max(hot_props$prop)), labels = round(c(0,100*(max(hot_props$prop))), 1))+
annotate('text', x = 1.5, y = max(hot_props$prop)*1.1, label = fisher_hot) +
guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) +
theme(axis.text = element_text(color='black'), aspect.ratio = 1)
}
wrap_plots(cosmx_hot[c('Myeloid','TNK','Epi')], ncol = 1)
The results from Fisher’s tests indicate an enrichment of both myeloid and T/NK cells within CD74 hot areas compared to other regions of the sample.
Although CRC epithelial cells are not enriched in CD74 hot areas, findings from Acha-Sagredo et al suggest that CRC cells can be induced to overexpress CD74 as a result of proximity to T/NK cell.
We can corroborate this observation by dividing our CRC cells into
two groups, based on whether or not at least one T/NK cells is present
within their c-NBs.
We can then compare CD74 expression between these two groups of CRC
cells via Wilcoxon Rank test.
cosmx = nnMat(cosmx, label = 'final_anno')
#> Creating neighbour matrix using pre-defined neighbour network
cosmx = nn_query(cosmx
, query = "`T/NK` >0, final_anno == 'Epi'"
, anno_name = 'Epi_type'
, diffexpr = F
, label = 'final_anno'
, which = 'Epi')
cosmx$CD74_expr = FetchData(cosmx, 'CD74', clean = 'none')[,1]
cosmx_cd74_data = cosmx@meta.data[cosmx@meta.data$final_anno == 'Epi',c('Epi_type','CD74_expr')]
pval = wilcox.test(CD74_expr ~ Epi_type, data = cosmx_cd74_data)$p.value
if(pval < 2.2e-16){
pval = expression('Wilcoxon p-value < 2.2x10'^-16)
}else{
pval = format(pval, scientific = T, digits = 3)
}
n_epi1 = nrow(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',])
n_epi2 = nrow(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',])
dens1 = density(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',]$CD74_expr)$y
dens2 = density(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',]$CD74_expr)$y
max_dens = max(c(dens1,dens2))
min_dens = min(c(dens1,dens2))
median1 = median(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'N',]$CD74_expr)
median2 = median(cosmx_cd74_data[cosmx_cd74_data$Epi_type == 'Y',]$CD74_expr)
ggplot(cosmx_cd74_data, aes(x = CD74_expr, fill = Epi_type)) +
theme_classic() + geom_density(alpha = 0.4,color=NA)+
scale_x_continuous(breaks = c(min(cosmx_cd74_data$CD74_expr), median1, median2, max(cosmx_cd74_data$CD74_expr))
, labels = round(c(min(cosmx_cd74_data$CD74_expr), median1, median2, max(cosmx_cd74_data$CD74_expr)), 1)) +
scale_y_continuous(breaks = c(min_dens, max_dens)
, labels = round(c(min_dens, max_dens),2)) +
guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) +
labs(x = 'CD74 normalized expression', y = 'Density', fill = 'Epi type') +
annotate('text', x = median(cosmx_cd74_data$CD74_expr), y = max_dens, label = pval) +
scale_fill_manual(values = c('#2E8B57','#80E580')
, labels = c(paste0('Rest of CRC cells (', n_epi1, ')'), paste0('CRC cells near T/NK (', n_epi2, ')'))) +
geom_vline(xintercept = median1, linetype = 'dashed', color = '#2E8B57') +
geom_vline(xintercept = median2, linetype = 'dashed', color = '#80E580') +
coord_fixed(10)
Indeed, CRC cells with at least one T/NK cell within their c-NBs show a significantly higher CD74 expression compared to the rest of CRC cells.
The Visium dataset analysed in this vignette has been downloaded from Zenodo.
It consists of a distinct tissue regions obtained from the same human CRC FFPE sample previously profiled with the CosMx Universal Cell Characterization RNA Panel.
The Visium Human Transcriptome Probe kit (v1) was used to capture RNA molecules from the FFPE sample after tissue permeabilization, enabling gene expression quantification for ~18,000 human genes.
After sequencing, Visium FASTQ files were processed with spaceranger (v2.0). Loupe Browser (v6.2) was used to visualize Visium spot-level gene expression data and the associated H&E image, to check for the presence of “in-tissue” spots located in empty regions of the tissue slide.
A separate spot annotation file
A1_CR48_TissueType_Anno.csv
was generated.
We first need to specify the path to the folder
containing raw Visium input data.
Next, we import Visium data into R as a Seurat object
using
the Load10X_Spatial()
function.
Once the data is loaded, we perform initial quality checks. Specifically:
A1_CR48_TissueType_Anno.csv
annotation filevisium = Load10X_Spatial(
, data.dir = visium_path
, filename = "filtered_feature_bc_matrix.h5"
, assay = "Spatial"
, slice = 'CR48'
, filter.matrix = TRUE
)
genes = rownames(visium)
message("Total spots: ",ncol(visium))
#> Total spots: 3106
# Load spot annotation file (used to remove 'empty' spots)
anno_spots = read.delim(paste0(visium_path,'A1_CR48_TissueType_Anno.csv'), sep = ',')
rownames(anno_spots) = anno_spots$Barcode
# Remove spots without tissue annotation (corresponding to empty regions)
anno_spots = anno_spots[which(anno_spots$Location != ""),]
# Remove spots corresponding to empty regions (if any)
if(ncol(visium) > nrow(anno_spots)){
message("Removing ", ncol(visium) - nrow(anno_spots), " empty spots")
visium = visium[,which(colnames(visium) %in% rownames(anno_spots))]
} else {
message("No empty spots detected")
}
#> Removing 124 empty spots
anno_spots = anno_spots[colnames(visium),]
visium = AddMetaData(visium, anno_spots$Location, col.name = "Location")
# Remove low-quality spots
visium = subset(x = visium, subset = nCount_Spatial >= 500)
message(ncol(visium)," spots with at least 500 UMIs")
#> 2955 spots with at least 500 UMIs
visium = subset(x = visium, subset = nFeature_Spatial >= 300)
message(ncol(visium)," spots with at least 300 genes detected")
#> 2955 spots with at least 300 genes detected
cell_id = colnames(visium)
keep_genes = which(rowSums(LayerData(visium,'counts') > 0) >= 10)
visium = subset(visium, features = keep_genes)
dim(visium)
#> [1] 14487 2955
Using Kandinsky kandinsky_init()
function, we define
spot neighbourhoods (s-NBs) based on the Queen contiguity
method (nb.method = ‘Q’
), selecting the first
layer of neighboring spots around each spot
We then use the Kandinsky nb_expand()
function to
include the second closest layer of spots in each
s-NB.
visium = kandinsky_init(visium
, tech ='visium'
, nb.method = 'Q'
, res = 'high'
, img = paste0(visium_path,'spatial/tissue_hires_image.png')
)
#> Building Kandinsky slot "img"...
#> Building Kandinsky slot "sf"...
#> Building Kandinsky slot "nb"...
#> Queen neighbour method is designed to work with Visium / Visium HD data only
#> Building Kandinsky slot "tx"...
#> Building Kandinsky slot "fov_mask"...
#> Kandinsky data loaded into Seurat object!
# Expand neighbourhood definition by including the second ring of surrounding spots
visium = nb_expand(visium, maxorder = 2)
Moreover, we can assess the size of identified
s-NBs, expressed as number of cells found within each s-NB,
using the Kandinsky nbSizePlot()
function:
The H&E image associated with the Visium sample is automatically
stored in the Kandinsky slot img
.
For visualization purposes, the user can create a mask
of the H&E tissue slide, retaining only the image area covered by
tissue, using the Kandinsky he_mask()
function.
# Create a mask image starting from visium H&E staining image
visium = he_mask(visium, stretch = T, sd_thresh = 7)
SlidePlot(visium, img_type = 'img') + coord_fixed(1) +
SlidePlot(visium, img_type = 'mask') + coord_fixed(1)
#> SpatRaster resampled to ncells = 1001000
#> SpatRaster resampled to ncells = 1001325
Before continuing with the downstream analysis, we need to perform
gene expression normalization using Seurat function
NormalizeData()
:
We can visualize CD74 expression levels across spots:
Now, we can define CD74 hot and cold areas according to Getis-Ord Gi
statistics calculated using Kandinsky hotspot_analysis()
function.
# Define hot/cold areas of CD74 expression (default values used are the normalized counts stored in the 'data' layer)
visium = hotspot_analysis(visium, feature='CD74', sim = 999, padj.thresh = 0.05)
KanPlot(visium, feature = 'CD74_clust', palette_discrete = c('#1A6AA0','#A94349','#D9D9D9'))
Since Visium spots cover tissue regions that can generally contain multiple individual cells, we can use cell type gene expression signatures to estimate the abundance of different cell populations within each spot.
We load the list of CRC immune-related signatures described in Acha-Sagredo et al. Based on our analysis of the CosMx CRC dataset, we revealed an association between CD74 hot areas and myeloid and T/NK cells.
Now, we are interested in defining the relationship between CD74 expression and specific subpopulations of tumor-associated macrophages and T/NK cells.
We can calculate gene expression scores across spots for each
immune-related signature using UCell
.
Next, we define hot and cold areas for each immune
signature using Kandinsky hotspot_analysis()
function,
considering the signature scores calculated with UCell
across all spots .
# Test signatures related to TAMs/TNK subtypes or IFN production
signatures = signatures[c(4,5,6,10,12,13,14)]
# Calculate signature scores within each spot using UCell
visium = UCell::AddModuleScore_UCell(visium, features = signatures, maxRank = 5000)
visium = visium %>%
FindVariableFeatures(., nfeatures = 5000, selection.method = "vst") %>%
ScaleData() %>%
RunPCA(assay = "Spatial", verbose = FALSE,npcs = 50) %>%
FindNeighbors(reduction = "pca", dims = 1:30)
#> Finding variable features for layer counts
#> Centering and scaling data matrix
#> Computing nearest neighbor graph
#> Computing SNN
# Smooth UCell scores across spots according to their 20 nearest neighbour in the PCA space
visium = UCell::SmoothKNN(visium, signature.names = paste0(names(signatures),'_UCell'), k = 20)
# Immune sig hot/cold areas
for(s in paste0(names(signatures),'_UCell_kNN')){
visium = hotspot_analysis(visium, feature = s, sim = 999, padj.thres = 0.05)
}
# Clean column names for hot/cold areas annotation
colnames(visium@meta.data) = gsub('_UCell_kNN' ,'', colnames(visium@meta.data))
When hot and cold areas are identified for more than one variable
within the same dataset, the Kandinsky hotspot_overlap
function can be used to create a new annotation that reflects the
overlap between hot and cold areas for two
independent variables, specified using feat.1
and
feat.2
parameters.
visium = hotspot_overlap(visium, feat.1 = 'CD74', feat.2 = 'M1_16')
KanPlot(visium, feature = 'CD74_M1_16_clust', palette_discrete = c('dodgerblue4','skyblue1','purple','firebrick4','firebrick1','gray85'), border.col = 'gray95')
KanPlot(visium, feature = 'CD74_M1_16_clust', palette_discrete = c('gray85','skyblue1','gray85','gray85','firebrick1','gray85'), border.col = 'gray95')
We can test the association between CD74 hot and
cold areas and each TAM or T/NK related signature using a one-tailed
Fisher’s Exact test.
Specifically, we assess the overlap between CD74 hot/cold areas and
hot/cold areas for each signature, respectively:
res = list()
for(s in paste0(names(signatures),'_clust')){
cd74 = model.matrix(data = visium@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
colnames(cd74) = c('Cold','Hot','NS')
immune = model.matrix(data = visium@meta.data, formula(paste0("~0+", s))) %>% as.data.frame()
colnames(immune) = c('Cold','Hot','NS')
fisher_hot = fisher.test(table(cd74$Hot, immune$Hot), alternative = 'greater')$p.value
fisher_cold = fisher.test(table(cd74$Cold, immune$Cold), alternative = 'greater')$p.value
tot_res = data.frame(signature = gsub("_clust","", s),
hot_hot_pval = fisher_hot,
cold_cold_pval = fisher_cold)
res[[ gsub("_clust","",s)]] = tot_res
}
res = purrr::reduce(res,rbind)
res$hot_FDR = p.adjust(res$hot_hot_pval, method = 'BH')
res$cold_FDR = p.adjust(res$cold_cold_pval, method = 'BH')
rownames(res) = res$signature
summary_res = res %>% dplyr::select(signature, hot_FDR, cold_FDR)
summary_res = reshape2::melt(summary_res)
#> Using signature as id variables
We can now visualize the association between CD74 expression levels and each TAM or T/NK related signature by
plots_hot = list()
for(s in paste0(names(signatures),'_clust')){
print(s)
cd74 = model.matrix(data = visium@meta.data, ~ 0 + CD74_clust) %>% as.data.frame()
colnames(cd74) = c('Cold','Hot','NS')
immune = model.matrix(data = visium@meta.data, formula(paste0("~0+",s))) %>% as.data.frame()
colnames(immune) = c('Cold','Hot','NS')
fisher_hot = summary_res[summary_res$signature == gsub('_clust','',s) & summary_res$variable == 'hot_FDR',]$value
if(fisher_hot < 2.2e-16){
fisher_hot = expression('Fisher FDR < 2.2x10'^-16)
}else{
fisher_hot = paste0('Fisher FDR = ',format(fisher_hot, scientific = TRUE, digits = 3))
}
hot_props = table(cd74$Hot,immune$Hot) %>% as.data.frame() %>%
group_by(Var1) %>%
mutate(prop = Freq/sum(Freq)) %>% ungroup() %>%
filter(Var2 == 1)
plots_hot[[s]] = ggplot(hot_props,aes(x = Var1, y = prop)) +
theme_classic() + geom_bar(stat = 'identity', fill = '#A94349', color = 'black') +
scale_x_discrete(breaks = c('1','0'), limits = c('1','0'), labels = c('Hot','Rest')) +
labs(x = 'CD74 area', y = paste0('Hot for ',gsub('_clust','',s),' (%)'), fill = paste0(gsub('_clust','',s),' area')) +
scale_y_continuous(limits = c(0,max(hot_props$prop)+0.05)
, breaks = c(0,max(hot_props$prop))
,labels = round(100*c(0,max(hot_props$prop)),1)) +
annotate('text', x = 1.5, y = max(hot_props$prop) + 0.05, label = fisher_hot)+
guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) +
theme(axis.text = element_text(color = 'black'))
}
#> [1] "M0_27_clust"
#> [1] "M1_16_clust"
#> [1] "M2_13_clust"
#> [1] "NK_33_clust"
#> [1] "CD4_17_clust"
#> [1] "T_8_clust"
#> [1] "Cytotoxic_23_clust"
wrap_plots(plots_hot, ncol = 4)
plots_cold = list()
for(s in paste0(names(signatures),'_clust')){
print(s)
cd74 = model.matrix(data = visium@meta.data, ~0 + CD74_clust) %>% as.data.frame()
colnames(cd74) = c('Cold','Hot','NS')
immune = model.matrix(data = visium@meta.data, formula(paste0("~0+",s))) %>% as.data.frame()
colnames(immune) = c('Cold','Hot','NS')
fisher_cold = summary_res[summary_res$signature == gsub('_clust','',s) & summary_res$variable == 'cold_FDR',]$value
if(fisher_cold < 2.2e-16){
fisher_cold = expression('Fisher FDR < 2.2x10'^-16)
}else{
fisher_cold = paste0('Fisher FDR = ',format(fisher_cold, scientific = T, digits = 3))
}
cold_props = table(cd74$Cold, immune$Cold) %>% as.data.frame() %>%
group_by(Var1) %>%
mutate(prop = Freq/sum(Freq)) %>% ungroup() %>%
filter(Var2 == 1)
plots_cold[[s]] = ggplot(cold_props,aes(x = Var1, y = prop)) +
theme_classic() + geom_bar(stat = 'identity', fill = '#1A6AA0', color = 'black')+
scale_x_discrete(breaks = c('1','0')
, limits = c('1','0')
, labels = c('Cold','Rest')) +
labs(x = 'CD74 area', y = paste0('Cold for ',gsub('_clust','',s),' (%)'), fill = paste0(gsub('_clust','',s),' area')) +
scale_y_continuous(limits = c(0, max(cold_props$prop)+0.05)
, breaks = c(0, max(cold_props$prop))
, labels = round(100*c(0, max(cold_props$prop)),1)) +
annotate('text', x = 1.5, y = max(cold_props$prop) + 0.05, label = fisher_cold) +
guides(y = guide_axis(cap = "both"), x = guide_axis(cap = "both")) +
theme(axis.text = element_text(color = 'black'))
}
#> [1] "M0_27_clust"
#> [1] "M1_16_clust"
#> [1] "M2_13_clust"
#> [1] "NK_33_clust"
#> [1] "CD4_17_clust"
#> [1] "T_8_clust"
#> [1] "Cytotoxic_23_clust"
wrap_plots(plots_cold, ncol = 4)