Tutorial 5: Integrating RNA and Metabolomics

In this tutorial, we demonstrate how to use SCIGMA to inegrate RNA and metabolomics. This tutorial can be run in under 10 minutes on a GPU-enabled computer.

[1]:
%load_ext autoreload
%autoreload 2
[2]:
from SCIGMA import *
from data import load_data, preprocessing
import numpy as np
import matplotlib.pyplot as plt
from utils import clustering
import scanpy as sc
import anndata
import pandas as pd

Load Data

To run this tutorial, make sure to download the metabolomics data from here: https://data.mendeley.com/datasets/w7nw4km7xd/1

Modify the filepaths accordingly to match the downloaded data filepaths.

[3]:
metadata_path = 'metablomics/metadata.csv'
metadata = pd.read_csv(metadata_path)
metadata
[3]:
Protocol Path.To.File File.Name Data.Type Glass.Type ArrayID Sample.ID Matrix Laser Laser.resolution sample Condition Brain.area Visium.Protocol
0 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_A1 DHB yL 100.0 mH1 mH Striatum TissueOptimization
1 Sma_TO_iCtrl /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_A2 DHB nL inf mH1 mH Striatum TissueOptimization
2 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_B1 Norh yL 100.0 mH1 mH Striatum TissueOptimization
3 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_B2 Norh yL 100.0 mH1 mH Striatum TissueOptimization
4 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_C1 9-AA yL 100.0 mH1 mH Striatum TissueOptimization
5 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_C2 9-AA yL 100.0 mH1 mH Striatum TissueOptimization
6 Sma_TO /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_D1 FMP-10 yL 100.0 mH1 mH Striatum TissueOptimization
7 Sma_TO_iCtrl /sma/TO2/qc 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg fluorescence NotCharged TO2 TO2_D2 FMP-10 nL inf mH1 mH Striatum TissueOptimization
8 Sma /sma/V11L12-038/V11L12-038_A1/output_data/V11L... V11L12-038_A1.Visium.RNA RNA NotCharged V11L12-038 V11L12-038_A1 DHB yL 100.0 mPD3 mPD Striatum Standard
9 Sma /sma/V11L12-038/V11L12-038_B1/output_data/V11L... V11L12-038_B1.Visium.RNA RNA NotCharged V11L12-038 V11L12-038_B1 DHB yL 100.0 mPD3 mPD Striatum Standard
10 Visium_iCtrl /sma/V11L12-038/V11L12-038_C1/output_data/V11L... V11L12-038_C1.Visium.RNA RNA NotCharged V11L12-038 V11L12-038_C1 nM nL inf mPD3 mPD Striatum Standard
11 Sma /sma/V11L12-038/V11L12-038_D1/output_data/V11L... V11L12-038_D1.Visium.RNA RNA NotCharged V11L12-038 V11L12-038_D1 9-AA yL 100.0 mPD3 mPD Striatum Standard
12 Sma /sma/V11L12-109/V11L12-109_A1/output_data/V11L... V11L12-109_A1.Visium.RNA RNA NotCharged V11L12-109 V11L12-109_A1 FMP-10 yL 100.0 mPD1 mPD Striatum Standard
13 Sma /sma/V11L12-109/V11L12-109_B1/output_data/V11L... V11L12-109_B1.Visium.RNA RNA NotCharged V11L12-109 V11L12-109_B1 FMP-10 yL 100.0 mPD3 mPD Striatum Standard
14 Sma /sma/V11L12-109/V11L12-109_C1/output_data/V11L... V11L12-109_C1.Visium.RNA RNA NotCharged V11L12-109 V11L12-109_C1 FMP-10 yL 100.0 mPD4 mPD Striatum Standard
15 Visium_iCtrl /sma/V11L12-109/V11L12-109_D1/output_data/V11L... V11L12-109_D1.Visium.RNA RNA NotCharged V11L12-109 V11L12-109_D1 FMP-10 nL inf mPD3 mPD Striatum Standard
16 Sma /sma/V11T16-085/V11T16-085_A1/output_data/V11T... V11T16-085_A1.Visium.RNA RNA NotCharged V11T16-085 V11T16-085_A1 FMP-10 yL 100.0 mPD1 mPD SubNigra Standard
17 Sma /sma/V11T16-085/V11T16-085_B1/output_data/V11T... V11T16-085_B1.Visium.RNA RNA NotCharged V11T16-085 V11T16-085_B1 FMP-10 yL 100.0 mPD3 mPD SubNigra Standard
18 Sma /sma/V11T16-085/V11T16-085_C1/output_data/V11T... V11T16-085_C1.Visium.RNA RNA NotCharged V11T16-085 V11T16-085_C1 FMP-10 yL 100.0 mPD4 mPD SubNigra Standard
19 Visium_iCtrl /sma/V11T16-085/V11T16-085_D1/output_data/V11T... V11T16-085_D1.Visium.RNA RNA NotCharged V11T16-085 V11T16-085_D1 nM nL inf mPD4 mPD SubNigra Standard
20 Sma /sma/V11T17-102/V11T17-102_A1/output_data/V11T... V11T17-102_A1.Visium.RNA RNA NotCharged V11T17-102 V11T17-102_A1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
21 Sma /sma/V11T17-102/V11T17-102_B1/output_data/V11T... V11T17-102_B1.Visium.RNA RNA NotCharged V11T17-102 V11T17-102_B1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
22 Sma /sma/V11T17-102/V11T17-102_C1/output_data/V11T... V11T17-102_C1.Visium.RNA RNA NotCharged V11T17-102 V11T17-102_C1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
23 Sma /sma/V11T17-102/V11T17-102_D1/output_data/V11T... V11T17-102_D1.Visium.RNA RNA NotCharged V11T17-102 V11T17-102_D1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
24 Visium /visium/V11T17-101/V11T17-101_A1/output_data/V... V11T17-101_A1.Visium.RNA RNA NotCharged V11T17-101 V11T17-101_A1 nM nL inf mPD1 mPD SubNigra Standard
25 Visium /visium/V11T17-101/V11T17-101_B1/output_data/V... V11T17-101_B1.Visium.RNA RNA NotCharged V11T17-101 V11T17-101_B1 nM nL inf mPD3 mPD Striatum Standard
26 Visium /visium/V11T17-101/V11T17-101_C1/output_data/V... V11T17-101_C1.Visium.RNA RNA NotCharged V11T17-101 V11T17-101_C1 nM nL inf mH1 mH Striatum Standard
27 Visium /visium/V11T17-101/V11T17-101_D1/output_data/V... V11T17-101_D1.Visium.RNA RNA NotCharged V11T17-101 V11T17-101_D1 nM nL inf hPD1 hPD Striatum Standard
28 Sma /sma/V11L12-038/V11L12-038_A1/output_data/V11L... V11L12-038_A1.Visium.DHB.220826_smamsi.csv lipids NotCharged V11L12-038 V11L12-038_A1 DHB yL 100.0 mPD3 mPD Striatum Standard
29 Sma /sma/V11L12-038/V11L12-038_B1/output_data/V11L... V11L12-038_B1.Visium.DHB.220826_smamsi.csv lipids NotCharged V11L12-038 V11L12-038_B1 DHB yL 100.0 mPD3 mPD Striatum Standard
30 Sma /sma/V11L12-038/V11L12-038_D1/output_data/V11L... V11L12-038_Mouse_D1.Visium.9aa.220826_smamsi.csv metabolites NotCharged V11L12-038 V11L12-038_D1 9-AA yL 100.0 mPD3 mPD Striatum Standard
31 Sma /sma/V11L12-109/V11L12-109_A1/output_data/V11L... V11L12-109_A1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11L12-109 V11L12-109_A1 FMP-10 yL 100.0 mPD1 mPD Striatum Standard
32 Sma /sma/V11L12-109/V11L12-109_B1/output_data/V11L... V11L12-109_B1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11L12-109 V11L12-109_B1 FMP-10 yL 100.0 mPD3 mPD Striatum Standard
33 Sma /sma/V11L12-109/V11L12-109_C1/output_data/V11L... V11L12-109_C1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11L12-109 V11L12-109_C1 FMP-10 yL 100.0 mPD4 mPD Striatum Standard
34 Sma /sma/V11T16-085/V11T16-085_A1/output_data/V11T... V11T17-085_A1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T16-085 V11T16-085_A1 FMP-10 yL 100.0 mPD1 mPD SubNigra Standard
35 Sma /sma/V11T16-085/V11T16-085_B1/output_data/V11T... V11T17-085_B1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T16-085 V11T16-085_B1 FMP-10 yL 100.0 mPD3 mPD SubNigra Standard
36 Sma /sma/V11T16-085/V11T16-085_C1/output_data/V11T... V11T17-085_C1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T16-085 V11T16-085_C1 FMP-10 yL 100.0 mPD4 mPD SubNigra Standard
37 Sma /sma/V11T17-102/V11T17-102_A1/output_data/V11T... V11T17-102_A1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T17-102 V11T17-102_A1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
38 Sma /sma/V11T17-102/V11T17-102_B1/output_data/V11T... V11T17-102_B1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T17-102 V11T17-102_B1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
39 Sma /sma/V11T17-102/V11T17-102_C1/output_data/V11T... V11T17-102_C1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T17-102 V11T17-102_C1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
40 Sma /sma/V11T17-102/V11T17-102_D1/output_data/V11T... V11T17-102_D1.Visium.FMP.220826_smamsi.csv neurotransmitters NotCharged V11T17-102 V11T17-102_D1 FMP-10 yL 100.0 hPD1 hPD Striatum RNArescue
[49]:
# V11L12-109: A_1, B_1, C_1
# V11L12-038: A_1, B_1, D_1
# V11T16-085: A_1, B_1, C_1
# use the metadata information to find what paths to use
rep = "V11L12-109_C1"
folder = 'metablomics/sma/V11L12-109/'
sample = f'{rep}/output_data/'
msi_file = f'{rep}_MSI/V11L12-109_C1.Visium.FMP.220826_smamsi.csv' #FMP
rna_file = f'{rep}_RNA/outs/filtered_feature_bc_matrix.h5'
spatial_locations_file = f'{rep}_RNA/outs/spatial/tissue_positions_list.csv'
[50]:
msi_csv = pd.read_csv(folder+sample+msi_file)
rna_spots = pd.read_csv(folder+sample+spatial_locations_file,header=None)
rna_adata = sc.read_10x_h5(folder+sample+rna_file)
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")

Preprocessing Data

Format the data for SCIGMA

[52]:
# get spots from both modalities that share coodrinates
msi_filtered = msi_csv.merge(rna_spots[[0,2,3]], left_on=['x','y'], right_on=[2,3], how='inner').drop(columns=[2,3])
[53]:
# get spots that were measured
shared_spots = set(msi_filtered[0]).intersection(set(rna_adata.obs_names))
mask = msi_filtered[0].isin(shared_spots)
msi_filtered = msi_filtered[mask]
[54]:
spatial = msi_filtered[['x', 'y']].to_numpy()
[55]:
msi = anndata.AnnData(msi_filtered.drop(columns=['x','y',0]))
msi.obsm['spatial'] = spatial
msi.obs_names = [str(i) for i in range(len(msi))]
/tmp/ipykernel_2076060/3057740423.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  msi = anndata.AnnData(msi_filtered.drop(columns=['x','y',0]))
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
[56]:
rna = rna_adata[msi_filtered[0]]
rna.obsm['spatial'] = spatial
rna.obs_names = [str(i) for i in range(len(rna))]
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
[ ]:
# preprocess into dictionary
data_dict = preprocessing(rna, msi, datatype='MSI',n_neighbors=6, feat_neighbors=6)
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
Preprocessing anndatas
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
Constructing spatial graphs
Constructing feature graphs
Feature graph 1
Feature graph 1 finished
Feature graph 2
Feature graph 2 finished

Model Training

[397]:
seed = 2025
model = SCIGMA(data_dict, dim_output=20, seed_num=seed, device=torch.device('cuda:0'), recon_weight1 = 1, recon_weight2 = 0.1, contrastive_weight1=1e-1, contrastive_weight2=1e-2, clr_weight=0.2, batch_size=len(data_dict['adata_omics1']), learning_rate=1e-3) #connectivities=connectivities)
Num samples pruned: 0
Creating adjacency matrices
Model ready for training!
[ ]:
output = model.train(600)
[ ]:
adata_combined = data_dict['adata_omics1'].copy()
adata_combined.uns['emb_latent'] = output['emb_latent']
adata_combined.uns['emb_recon'] = output['emb_recon']
adata_combined.obsm['emb_combined'] = output['emb_latent_combined']
adata_combined.obs['invtau'] = output['invtau']
adata_combined.obs['tau'] = output['tau']

Spatial Domain Detection

[ ]:
# we use mcluster as clustering tool by default.
tool = 'mclust' # mclust, leiden, and louvain

# clustering
if tool == 'mclust':
    clustering(adata_combined, key='emb_combined', add_key='SCIGMA', n_clusters=12, method=tool, use_pca=False)
elif tool in ['leiden', 'louvain']:
    clustering(adata_combined, key='emb_combined', add_key='SCIGMA', n_clusters=12, method=tool, start=0.1, end=2.0, increment=0.01)
[ ]:
# visualization
fig, ax_list = plt.subplots(1, 1, figsize=(6, 6))
sc.pl.embedding(adata_combined, basis='spatial', color='SCIGMA', ax=ax_list, title='SCIGMA', s=250, show=False, add_outline=False)
plt.gca()
plt.show()
[ ]:
# visualization
fig, ax_list = plt.subplots(1, 2, figsize=(12, 5))
sc.pp.neighbors(adata_combined, use_rep='emb_combined', n_neighbors=10)
sc.tl.umap(adata_combined)

sc.pl.umap(adata_combined, color='SCIGMA', ax=ax_list[0], title='SCIGMA', s=60, show=False)
sc.pl.embedding(adata_combined, basis='spatial', color='SCIGMA', ax=ax_list[1], title='ContrastiveSG', s=80, show=False)

ax_list[0].get_legend().remove()
plt.tight_layout(w_pad=0.3)
plt.show()

Uncertainty Visualization

[ ]:
# uncertainty visualization
sc.pl.embedding(adata_combined,
                basis='spatial',
                color=['invtau'],
                title=['Uncertainty'], s=50, show=False)
plt.gca()
plt.show()