Tutorial 5: Integrating RNA and Metabolomics
In this tutorial, we demonstrate how to use SCIGMA to inegrate RNA and metabolomics. This tutorial can be run in under 10 minutes on a GPU-enabled computer.
[1]:
%load_ext autoreload
%autoreload 2
[2]:
from SCIGMA import *
from data import load_data, preprocessing
import numpy as np
import matplotlib.pyplot as plt
from utils import clustering
import scanpy as sc
import anndata
import pandas as pd
Load Data
To run this tutorial, make sure to download the metabolomics data from here: https://data.mendeley.com/datasets/w7nw4km7xd/1
Modify the filepaths accordingly to match the downloaded data filepaths.
[3]:
metadata_path = 'metablomics/metadata.csv'
metadata = pd.read_csv(metadata_path)
metadata
[3]:
| Protocol | Path.To.File | File.Name | Data.Type | Glass.Type | ArrayID | Sample.ID | Matrix | Laser | Laser.resolution | sample | Condition | Brain.area | Visium.Protocol | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_A1 | DHB | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 1 | Sma_TO_iCtrl | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_A2 | DHB | nL | inf | mH1 | mH | Striatum | TissueOptimization |
| 2 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_B1 | Norh | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 3 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_B2 | Norh | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 4 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_C1 | 9-AA | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 5 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_C2 | 9-AA | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 6 | Sma_TO | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_D1 | FMP-10 | yL | 100.0 | mH1 | mH | Striatum | TissueOptimization |
| 7 | Sma_TO_iCtrl | /sma/TO2/qc | 220221_MS__TO_mousebrain_Cy3_MV_gain5.jpg | fluorescence | NotCharged | TO2 | TO2_D2 | FMP-10 | nL | inf | mH1 | mH | Striatum | TissueOptimization |
| 8 | Sma | /sma/V11L12-038/V11L12-038_A1/output_data/V11L... | V11L12-038_A1.Visium.RNA | RNA | NotCharged | V11L12-038 | V11L12-038_A1 | DHB | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 9 | Sma | /sma/V11L12-038/V11L12-038_B1/output_data/V11L... | V11L12-038_B1.Visium.RNA | RNA | NotCharged | V11L12-038 | V11L12-038_B1 | DHB | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 10 | Visium_iCtrl | /sma/V11L12-038/V11L12-038_C1/output_data/V11L... | V11L12-038_C1.Visium.RNA | RNA | NotCharged | V11L12-038 | V11L12-038_C1 | nM | nL | inf | mPD3 | mPD | Striatum | Standard |
| 11 | Sma | /sma/V11L12-038/V11L12-038_D1/output_data/V11L... | V11L12-038_D1.Visium.RNA | RNA | NotCharged | V11L12-038 | V11L12-038_D1 | 9-AA | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 12 | Sma | /sma/V11L12-109/V11L12-109_A1/output_data/V11L... | V11L12-109_A1.Visium.RNA | RNA | NotCharged | V11L12-109 | V11L12-109_A1 | FMP-10 | yL | 100.0 | mPD1 | mPD | Striatum | Standard |
| 13 | Sma | /sma/V11L12-109/V11L12-109_B1/output_data/V11L... | V11L12-109_B1.Visium.RNA | RNA | NotCharged | V11L12-109 | V11L12-109_B1 | FMP-10 | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 14 | Sma | /sma/V11L12-109/V11L12-109_C1/output_data/V11L... | V11L12-109_C1.Visium.RNA | RNA | NotCharged | V11L12-109 | V11L12-109_C1 | FMP-10 | yL | 100.0 | mPD4 | mPD | Striatum | Standard |
| 15 | Visium_iCtrl | /sma/V11L12-109/V11L12-109_D1/output_data/V11L... | V11L12-109_D1.Visium.RNA | RNA | NotCharged | V11L12-109 | V11L12-109_D1 | FMP-10 | nL | inf | mPD3 | mPD | Striatum | Standard |
| 16 | Sma | /sma/V11T16-085/V11T16-085_A1/output_data/V11T... | V11T16-085_A1.Visium.RNA | RNA | NotCharged | V11T16-085 | V11T16-085_A1 | FMP-10 | yL | 100.0 | mPD1 | mPD | SubNigra | Standard |
| 17 | Sma | /sma/V11T16-085/V11T16-085_B1/output_data/V11T... | V11T16-085_B1.Visium.RNA | RNA | NotCharged | V11T16-085 | V11T16-085_B1 | FMP-10 | yL | 100.0 | mPD3 | mPD | SubNigra | Standard |
| 18 | Sma | /sma/V11T16-085/V11T16-085_C1/output_data/V11T... | V11T16-085_C1.Visium.RNA | RNA | NotCharged | V11T16-085 | V11T16-085_C1 | FMP-10 | yL | 100.0 | mPD4 | mPD | SubNigra | Standard |
| 19 | Visium_iCtrl | /sma/V11T16-085/V11T16-085_D1/output_data/V11T... | V11T16-085_D1.Visium.RNA | RNA | NotCharged | V11T16-085 | V11T16-085_D1 | nM | nL | inf | mPD4 | mPD | SubNigra | Standard |
| 20 | Sma | /sma/V11T17-102/V11T17-102_A1/output_data/V11T... | V11T17-102_A1.Visium.RNA | RNA | NotCharged | V11T17-102 | V11T17-102_A1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 21 | Sma | /sma/V11T17-102/V11T17-102_B1/output_data/V11T... | V11T17-102_B1.Visium.RNA | RNA | NotCharged | V11T17-102 | V11T17-102_B1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 22 | Sma | /sma/V11T17-102/V11T17-102_C1/output_data/V11T... | V11T17-102_C1.Visium.RNA | RNA | NotCharged | V11T17-102 | V11T17-102_C1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 23 | Sma | /sma/V11T17-102/V11T17-102_D1/output_data/V11T... | V11T17-102_D1.Visium.RNA | RNA | NotCharged | V11T17-102 | V11T17-102_D1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 24 | Visium | /visium/V11T17-101/V11T17-101_A1/output_data/V... | V11T17-101_A1.Visium.RNA | RNA | NotCharged | V11T17-101 | V11T17-101_A1 | nM | nL | inf | mPD1 | mPD | SubNigra | Standard |
| 25 | Visium | /visium/V11T17-101/V11T17-101_B1/output_data/V... | V11T17-101_B1.Visium.RNA | RNA | NotCharged | V11T17-101 | V11T17-101_B1 | nM | nL | inf | mPD3 | mPD | Striatum | Standard |
| 26 | Visium | /visium/V11T17-101/V11T17-101_C1/output_data/V... | V11T17-101_C1.Visium.RNA | RNA | NotCharged | V11T17-101 | V11T17-101_C1 | nM | nL | inf | mH1 | mH | Striatum | Standard |
| 27 | Visium | /visium/V11T17-101/V11T17-101_D1/output_data/V... | V11T17-101_D1.Visium.RNA | RNA | NotCharged | V11T17-101 | V11T17-101_D1 | nM | nL | inf | hPD1 | hPD | Striatum | Standard |
| 28 | Sma | /sma/V11L12-038/V11L12-038_A1/output_data/V11L... | V11L12-038_A1.Visium.DHB.220826_smamsi.csv | lipids | NotCharged | V11L12-038 | V11L12-038_A1 | DHB | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 29 | Sma | /sma/V11L12-038/V11L12-038_B1/output_data/V11L... | V11L12-038_B1.Visium.DHB.220826_smamsi.csv | lipids | NotCharged | V11L12-038 | V11L12-038_B1 | DHB | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 30 | Sma | /sma/V11L12-038/V11L12-038_D1/output_data/V11L... | V11L12-038_Mouse_D1.Visium.9aa.220826_smamsi.csv | metabolites | NotCharged | V11L12-038 | V11L12-038_D1 | 9-AA | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 31 | Sma | /sma/V11L12-109/V11L12-109_A1/output_data/V11L... | V11L12-109_A1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11L12-109 | V11L12-109_A1 | FMP-10 | yL | 100.0 | mPD1 | mPD | Striatum | Standard |
| 32 | Sma | /sma/V11L12-109/V11L12-109_B1/output_data/V11L... | V11L12-109_B1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11L12-109 | V11L12-109_B1 | FMP-10 | yL | 100.0 | mPD3 | mPD | Striatum | Standard |
| 33 | Sma | /sma/V11L12-109/V11L12-109_C1/output_data/V11L... | V11L12-109_C1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11L12-109 | V11L12-109_C1 | FMP-10 | yL | 100.0 | mPD4 | mPD | Striatum | Standard |
| 34 | Sma | /sma/V11T16-085/V11T16-085_A1/output_data/V11T... | V11T17-085_A1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T16-085 | V11T16-085_A1 | FMP-10 | yL | 100.0 | mPD1 | mPD | SubNigra | Standard |
| 35 | Sma | /sma/V11T16-085/V11T16-085_B1/output_data/V11T... | V11T17-085_B1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T16-085 | V11T16-085_B1 | FMP-10 | yL | 100.0 | mPD3 | mPD | SubNigra | Standard |
| 36 | Sma | /sma/V11T16-085/V11T16-085_C1/output_data/V11T... | V11T17-085_C1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T16-085 | V11T16-085_C1 | FMP-10 | yL | 100.0 | mPD4 | mPD | SubNigra | Standard |
| 37 | Sma | /sma/V11T17-102/V11T17-102_A1/output_data/V11T... | V11T17-102_A1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T17-102 | V11T17-102_A1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 38 | Sma | /sma/V11T17-102/V11T17-102_B1/output_data/V11T... | V11T17-102_B1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T17-102 | V11T17-102_B1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 39 | Sma | /sma/V11T17-102/V11T17-102_C1/output_data/V11T... | V11T17-102_C1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T17-102 | V11T17-102_C1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
| 40 | Sma | /sma/V11T17-102/V11T17-102_D1/output_data/V11T... | V11T17-102_D1.Visium.FMP.220826_smamsi.csv | neurotransmitters | NotCharged | V11T17-102 | V11T17-102_D1 | FMP-10 | yL | 100.0 | hPD1 | hPD | Striatum | RNArescue |
[49]:
# V11L12-109: A_1, B_1, C_1
# V11L12-038: A_1, B_1, D_1
# V11T16-085: A_1, B_1, C_1
# use the metadata information to find what paths to use
rep = "V11L12-109_C1"
folder = 'metablomics/sma/V11L12-109/'
sample = f'{rep}/output_data/'
msi_file = f'{rep}_MSI/V11L12-109_C1.Visium.FMP.220826_smamsi.csv' #FMP
rna_file = f'{rep}_RNA/outs/filtered_feature_bc_matrix.h5'
spatial_locations_file = f'{rep}_RNA/outs/spatial/tissue_positions_list.csv'
[50]:
msi_csv = pd.read_csv(folder+sample+msi_file)
rna_spots = pd.read_csv(folder+sample+spatial_locations_file,header=None)
rna_adata = sc.read_10x_h5(folder+sample+rna_file)
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
Preprocessing Data
Format the data for SCIGMA
[52]:
# get spots from both modalities that share coodrinates
msi_filtered = msi_csv.merge(rna_spots[[0,2,3]], left_on=['x','y'], right_on=[2,3], how='inner').drop(columns=[2,3])
[53]:
# get spots that were measured
shared_spots = set(msi_filtered[0]).intersection(set(rna_adata.obs_names))
mask = msi_filtered[0].isin(shared_spots)
msi_filtered = msi_filtered[mask]
[54]:
spatial = msi_filtered[['x', 'y']].to_numpy()
[55]:
msi = anndata.AnnData(msi_filtered.drop(columns=['x','y',0]))
msi.obsm['spatial'] = spatial
msi.obs_names = [str(i) for i in range(len(msi))]
/tmp/ipykernel_2076060/3057740423.py:1: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
msi = anndata.AnnData(msi_filtered.drop(columns=['x','y',0]))
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
[56]:
rna = rna_adata[msi_filtered[0]]
rna.obsm['spatial'] = spatial
rna.obs_names = [str(i) for i in range(len(rna))]
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
[ ]:
# preprocess into dictionary
data_dict = preprocessing(rna, msi, datatype='MSI',n_neighbors=6, feat_neighbors=6)
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
Preprocessing anndatas
/users/schang59/anaconda/glue/lib/python3.8/site-packages/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
Constructing spatial graphs
Constructing feature graphs
Feature graph 1
Feature graph 1 finished
Feature graph 2
Feature graph 2 finished
Model Training
[397]:
seed = 2025
model = SCIGMA(data_dict, dim_output=20, seed_num=seed, device=torch.device('cuda:0'), recon_weight1 = 1, recon_weight2 = 0.1, contrastive_weight1=1e-1, contrastive_weight2=1e-2, clr_weight=0.2, batch_size=len(data_dict['adata_omics1']), learning_rate=1e-3) #connectivities=connectivities)
Num samples pruned: 0
Creating adjacency matrices
Model ready for training!
[ ]:
output = model.train(600)
[ ]:
adata_combined = data_dict['adata_omics1'].copy()
adata_combined.uns['emb_latent'] = output['emb_latent']
adata_combined.uns['emb_recon'] = output['emb_recon']
adata_combined.obsm['emb_combined'] = output['emb_latent_combined']
adata_combined.obs['invtau'] = output['invtau']
adata_combined.obs['tau'] = output['tau']
Spatial Domain Detection
[ ]:
# we use mcluster as clustering tool by default.
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(adata_combined, key='emb_combined', add_key='SCIGMA', n_clusters=12, method=tool, use_pca=False)
elif tool in ['leiden', 'louvain']:
clustering(adata_combined, key='emb_combined', add_key='SCIGMA', n_clusters=12, method=tool, start=0.1, end=2.0, increment=0.01)
[ ]:
# visualization
fig, ax_list = plt.subplots(1, 1, figsize=(6, 6))
sc.pl.embedding(adata_combined, basis='spatial', color='SCIGMA', ax=ax_list, title='SCIGMA', s=250, show=False, add_outline=False)
plt.gca()
plt.show()
[ ]:
# visualization
fig, ax_list = plt.subplots(1, 2, figsize=(12, 5))
sc.pp.neighbors(adata_combined, use_rep='emb_combined', n_neighbors=10)
sc.tl.umap(adata_combined)
sc.pl.umap(adata_combined, color='SCIGMA', ax=ax_list[0], title='SCIGMA', s=60, show=False)
sc.pl.embedding(adata_combined, basis='spatial', color='SCIGMA', ax=ax_list[1], title='ContrastiveSG', s=80, show=False)
ax_list[0].get_legend().remove()
plt.tight_layout(w_pad=0.3)
plt.show()
Uncertainty Visualization
[ ]:
# uncertainty visualization
sc.pl.embedding(adata_combined,
basis='spatial',
color=['invtau'],
title=['Uncertainty'], s=50, show=False)
plt.gca()
plt.show()