Integration of scRNA and multi-omic datasets

[1]:
import numpy as np
import anndata as ad
import scanpy as sc
import scvelo as scv
import torch
from sklearn.decomposition import PCA
import umap
import os
import multivelovae as vv
from datetime import datetime
[2]:
torch.cuda.is_available()
[2]:
True
[3]:
now = datetime.now()
date = now.strftime("%m_%d_%Y")

Read in processed data and define places to store output

[4]:
adata = sc.read_h5ad('3423-MV-2-8489-MV-1-GSM5460410_adata_postpro_concat.h5ad')
adata_atac = sc.read_h5ad('3423-MV-2-8489-MV-1-GSM5460410_adata_atac_postpro_concat.h5ad')
[5]:
gene_plot = ['AZU1', 'HBB', 'HDC', 'LYZ', 'PF4', 'SPINK2']
np.isin(gene_plot, adata.var_names)
[5]:
array([ True,  True,  True,  True,  True,  True])
[6]:
'PROM1' in adata.var_names
[6]:
True
[7]:
dataset = "3423-MV-2-8489-MV-1-GSM5460410"
model_path_base = f"checkpoints/{dataset}"
figure_path_base = f"figures/{dataset}"
data_path_base = f"data/{dataset}"
[8]:
adata, adata_atac
[8]:
(AnnData object with n_obs × n_vars = 27841 × 1044
     obs: 'total_unspliced', 'total_spliced', 'log1p_total_unspliced', 'log1p_total_spliced', 'fraction_u', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_20_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'log1p_total_counts_rb', 'pct_counts_rb', 'total_counts_hla', 'log1p_total_counts_hla', 'pct_counts_hla', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'outlier', 'initial_size_unspliced', 'initial_size_spliced', 'initial_size', 'n_counts', 'S_score', 'G2M_score', 'phase', 'CC_difference', 'total_unspliced2', 'total_spliced2', 'log1p_total_unspliced2', 'log1p_total_spliced2', 'fraction_u2', 'n_c', 'n_Mu', 'n_Ms', 'leiden', 'total_chromatin', 'total_unspliced3', 'total_spliced3', 'log1p_total_unspliced3', 'log1p_total_spliced3', 'fraction_u3', 'total_Mc', 'total_Mu', 'total_Ms', 'batch', 'n_Mc'
     var: 'gene_ids', 'feature_types', 'mt', 'rb', 'hla', 'hb', 'HVG-Multiome 1', 'n_cells-Multiome 1', 'n_cells_by_counts-Multiome 1', 'mean_counts-Multiome 1', 'log1p_mean_counts-Multiome 1', 'pct_dropout_by_counts-Multiome 1', 'total_counts-Multiome 1', 'log1p_total_counts-Multiome 1', 'gene_count_corr-Multiome 1', 'highly_variable-Multiome 1', 'mean-Multiome 1', 'std-Multiome 1', 'HVG-Multiome 2', 'n_cells-Multiome 2', 'n_cells_by_counts-Multiome 2', 'mean_counts-Multiome 2', 'log1p_mean_counts-Multiome 2', 'pct_dropout_by_counts-Multiome 2', 'total_counts-Multiome 2', 'log1p_total_counts-Multiome 2', 'gene_count_corr-Multiome 2', 'highly_variable-Multiome 2', 'mean-Multiome 2', 'std-Multiome 2', 'HVG-scRNA', 'n_cells-scRNA', 'n_cells_by_counts-scRNA', 'mean_counts-scRNA', 'log1p_mean_counts-scRNA', 'pct_dropout_by_counts-scRNA', 'total_counts-scRNA', 'log1p_total_counts-scRNA', 'gene_count_corr-scRNA', 'highly_variable-scRNA', 'mean-scRNA', 'std-scRNA'
     uns: 'batch_colors', 'leiden_colors', 'neighbors', 'umap'
     obsm: 'X_pca', 'X_umap'
     layers: 'Ms', 'Mu', 'spliced', 'unspliced'
     obsp: 'connectivities', 'distances',
 AnnData object with n_obs × n_vars = 27841 × 1044
     obs: 'n_counts', 'batch'
     layers: 'Mc')
[9]:
adata.obs['leiden'].cat.categories
[9]:
Index(['CMP', 'DC', 'Erythrocyte', 'GMP', 'Granulocyte', 'HSC', 'LMPP', 'MEP',
       'Mast Cells', 'Megakaryocyte', 'Platelet', 'Prog B', 'Prog DC',
       'Prog MK'],
      dtype='object')
[10]:
adata.obs['batch'].cat.categories
[10]:
Index(['Multiome 1', 'Multiome 2', 'scRNA'], dtype='object')
[11]:
os.makedirs(figure_path_base, exist_ok=True)
scv.pl.scatter(adata, basis='umap', color='leiden', legend_loc='right margin')
scv.pl.scatter(adata, basis='umap', color='batch', legend_loc='right margin')
_images/HSPC_scRNA_Multiome_Integration_12_0.png
_images/HSPC_scRNA_Multiome_Integration_12_1.png
[12]:
figure_path = figure_path_base+'/'+date
model_path = model_path_base+'/'+date
data_path = data_path_base

Initialize and train a MultiVeloVAE

[13]:
key = "vae"
[14]:
torch.manual_seed(2022)
np.random.seed(2022)
model = vv.VAEChrom(adata,
                    adata_atac,
                    batch_key='batch',
                    ref_batch=1,
                    batch_hvg_key='highly_variable',
                    var_to_regress=['total_unspliced3'],
                    device='cuda:0',
                    rna_only_idx=2,
                    plot_init=False,
                    gene_plot=gene_plot,
                    cluster_key="leiden",
                    figure_path=figure_path,
                    embed="umap")

model.train(plot=False,
            gene_plot=gene_plot,
            figure_path=figure_path,
            embed="umap")

model.save_model(model_path)
model.save_anndata(data_path, file_name="out.h5ad")
Running in mixed RNA-only mode.
CVAE enabled. Performing batch effect correction.
Reference batch set to 1 (Multiome 2).
Latent dimension set to 9.
KL weights set to 4.00 4.00.
Found ['total_unspliced3'] in obs. Regressing them out.
Batch 0 sparsity: 0.095
Batch 1 sparsity: 0.108
Batch 2 sparsity: 0.169
Learning rate set to 7.5e-4 based on data sparsity.
Early stop threshold set to 2.6.
Using Gaussian Prior.
Initializing using the steady-state and dynamical models.
956 out of 1044 = 91.6% genes have good ellipse fits.
KS-test result: [1. 1. 1. 1. 1. 1. 1.]
Assigning cluster 5 to repressive.
Initial induction: 809, repression: 235 out of 1044.
Computing scaling factors for each batch class.
-------------------------- Train a MultiVeloVAE -------------------------
*********      Creating Training and Validation Datasets      *********
Total Number of Iterations Per Epoch: 77, test iteration: 152
*********                      Finished.                      *********
*********                      Stage  1                       *********
Epoch 1: Train ELBO = 119.073, Test ELBO = -778723.808          Total Time =   0 h :  0 m :  1 s
Epoch 50: Train ELBO = 1407.613, Test ELBO = 1407.692           Total Time =   0 h :  0 m : 29 s
Epoch 100: Train ELBO = 1428.686, Test ELBO = 1423.719          Total Time =   0 h :  0 m : 58 s
*********     Stage 1: Early Stop Triggered at epoch 121.     *********
*********     Retrieving best model from iteration 9121.      *********
*********                      Stage  2                       *********
Cell-wise KNN estimation.
Using 616 latent neighbors to select ancestors.
Percentage of Invalid Sets: 0.000
Average Set Size: 100
Finished. Actual Time:   0 h :  0 m : 24 s
*********             Velocity Refinement Round 1             *********
Epoch 127: Train ELBO = 1305.002, Test ELBO = 1308.716          Total Time =   0 h :  1 m : 37 s
*********     Round 1: Early Stop Triggered at epoch 127.     *********
*********     Retrieving best model from iteration 9702.      *********
Cell-wise KNN estimation.
Finished. Actual Time:   0 h :  0 m :  3 s
*********             Velocity Refinement Round 2             *********
Epoch 131: Train ELBO = 1273.187, Test ELBO = 1277.815          Total Time =   0 h :  1 m : 44 s
*********     Round 2: Early Stop Triggered at epoch 131.     *********
*********     Retrieving best model from iteration 10002.      *********
Change in x0: 0.210
Cell-wise KNN estimation.
Finished. Actual Time:   0 h :  0 m :  3 s
*********             Velocity Refinement Round 3             *********
Epoch 135: Train ELBO = 1243.057, Test ELBO = 1247.244          Total Time =   0 h :  1 m : 50 s
*********     Round 3: Early Stop Triggered at epoch 135.     *********
*********     Retrieving best model from iteration 10302.      *********
Change in x0: 0.163
Cell-wise KNN estimation.
Finished. Actual Time:   0 h :  0 m :  3 s
*********             Velocity Refinement Round 4             *********
Epoch 139: Train ELBO = 1217.538, Test ELBO = 1221.508          Total Time =   0 h :  1 m : 57 s
*********     Round 4: Early Stop Triggered at epoch 139.     *********
*********     Retrieving best model from iteration 10602.      *********
Change in x0: 0.131
Cell-wise KNN estimation.
Finished. Actual Time:   0 h :  0 m :  3 s
*********             Velocity Refinement Round 5             *********
Epoch 143: Train ELBO = 1194.960, Test ELBO = 1199.444          Total Time =   0 h :  2 m :  4 s
*********     Round 5: Early Stop Triggered at epoch 143.     *********
*********     Retrieving best model from iteration 10902.      *********
Change in x0: 0.124
Cell-wise KNN estimation.
Finished. Actual Time:   0 h :  0 m :  3 s
*********             Velocity Refinement Round 6             *********
Epoch 147: Train ELBO = 1172.169, Test ELBO = 1176.171          Total Time =   0 h :  2 m : 10 s
*********     Round 6: Early Stop Triggered at epoch 147.     *********
*********     Retrieving best model from iteration 11202.      *********
Change in x0: 0.121
Final: Train ELBO = 1172.169,   Test ELBO = 1176.171
*********      Finished. Total Time =   0 h :  2 m : 11 s     *********
Using reference batch for latent variable computation.
Computing velocity.
Selected 776 velocity genes.
Writing anndata output to file.

Downstream analyses

[15]:
z = adata.obsm[f'{key}_z']
t = adata.obs[f'{key}_time'].to_numpy()
rho = adata.layers[f'{key}_rho']
kc = adata.layers[f'{key}_kc']
[16]:
# UMAP embedding from latent cell space using reference label
umap_obj = umap.UMAP(n_neighbors=30, n_components=2, min_dist=0.25, random_state=2022)
z_umap = umap_obj.fit_transform(z)

# UMAP embedding from latent cell space using original batch labels
z_batch = adata.obsm[f'{key}_z_batch']
umap_obj = umap.UMAP(n_neighbors=30, n_components=2, min_dist=0.25, random_state=2022)
z_umap2 = umap_obj.fit_transform(z_batch)
[17]:
scv.pl.scatter(adata, basis='umap', color=['leiden', 'batch', f'{key}_time'], legend_loc='right', color_map='gnuplot', ncols=1, size=30)
adata.obsm['X_z_umap'] = z_umap
scv.pl.scatter(adata, basis='z_umap', color=['leiden', 'batch', f'{key}_time'], legend_loc='right', color_map='gnuplot', ncols=1, size=30, save=f'{figure_path}/{dataset}_z_batch_time_{key}.png')
adata.obsm['X_z_umap2'] = z_umap2
scv.pl.scatter(adata, basis='z_umap2', color=['leiden', 'batch', f'{key}_time'], legend_loc='right', color_map='gnuplot', ncols=1, size=30, save=f'{figure_path}/{dataset}_z_batch_time2_{key}.png')
_images/HSPC_scRNA_Multiome_Integration_20_0.png
saving figure to file figures/3423-MV-2-8489-MV-1-GSM5460410/06_04_2025/3423-MV-2-8489-MV-1-GSM5460410_z_batch_time_vae.png
_images/HSPC_scRNA_Multiome_Integration_20_2.png
saving figure to file figures/3423-MV-2-8489-MV-1-GSM5460410/06_04_2025/3423-MV-2-8489-MV-1-GSM5460410_z_batch_time2_vae.png
_images/HSPC_scRNA_Multiome_Integration_20_4.png
[18]:
adata.obsm[f'{key}_latent'] = np.concatenate((adata.obsm[f'{key}_z'], adata.obsm[f'{key}_t']), axis=1)
sc.pp.neighbors(adata, use_rep=f'{key}_latent', n_neighbors=50)
[19]:
# Compute velocity graph on Z UMAP and visualize it, together with batch labels and inferred latent time
adata.obsm['X_z_umap'] = z_umap
vv.model.velocity_graph(adata, key=key, batch_corrected=True)
vv.velocity_embedding_stream(adata, key=key, basis='z_umap', color='leiden', title="", figsize=(8,6), legend_loc='right margin')
vv.velocity_embedding_stream(adata, key=key, basis='z_umap', color='batch', title="", figsize=(8,6), legend_loc='right margin')
vv.velocity_embedding_stream(adata, key=key, basis='z_umap', color=f'{key}_time', color_map='gnuplot', title="", figsize=(8,6), legend_loc='right margin')
computing velocity graph (using 1/8 cores)
    finished (0:01:17) --> added
    'vae_velocity_norm_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity embedding
    finished (0:00:03) --> added
    'vae_velocity_norm_z_umap', embedded velocity vectors (adata.obsm)
_images/HSPC_scRNA_Multiome_Integration_22_3.png
_images/HSPC_scRNA_Multiome_Integration_22_4.png
_images/HSPC_scRNA_Multiome_Integration_22_5.png
[20]:
# HSC marker gene CD133 (PROM1)
scv.pl.scatter(adata, color='PROM1', basis='z_umap', title='CD133')
_images/HSPC_scRNA_Multiome_Integration_23_0.png
[21]:
adata.obs['clusters'] = adata.obs['leiden']
[22]:
# Subset to scRNA cells only for later use
adata_sub = adata[adata.obs['batch']=='scRNA',:].copy()
adata_atac_sub = adata_atac[adata.obs['batch']=='scRNA',:].copy()
[23]:
# Dynamics plots for selected genes with velocity arrows, colored by cell types
np.random.seed(2022)
for gene in gene_plot:
    vv.plot_vel(adata_sub.obs[f'{key}_time'],
                adata_sub[:,gene].layers[f'{key}_chat'],
                adata_sub[:,gene].layers[f'{key}_uhat'],
                adata_sub[:,gene].layers[f'{key}_shat'],
                adata_sub[:,gene].layers[f'{key}_velocity_c'],
                adata_sub[:,gene].layers[f'{key}_velocity_u'],
                adata_sub[:,gene].layers[f'{key}_velocity'],
                cell_labels=adata_sub.obs['leiden'].to_numpy(),
                cell_type_colors=model.cell_type_colors,
                title=gene,
                frame_on=False,
                axis_on=False,
                legend=False)
_images/HSPC_scRNA_Multiome_Integration_26_0.png
_images/HSPC_scRNA_Multiome_Integration_26_1.png
_images/HSPC_scRNA_Multiome_Integration_26_2.png
_images/HSPC_scRNA_Multiome_Integration_26_3.png
_images/HSPC_scRNA_Multiome_Integration_26_4.png
_images/HSPC_scRNA_Multiome_Integration_26_5.png
[24]:
# Dynamics plots for selected genes with velocity arrows, colored by batch labels
batch_raw = adata.obs['batch'].cat.categories.to_numpy()
batch_colors = {batch_raw[i]: adata.uns['batch_colors'][i] for i in range(len(batch_raw))}
np.random.seed(2022)
for gene in gene_plot:
    vv.plot_vel(adata.obs[f'{key}_time'],
                adata[:,gene].layers[f'{key}_chat'],
                adata[:,gene].layers[f'{key}_uhat'],
                adata[:,gene].layers[f'{key}_shat'],
                adata[:,gene].layers[f'{key}_velocity_c'],
                adata[:,gene].layers[f'{key}_velocity_u'],
                adata[:,gene].layers[f'{key}_velocity'],
                cell_labels=adata.obs['batch'].to_numpy(),
                cell_type_colors=batch_colors,
                title=gene,
                frame_on=False,
                axis_on=False,
                legend=False)
_images/HSPC_scRNA_Multiome_Integration_27_0.png
_images/HSPC_scRNA_Multiome_Integration_27_1.png
_images/HSPC_scRNA_Multiome_Integration_27_2.png
_images/HSPC_scRNA_Multiome_Integration_27_3.png
_images/HSPC_scRNA_Multiome_Integration_27_4.png
_images/HSPC_scRNA_Multiome_Integration_27_5.png
[25]:
# Dynamic plots of modality level by latent time for genes of interest
vv.dynamic_plot(adata, adata_atac, gene_plot, color_by='leiden', batch_correction=True)
_images/HSPC_scRNA_Multiome_Integration_28_0.png
[26]:
# Dynamic plots of modality level by latent time for genes of interest, for scRNA cells only
vv.dynamic_plot(adata_sub, adata_atac_sub, gene_plot, color_by='leiden', batch_correction=True)
_images/HSPC_scRNA_Multiome_Integration_29_0.png

Plot phase portraits of genes with the highest likelihoods

[27]:
top_genes = adata[:, adata.var[f'{key}_velocity_genes']].var[f'{key}_likelihood'].sort_values(ascending=False).index
[28]:
scv.pl.scatter(adata, basis=top_genes[:20], x=f'{key}_uhat', y=f'{key}_chat', color='batch', ncols=5)
_images/HSPC_scRNA_Multiome_Integration_32_0.png
[29]:
adata.layers['Mc'] = adata_atac.layers['Mc']
scv.pl.scatter(adata, basis=top_genes[:20], x='Mu', y='Mc', color='batch', ncols=5)
_images/HSPC_scRNA_Multiome_Integration_33_0.png

Plot phase portraits of selected marker genes

[30]:
scv.pl.scatter(adata, basis=['HBB', 'HDC', 'LYZ'], x='Mu', y='Mc', color='leiden', ncols=3, frameon=False, fontsize=30)
scv.pl.scatter(adata, basis=['HBB', 'HDC', 'LYZ'], x=f'{key}_uhat', y=f'{key}_chat', color='leiden', ncols=3, frameon=False, fontsize=30)
_images/HSPC_scRNA_Multiome_Integration_35_0.png
_images/HSPC_scRNA_Multiome_Integration_35_1.png
[31]:
scv.pl.scatter(adata, basis=['HBB', 'HDC', 'LYZ'], x='Mu', y='Mc', color='batch', ncols=3, frameon=False, fontsize=30)
scv.pl.scatter(adata, basis=['HBB', 'HDC', 'LYZ'], x=f'{key}_uhat', y=f'{key}_chat', color='batch', ncols=3, frameon=False, fontsize=30)
_images/HSPC_scRNA_Multiome_Integration_36_0.png
_images/HSPC_scRNA_Multiome_Integration_36_1.png
[32]:
vv.scatter_plot(adata, adata_atac, genes=['HBB', 'HDC', 'LYZ'], by='cus', modalities_pred=['Mc', 'Mu', 'Ms'], color_by='batch', n_cols=3, show_pred_only=True, frame_on=True, axis_on=False, pointsize=0.5)
_images/HSPC_scRNA_Multiome_Integration_37_0.png
[33]:
vv.scatter_plot(adata, adata_atac, genes=['HBB', 'HDC', 'LYZ'], by='cus', modalities=[f'{key}_chat', f'{key}_uhat', f'{key}_shat'], color_by='batch', n_cols=3, show_pred_only=True, frame_on=True, axis_on=False, pointsize=0.5)
_images/HSPC_scRNA_Multiome_Integration_38_0.png
[34]:
# Phase portraits of genes, original, reconstructed and batch-corrected, and reconstructed and not-batch-corrected
scv.pl.scatter(adata, basis=top_genes[:20], color='leiden', ncols=5)
scv.pl.scatter(adata, basis=top_genes[:20], x=f'{key}_shat', y=f'{key}_uhat', color='leiden', ncols=5)
scv.pl.scatter(adata, basis=top_genes[:20], x=f'{key}_shat_batch', y=f'{key}_uhat_batch', color='leiden', ncols=5)
_images/HSPC_scRNA_Multiome_Integration_39_0.png
_images/HSPC_scRNA_Multiome_Integration_39_1.png
_images/HSPC_scRNA_Multiome_Integration_39_2.png
[35]:
scv.pl.scatter(adata, basis=top_genes[:20], color='batch', ncols=5)
scv.pl.scatter(adata, basis=top_genes[:20], x=f'{key}_shat', y=f'{key}_uhat', color='batch', ncols=5)
scv.pl.scatter(adata, basis=top_genes[:20], x=f'{key}_shat_batch', y=f'{key}_uhat_batch', color='batch', ncols=5)
_images/HSPC_scRNA_Multiome_Integration_40_0.png
_images/HSPC_scRNA_Multiome_Integration_40_1.png
_images/HSPC_scRNA_Multiome_Integration_40_2.png

In silico perturbation of PU.1 and GATA1

[38]:
'SPI1' in adata.var_names, 'GATA1' in adata.var_names
[38]:
(True, True)
[39]:
scv.pl.scatter(adata, color='SPI1', basis='z_umap', size=10, title='')
scv.pl.scatter(adata, color='GATA1', basis='z_umap', size=10, title='')
_images/HSPC_scRNA_Multiome_Integration_43_0.png
_images/HSPC_scRNA_Multiome_Integration_43_1.png
[40]:
c1 = adata_atac.layers['Mc'].copy()
u1 = adata.layers['Mu'].copy()
s1 = adata.layers['Ms'].copy()
c2 = adata_atac.layers['Mc'].copy()
u2 = adata.layers['Mu'].copy()
s2 = adata.layers['Ms'].copy()
[41]:
c1[:, np.where(adata.var_names == 'SPI1')[0][0]] = 0
u1[:, np.where(adata.var_names == 'SPI1')[0][0]] = 0
s1[:, np.where(adata.var_names == 'SPI1')[0][0]] = 0
c2[:, np.where(adata.var_names == 'GATA1')[0][0]] = 0
u2[:, np.where(adata.var_names == 'GATA1')[0][0]] = 0
s2[:, np.where(adata.var_names == 'GATA1')[0][0]] = 0
[42]:
spi1_ko_set = model.prepare_dataset(c1, u1, s1, only_full=True)
gata1_ko_set = model.prepare_dataset(c2, u2, s2, only_full=True)
[43]:
# Get PU.1 KO results
out1 = model.test(spi1_ko_set, covar=model.var_to_regress.detach().cpu().numpy())
chat1, uhat1, shat1, _, _, _, z1, std_z1, t1, std_t1, t_1, kc1, rho1 = out1
Using reference batch for latent variable computation.
[44]:
adata_spi1 = ad.AnnData(np.zeros((adata.n_obs, adata.n_vars*3)), obs=adata.obs)
adata_spi1.uns = adata.uns
adata_spi1.obsm = adata.obsm
adata_spi1.obsp = adata.obsp
[45]:
norm_c = np.linalg.norm(adata.layers[f'{key}_chat'][:,adata.var[f'{key}_velocity_genes'].values], axis=1).reshape(-1, 1) + 1e-10
norm_u = np.linalg.norm(adata.layers[f'{key}_uhat'][:,adata.var[f'{key}_velocity_genes'].values], axis=1).reshape(-1, 1) + 1e-10
norm_s = np.linalg.norm(adata.layers[f'{key}_shat'][:,adata.var[f'{key}_velocity_genes'].values], axis=1).reshape(-1, 1) + 1e-10
[46]:
adata_spi1.layers['cus'] = np.concatenate((adata.layers[f'{key}_chat']/norm_c, adata.layers[f'{key}_uhat']/norm_u, adata.layers[f'{key}_shat']/norm_s), 1)
adata_spi1.layers['spi1_velocity'] = np.concatenate((chat1/norm_c, uhat1/norm_u, shat1/norm_s), 1) - adata_spi1.layers['cus']
adata_spi1.var['spi1_velocity_genes'] = np.concatenate((adata.var[f'{key}_velocity_genes'].values, adata.var[f'{key}_velocity_genes'].values, adata.var[f'{key}_velocity_genes'].values))
adata_spi1.uns["spi1_velocity_params"] = {'mode': 'dynamical'}
adata_spi1.obsm['X_z_umap'] = z_umap
vv.model.velocity_graph(adata_spi1, key='spi1', xkey='cus', batch_corrected=True)
computing velocity graph (using 1/8 cores)
    finished (0:03:39) --> added
    'spi1_velocity_norm_graph', sparse matrix with cosine correlations (adata.uns)
[47]:
# Plot perturbation force
vv.velocity_embedding_stream(adata_spi1, key='spi1', basis='z_umap', color='leiden', title="", figsize=(8,6), legend_loc='none')
vv.velocity_embedding_stream(adata_spi1, key='spi1', basis='z_umap', color='batch', title="", figsize=(8,6), legend_loc='none')
computing velocity embedding
    finished (0:00:03) --> added
    'spi1_velocity_norm_z_umap', embedded velocity vectors (adata.obsm)
_images/HSPC_scRNA_Multiome_Integration_51_1.png
_images/HSPC_scRNA_Multiome_Integration_51_2.png
[48]:
adata.obs['spi1_ko_time_diff'] = np.abs(t_1 - adata.obs[f'{key}_time'])
[49]:
# Plot time shift
scv.pl.scatter(adata, basis='z_umap', color='spi1_ko_time_diff', legend_loc='right', color_map='gnuplot', size=20, title='')
_images/HSPC_scRNA_Multiome_Integration_53_0.png
[50]:
# Plot latent space shift
pca = PCA(n_components=1, random_state=42)
z_pca = pca.fit_transform(np.abs(z1-adata.obsm[f'{key}_z']))
adata.obs['z_pca_spi1_ko'] = z_pca
scv.pl.scatter(adata, color='z_pca_spi1_ko', basis='z_umap', size=10, title='')
_images/HSPC_scRNA_Multiome_Integration_54_0.png
[51]:
# Get GATA1 KO results
out2 = model.test(gata1_ko_set, covar=model.var_to_regress.detach().cpu().numpy())
chat2, uhat2, shat2, _, _, _, z2, std_z2, t2, std_t2, t_2, kc2, rho2 = out2
Using reference batch for latent variable computation.
[52]:
adata_gata1 = ad.AnnData(np.zeros((adata.n_obs, adata.n_vars*3)), obs=adata.obs)
adata_gata1.uns = adata.uns
adata_gata1.obsm = adata.obsm
adata_gata1.obsp = adata.obsp
[53]:
adata_gata1.layers['cus'] = np.concatenate((adata.layers[f'{key}_chat']/norm_c, adata.layers[f'{key}_uhat']/norm_u, adata.layers[f'{key}_shat']/norm_s), 1)
adata_gata1.layers['gata1_velocity'] = np.concatenate((chat2/norm_c, uhat2/norm_u, shat2/norm_s), 1) - adata_gata1.layers['cus']
adata_gata1.var['gata1_velocity_genes'] = np.concatenate((adata.var[f'{key}_velocity_genes'].values, adata.var[f'{key}_velocity_genes'].values, adata.var[f'{key}_velocity_genes'].values))
adata_gata1.uns["gata1_velocity_params"] = {'mode': 'dynamical'}
adata_gata1.obsm['X_z_umap'] = z_umap
vv.model.velocity_graph(adata_gata1, key='gata1', xkey='cus', batch_corrected=True)
computing velocity graph (using 1/8 cores)
    finished (0:03:42) --> added
    'gata1_velocity_norm_graph', sparse matrix with cosine correlations (adata.uns)
[54]:
# Plot perturbation force
vv.velocity_embedding_stream(adata_gata1, key='gata1', basis='z_umap', color='leiden', title="", figsize=(8,6), legend_loc='none')
vv.velocity_embedding_stream(adata_gata1, key='gata1', basis='z_umap', color='batch', title="", figsize=(8,6), legend_loc='none')
computing velocity embedding
    finished (0:00:03) --> added
    'gata1_velocity_norm_z_umap', embedded velocity vectors (adata.obsm)
_images/HSPC_scRNA_Multiome_Integration_58_1.png
_images/HSPC_scRNA_Multiome_Integration_58_2.png
[55]:
adata.obs['gata1_ko_time_diff'] = np.abs(t_2 - adata.obs[f'{key}_time'])
[56]:
# Plot time shift
scv.pl.scatter(adata, basis='z_umap', color='gata1_ko_time_diff', legend_loc='right', color_map='gnuplot', size=20, title='')
_images/HSPC_scRNA_Multiome_Integration_60_0.png
[57]:
# Plot latent space shift
pca = PCA(n_components=1, random_state=42)
z_pca = pca.fit_transform(np.abs(z2-adata.obsm[f'{key}_z']))
adata.obs['z_pca_gata1_ko'] = z_pca
scv.pl.scatter(adata, color='z_pca_gata1_ko', basis='z_umap', size=10, title='', perc=[0,99.9])
_images/HSPC_scRNA_Multiome_Integration_61_0.png

Save the final result

[58]:
adata.write_h5ad(data_path_base+"/final.h5ad")
[ ]: