################### ### Description ### ################### # This script is the for checking how sensitive our N/s estimates are to the number of samples analysed # Relevant figures: Supp. Fig. 2c # Inputs: # - Anndata object of methylation beta values, with CpGs as obs and samples as vars # Outputs: # - Model outputs (traces) saved as pk files for each number of sites analysed ############## ### Author ### ############## # Sam Crofts (sam.crofts@ed.ac.uk) ################################## ################################## import sys sys.path.append("..") # fix to import modules from root from src.general_imports import * import pymc.sampling.jax as pmjax import jax # Variable for site selection (spearman = based on mean change, white = based on variance change) selection_method = "white" #other parameters sample_size = 200 #set random seed np.random.seed(13) random.seed(13) for n_sites in [50, 75, 100, 150, 200, 250, 500, 1000, 2000]: # Load adata file = open('../data/genscot_full_with_site_details.pk', 'rb') adata = pickle.load(file) file.close() #reset ages to start at 0 adata.var.age = adata.var.age - adata.var.age.min() # Replace 0 and 1 adata.X = np.where(adata.X <= 0, 0.0001, adata.X) adata.X = np.where(adata.X >= 1, 0.9999, adata.X) #keep only sites that have a single peak adata = adata[(adata.obs["n_peaks"]==1)] #only keep sites increasing in variance adata = adata[adata.obs["var_change"] > 0] #make sure mean methylation is between 0.1 and 0.9 adata = adata[(adata.obs.mean_meth > 0.1) & (adata.obs.mean_meth < 0.9)] #set random seed np.random.seed(11) random.seed(11) adata = sample_to_uniform_age(adata, sample_size) if selection_method == "spearman": #Get absolute value of spearman stat adata.obs['spearman_stat'] = np.abs(adata.obs['spearman_stat']) adata = adata[adata.obs.sort_values('spearman_stat', ascending=False).index] adata = adata[:n_sites] elif selection_method == "white": #take the lowest white pval sites adata = adata[adata.obs.sort_values('white_pval', ascending=True).index] adata = adata[:n_sites] #make model sym_model = make_mcmc_order1(adata) with sym_model: trace = pmjax.sample_numpyro_nuts(chains=2, progressbar=True, target_accept=0.95, tune=5000, chain_method='vectorized', random_seed=16) filename = "nsites" + str(n_sites) + "_ss" + str(sample_size) + "_" + selection_method with open('../exports/model_outputs/humans/sensitivity_analyses/n_sites/'+filename+'.pk', 'wb') as f: pickle.dump(trace, f)