library("DiffBind")
library("DESeq2")
library("GenomicRanges")
library("limma")
########## Functions used
############ Genomic overlap
to_make_gr_object_chr_base_loc = function (chr_base_loc_vec,column_name) {
chr_base_loc = unique(chr_base_loc_vec)
chr_base_loc_gr = GRanges(chr_base_loc)
chr_base_loc_gr$meta_col = chr_base_loc
names(mcols(chr_base_loc_gr)) = column_name
return (chr_base_loc_gr) }
##### to_find overlap between gr
to_overlap_grange_object = function (gr1, gr2, gr1_col, gr2_col) {
query_gr = gr1
subject_gr = gr2
query_col = gr1_col
subject_col = gr2_col
overlap = findOverlaps(query_gr,subject_gr)
query_row_ids = queryHits(overlap)
subject_rowids = subjectHits(overlap)
query_overlap_df = as.data.frame(elementMetadata(query_gr)[query_row_ids,query_col])
subject_overlap_df= as.data.frame(elementMetadata(subject_gr)[subject_rowids, subject_col])
### to find the interesection of overlaps
overlap_width = width(pintersect(query_gr[queryHits(overlap)],subject_gr[subjectHits(overlap )]))
Overlap_df = cbind.data.frame(query_overlap_df,subject_overlap_df)
Overlap_df["width"] = overlap_width
return(Overlap_df)
}
######## Outfn
grn_evidence_out_fn = function(grn_name,evidence_vector) {
evidence_all = paste(evidence_vector,collapse = "_")
grn_out_fn = paste(grn_name,evidence_all,sep = "_")
return(grn_out_fn)
}
### Overlapping T-cell GRN with molecular disease evidence
grn_peak_overalp_with_cols = function(grn, grn_peak_col, disease_freq_df, evidence_col, evidence_name) {
####### Making col_name
#evidence_id = paste(evidence_name,"id",sep = "_")
###### To make gr object for the overlap
input_grn_peak_gr = to_make_gr_object_chr_base_loc(chr_base_loc_vec = grn[,grn_peak_col],column_name = "Location")
input_evidence_gr = to_make_gr_object_chr_base_loc(chr_base_loc_vec = disease_freq_df[,evidence_col],column_name = "Location")
grn_evidence_overlap_df = to_overlap_grange_object(gr1 = input_grn_peak_gr,gr2 = input_evidence_gr,
gr1_col = "Location",gr2_col = "Location")
names(grn_evidence_overlap_df) = c("grn_peak_id","Evidence_id")
############### Sub-setting grn for the overlapping region
col_select_grn = c("peak","TF","ENSEMBL","SYMBOL")
grn_sub = grn[grn[,grn_peak_col]%in%grn_evidence_overlap_df[,"grn_peak_id"],col_select_grn]
####### Merging grn_sub with overlap df to include evidence id
grn_sub_merge = merge(grn_sub,grn_evidence_overlap_df,by.x = "peak",by.y = "grn_peak_id")
####### To add disease info of evidence (like diffpeak, coloc region) on merge df
disease_col = c("Disease")
grn_sub_merge[,disease_col] = disease_freq_df[match(grn_sub_merge[,"Evidence_id"],disease_freq_df[,evidence_col]),disease_col]
###### Adding the evidence col to grn_sub_merge
grn_sub_merge$Evidence = evidence_name
return(grn_sub_merge)
}
### to get key freq df
to_get_key_freq_category_df_mod = function (key_value_list,key_name,value_name) {
#key_value_list = hQTL_disease_list
#key_name = "hQTL_id"
#value_name = "Disease"
###### Names of the columns in df
value_freq_name = paste(value_name,"freq",sep = "_")
value_category = paste(value_name,"category",sep = "_")
######### To make key- value freq df from key-value list
key_vector = names(unlist(lapply(X = key_value_list,FUN = length)))
value_freq_vector = unname(unlist(lapply(X = key_value_list,FUN = length)))
key_value_freq_df = data.frame(key = key_vector,freq = value_freq_vector)
names(key_value_freq_df) = c(key_name,value_freq_name)
###### To add cols
key_value_freq_df$category = NA
names(key_value_freq_df)[3] = value_category
key_value_freq_df[,value_name] = NA
####### Adding the disease column for every key
for (i in seq(1:nrow(key_value_freq_df))) {
key = as.character(key_value_freq_df[i,key_name])
value = paste(key_value_list[[key]],collapse = "_")
key_value_freq_df[i,value_name] = value
}
###### To add the category as "common" and "unique)
key_value_freq_df[,value_freq_name] = as.numeric(key_value_freq_df[,value_freq_name])
key_value_freq_df[,value_category] = ifelse(key_value_freq_df[,value_freq_name] == 1,"Unique","Common")
return (key_value_freq_df)
}
to_make_master_freq_all_val_table = function(master_disease_grn_table, key_name, value_vector,merging = TRUE) {
#disease_pattern = paste0(".*",disease_name,".*")
#disease_specific_grn = master_disease_grn_table[grep(pattern = disease_pattern,x = master_disease_grn_table[,"Disease"]),]
key_value_freq_df_list = list()
#key_value_list_all = list()
for( value_name in value_vector) {
key_value_list = to_key_value_list_from_df(long_df = master_disease_grn_table,key_name = key_name,value_name = value_name)
key_value_freq_df = to_get_key_freq_category_df_mod(key_value_list = key_value_list,key_name = key_name,value_name = value_name)
key_value_freq_df_list[[value_name]] = key_value_freq_df
#key_value_list_all[[value_name]] = key_value_list
}
##### Merge freq dataframe
if (merging) {
key_all_value_freq_df = Reduce(f = function(x,y) {z = merge (x,y,by.x = key_name,by.y = key_name); return (z)}, x = key_value_freq_df_list)
}
else{
key_all_value_freq_df = key_value_freq_df_list
}
return (key_all_value_freq_df)
}
generate_freq_df_grn_list_from_master_grn_corrected = function (input_master_grn){
##### master table for gene, TF, peak
TF_master_table = input_master_grn[grepl(pattern = "diffTF",x = input_master_grn$Evidence),]
peak_master_table = input_master_grn[grepl(pattern = "diffpeak|gwas|coloc",x = input_master_grn$Evidence),]
#### Process
freq_list = list()
value_vector = c("Disease","Evidence","Evidence_id")
### To make master targte genes df
if(nrow(input_master_grn) > 0){
target_genes_all_values_freq_df = to_make_master_freq_all_val_table(master_disease_grn_table = input_master_grn,key_name = "SYMBOL",value_vector = value_vector)
freq_list[["target_genes_all_values_freq_df"]] = target_genes_all_values_freq_df}
if(nrow(peak_master_table) > 0){
peaks_all_values_freq_df = to_make_master_freq_all_val_table(master_disease_grn_table = peak_master_table ,key_name = "peak",value_vector = value_vector)
freq_list[["peaks_all_values_freq_df"]] = peaks_all_values_freq_df}
if(nrow(TF_master_table) > 0){
TF_all_values_freq_df = to_make_master_freq_all_val_table(master_disease_grn_table = TF_master_table,key_name = "TF",value_vector = value_vector)
freq_list[["TF_all_values_freq_df"]] = TF_all_values_freq_df}
return(freq_list)}
making_grn_from_master_grn = function (input_master_grn) {
input_master_grn$TF_peak_gene = paste(input_master_grn$peak,
input_master_grn$TF,
input_master_grn$SYMBOL,sep = "_")
grn_cols_select = c("peak","TF","ENSEMBL","SYMBOL")
grn_unique = input_master_grn[!(duplicated(input_master_grn$TF_peak_gene)),grn_cols_select]
return(grn_unique) }
to_find_same_direction_percentage = function (x){
### if dorection is postive or negative
if(all(x ==1)|all(x == -1)) {
same_direction_percent = 100
}else{
same_direction_percent = round((mean(x == 1))*100,2)}
return(same_direction_percent)
}