AD-Enhancer-Remodelling / kmeans_clustering_all_diseases.R
kmeans_clustering_all_diseases.R
Raw
source("/Users/daga/Documents/Projects/Universal_scripts/Diffbind_visualization_functions.R")
setwd("/Users/daga/Documents/Projects/Autoimmune_all/Scripts/")

library(factoextra)
library(tidyverse)
library(cluster)
library(factoextra)
library(pheatmap)

load("../Diff_expression_results/resNorm_l2FC_all_disease_withJIASF.Rdata")

########Function 
######### Normalises across rowa
cal_z_score <- function(x){
  (x - mean(x,na.rm = TRUE)) / sd(x,na.rm = TRUE)
}

####### datasets
### Full dattset
resNorm_l2FC_all_disease_mat = as.matrix(resNorm_l2FC_all_disease_df)

##### Condition applied
condition_vec_any = apply(resNorm_l2FC_all_disease_mat,MARGIN = 1,FUN = function(x) any(x < -log2(1.1) | x > log2(1.1)))
resNorm_l2FC_cond_any = resNorm_l2FC_all_disease_mat[condition_vec_any,]

########### Normalised datasets
resNorm_l2FC_cond_any_norm = apply(resNorm_l2FC_cond_any, 2, cal_z_score)

######################## Intialising input mat 
input_mat = resNorm_l2FC_cond_any_norm


########## Analysis
plot(density(as.numeric(input_mat)))


set.seed(123)
km10.res <- kmeans(input_mat, iter.max = 1000,centers = 10 ,
                   nstart = 100,algorithm="Lloyd")


########### for pheatmap annotation 
km.res = km10.res

peak_cluster_df = as.data.frame(km.res$cluster)
peak_cluster_df$peak_id = rownames(peak_cluster_df)
peak_cluster_df = peak_cluster_df[order(peak_cluster_df$`km.res$cluster`),]
peak_cluster_df$peak_id  = NULL
peak_cluster_df$`km.res$cluster` = factor(peak_cluster_df$`km.res$cluster`)

####### Order according to cluster
input_mat_order = input_mat[rownames(peak_cluster_df),]

########## Gap separation 
sum_cluster_size = numeric()
sum_cluster = 0 

for (i in 1:(length(km.res$size)-1)) {
  
  row_gap = km.res$size[i] 
  sum_cluster = sum_cluster + row_gap 
  
  sum_cluster_size = append(sum_cluster_size,sum_cluster)
  #print (sum_cluster)
  
}

row_sep = sum_cluster_size + 1

#### Check
peak_cluster_df[row_sep,]
peak_cluster_df[(row_sep-1),]



#### any norm condition
breaks = unique(c(min(input_mat),seq(-4.5,4.5,length.out = 200),max(input_mat)))
hmcol_rdbu <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(201))

####### Colour
hmcol = hmcol_rdbu

############# For small size figure
colnames = c("SLE","Psoriasis","Graves","JIA")
pdf("../Analysis_plot/K_means_clusters/all_disease_with_JIASF/resNorm_l2FC_subset_norm_kmeans10_with_legend.pdf",width = 3,height = 3)

pheatmap(mat = input_mat_order,cluster_rows = F,cluster_cols = F,
         show_rownames = F,legend = T,labels_col = colnames,angle_col = 90,fontsize_col = 8,
         annotation_row  = peak_cluster_df,color = hmcol,
         breaks = breaks,gaps_row = row_sep,annotation_legend = T,annotation_names_row = F)
dev.off()