source("/Users/daga/Documents/Projects/Universal_scripts/Diffbind_visualization_functions.R")
setwd("/Users/daga/Documents/Projects/Autoimmune_all/Scripts/")
library(factoextra)
library(tidyverse)
library(cluster)
library(factoextra)
library(pheatmap)
load("../Diff_expression_results/resNorm_l2FC_all_disease_withJIASF.Rdata")
########Function
######### Normalises across rowa
cal_z_score <- function(x){
(x - mean(x,na.rm = TRUE)) / sd(x,na.rm = TRUE)
}
####### datasets
### Full dattset
resNorm_l2FC_all_disease_mat = as.matrix(resNorm_l2FC_all_disease_df)
##### Condition applied
condition_vec_any = apply(resNorm_l2FC_all_disease_mat,MARGIN = 1,FUN = function(x) any(x < -log2(1.1) | x > log2(1.1)))
resNorm_l2FC_cond_any = resNorm_l2FC_all_disease_mat[condition_vec_any,]
########### Normalised datasets
resNorm_l2FC_cond_any_norm = apply(resNorm_l2FC_cond_any, 2, cal_z_score)
######################## Intialising input mat
input_mat = resNorm_l2FC_cond_any_norm
########## Analysis
plot(density(as.numeric(input_mat)))
set.seed(123)
km10.res <- kmeans(input_mat, iter.max = 1000,centers = 10 ,
nstart = 100,algorithm="Lloyd")
########### for pheatmap annotation
km.res = km10.res
peak_cluster_df = as.data.frame(km.res$cluster)
peak_cluster_df$peak_id = rownames(peak_cluster_df)
peak_cluster_df = peak_cluster_df[order(peak_cluster_df$`km.res$cluster`),]
peak_cluster_df$peak_id = NULL
peak_cluster_df$`km.res$cluster` = factor(peak_cluster_df$`km.res$cluster`)
####### Order according to cluster
input_mat_order = input_mat[rownames(peak_cluster_df),]
########## Gap separation
sum_cluster_size = numeric()
sum_cluster = 0
for (i in 1:(length(km.res$size)-1)) {
row_gap = km.res$size[i]
sum_cluster = sum_cluster + row_gap
sum_cluster_size = append(sum_cluster_size,sum_cluster)
#print (sum_cluster)
}
row_sep = sum_cluster_size + 1
#### Check
peak_cluster_df[row_sep,]
peak_cluster_df[(row_sep-1),]
#### any norm condition
breaks = unique(c(min(input_mat),seq(-4.5,4.5,length.out = 200),max(input_mat)))
hmcol_rdbu <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(201))
####### Colour
hmcol = hmcol_rdbu
############# For small size figure
colnames = c("SLE","Psoriasis","Graves","JIA")
pdf("../Analysis_plot/K_means_clusters/all_disease_with_JIASF/resNorm_l2FC_subset_norm_kmeans10_with_legend.pdf",width = 3,height = 3)
pheatmap(mat = input_mat_order,cluster_rows = F,cluster_cols = F,
show_rownames = F,legend = T,labels_col = colnames,angle_col = 90,fontsize_col = 8,
annotation_row = peak_cluster_df,color = hmcol,
breaks = breaks,gaps_row = row_sep,annotation_legend = T,annotation_names_row = F)
dev.off()