src/83_validation_RNAscope_PDGFRA_RBFOX1.R · Luise-Seeker-Human-WM-Glia

#Quantification analysis for RNAscope measurements 
library(dplyr)
library(ggplot2)
#library(ggpubr)
library(here)
library(ggsci)

# make colour palette
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)

mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)

#Read in
dat <- read.csv(here("data", 
                     "validation_data",
                     "20221011_RNAscope_PDGFRA_EBF1_AF.csv"))

#select detected cells only
dat <- subset(dat, dat$Name =="PathCellObject")

#select only the columns of interest

t <- select(dat, 
            Image, 
            Parent, 
            dummy_id,
            tissue,
            age_group,
            Subcellular_Channel2_Num_spots_estimated, 
            Subcellular_Channel_3_Num_spots_estimated )

#rename columns
colnames(t) [colnames(t)== "Subcellular_Channel2_Num_spots_estimated"] <- "EBF1_dots"
colnames(t) [colnames(t)== "Subcellular_Channel_3_Num_spots_estimated"] <- "PDGFRA_dots"
t$EBF1_dots <- as.numeric(t$EBF1_dots)
t$PDGFRA_dots <- as.numeric(t$PDGFRA_dots)



#select columns again
t<- select(t, Parent, dummy_id, age_group, tissue, EBF1_dots, PDGFRA_dots)

#save the reduced dataset
#write.csv(t, file = "M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#t <- read.csv("M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#Assign unique ID for each FOV - check that this works properly 
split <- split(t, t$dummy_id)
#for (i in 1:length(names(split))) {
#  temp <-split[[i]]
#  temp$FOV <- paste(names(split)[i], temp$ROI)
#  if(i == 1){
#    t <- temp
#  }else{
#    t <- rbind(temp, t)}
#}

#Add area for calculating density
t$ROI_a_um2 <- "250000"




#add logical columns 
t<- mutate(t, EBF1 = t$EBF1_dots >=1)
t<- mutate(t, PDGFRA = t$PDGFRA_dots >=3)
t<- mutate(t, dPos= t$EBF1 == TRUE & t$PDGFRA == TRUE)

#sum up positive/double positive cells per FOV (== Parent)
EBF1 <- t %>% group_by(Parent) %>%
  summarise(tot_EBF1 = sum(EBF1)) 
PDGFRA <- t %>% group_by(Parent) %>% summarise(tot_PDGFRA = sum(PDGFRA)) 
dPos <-  t %>% group_by(Parent) %>%
  summarise(tot_dPos = sum(dPos)) 
tot_nl <- t %>% group_by(Parent) %>%
  summarise(tot_nl = length(Parent))

#create df with counts per FOV
df<- merge(PDGFRA, EBF1, by = "Parent", all = TRUE)        
df<- merge(df,dPos, by = "Parent", all = TRUE) 
df <- merge(df, tot_nl, by = "Parent", all = TRUE)
t2 <- merge(df, t, by = "Parent", all = TRUE)

t2<- t2[!duplicated(t2$Parent),]


#express double pos as proportion of PDGFRA-pos
t2$prop_dPos <- t2$tot_dPos/t2$tot_PDGFRA
#calculate density 
t2$dP_density <- t2$tot_dPos/as.numeric(t2$ROI_a_um2)
t2$EBF1_density <- t2$tot_EBF1/as.numeric(t2$ROI_a_um2)
t2$PDGFRA_density <- t2$tot_PDGFRA/as.numeric(t2$ROI_a_um2)

###UPDATED##
#ANOVA works for looking at tissue and condition - data is normal so yaay
shapiro.test(t2$prop_dPos)

lin_mod <- lm(prop_dPos ~ age_group + tissue, data = t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group+ tissue, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)

t2$age_group <- factor(t2$age_group, levels = c("young", "old"))
t2$percentage <- t2$prop_dPos *100

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(color = dummy_id),
                                 position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12)) +
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(color = dummy_id, shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12)) +
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12)) +
  scale_color_manual(values = mycoloursP)


ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(color = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12)) +
  scale_color_manual(values = c(mycoloursP[8],mycoloursP[9]) )



# Repeat, but remove auto fluorescence signal this time
# Below I am harsh and remove any cell that has an autofluorescent signal 
# regardless of co-localisation with EBF1 signal or not.



#select only the columns of interest

t <- select(dat, 
            Image, 
            Parent, 
            dummy_id,
            age_group,
            tissue,
            Subcellular_Channel1_Num_spots_estimated,
            Subcellular_Channel2_Num_spots_estimated, 
            Subcellular_Channel_3_Num_spots_estimated )

#rename columns
colnames(t) [colnames(t)== "Subcellular_Channel1_Num_spots_estimated"] <- "AF_dots"
colnames(t) [colnames(t)== "Subcellular_Channel2_Num_spots_estimated"] <- "EBF1_dots"
colnames(t) [colnames(t)== "Subcellular_Channel_3_Num_spots_estimated"] <- "PDGFRA_dots"
t$AF_dots <- as.numeric(t$AF_dots)
t$EBF1_dots <- as.numeric(t$EBF1_dots)
t$PDGFRA_dots <- as.numeric(t$PDGFRA_dots)


#select columns again
t<- select(t, Parent, dummy_id, age_group, tissue, EBF1_dots, PDGFRA_dots, AF_dots)

#save the reduced dataset
#write.csv(t, file = "M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#t <- read.csv("M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#Assign unique ID for each FOV - check that this works properly 
split <- split(t, t$dummy_id)


#Add area for calculating density
t$ROI_a_um2 <- "250000"




#add logical columns 
t<- mutate(t, AF = t$AF_dots >=2)
t<- mutate(t, EBF1 = t$EBF1_dots >=1)
t<- mutate(t, PDGFRA = t$PDGFRA_dots >=3)
t<- mutate(t, dPos= t$EBF1 == TRUE & t$PDGFRA == TRUE)

t<- mutate(t, tripPos= t$EBF1 == TRUE & t$PDGFRA == TRUE & t$AF == TRUE)

bool <- t$tripPos == "FALSE"

summary(bool)
subs_t <- t[bool,]

#sum up positive/double positive cells per FOV (== Parent)
EBF1 <- subs_t %>% group_by(Parent) %>%
  summarise(tot_EBF1 = sum(EBF1)) 
PDGFRA <- subs_t %>% group_by(Parent) %>% summarise(tot_PDGFRA = sum(PDGFRA)) 
dPos <-  subs_t %>% group_by(Parent) %>%
  summarise(tot_dPos = sum(dPos)) 
tot_nl <- subs_t %>% group_by(Parent) %>%
  summarise(tot_nl = length(Parent))

#create df with counts per FOV
df<- merge(PDGFRA, EBF1, by = "Parent", all = TRUE)        
df<- merge(df,dPos, by = "Parent", all = TRUE) 
df <- merge(df, tot_nl, by = "Parent", all = TRUE)
t2 <- merge(df, t, by = "Parent", all = TRUE)

t2<- t2[!duplicated(t2$Parent),]


#express double pos as proportion of PDGFRA-pos
t2$prop_dPos <- t2$tot_dPos/t2$tot_PDGFRA
#calculate density 
t2$dP_density <- t2$tot_dPos/as.numeric(t2$ROI_a_um2)
t2$EBF1_density <- t2$tot_EBF1/as.numeric(t2$ROI_a_um2)
t2$PDGFRA_density <- t2$tot_PDGFRA/as.numeric(t2$ROI_a_um2)

###UPDATED##
#ANOVA works for looking at tissue and condition - data is normal so yaay
shapiro.test(t2$prop_dPos)

lin_mod <- lm(prop_dPos ~ age_group + tissue, data = t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group + tissue, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)


lin_mod <- lm(prop_dPos ~ age_group * tissue, data = t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group * tissue, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)

t2$age_group <- factor(t2$age_group, levels = c("young", "old"))
t2$percentage <- t2$prop_dPos *100

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id, shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes( shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)

# Removal of cells that contain auto-fluorescent signal makes the data
# distribution non-normal and also reduces the effect size 

wilcox.test(prop_dPos ~ age_group, data = t2)



#####
####
#### CB only
cb_t2 <- subset(t2, t2$tissue == "CB")

cb_t2$prop_dPos <- cb_t2$tot_dPos/cb_t2$tot_PDGFRA
#calculate density 
cb_t2$dP_density <- cb_t2$tot_dPos/as.numeric(cb_t2$ROI_a_um2)
cb_t2$EBF1_density <- cb_t2$tot_EBF1/as.numeric(cb_t2$ROI_a_um2)
cb_t2$PDGFRA_density <- cb_t2$tot_PDGFRA/as.numeric(cb_t2$ROI_a_um2)

###UPDATED##
#ANOVA works for looking at tissue and condition - data is normal so yaay
shapiro.test(cb_t2$prop_dPos)

lin_mod <- lm(prop_dPos ~ age_group, data = cb_t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)


cb_t2$age_group <- factor(cb_t2$age_group, levels = c("young", "old"))
cb_t2$percentage <- cb_t2$prop_dPos *100

ggplot(cb_t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)


#### CSC only
csc_t2 <- subset(t2, t2$tissue == "CSC")

csc_t2$prop_dPos <- csc_t2$tot_dPos/csc_t2$tot_PDGFRA
#calculate density 
csc_t2$dP_density <- csc_t2$tot_dPos/as.numeric(csc_t2$ROI_a_um2)
csc_t2$EBF1_density <- csc_t2$tot_EBF1/as.numeric(csc_t2$ROI_a_um2)
csc_t2$PDGFRA_density <- csc_t2$tot_PDGFRA/as.numeric(csc_t2$ROI_a_um2)


shapiro.test(csc_t2$prop_dPos)

lin_mod <- lm(prop_dPos ~ age_group, data = csc_t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)


csc_t2$age_group <- factor(csc_t2$age_group, levels = c("young", "old"))
csc_t2$percentage <- csc_t2$prop_dPos *100

ggplot(csc_t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)




###### Test other autofluorescence markers to correct for 




t <- select(dat, 
            Image, 
            Parent, 
            dummy_id,
            tissue,
            age_group,
            Subcellular_Channel1_Num_spots_estimated,
            Subcellular_Channel2_Num_spots_estimated, 
            Subcellular_Channel_3_Num_spots_estimated,
            Nucleus_Alexa_488_narrow_sum,
            Nucleus_Alexa_488_narrow_std_dev,
            Nucleus_Alexa_488_narrow_max,
            Nucleus_Alexa_488_narrow_min,
            Nucleus_Alexa_488_narrow_range)




#rename columns
colnames(t) [colnames(t)== "Subcellular_Channel1_Num_spots_estimated"] <- "AF_dots"
colnames(t) [colnames(t)== "Subcellular_Channel2_Num_spots_estimated"] <- "EBF1_dots"
colnames(t) [colnames(t)== "Subcellular_Channel_3_Num_spots_estimated"] <- "PDGFRA_dots"
colnames(t)[colnames(t)== "Nucleus_Alexa_488_narrow_sum"] <- "nuc_AF_sum"
colnames(t)[colnames(t)== "Nucleus_Alexa_488_narrow_std_dev"] <- "nuc_AF_sd"
colnames(t)[colnames(t)== "Nucleus_Alexa_488_narrow_max"] <- "nuc_AF_max"
colnames(t)[colnames(t)== "Nucleus_Alexa_488_narrow_min"] <- "nuc_AF_min"
colnames(t)[colnames(t)== "Nucleus_Alexa_488_narrow_range"] <- "nuc_AF_range"

t$EBF1_dots <- as.numeric(t$EBF1_dots)
t$PDGFRA_dots <- as.numeric(t$PDGFRA_dots)
t$AF_dots <- as.numeric(t$AF_dots)
t$nuc_AF_sum <- as.numeric(t$nuc_AF_sum)
t$nuc_AF_sd <- as.numeric(t$nuc_AF_sd)
t$nuc_AF_max <- as.numeric(t$nuc_AF_max)
t$nuc_AF_min <- as.numeric(t$nuc_AF_min)
t$nuc_AF_range <- as.numeric(t$nuc_AF_range)



ggplot(t, aes(x = age_group, y = EBF1_dots))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = PDGFRA_dots))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = AF_dots))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = nuc_AF_sum))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = nuc_AF_sd))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = nuc_AF_max))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = nuc_AF_min))+
  geom_boxplot()+
  geom_jitter()

ggplot(t, aes(x = age_group, y = nuc_AF_range))+
  geom_boxplot()+
  geom_jitter()

t$mean_AF <- t$nuc_AF_range/ t$nuc_AF_sum

ggplot(t, aes(x = age_group, y = mean_AF, fill = age_group))+
  geom_boxplot()+
  geom_jitter(size = 0.1)+
  theme_minimal()+
  ylab("Mean autofluorescence")+
  xlab("Age Group")

lmod <- lm(mean_AF ~ age_group, data = t)
summary(lmod)
#select columns again
t<- select(t, Parent, dummy_id, age_group, tissue, AF_dots, 
           EBF1_dots, PDGFRA_dots, mean_AF)

#save the reduced dataset
#write.csv(t, file = "M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#t <- read.csv("M:/PhD/Year 4/Validation/ISH/ISH_PDGFRA_EBF1_df.csv")
#Assign unique ID for each FOV - check that this works properly 
split <- split(t, t$dummy_id)

#Add area for calculating density
t$ROI_a_um2 <- "250000"




#add logical columns 



t<- mutate(t, AF = t$AF_dots >=1)
t<- mutate(t, EBF1 = t$EBF1_dots >=1)
t<- mutate(t, PDGFRA = t$PDGFRA_dots >=3)
t<- mutate(t, dPos= t$EBF1 == TRUE & t$PDGFRA == TRUE)

t<- mutate(t, tripPos= t$EBF1 == TRUE & t$PDGFRA == TRUE & t$AF == TRUE)

#bool <- t$tripPos == "FALSE"

#summary(bool)
#subs_t <- t[bool,]

#sum up positive/double positive cells per FOV (== Parent)
EBF1 <- t %>% group_by(Parent) %>%
  summarise(tot_EBF1 = sum(EBF1)) 
PDGFRA <- t %>% group_by(Parent) %>% summarise(tot_PDGFRA = sum(PDGFRA)) 
dPos <-  t %>% group_by(Parent) %>%
  summarise(tot_dPos = sum(dPos)) 
tot_nl <- t %>% group_by(Parent) %>%
  summarise(tot_nl = length(Parent))

mean_af <- t %>% group_by(Parent) %>%
  summarise(tot_mean_af = mean(mean_AF))

#create df with counts per FOV
df<- merge(PDGFRA, EBF1, by = "Parent", all = TRUE)        
df<- merge(df,dPos, by = "Parent", all = TRUE) 
df <- merge(df, tot_nl, by = "Parent", all = TRUE)
df <- merge(df, t, by = "Parent", all = TRUE)
t2 <- merge(df, mean_af, by = "Parent")

t2<- t2[!duplicated(t2$Parent),]


#express double pos as proportion of PDGFRA-pos
t2$prop_dPos <- t2$tot_dPos/t2$tot_PDGFRA
#calculate density 
t2$dP_density <- t2$tot_dPos/as.numeric(t2$ROI_a_um2)
t2$EBF1_density <- t2$tot_EBF1/as.numeric(t2$ROI_a_um2)
t2$PDGFRA_density <- t2$tot_PDGFRA/as.numeric(t2$ROI_a_um2)

###UPDATED##
#ANOVA works for looking at tissue and condition - data is normal so yaay
shapiro.test(t2$prop_dPos)

lin_mod <- lm(prop_dPos ~ age_group + tissue + tot_mean_af, data = t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group + tissue + tot_mean_af, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)




lin_mod <- lm(prop_dPos ~ age_group * tissue+ tot_mean_af, data = t2)
summary(lin_mod)
res.aov2 <- aov(prop_dPos ~ age_group * tissue+ tot_mean_af, data = t2)
summary(res.aov2)
tukey.test <- TukeyHSD(res.aov2)

t2$age_group <- factor(t2$age_group, levels = c("young", "old"))
t2$percentage <- t2$prop_dPos *100

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes(colour = dummy_id, shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)

ggplot(t2, aes(x = age_group, y = percentage)) +
  geom_boxplot()+ 
  geom_jitter(aes( shape = tissue),
              position=position_dodge(width=0.75)) +
  theme_minimal() + 
  ylab("% EBF1+ PDGFRA+ of PDGFRA+") + 
  xlab("Age Group") + 
  theme(axis.text = element_text(size = 15))  + 
  theme(legend.text = element_text(size = 12))  + 
  theme(legend.title = element_text(size = 12))+
  scale_color_manual(values = mycoloursP)