Luise-Seeker-Human-WM-Glia / src / 74_Validation_RBFOX1_OPALIN_OLIG2_IF_FOV_thresholds.Rmd
74_Validation_RBFOX1_OPALIN_OLIG2_IF_FOV_thresholds.Rmd
Raw
---
title: "Validation_rbfox1_opalin_olig2_IF"
author: "Luise A. Seeker"
date: "09/08/2022"
output: html_document
---
RBFOX1, OPALIN and OLIG2 co-labelling in 3 BA4 and 3 CSC samples to validate 
clusters using immuno-fluorescence.


```{r}

library(ggplot2)
library(lme4)
library(lmerTest)
library(here)
library(tidyr)
library(ggsci)
library(Seurat)
```

```{r, echo = F}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)

mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)

```




```{r}
rbfox1_opalin_olig2 <- read.csv(here("data", 
                   "validation_data", 
                   "OPALIN_RBFOX1_OLIG2_quant.csv"))

```


```{r}
names(rbfox1_opalin_olig2)

rbfox1_opalin_olig2$tissue <- as.factor(rbfox1_opalin_olig2$Tissue)


rbfox1_opalin_olig2$sample_id <- factor(rbfox1_opalin_olig2$sample_id,
                                     levels = c("SD012_15_BA4",
                                                "SD021_17_BA4",
                                                "SD042_18_BA4",
                                                "SD011_18_CSC",
                                                "SD015_12_CSC",
                                                "SD042_18_CSC"))
```

Convert from wide to long format to see how many single, double and triple
positive cells are in each sample

```{r}

keycol <- "sample_id"
valuecol <- "count"
gathercols <- c("Num_Detections",
                "Num_OLIG2",
                "Num_OPALIN", 
                "Num_RBFOX1",
                "Num_OPALIN_OLIG2",
                "Num_OLIG2_RBFOX1",
                "Num_OPALIN_RBFOX1",
                "Num.OPALIN..OLIG2..RBFOX1")

long_data <- gather(rbfox1_opalin_olig2, keycol, valuecol, gathercols)

head(long_data)
nrow(long_data)
names(long_data)
```






```{r}


ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="stack", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_fill_manual(values= mycoloursP[25:50])





ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="fill", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +scale_fill_manual(values= mycoloursP[25:50])


```
Remove cells that are negative for all tested markers:


```{r}

keycol <- "sample_id"
valuecol <- "count"
gathercols <- c("Num_OLIG2",
                "Num_OPALIN", 
                "Num_RBFOX1",
                "Num_OPALIN_OLIG2",
                "Num_OLIG2_RBFOX1",
                "Num_OPALIN_RBFOX1",
                "Num.OPALIN..OLIG2..RBFOX1")

long_data <- gather(rbfox1_opalin_olig2, keycol, valuecol, gathercols)

head(long_data)
nrow(long_data)
names(long_data)


ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="stack", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_fill_manual(values= mycoloursP[25:50])





ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="fill", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +scale_fill_manual(values= mycoloursP[25:50])

```


Exclude cells that are not positive for OLIG2
```{r}

keycol <- "sample_id"
valuecol <- "count"
gathercols <- c("Num_OLIG2",
                "Num_OPALIN_OLIG2",
                "Num_OLIG2_RBFOX1",
                "Num.OPALIN..OLIG2..RBFOX1")

long_data <- gather(rbfox1_opalin_olig2, keycol, valuecol, gathercols)

ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="stack", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_fill_manual(values= mycoloursP[25:50])





ggplot(long_data, aes(x = sample_id , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="fill", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +scale_fill_manual(values= mycoloursP[25:50])


```
collapse tissue
```{r}

ggplot(long_data, aes(x = tissue , 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="stack", stat="identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_fill_manual(values= mycoloursP[25:50])





ggplot(long_data, aes(x = tissue, 
                          y = valuecol, 
                          fill = keycol)) +
  geom_bar(position="fill", stat="identity")+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_fill_manual(values= mycoloursP[25:50])


```

```{r}
rbfox1_opalin_olig2$perc_opalin_OLIG2 <- (rbfox1_opalin_olig2$Num_OPALIN_OLIG2/
                                            (rbfox1_opalin_olig2$Num_OLIG2 +
                                               rbfox1_opalin_olig2$Num_OPALIN_OLIG2+
                                               rbfox1_opalin_olig2$Num_OLIG2_RBFOX1+
                                               rbfox1_opalin_olig2$Num.OPALIN..OLIG2..RBFOX1))*100

rbfox1_opalin_olig2$perc_rbfox1_olig2 <- (rbfox1_opalin_olig2$Num_OLIG2_RBFOX1/
                                            (rbfox1_opalin_olig2$Num_OLIG2 +
                                               rbfox1_opalin_olig2$Num_OLIG2_RBFOX1+
                                               rbfox1_opalin_olig2$Num_OPALIN_OLIG2+
                                               rbfox1_opalin_olig2$Num.OPALIN..OLIG2..RBFOX1))*100

rbfox1_opalin_olig2$perc_double_pos_olig <- (rbfox1_opalin_olig2$Num.OPALIN..OLIG2..RBFOX1/
                                            (rbfox1_opalin_olig2$Num_OLIG2 +
                                               rbfox1_opalin_olig2$Num_OLIG2_RBFOX1+
                                               rbfox1_opalin_olig2$Num_OPALIN_OLIG2+
                                               rbfox1_opalin_olig2$Num.OPALIN..OLIG2..RBFOX1))*100

rbfox1_opalin_olig2$perc_olig2 <- (rbfox1_opalin_olig2$Num_OLIG2/
                                            (rbfox1_opalin_olig2$Num_OLIG2 +
                                               rbfox1_opalin_olig2$Num_OLIG2_RBFOX1+
                                               rbfox1_opalin_olig2$Num_OPALIN_OLIG2+
                                               rbfox1_opalin_olig2$Num.OPALIN..OLIG2..RBFOX1))*100




keycol <- "sample_id"
valuecol <- "percentage"
gathercols <- c("perc_opalin_OLIG2",
                "perc_rbfox1_olig2", 
                "perc_double_pos_olig",
                "perc_olig2")

df_protein <- gather(rbfox1_opalin_olig2, keycol, valuecol, gathercols)

df_protein$gene_expr <- ifelse(df_protein$keycol == "perc_rbfox1_olig2",
                               "RBFOX1+", ifelse(df_protein$keycol == 
                                                   "perc_opalin_OLIG2",
                                                 "OPALIN+", 
                                                 ifelse(df_protein$keycol ==
                                                          "perc_olig2", "OLIG2+",
                                                        "RBFOX1+OPALIN+")))


df_protein$valuecol <- as.numeric(df_protein$valuecol)


# This one is over 100% because it is the added pecentage across all FOVs and 
# samples within a tissue


ggplot(df_protein, aes(x = Tissue, y = valuecol, fill = gene_expr))+
  geom_bar(stat="identity")+scale_fill_manual(values= mycoloursP[25:50])+
  ylab("Percentage")+
  theme(legend.title= element_blank()) 



ggplot(df_protein, aes(x = Tissue, y=valuecol, fill = gene_expr))+
  geom_bar(position="fill", 
           stat="identity")+scale_fill_manual(values= mycoloursP[25:50])+
  ylab("Percentage")+
  theme(legend.title= element_blank()) 

```


```{r}
ggplot(rbfox1_opalin_olig2, aes(x = tissue, y = perc_opalin_OLIG2)) +
  geom_boxplot()+ geom_jitter(width = 0.2, aes(colour = sample_id)) +
  theme_bw(19) + ylab("OPALIN+OLIG2+HOECHST+ (%)") + xlab("Tissue")

```


```{r}
ggplot(rbfox1_opalin_olig2, aes(x = tissue, y = perc_rbfox1_olig2)) +
  geom_boxplot()+ geom_jitter(width = 0.2, aes(colour = sample_id)) +
  theme_bw(19) + ylab("RBFOX1+OLIG2+HOECHST+ (%)") + xlab("Tissue")

```


```{r}
ggplot(rbfox1_opalin_olig2, aes(x = tissue, y = perc_double_pos_olig)) +
  geom_boxplot()+ geom_jitter(width = 0.2, aes(colour = sample_id)) +
  theme_bw(11) + ylab("RBFOX1+OPALIN+OLIG2+HOECHST+ (%)") + xlab("Tissue")

```


```{r}

lm_prot_double <- lm(rbfox1_opalin_olig2$perc_double_pos_olig~ rbfox1_opalin_olig2$tissue)
summary(lm_prot_double)
anova(lm_prot_double)
```

```{r}

lm_prot_rbfox1 <- lm(rbfox1_opalin_olig2$perc_rbfox1_olig2~ rbfox1_opalin_olig2$tissue)
summary(lm_prot_rbfox1)
anova(lm_prot_rbfox1)
```

```{r}

lm_prot_opalin <- lm(rbfox1_opalin_olig2$perc_opalin_OLIG2~ rbfox1_opalin_olig2$tissue)
summary(lm_prot_opalin)
anova(lm_prot_opalin)
```


```{r}

ti_csc <- subset(rbfox1_opalin_olig2, rbfox1_opalin_olig2$tissue == "CSC")
ti_ba4 <- subset(rbfox1_opalin_olig2, rbfox1_opalin_olig2$tissue == "BA4")

t.test(ti_csc$perc_double_pos_olig, ti_ba4$perc_double_pos_olig)
t.test(ti_csc$perc_opalin_OLIG2, ti_ba4$perc_opalin_OLIG2)
t.test(ti_csc$perc_rbfox1_olig2, ti_ba4$perc_rbfox1_olig2)
```

```{r}

ti_csc <- subset(rbfox1_opalin_olig2, rbfox1_opalin_olig2$tissue == "CSC")
ti_ba4 <- subset(rbfox1_opalin_olig2, rbfox1_opalin_olig2$tissue == "BA4")

t.test(ti_csc$perc_double_pos_olig, ti_ba4$perc_double_pos_olig)
t.test(ti_csc$perc_opalin_OLIG2, ti_ba4$perc_opalin_OLIG2)
t.test(ti_csc$perc_rbfox1_olig2, ti_ba4$perc_rbfox1_olig2)
```



```{r}
kruskal.test(perc_double_pos_olig ~ tissue, data = rbfox1_opalin_olig2)
kruskal.test(perc_opalin_OLIG2 ~ tissue, data = rbfox1_opalin_olig2)
kruskal.test(perc_rbfox1_olig2 ~ tissue, data = rbfox1_opalin_olig2)

```

```{r}
nad_ol <-  readRDS(here("data", 
                           "single_nuc_data", 
                           "oligodendroglia",
                           "srt_oligos_and_opcs_LS.RDS"))

Idents(nad_ol) <- "ol_clusters_named"


Idents(nad_ol) <- "Tissue"

csc_srt <- subset(nad_ol, idents = "CSC")
cb_srt <- subset(nad_ol, idents = "CB")
ba4_srt <- subset(nad_ol, idents = "BA4")

cd_genes <- c("RBFOX1")


a_csc <- DotPlot(object = csc_srt, features = cd_genes)
a_csc$data
mean(a_csc$data$pct.exp)


a_cb <- DotPlot(object = cb_srt, features = cd_genes)
a_cb$data
mean(a_cb$data$pct.exp)

a_ba4 <- DotPlot(object = ba4_srt, features = cd_genes)
a_ba4$data
mean(a_ba4$data$pct.exp)


df <- data.frame(modality = rep(c("RBFOX1_neg", "RBFOX1_pos"),3), 
                 tissue = c("BA4", "BA4", "CB", "CB", "CSC", "CSC"),
                 percentage = c(100- mean(a_ba4$data$pct.exp),
                                mean(a_ba4$data$pct.exp), 
                                100- mean(a_cb$data$pct.exp),
                                mean(a_cb$data$pct.exp), 
                                100- mean(a_csc$data$pct.exp),
                                mean(a_csc$data$pct.exp)))



ggplot(df, aes(x = tissue, y=percentage, fill = modality))+
  geom_bar(stat="identity")+scale_fill_manual(values= mycoloursP[25:50])

```
OPALIN expression in snRNAseq

```{r}
cd_genes <- c("OPALIN")


a_csc <- DotPlot(object = csc_srt, features = cd_genes)
a_csc$data
mean(a_csc$data$pct.exp)


a_cb <- DotPlot(object = cb_srt, features = cd_genes)
a_cb$data
mean(a_cb$data$pct.exp)

a_ba4 <- DotPlot(object = ba4_srt, features = cd_genes)
a_ba4$data
mean(a_ba4$data$pct.exp)


df <- data.frame(modality = rep(c("OPALIN_neg", "OPALIN_pos"),3), 
                 tissue = c("BA4", "BA4", "CB", "CB", "CSC", "CSC"),
                 percentage = c(100- mean(a_ba4$data$pct.exp),
                                mean(a_ba4$data$pct.exp), 
                                100- mean(a_cb$data$pct.exp),
                                mean(a_cb$data$pct.exp), 
                                100- mean(a_csc$data$pct.exp),
                                mean(a_csc$data$pct.exp)))



ggplot(df, aes(x = tissue, y=percentage, fill = modality))+
  geom_bar(stat="identity")+scale_fill_manual(values= mycoloursP[25:50])

```

OPALIN and RBFOX1 co-expression in snRNAseq

```{r}
Idents(nad_ol) <- "ol_clusters_named"
oligos <- subset(nad_ol, idents = c("Oligo_A",
                                    "Oligo_B",
                                    "Oligo_C",
                                    "Oligo_D",
                                    "Oligo_E",
                                    "Oligo_F"))
Idents(oligos) <- "Tissue"
ba4_ol <-subset(oligos, idents = "BA4")
cb_ol <-subset(oligos, idents = "CB")
csc_ol <-subset(oligos, idents = "CSC")


rbfox1 <- 'RBFOX1' 
opalin <- 'OPALIN' 
rbfox1.cutoff <- 1 
opalin.cutoff <- 1 

#BA4
rbfox1_cells_ba4 <- length(which(FetchData(ba4_ol, vars = rbfox1) > rbfox1.cutoff & 
                                      FetchData(ba4_ol, vars = opalin) < opalin.cutoff))
opalin_cells_ba4 <- length(which(FetchData(ba4_ol, vars = opalin) > opalin.cutoff& 
                                      FetchData(ba4_ol, vars = rbfox1) < rbfox1.cutoff))
rbfox1_opalin_cells_ba4 <- length(which(FetchData(ba4_ol, vars = opalin) > opalin.cutoff & 
                                      FetchData(ba4_ol, vars = rbfox1) > rbfox1.cutoff))
all_cells_incluster_ba4 <- table(ba4_ol@active.ident)
rbfox1_ba4 <-rbfox1_cells_ba4/all_cells_incluster_ba4 * 100 
# Percentage of cells in dataset that express rbfox1

opalin_ba4 <- opalin_cells_ba4/all_cells_incluster_ba4 * 100 
#Percentage of cells in dataset that express opalin
double_ba4 <- rbfox1_opalin_cells_ba4/all_cells_incluster_ba4 * 100 
#Percentage of cells in dataset that co-express rbfox1 + opalin

#CB
rbfox1_cells_cb <- length(which(FetchData(cb_ol, vars = rbfox1) > rbfox1.cutoff& 
                                      FetchData(cb_ol, vars = opalin) < opalin.cutoff))
opalin_cells_cb <- length(which(FetchData(cb_ol, vars = opalin) > opalin.cutoff& 
                                      FetchData(cb_ol, vars = rbfox1) < rbfox1.cutoff))
rbfox1_opalin_cells_cb <- length(which(FetchData(cb_ol, vars = opalin) > 
                                         opalin.cutoff & FetchData(cb_ol, vars = rbfox1) >
                                         rbfox1.cutoff))
all_cells_incluster_cb <- table(cb_ol@active.ident)
rbfox1_cb <- rbfox1_cells_cb/all_cells_incluster_cb * 100 # Percentage of cells in dataset that express rbfox1
opalin_cb <- opalin_cells_cb/all_cells_incluster_cb * 100 #Percentage of cells in dataset that express opalin
double_cb <- rbfox1_opalin_cells_cb/all_cells_incluster_cb * 100 #Percentage of cells in dataset that co-express rbfox1 + opalin

#CSC
rbfox1_cells_csc <- length(which(FetchData(csc_ol, vars = rbfox1) > rbfox1.cutoff& 
                                      FetchData(csc_ol, vars = opalin) < opalin.cutoff))
opalin_cells_csc <- length(which(FetchData(csc_ol, vars = opalin) > opalin.cutoff& 
                                      FetchData(csc_ol, vars = rbfox1) < rbfox1.cutoff))
rbfox1_opalin_cells_csc <- length(which(FetchData(csc_ol, vars = opalin) > 
                                          opalin.cutoff & FetchData(csc_ol, vars = rbfox1) >
                                          rbfox1.cutoff))
all_cells_incluster_csc <- table(csc_ol@active.ident)
rbfox1_csc <- rbfox1_cells_csc/all_cells_incluster_csc * 100 
# Percentage of cells in dataset that express rbfox1
opalin_csc <- opalin_cells_csc/all_cells_incluster_csc * 100 
#Percentage of cells in dataset that express opalin
double_csc <- rbfox1_opalin_cells_csc/all_cells_incluster_csc * 100 
#Percentage of cells in dataset that co-express rbfox1 + opalin

df_snrna_seq <- data.frame(Tissue = c(rep("BA4", 4), 
                                     rep("CB", 4),
                                     rep("CSC", 4)),
                           gene_expr = rep(c("RBFOX1+", 
                                             "OPALIN+",
                                             "RBFOX1+OPALIN+",
                                             "Other oligodendrocytes"), 3),
                           percentage = c(as.numeric(rbfox1_ba4), 
                                          as.numeric(opalin_ba4),
                                          as.numeric(double_ba4),
                                          100 - (as.numeric(rbfox1_ba4)+ 
                                          as.numeric(opalin_ba4)+
                                          as.numeric(double_ba4)),
                                          
                                          as.numeric(rbfox1_cb), 
                                          as.numeric(opalin_cb),
                                          as.numeric(double_cb),
                                          100 - (as.numeric(rbfox1_cb)+ 
                                          as.numeric(opalin_cb)+
                                          as.numeric(double_cb)),
                                          
                                          
                                          as.numeric(rbfox1_csc), 
                                          as.numeric(opalin_csc),
                                          as.numeric(double_csc),
                                          100 - (as.numeric(rbfox1_csc)+ 
                                          as.numeric(opalin_csc)+
                                          as.numeric(double_csc))))


ggplot(df_snrna_seq, aes(x = Tissue, y=percentage, fill = gene_expr))+
  geom_bar(stat="identity")+scale_fill_manual(values= mycoloursP[25:50])
```
```{r}

subs_df_snrna_seq <- subset(df_snrna_seq, df_snrna_seq$gene_expr != "Other oligodendrocytes")

ggplot(subs_df_snrna_seq, aes(x = Tissue, y=percentage, fill = gene_expr))+
  geom_bar(stat="identity")+scale_fill_manual(values= mycoloursP[25:50])

ggplot(subs_df_snrna_seq, aes(x = Tissue, y=percentage, fill = gene_expr))+
  geom_bar(stat="identity", position = "fill")+scale_fill_manual(values= mycoloursP[25:50])


```



Conclusion:
I stained for RBFOX1, OPALIN and OLIG2 in immuno fluorescence stainings and 
found a larger proportion of double positive cells than expected from snRNAseq
data. 

```{r}
sessionInfo()

```