############################################################################################### ## Load protein locations ############################################################################################### # srun -p interactive --pty bash # Load packages library(tidyverse) library(data.table) library(pacman) p_load(tidyverse, data.table, magrittr, tools, ggpubr) # Load batch info and list of Olink protein index files from Ben batchinfo = fread('path/...', colClasses = c("character")) %>% as_tibble() batchinfo <- as.data.frame(batchinfo) length(unique(batchinfo$SampleID)) # 60463 length(unique(batchinfo$App_26041)) # 54309 length(unique(batchinfo$pseudo_ind_id)) # 54309 length(unique(batchinfo$UKBPPP_SampleID)) # 60463 # List of Olink protein measurements available (1472 protein observations) olink_proteins = fread('path/...', colClasses = c("character")) %>% distinct(UKBPPP_ProteinID, Panel) %>% as_tibble() olink_proteins <- as.data.frame(olink_proteins) olink_proteins[,1] <- sub(':', '.', olink_proteins[,1]) olink_proteins[,1] <- sub(':', '.', olink_proteins[,1]) olink_proteins[,1] <- sub(':', '.', olink_proteins[,1]) olink_proteins$P <- gsub("\\..*", "", olink_proteins[,1]) t <- olink_proteins[which(duplicated(olink_proteins$P)),] write.csv(olink_proteins, 'path/...', row.names = F) # Protein measurements for 54,189 individuals and 1,474 protein levels olink_internal = read.csv('path/...') # Check dist hist(olink_internal$NPPB.P16860.OID20049.v1) # Check missingness across one protein measurement table(is.na(olink_internal$NPPB.P16860.OID20049.v1))