library('biomaRt') library('tidyr') library('stringr') library('dplyr') library('sqldf') library('stringi') library('optparse') option_list = list( make_option(c("-f", "--file"), type="character", default=NULL, help="file name", metavar="character"), make_option(c("-n", "--file2"), type="character", default=NULL, help="file name2", metavar="character"), make_option(c("-c", "--code"), type="character", default=NULL, help="code", metavar="character"), make_option(c("-o", "--out"), type="character", default="results2.csv", help="output file name with full path [default= %default]", metavar="character") ); opt_parser = OptionParser(option_list=option_list); opt = parse_args(opt_parser); if (is.null(opt$file)){ print_help(opt_parser) stop(" At least 2 arguments must be supplied (2 input files).n", call.=FALSE) } f=opt$file f2=opt$file2 filetowrite=opt$out code=opt$code print("Reading file") df_raw = read.csv(f, sep ='\t', header = FALSE,na.strings=c("", " ","NA")) colnames(df_raw) <- c("id","chr","pos") df_raw$id <- gsub(" ", "", df_raw$id, fixed = TRUE) print("Reading file 2") df = read.csv(f2, sep ='\t', header = FALSE,na.strings=c("", " ","NA")) colnames(df) <- c("id_R2", "tail_seq") df2 <- df %>% separate(id_R2,c("id_R","garbage"), sep=" ") df2$garbage <- NULL merged = sqldf(" SELECT * FROM df_raw d1 JOIN df2 d2 ON d1.id = d2.id_R ") merged$code <- code df3 <- select(merged,"id","tail_seq","chr","pos","code") write.table(df3, filetowrite, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)