Last updated: 2020-03-17

Checks: 6 0

Knit directory: m6AQTL_reproducibleDocument/

This reproducible R Markdown analysis was created with workflowr (version 1.3.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.


Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.

Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.

The command set.seed(20200317) was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.

Great job! Recording the operating system, R version, and package versions is critical for reproducibility.

Nice! There were no cached chunks for this analysis, so you can be confident that you successfully produced the results during this run.

Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated.

Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use wflow_publish or wflow_git_commit). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:


Untracked files:
    Untracked:  analysis/Re-analysis_of_Wang_2015_cell.Rmd

Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.


There are no past versions. Publish this analysis with wflow_publish() to start tracking its development.


1.Test enrichment of annotation using Torus

library(MeRIPtools)
allChr.est <- readRDS("~/m6AQTL/m6A_QTL_results/linear_model/m6AQTL.m6APeak_logOR_GC.IP.adjusted_qqnorm.15PCs.fastQTL.nominals.rds")
LCLs <- readRDS( "~/m6AQTL/peakcalling/MeRIPtools/jointPeak_threshold5_MeRIP.RDS")

vcf <- readRDS("~/m6AQTL/variant_data/sixtySample_hg19/MAF_filtered_allChr_vcf.RDS")


QTL.est <- data.frame(SNP = allChr.est$SNP, gene = allChr.est$PEAK, beta = allChr.est$beta, t.stat = allChr.est$statistic, p.value = allChr.est$pvalue )
colnames(QTL.est) <- c("SNP",   "gene", "beta", "t-stat",   "p-value")
m6Apeaks <- jointPeak(LCLs)
m6Apeaks <- m6Apeaks[which(paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ) %in% unique(allChr.est$PEAK)),]
gene.Map <- data.frame(Id = paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ),
                       Chr = m6Apeaks$chr,
                       TSS = ifelse(m6Apeaks$strand == "+",m6Apeaks$start,m6Apeaks$end),
                       TES = ifelse(m6Apeaks$strand == "+",m6Apeaks$start,m6Apeaks$end)
                       )


SNP.Map <- data.frame(ID = paste(vcf$CHROM,vcf$POS,sep = ":"), CHROM = vcf$CHROM, POS = vcf$POS)
SNP.Map <- SNP.Map[SNP.Map$ID %in% QTL.est$SNP,]
SNP.gr <- GRanges(seqnames = SNP.Map$CHROM,ranges = IRanges(start = SNP.Map$POS,end = SNP.Map$POS ) )

write.table(QTL.est, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary", sep = "\t", col.names = T,row.names = F,quote = F)
write.table(gene.Map, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map", sep = "\t", col.names = T,row.names = F,quote = F)
write.table(SNP.Map, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map", sep = "\t", col.names = T,row.names = F,quote = F)
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary")
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map")
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map")

Run torus for each RBP

summaryFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary.gz"
snpMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map.gz"
geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"

registerDoParallel(30)
eachRBP.enrich <- foreach(rbp = unique(RBP.gr$V4), .combine = rbind )%dopar%{
  tmpRBP.gr <- RBP.gr[RBP.gr$V4 == rbp]
  
  tmpRBP.flag <-   unique( queryHits(findOverlaps(SNP.gr,tmpRBP.gr))  )
  tmpAnno <- data.frame(SNP = SNP.Map$ID , RBPs_d = 0)
  tmpAnno$RBPs_d[tmpRBP.flag] <- 1
  
  tmpAnnoFile <- tempfile(fileext = ".txt")
  write.table(tmpAnno, tmpAnnoFile,  sep = "\t", col.names = T,row.names = F,quote = F )
  system(paste0("gzip ",tmpAnnoFile))
  
  tmpEnrich <-  tempfile(fileext = ".est")
  system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
  tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
  
  unlink(tmpAnnoFile)
  unlink(tmpEnrich)
  
  data.frame(RBP = rbp, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
write.table(eachRBP.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedRBPs.txt",sep = "\t", col.names = T,row.names = F,quote = F)

Run torus for each microRNA-binding

miRNAbinding <- read.table("~/m6AQTL/mircoRNA/miRNA_target_in_YRI_LCLs.bed", sep = "\t", header = T)
miRNAbinding.gr <- makeGRangesFromDataFrame(miRNAbinding, keep.extra.columns = TRUE)

registerDoParallel(30)
eachMicro.enrich <- foreach( micro = unique(miRNAbinding$miRNA) , .combine = rbind )%dopar%{
  tmpMicro.gr <- miRNAbinding.gr[ miRNAbinding.gr$miRNA == micro ]
  
  tmpMicro.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpMicro.gr))  )
  tmpAnno <- data.frame(SNP = SNP.Map$ID , micro_d = 0)
  if(length(tmpMicro.flag)==0){return(NULL)}
  tmpAnno$micro_d[ tmpMicro.flag ] <- 1
  
  tmpAnnoFile <- tempfile(fileext = ".txt")
  write.table(tmpAnno, tmpAnnoFile,  sep = "\t", col.names = T,row.names = F,quote = F )
  system(paste0("gzip ",tmpAnnoFile))
  
  tmpEnrich <-  tempfile(fileext = ".est")
  system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
  tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
  
  unlink(tmpAnnoFile)
  unlink(tmpEnrich)
  
  data.frame( microRNA = micro, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3] )
}

eachMicro.enrich <- eachMicro.enrich[order( as.numeric(as.character(eachMicro.enrich$logOddRatio)), decreasing = TRUE),]

write.table(eachMicro.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedMiroRNAs.txt",sep = "\t", col.names = T,row.names = F,quote = F)

Run torus for each TFs

summaryFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary.gz"
snpMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map.gz"
geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"

registerDoParallel(15)
eachTF.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
  tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
  
  tmpTF.flag <-   unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr))  )
  tmpAnno <- data.frame(SNP = SNP.Map$ID , TFs_d = 0)
  tmpAnno$TFs_d[tmpTF.flag] <- 1
  
  tmpAnnoFile <- tempfile(fileext = ".txt")
  write.table(tmpAnno, tmpAnnoFile,  sep = "\t", col.names = T,row.names = F,quote = F )
  system(paste0("gzip ",tmpAnnoFile))
  
  tmpEnrich <-  tempfile(fileext = ".est")
  system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
  tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
  
  unlink(tmpAnnoFile)
  unlink(tmpEnrich)
  
  data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs.txt",sep = "\t", col.names = T,row.names = F,quote = F)

each TF conditioned on POLR2A or H3K27ac

POLR2A.gr <- POLII.gr[POLII.gr$V4 == "POLR2A" ]
POLR2A.flag <- unique(queryHits( findOverlaps(SNP.gr,POLR2A.gr)  ) )


registerDoParallel(15)
eachTF_POLII.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
  tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
  
  tmpTF.flag <-   unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr))  )
  tmpAnno <- data.frame(SNP = SNP.Map$ID ,POLR2A_d = 0, TFs_d = 0)
  tmpAnno$TFs_d[tmpTF.flag] <- 1
  tmpAnno$POLR2A_d[POLR2A.flag] <- 1
  
  tmpAnnoFile <- tempfile(fileext = ".txt")
  write.table(tmpAnno, tmpAnnoFile,  sep = "\t", col.names = T,row.names = F,quote = F )
  system(paste0("gzip ",tmpAnnoFile))
  
  tmpEnrich <-  tempfile(fileext = ".est")
  system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
  tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[2]][3:5]
  
  unlink(tmpAnnoFile)
  unlink(tmpEnrich)
  
  data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF_POLII.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs_POLII.txt",sep = "\t", col.names = T,row.names = F,quote = F)
##################################################

H3K27ac.gr <- HK.gr[HK.gr$V4 == Histones[1]]
H3K27ac.flag <- unique(queryHits( findOverlaps(SNP.gr,H3K27ac.gr)  ))

registerDoParallel(15)
eachTF_H3K27ac.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
  tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
  
  tmpTF.flag <-   unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr))  )
  tmpAnno <- data.frame(SNP = SNP.Map$ID ,H3K27ac_d = 0, TFs_d = 0)
  tmpAnno$TFs_d[tmpTF.flag] <- 1
  tmpAnno$H3K27ac_d[H3K27ac.flag] <- 1
  
  tmpAnnoFile <- tempfile(fileext = ".txt")
  write.table(tmpAnno, tmpAnnoFile,  sep = "\t", col.names = T,row.names = F,quote = F )
  system(paste0("gzip ",tmpAnnoFile))
  
  tmpEnrich <-  tempfile(fileext = ".est")
  system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
  tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[2]][3:5]
  
  unlink(tmpAnnoFile)
  unlink(tmpEnrich)
  
  data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF_H3K27ac.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs_H3K27ac.txt",sep = "\t", col.names = T,row.names = F,quote = F)

Test enrichment of annotation using matched control SNPs and Fisher’s exact test

Take the independent SNPs from the SuSiE fine-mapping result.

## take top SNP per Credible set to get independent SNPs
SusieResult <- readRDS("/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/fineMapping/Susie_noPrior/SusieResult_qvalue_0.1.RDS")
SusieResult <- SusieResult[order(SusieResult$PIP, decreasing = T),]
sig.m6AQTL <- SusieResult[!duplicated(paste(SusieResult$PEAK,SusieResult$CS)) & !is.na(SusieResult$CS),]


InputSNPs <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieSNPs.inCS_qvalue_0.1/snpsnap_anno_input_exist.txt", sep = "\t", header = T, stringsAsFactors = F)
InputSNPs <- InputSNPs[which(InputSNPs$snpID %in%  gsub("chr","",sig.m6AQTL$SNP) ),]

InputSNPs.gr <- GRanges(seqnames = paste0("chr",unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[1]}))),IRanges(start = as.integer(unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[2]}))),end = as.integer(unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[2]})))))
values(InputSNPs.gr) <- DataFrame(snpID = InputSNPs$snpID)

CtlSNPs <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieSNPs.inCS_qvalue_0.1/SNPmatch_100_anno_gene_matchedSNPs.txt",sep = "\t",header = F, stringsAsFactors = F)
CtlSNPs <- CtlSNPs[!duplicated(CtlSNPs$V1),]
CtlSNPs <- CtlSNPs[CtlSNPs$V1 %in%  gsub("chr","",sig.m6AQTL$SNP) ,]
CtlSNPs_sets <- CtlSNPs[match(CtlSNPs$V1, InputSNPs$snpID),4:103]
colnames(CtlSNPs_sets) <- 1:100
names(InputSNPs.gr) <- rownames(CtlSNPs_sets)<- InputSNPs$snpID

CtlSNPs_sets <- CtlSNPs_sets[- which( apply(CtlSNPs_sets,1,function(x){any(is.na(x))}) ),]

CtlSNPs.gr <- foreach( i = 1:100, .combine = c)%do%{
  tmp_chr <- unlist(lapply(strsplit(CtlSNPs_sets[,i],":"),function(x){x[1]}))
  tmp_pos <-  as.integer(unlist(lapply(strsplit(CtlSNPs_sets[,i],":"),function(x){x[2]})) )
  tmp_gr <- GRanges(paste0("chr",tmp_chr),IRanges(tmp_pos,tmp_pos))
  values(tmp_gr) <- DataFrame(snpID = CtlSNPs_sets[,i] ,InputSNP = rownames(CtlSNPs_sets), matchSet = i )
  tmp_gr
}
save(InputSNPs,InputSNPs.gr,CtlSNPs_sets,CtlSNPs.gr,file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/matchedSNPs_forSusieSNPs.inCS_qvalue_0.1.RData")

RBP

RBP.bed <- read.table("~/m6AQTL/RBP_near_peaks/all.RBP.intersect.hg19.bed",sep = "\t",header = F, stringsAsFactors = F)
RBP.gr <- makeGRangesFromDataFrame(RBP.bed,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)

QTLs_RBP <- findOverlaps(InputSNPs.gr,RBP.gr)
Ctl_RBP <- findOverlaps(CtlSNPs.gr, RBP.gr)


ENCODE_RBP_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_RBP)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_RBP)) )  ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_RBP))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_RBP))) ) ),
      alternative = "two.sided"
      )

CHIPseq.bed <- read.table("~/m6AQTL/TF_binding/all_TFs.bed",sep = "\t",header = F, stringsAsFactors = F)
Histones <- unique(CHIPseq.bed$V4)[c(grep("H3K",unique(CHIPseq.bed$V4)),grep("H4K",unique(CHIPseq.bed$V4)))]
POLII <- unique(CHIPseq.bed$V4)[c(grep("POLR",unique(CHIPseq.bed$V4)) ) ]
TFs.bed <- dplyr::filter( CHIPseq.bed, ! V4 %in% c(Histones,POLII) )
TFs.gr <- makeGRangesFromDataFrame(TFs.bed,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)

QTLs_TF <- findOverlaps(InputSNPs.gr,TFs.gr)
Ctl_TF <- findOverlaps(CtlSNPs.gr, TFs.gr)

ENCODE_TF_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_TF))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_TF))) ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_TF))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_TF))) ) ),
      alternative = "two.sided"
      )

HK.gr <- makeGRangesFromDataFrame(CHIPseq.bed[CHIPseq.bed$V4%in%Histones,] ,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
Histone_fisher <- foreach(i = 1:length(Histones), .combine = rbind)%do%{
  tmp_gr <- HK.gr[HK.gr$V4 == Histones[i]]
  QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
  Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
  tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
      alternative = "two.sided"
      )
  data.frame(feature = Histones[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}

POLII.gr <- makeGRangesFromDataFrame(CHIPseq.bed[CHIPseq.bed$V4%in%POLII,] ,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
POLII_fisher <- foreach(i = 1:length(POLII), .combine = rbind)%do%{
  tmp_gr <- POLII.gr[POLII.gr$V4 == POLII[i]]
  QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
  Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
  tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
      alternative = "two.sided"
      )
  data.frame(feature = POLII[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}

saveRDS(rbind(data.frame(feature = "RBP CLIPseq peak",oddRatio = ENCODE_RBP_fisher$estimate, CI05 = ENCODE_RBP_fisher$conf.int[1], CI95 = ENCODE_RBP_fisher$conf.int[2], pvalue = ENCODE_RBP_fisher$p.value, row.names = NULL),
data.frame(feature = "TFs ChIPseq peak",oddRatio = ENCODE_TF_fisher$estimate, CI05 = ENCODE_TF_fisher$conf.int[1], CI95 = ENCODE_TF_fisher$conf.int[2], pvalue = ENCODE_TF_fisher$p.value, row.names = NULL),
Histone_fisher, POLII_fisher[1:3,]), file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieNoPrior_m6AQTL_enrichInFeatures_fisherTest.RDS" )

each RBP binding from eCLIPseq peaks

registerDoParallel(10)
RBPs_fisher <- foreach(i = 1:length(unique(RBP.gr$V4)), .combine = rbind)%dopar%{
  tmp_gr <- RBP.gr[RBP.gr$V4 == unique(RBP.gr$V4)[i]]
  QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
  Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
  tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
      alternative = "two.sided"
      )
  data.frame(feature = unique(RBP.gr$V4)[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
RBPs_fisher <- RBPs_fisher[order(RBPs_fisher$pvalue, decreasing = F),]
RBPs_fisher <- RBPs_fisher[RBPs_fisher$oddRatio!=0,]
RBPs_fisher$p.ajd <- p.adjust(RBPs_fisher$pvalue,method = "fdr")
save(RBPs_fisher, file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieNoPrior_m6AQTL_enrich_RBP_encode.RData")

m6A consensus motif (RRACH)

library(motifbreakR)
library(BSgenome.Hsapiens.UCSC.hg19)
library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
library(MotifDb)
library(dplyr)

## try to get rsid from matched SNPs
write.table(data.frame(chr = gsub("chr","",seqnames(CtlSNPs.gr)), start(CtlSNPs.gr)),"~/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/SusieNoPrior_q0.1_matched_SNPs.txt",sep = "\t",col.names = F,row.names = F,quote = F)

system("vcftools --gzvcf /home/zijiezhang/m6AQTL/variant_data/common_all_20180423.vcf.gz --out /home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/matchedSNPs_vcf --positions /home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/SusieNoPrior_q0.1_matched_SNPs.txt --recode" )


match.vcf <- read.table("/home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/matchedSNPs_vcf.recode.vcf",sep = "\t",col.names = c("CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO"))
## Filter for biallelic and single nucleotide SNPs. 
match.vcf <- match.vcf[nchar(as.character(match.vcf$REF))==1 & nchar(as.character(match.vcf$ALT))==1,]

CtlSNPs.gr$name <- paste(match.vcf$CHROM,match.vcf$POS,match.vcf$REF,match.vcf$ALT,sep = ":")[match(CtlSNPs.gr$snpID,paste(match.vcf$CHROM,match.vcf$POS,sep = ":"))]

vcf <- readRDS("~/m6AQTL/variant_data/sixtySample_hg19/MAF_filtered_allChr_vcf.RDS")
sig.m6AQTL_vcf <-  vcf[paste0(vcf$CHROM,":",vcf$POS) %in% unique(sig.m6AQTL$SNP), ]
## filter for biallelic input snps
sig.m6AQTL_vcf <- sig.m6AQTL_vcf[nchar(as.character(sig.m6AQTL_vcf$REF))==1 & nchar(as.character(sig.m6AQTL_vcf$ALT))==1,]
InputSNPs.gr$name <- paste(gsub("chr","",sig.m6AQTL_vcf$CHROM),sig.m6AQTL_vcf$POS,sig.m6AQTL_vcf$REF,sig.m6AQTL_vcf$ALT,sep = ":")[match(InputSNPs.gr$snpID, paste(gsub("chr","",sig.m6AQTL_vcf$CHROM),sig.m6AQTL_vcf$POS,sep = ":"))]

## Prepare data for match motif
Ctl_snps_filter <- CtlSNPs.gr[!is.na(CtlSNPs.gr$name)]
numSNP <- table(Ctl_snps_filter$matchSet)

Input_snps_filter <- InputSNPs.gr[!is.na(InputSNPs.gr$name)]


## load transcribed region
geneModel <- m6Amonster::gtfToGeneModel(gtf = "~/Database/genome/hg19/hg19_UCSC.gtf")
transcribed_region <- range(geneModel)


motifMetadata <- DataFrame(providerName = paste0("m6A_Concensus_motif",1:3),
                           providerId = paste0("m6A_Concensus_motif",1:3),
                           dataSource = "Homer2_de_novo",
                           geneSymbol = "m6A",
                           geneId =NA, geneIdType = NA, proteinId = NA, proteinIdType = NA, 
                           organism = "Hsapiens",
                           sequenceCount = 6312, 
                           bindingSequence = NA, bindingDomain = NA, tfFamily = NA, experimentType = "high-throughput methods", 
                           pubmedID = NA
                           )
motif_ppm <- list( t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",1,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:7)) ),
                   t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",2,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:7)) ),
                   t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",3,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:6)) ) )
names(motif_ppm) <- paste(motifMetadata$organism,motifMetadata$dataSource,motifMetadata$providerName,sep = "-")


MotifList_m6A <- new("MotifList",elementMetadata = motifMetadata ,manuallyCuratedGeneMotifAssociationTable = data.frame(), listData = motif_ppm )


## match motif for each control set
for( i in 1:100){
  batchSNPs <- Ctl_snps_filter[Ctl_snps_filter$matchSet == i ]
  batchSNPs_bed <- as.data.frame(batchSNPs)[,c(1:3,9,4,5)]
  batchSNPs_bed$start <- batchSNPs_bed$start - 1
  batchSNPs_bed <- mutate_if(batchSNPs_bed,is.numeric, as.integer)
  write.table(batchSNPs_bed,file = "~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed",sep = "\t",col.names = F, row.names = F, quote = F )
  batchSNP.mb <- snps.from.file( file = "~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed", dbSNP = SNPlocs.Hsapiens.dbSNP144.GRCh37 ,search.genome = BSgenome.Hsapiens.UCSC.hg19, format = "bed" )
  file.remove("~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed")
  

  batchResult <- motifbreakR(snpList = batchSNP.mb, filterp = TRUE,
                             pwmList = MotifList_m6A,
                             threshold = 1e-3,
                             method = "ic",
                             bkg = c(A=0.25, C=0.25, G=0.25, T=0.25),
                             BPPARAM = BiocParallel::MulticoreParam(workers = 10) 
                             )
  saveRDS(batchResult, file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_controlQTL",i,"_motifBreak_GGACU.rds"))
}
allCtlSNPs <- foreach(i = 1:100, .combine = c)%do%{
  readRDS(paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_controlQTL",i,"_motifBreak_GGACU.rds"))
}

LCLs <- readRDS( "~/m6AQTL/peakcalling/MeRIPtools/jointPeak_threshold5_MeRIP.RDS")
m6Apeaks <- jointPeak(LCLs)

m6APeaks <- m6Apeaks[which(paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ) %in% unique(SusieResult$PEAK)),]

m6APeaks.GRList <- MeRIPtools:::.peakToGRangesList(m6APeaks)
onPeak.id <- unique( queryHits( findOverlaps(allCtlSNPs,m6APeaks.GRList ) )  )
saveRDS(allCtlSNPs[onPeak.id], file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allControlQTL_motifBreak_GGACU_motif.rds"))

## QTLs set

Input_snps_filter_peak.bed <- as.data.frame(Input_snps_filter[unique(queryHits( findOverlaps(Input_snps_filter, m6APeaks.GRList) ))])[,c(1:3,7,4,5)]
Input_snps_filter_peak.bed$start <- Input_snps_filter_peak.bed$start - 1 
Input_snps_filter_peak.bed <- mutate_if(Input_snps_filter_peak.bed,is.numeric, as.integer)
write.table(Input_snps_filter_peak.bed,file = "~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed",sep = "\t",col.names = F, row.names = F, quote = F )
batchSNP.mb <- snps.from.file( file = "~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed", dbSNP = SNPlocs.Hsapiens.dbSNP144.GRCh37 ,search.genome = BSgenome.Hsapiens.UCSC.hg19, format = "bed" )
file.remove("~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed")
batchResult <- motifbreakR(snpList = batchSNP.mb, filterp = TRUE,
                             pwmList = MotifList_m6A,
                             threshold = 1e-3,
                             method = "ic",
                             bkg = c(A=0.25, C=0.25, G=0.25, T=0.25),
                             BPPARAM = BiocParallel::MulticoreParam(workers = 10) 
                             )

onPeak.id <- unique( queryHits( findOverlaps(batchResult,m6APeaks.GRList ) )  )
saveRDS(batchResult[onPeak.id], file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allInputQTL_motifBreak_GGACU_motif.rds"))
load("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/matchedSNPs_forSusieSNPs.inCS_qvalue_0.1.RData")
GGACUmotif_QTL <- readRDS( file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allInputQTL_motifBreak_GGACU_motif.rds"))
GGACUmotif_Ctl <- readRDS(file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allControlQTL_motifBreak_GGACU_motif.rds"))

Ctl_snps_filter_noDup <- Ctl_snps_filter[!duplicated(Ctl_snps_filter$snpID)]
Input_snps_filter_noDup <- Input_snps_filter[!duplicated(Input_snps_filter$snpID)]

Ctl_snps_filter_noDup_onTrans <- Ctl_snps_filter_noDup[unique( queryHits( findOverlaps(Ctl_snps_filter_noDup,transcribed_region ) )  )]
Input_snps_filter_noDup_onTrans <- Input_snps_filter_noDup[unique( queryHits( findOverlaps(Input_snps_filter_noDup,transcribed_region ) )  )]


GGACU_motifBreak_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(names(GGACUmotif_QTL))) , length(Input_snps_filter_noDup_onTrans) - length(unique(names(GGACUmotif_QTL))) ),
      "CtlSNPs" = c(length(unique(names(GGACUmotif_Ctl))), length(Ctl_snps_filter_noDup_onTrans) - length(unique(names(GGACUmotif_Ctl))) ) ),
      alternative = "two.sided"
      )
saveRDS(data.frame(feature = "GGACU motifBreak",oddRatio = GGACU_motifBreak_fisher$estimate, CI05 = GGACU_motifBreak_fisher$conf.int[1], CI95 = GGACU_motifBreak_fisher$conf.int[2], pvalue = GGACU_motifBreak_fisher$p.value, row.names = NULL),file =  "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_m6AQTL_enrichInGGACUmotifBreak_fisherTest.RDS")

microRNA binding sites

miRNAbinding <- read.table("~/m6AQTL/mircoRNA/miRNA_target_in_YRI_LCLs.bed", sep = "\t", header = T)
miRNAbinding.gr <- makeGRangesFromDataFrame(miRNAbinding)


QTLs_miRNA <- findOverlaps(InputSNPs.gr,miRNAbinding.gr)
Ctl_miRNA <- findOverlaps(CtlSNPs.gr, miRNAbinding.gr)


ENCODE_miRNA_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_miRNA)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_miRNA)) )  ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_miRNA))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_miRNA))) ) ),
      alternative = "two.sided"
      )

ribo-SNitch

RiboSNitch <- read.table("~/m6AQTL/RiboSNitch/riboSNitches.hg19.csv", sep = "\t", header = T)
RiboSNitch.gr <- makeGRangesFromDataFrame(RiboSNitch, start.field = "position", end.field = "position", ignore.strand = T)


QTLs_riboSNitch <- findOverlaps(InputSNPs.gr,RiboSNitch.gr)
Ctl_riboSNitch <- findOverlaps(CtlSNPs.gr, RiboSNitch.gr)


ENCODE_riboSNitch_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_riboSNitch)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_riboSNitch)) )  ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_riboSNitch))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_riboSNitch))) ) ),
      alternative = "two.sided"
      )

#save(ENCODE_miRNA_fisher,ENCODE_riboSNitch_fisher,ENCODE_RBP_fisher, RBP_motifBreak_fisher,GGACU_motifBreak_fisher,RBP_motifBreak_inPeak_fisher,file = "~/m6AQTL/m6A_QTL_results/beta-binomial/7PCs/matchedSNPs/enrichRNAfeatures.RData")
save(ENCODE_miRNA_fisher,ENCODE_riboSNitch_fisher,ENCODE_RBP_fisher,GGACU_motifBreak_fisher,file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_enrichRNAfeatures.RData")

TF binding from encode ChIPseq peaks

registerDoParallel(10)
TFs_fisher <- foreach(i = 1:length(unique(TFs.gr$V4)), .combine = rbind)%dopar%{
  tmp_gr <- TFs.gr[TFs.gr$V4 == unique(TFs.gr$V4)[i]]
  QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
  Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
  tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
      "CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
      alternative = "two.sided"
      )
  data.frame(feature = unique(TFs.gr$V4)[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
TFs_fisher <- TFs_fisher[order(TFs_fisher$pvalue, decreasing = F),]
TFs_fisher <- TFs_fisher[TFs_fisher$oddRatio != 0 ,]
TFs_fisher$p.ajd <- p.adjust(TFs_fisher$pvalue,method = "fdr")
save(TFs_fisher, file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_SusieDistOnly_m6AQTL_enrich_TF_encode.RData")

sessionInfo()
R version 3.5.3 (2019-03-11)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 17.10

Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] workflowr_1.3.0 Rcpp_1.0.1      digest_0.6.18   rprojroot_1.3-2
 [5] backports_1.1.4 git2r_0.25.2    magrittr_1.5    evaluate_0.13  
 [9] stringi_1.4.3   fs_1.3.0        rmarkdown_1.12  tools_3.5.3    
[13] stringr_1.4.0   glue_1.3.1      xfun_0.6        yaml_2.2.0     
[17] compiler_3.5.3  htmltools_0.3.6 knitr_1.22