Last updated: 2020-03-17
Checks: 6 0
Knit directory: m6AQTL_reproducibleDocument/
This reproducible R Markdown analysis was created with workflowr (version 1.3.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.
Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
The command set.seed(20200317)
was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
Nice! There were no cached chunks for this analysis, so you can be confident that you successfully produced the results during this run.
Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated.
Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use wflow_publish
or wflow_git_commit
). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
Untracked files:
Untracked: analysis/Re-analysis_of_Wang_2015_cell.Rmd
Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.
There are no past versions. Publish this analysis with wflow_publish()
to start tracking its development.
library(MeRIPtools)
allChr.est <- readRDS("~/m6AQTL/m6A_QTL_results/linear_model/m6AQTL.m6APeak_logOR_GC.IP.adjusted_qqnorm.15PCs.fastQTL.nominals.rds")
LCLs <- readRDS( "~/m6AQTL/peakcalling/MeRIPtools/jointPeak_threshold5_MeRIP.RDS")
vcf <- readRDS("~/m6AQTL/variant_data/sixtySample_hg19/MAF_filtered_allChr_vcf.RDS")
QTL.est <- data.frame(SNP = allChr.est$SNP, gene = allChr.est$PEAK, beta = allChr.est$beta, t.stat = allChr.est$statistic, p.value = allChr.est$pvalue )
colnames(QTL.est) <- c("SNP", "gene", "beta", "t-stat", "p-value")
m6Apeaks <- jointPeak(LCLs)
m6Apeaks <- m6Apeaks[which(paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ) %in% unique(allChr.est$PEAK)),]
gene.Map <- data.frame(Id = paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ),
Chr = m6Apeaks$chr,
TSS = ifelse(m6Apeaks$strand == "+",m6Apeaks$start,m6Apeaks$end),
TES = ifelse(m6Apeaks$strand == "+",m6Apeaks$start,m6Apeaks$end)
)
SNP.Map <- data.frame(ID = paste(vcf$CHROM,vcf$POS,sep = ":"), CHROM = vcf$CHROM, POS = vcf$POS)
SNP.Map <- SNP.Map[SNP.Map$ID %in% QTL.est$SNP,]
SNP.gr <- GRanges(seqnames = SNP.Map$CHROM,ranges = IRanges(start = SNP.Map$POS,end = SNP.Map$POS ) )
write.table(QTL.est, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary", sep = "\t", col.names = T,row.names = F,quote = F)
write.table(gene.Map, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map", sep = "\t", col.names = T,row.names = F,quote = F)
write.table(SNP.Map, "~/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map", sep = "\t", col.names = T,row.names = F,quote = F)
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary")
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map")
system("gzip ~/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map")
summaryFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary.gz"
snpMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map.gz"
geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
registerDoParallel(30)
eachRBP.enrich <- foreach(rbp = unique(RBP.gr$V4), .combine = rbind )%dopar%{
tmpRBP.gr <- RBP.gr[RBP.gr$V4 == rbp]
tmpRBP.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpRBP.gr)) )
tmpAnno <- data.frame(SNP = SNP.Map$ID , RBPs_d = 0)
tmpAnno$RBPs_d[tmpRBP.flag] <- 1
tmpAnnoFile <- tempfile(fileext = ".txt")
write.table(tmpAnno, tmpAnnoFile, sep = "\t", col.names = T,row.names = F,quote = F )
system(paste0("gzip ",tmpAnnoFile))
tmpEnrich <- tempfile(fileext = ".est")
system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
unlink(tmpAnnoFile)
unlink(tmpEnrich)
data.frame(RBP = rbp, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
write.table(eachRBP.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedRBPs.txt",sep = "\t", col.names = T,row.names = F,quote = F)
miRNAbinding <- read.table("~/m6AQTL/mircoRNA/miRNA_target_in_YRI_LCLs.bed", sep = "\t", header = T)
miRNAbinding.gr <- makeGRangesFromDataFrame(miRNAbinding, keep.extra.columns = TRUE)
registerDoParallel(30)
eachMicro.enrich <- foreach( micro = unique(miRNAbinding$miRNA) , .combine = rbind )%dopar%{
tmpMicro.gr <- miRNAbinding.gr[ miRNAbinding.gr$miRNA == micro ]
tmpMicro.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpMicro.gr)) )
tmpAnno <- data.frame(SNP = SNP.Map$ID , micro_d = 0)
if(length(tmpMicro.flag)==0){return(NULL)}
tmpAnno$micro_d[ tmpMicro.flag ] <- 1
tmpAnnoFile <- tempfile(fileext = ".txt")
write.table(tmpAnno, tmpAnnoFile, sep = "\t", col.names = T,row.names = F,quote = F )
system(paste0("gzip ",tmpAnnoFile))
tmpEnrich <- tempfile(fileext = ".est")
system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
unlink(tmpAnnoFile)
unlink(tmpEnrich)
data.frame( microRNA = micro, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3] )
}
eachMicro.enrich <- eachMicro.enrich[order( as.numeric(as.character(eachMicro.enrich$logOddRatio)), decreasing = TRUE),]
write.table(eachMicro.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedMiroRNAs.txt",sep = "\t", col.names = T,row.names = F,quote = F)
summaryFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/allChr.summary.gz"
snpMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/snp.map.gz"
geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
registerDoParallel(15)
eachTF.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
tmpTF.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr)) )
tmpAnno <- data.frame(SNP = SNP.Map$ID , TFs_d = 0)
tmpAnno$TFs_d[tmpTF.flag] <- 1
tmpAnnoFile <- tempfile(fileext = ".txt")
write.table(tmpAnno, tmpAnnoFile, sep = "\t", col.names = T,row.names = F,quote = F )
system(paste0("gzip ",tmpAnnoFile))
tmpEnrich <- tempfile(fileext = ".est")
system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[1]][3:5]
unlink(tmpAnnoFile)
unlink(tmpEnrich)
data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs.txt",sep = "\t", col.names = T,row.names = F,quote = F)
POLR2A.gr <- POLII.gr[POLII.gr$V4 == "POLR2A" ]
POLR2A.flag <- unique(queryHits( findOverlaps(SNP.gr,POLR2A.gr) ) )
registerDoParallel(15)
eachTF_POLII.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
tmpTF.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr)) )
tmpAnno <- data.frame(SNP = SNP.Map$ID ,POLR2A_d = 0, TFs_d = 0)
tmpAnno$TFs_d[tmpTF.flag] <- 1
tmpAnno$POLR2A_d[POLR2A.flag] <- 1
tmpAnnoFile <- tempfile(fileext = ".txt")
write.table(tmpAnno, tmpAnnoFile, sep = "\t", col.names = T,row.names = F,quote = F )
system(paste0("gzip ",tmpAnnoFile))
tmpEnrich <- tempfile(fileext = ".est")
system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[2]][3:5]
unlink(tmpAnnoFile)
unlink(tmpEnrich)
data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF_POLII.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs_POLII.txt",sep = "\t", col.names = T,row.names = F,quote = F)
##################################################
H3K27ac.gr <- HK.gr[HK.gr$V4 == Histones[1]]
H3K27ac.flag <- unique(queryHits( findOverlaps(SNP.gr,H3K27ac.gr) ))
registerDoParallel(15)
eachTF_H3K27ac.enrich <- foreach(tf = unique(TFs.gr$V4), .combine = rbind )%dopar%{
tmpTF.gr <- TFs.gr[TFs.gr$V4 == tf]
tmpTF.flag <- unique( queryHits(findOverlaps(SNP.gr,tmpTF.gr)) )
tmpAnno <- data.frame(SNP = SNP.Map$ID ,H3K27ac_d = 0, TFs_d = 0)
tmpAnno$TFs_d[tmpTF.flag] <- 1
tmpAnno$H3K27ac_d[H3K27ac.flag] <- 1
tmpAnnoFile <- tempfile(fileext = ".txt")
write.table(tmpAnno, tmpAnnoFile, sep = "\t", col.names = T,row.names = F,quote = F )
system(paste0("gzip ",tmpAnnoFile))
tmpEnrich <- tempfile(fileext = ".est")
system2("torus",paste0("-d ",summaryFile," -smap ",snpMapFile," -gmap ",geneMapFile <- "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/annotation/gene.map.gz"
," -annot ",tmpAnnoFile,".gz -est > ",tmpEnrich) )
tmpEst <- strsplit(system2("awk",paste0("-F' +' -v OFS='\ ' '{sub(/ +$/,\"\");$1=$1}1' ",tmpEnrich," |awk 'NR>2{print$0}' "), stdout = TRUE),split = " ")[[2]][3:5]
unlink(tmpAnnoFile)
unlink(tmpEnrich)
data.frame(TF = tf, logOddRatio = tmpEst[1], CI05 = tmpEst[2], CI95 = tmpEst[3])
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
write.table(eachTF_H3K27ac.enrich, file = "/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/enrichedTFs_H3K27ac.txt",sep = "\t", col.names = T,row.names = F,quote = F)
Take the independent SNPs from the SuSiE fine-mapping result.
## take top SNP per Credible set to get independent SNPs
SusieResult <- readRDS("/home/zijiezhang/m6AQTL/m6A_QTL_results/linear_model/fineMapping/Susie_noPrior/SusieResult_qvalue_0.1.RDS")
SusieResult <- SusieResult[order(SusieResult$PIP, decreasing = T),]
sig.m6AQTL <- SusieResult[!duplicated(paste(SusieResult$PEAK,SusieResult$CS)) & !is.na(SusieResult$CS),]
InputSNPs <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieSNPs.inCS_qvalue_0.1/snpsnap_anno_input_exist.txt", sep = "\t", header = T, stringsAsFactors = F)
InputSNPs <- InputSNPs[which(InputSNPs$snpID %in% gsub("chr","",sig.m6AQTL$SNP) ),]
InputSNPs.gr <- GRanges(seqnames = paste0("chr",unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[1]}))),IRanges(start = as.integer(unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[2]}))),end = as.integer(unlist(lapply(strsplit(InputSNPs$snpID,":"),function(x){x[2]})))))
values(InputSNPs.gr) <- DataFrame(snpID = InputSNPs$snpID)
CtlSNPs <- read.table("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieSNPs.inCS_qvalue_0.1/SNPmatch_100_anno_gene_matchedSNPs.txt",sep = "\t",header = F, stringsAsFactors = F)
CtlSNPs <- CtlSNPs[!duplicated(CtlSNPs$V1),]
CtlSNPs <- CtlSNPs[CtlSNPs$V1 %in% gsub("chr","",sig.m6AQTL$SNP) ,]
CtlSNPs_sets <- CtlSNPs[match(CtlSNPs$V1, InputSNPs$snpID),4:103]
colnames(CtlSNPs_sets) <- 1:100
names(InputSNPs.gr) <- rownames(CtlSNPs_sets)<- InputSNPs$snpID
CtlSNPs_sets <- CtlSNPs_sets[- which( apply(CtlSNPs_sets,1,function(x){any(is.na(x))}) ),]
CtlSNPs.gr <- foreach( i = 1:100, .combine = c)%do%{
tmp_chr <- unlist(lapply(strsplit(CtlSNPs_sets[,i],":"),function(x){x[1]}))
tmp_pos <- as.integer(unlist(lapply(strsplit(CtlSNPs_sets[,i],":"),function(x){x[2]})) )
tmp_gr <- GRanges(paste0("chr",tmp_chr),IRanges(tmp_pos,tmp_pos))
values(tmp_gr) <- DataFrame(snpID = CtlSNPs_sets[,i] ,InputSNP = rownames(CtlSNPs_sets), matchSet = i )
tmp_gr
}
save(InputSNPs,InputSNPs.gr,CtlSNPs_sets,CtlSNPs.gr,file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/matchedSNPs_forSusieSNPs.inCS_qvalue_0.1.RData")
RBP.bed <- read.table("~/m6AQTL/RBP_near_peaks/all.RBP.intersect.hg19.bed",sep = "\t",header = F, stringsAsFactors = F)
RBP.gr <- makeGRangesFromDataFrame(RBP.bed,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
QTLs_RBP <- findOverlaps(InputSNPs.gr,RBP.gr)
Ctl_RBP <- findOverlaps(CtlSNPs.gr, RBP.gr)
ENCODE_RBP_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_RBP)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_RBP)) ) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_RBP))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_RBP))) ) ),
alternative = "two.sided"
)
CHIPseq.bed <- read.table("~/m6AQTL/TF_binding/all_TFs.bed",sep = "\t",header = F, stringsAsFactors = F)
Histones <- unique(CHIPseq.bed$V4)[c(grep("H3K",unique(CHIPseq.bed$V4)),grep("H4K",unique(CHIPseq.bed$V4)))]
POLII <- unique(CHIPseq.bed$V4)[c(grep("POLR",unique(CHIPseq.bed$V4)) ) ]
TFs.bed <- dplyr::filter( CHIPseq.bed, ! V4 %in% c(Histones,POLII) )
TFs.gr <- makeGRangesFromDataFrame(TFs.bed,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
QTLs_TF <- findOverlaps(InputSNPs.gr,TFs.gr)
Ctl_TF <- findOverlaps(CtlSNPs.gr, TFs.gr)
ENCODE_TF_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_TF))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_TF))) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_TF))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_TF))) ) ),
alternative = "two.sided"
)
HK.gr <- makeGRangesFromDataFrame(CHIPseq.bed[CHIPseq.bed$V4%in%Histones,] ,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
Histone_fisher <- foreach(i = 1:length(Histones), .combine = rbind)%do%{
tmp_gr <- HK.gr[HK.gr$V4 == Histones[i]]
QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
alternative = "two.sided"
)
data.frame(feature = Histones[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
POLII.gr <- makeGRangesFromDataFrame(CHIPseq.bed[CHIPseq.bed$V4%in%POLII,] ,seqnames.field = "V1",start.field = "V2",end.field = "V3",strand.field = "V6",keep.extra.columns = T)
POLII_fisher <- foreach(i = 1:length(POLII), .combine = rbind)%do%{
tmp_gr <- POLII.gr[POLII.gr$V4 == POLII[i]]
QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
alternative = "two.sided"
)
data.frame(feature = POLII[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
saveRDS(rbind(data.frame(feature = "RBP CLIPseq peak",oddRatio = ENCODE_RBP_fisher$estimate, CI05 = ENCODE_RBP_fisher$conf.int[1], CI95 = ENCODE_RBP_fisher$conf.int[2], pvalue = ENCODE_RBP_fisher$p.value, row.names = NULL),
data.frame(feature = "TFs ChIPseq peak",oddRatio = ENCODE_TF_fisher$estimate, CI05 = ENCODE_TF_fisher$conf.int[1], CI95 = ENCODE_TF_fisher$conf.int[2], pvalue = ENCODE_TF_fisher$p.value, row.names = NULL),
Histone_fisher, POLII_fisher[1:3,]), file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieNoPrior_m6AQTL_enrichInFeatures_fisherTest.RDS" )
registerDoParallel(10)
RBPs_fisher <- foreach(i = 1:length(unique(RBP.gr$V4)), .combine = rbind)%dopar%{
tmp_gr <- RBP.gr[RBP.gr$V4 == unique(RBP.gr$V4)[i]]
QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
alternative = "two.sided"
)
data.frame(feature = unique(RBP.gr$V4)[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
RBPs_fisher <- RBPs_fisher[order(RBPs_fisher$pvalue, decreasing = F),]
RBPs_fisher <- RBPs_fisher[RBPs_fisher$oddRatio!=0,]
RBPs_fisher$p.ajd <- p.adjust(RBPs_fisher$pvalue,method = "fdr")
save(RBPs_fisher, file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/SusieNoPrior_m6AQTL_enrich_RBP_encode.RData")
library(motifbreakR)
library(BSgenome.Hsapiens.UCSC.hg19)
library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
library(MotifDb)
library(dplyr)
## try to get rsid from matched SNPs
write.table(data.frame(chr = gsub("chr","",seqnames(CtlSNPs.gr)), start(CtlSNPs.gr)),"~/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/SusieNoPrior_q0.1_matched_SNPs.txt",sep = "\t",col.names = F,row.names = F,quote = F)
system("vcftools --gzvcf /home/zijiezhang/m6AQTL/variant_data/common_all_20180423.vcf.gz --out /home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/matchedSNPs_vcf --positions /home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/SusieNoPrior_q0.1_matched_SNPs.txt --recode" )
match.vcf <- read.table("/home/zijiezhang/m6AQTL/variant_data/SusieNoPrior_q0.1_linearModel/matchedSNPs_vcf.recode.vcf",sep = "\t",col.names = c("CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO"))
## Filter for biallelic and single nucleotide SNPs.
match.vcf <- match.vcf[nchar(as.character(match.vcf$REF))==1 & nchar(as.character(match.vcf$ALT))==1,]
CtlSNPs.gr$name <- paste(match.vcf$CHROM,match.vcf$POS,match.vcf$REF,match.vcf$ALT,sep = ":")[match(CtlSNPs.gr$snpID,paste(match.vcf$CHROM,match.vcf$POS,sep = ":"))]
vcf <- readRDS("~/m6AQTL/variant_data/sixtySample_hg19/MAF_filtered_allChr_vcf.RDS")
sig.m6AQTL_vcf <- vcf[paste0(vcf$CHROM,":",vcf$POS) %in% unique(sig.m6AQTL$SNP), ]
## filter for biallelic input snps
sig.m6AQTL_vcf <- sig.m6AQTL_vcf[nchar(as.character(sig.m6AQTL_vcf$REF))==1 & nchar(as.character(sig.m6AQTL_vcf$ALT))==1,]
InputSNPs.gr$name <- paste(gsub("chr","",sig.m6AQTL_vcf$CHROM),sig.m6AQTL_vcf$POS,sig.m6AQTL_vcf$REF,sig.m6AQTL_vcf$ALT,sep = ":")[match(InputSNPs.gr$snpID, paste(gsub("chr","",sig.m6AQTL_vcf$CHROM),sig.m6AQTL_vcf$POS,sep = ":"))]
## Prepare data for match motif
Ctl_snps_filter <- CtlSNPs.gr[!is.na(CtlSNPs.gr$name)]
numSNP <- table(Ctl_snps_filter$matchSet)
Input_snps_filter <- InputSNPs.gr[!is.na(InputSNPs.gr$name)]
## load transcribed region
geneModel <- m6Amonster::gtfToGeneModel(gtf = "~/Database/genome/hg19/hg19_UCSC.gtf")
transcribed_region <- range(geneModel)
motifMetadata <- DataFrame(providerName = paste0("m6A_Concensus_motif",1:3),
providerId = paste0("m6A_Concensus_motif",1:3),
dataSource = "Homer2_de_novo",
geneSymbol = "m6A",
geneId =NA, geneIdType = NA, proteinId = NA, proteinIdType = NA,
organism = "Hsapiens",
sequenceCount = 6312,
bindingSequence = NA, bindingDomain = NA, tfFamily = NA, experimentType = "high-throughput methods",
pubmedID = NA
)
motif_ppm <- list( t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",1,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:7)) ),
t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",2,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:7)) ),
t( read.table(paste0("~/m6AQTL/m6A_QTL_results/linear_model/Homer2/homerResults/motif",3,".motif"), header = F, comment.char = ">",col.names = c("A","C","G","T"), row.names = paste(1:6)) ) )
names(motif_ppm) <- paste(motifMetadata$organism,motifMetadata$dataSource,motifMetadata$providerName,sep = "-")
MotifList_m6A <- new("MotifList",elementMetadata = motifMetadata ,manuallyCuratedGeneMotifAssociationTable = data.frame(), listData = motif_ppm )
## match motif for each control set
for( i in 1:100){
batchSNPs <- Ctl_snps_filter[Ctl_snps_filter$matchSet == i ]
batchSNPs_bed <- as.data.frame(batchSNPs)[,c(1:3,9,4,5)]
batchSNPs_bed$start <- batchSNPs_bed$start - 1
batchSNPs_bed <- mutate_if(batchSNPs_bed,is.numeric, as.integer)
write.table(batchSNPs_bed,file = "~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed",sep = "\t",col.names = F, row.names = F, quote = F )
batchSNP.mb <- snps.from.file( file = "~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed", dbSNP = SNPlocs.Hsapiens.dbSNP144.GRCh37 ,search.genome = BSgenome.Hsapiens.UCSC.hg19, format = "bed" )
file.remove("~/m6AQTL/m6AQTL_analysis/data/tmpControlQTL_bed")
batchResult <- motifbreakR(snpList = batchSNP.mb, filterp = TRUE,
pwmList = MotifList_m6A,
threshold = 1e-3,
method = "ic",
bkg = c(A=0.25, C=0.25, G=0.25, T=0.25),
BPPARAM = BiocParallel::MulticoreParam(workers = 10)
)
saveRDS(batchResult, file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_controlQTL",i,"_motifBreak_GGACU.rds"))
}
allCtlSNPs <- foreach(i = 1:100, .combine = c)%do%{
readRDS(paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_controlQTL",i,"_motifBreak_GGACU.rds"))
}
LCLs <- readRDS( "~/m6AQTL/peakcalling/MeRIPtools/jointPeak_threshold5_MeRIP.RDS")
m6Apeaks <- jointPeak(LCLs)
m6APeaks <- m6Apeaks[which(paste0(m6Apeaks$chr,":",m6Apeaks$start,"-",m6Apeaks$end,"_",m6Apeaks$name,"_",m6Apeaks$strand ) %in% unique(SusieResult$PEAK)),]
m6APeaks.GRList <- MeRIPtools:::.peakToGRangesList(m6APeaks)
onPeak.id <- unique( queryHits( findOverlaps(allCtlSNPs,m6APeaks.GRList ) ) )
saveRDS(allCtlSNPs[onPeak.id], file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allControlQTL_motifBreak_GGACU_motif.rds"))
## QTLs set
Input_snps_filter_peak.bed <- as.data.frame(Input_snps_filter[unique(queryHits( findOverlaps(Input_snps_filter, m6APeaks.GRList) ))])[,c(1:3,7,4,5)]
Input_snps_filter_peak.bed$start <- Input_snps_filter_peak.bed$start - 1
Input_snps_filter_peak.bed <- mutate_if(Input_snps_filter_peak.bed,is.numeric, as.integer)
write.table(Input_snps_filter_peak.bed,file = "~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed",sep = "\t",col.names = F, row.names = F, quote = F )
batchSNP.mb <- snps.from.file( file = "~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed", dbSNP = SNPlocs.Hsapiens.dbSNP144.GRCh37 ,search.genome = BSgenome.Hsapiens.UCSC.hg19, format = "bed" )
file.remove("~/m6AQTL/m6AQTL_analysis/data/tmpAllInputQTL_bed")
batchResult <- motifbreakR(snpList = batchSNP.mb, filterp = TRUE,
pwmList = MotifList_m6A,
threshold = 1e-3,
method = "ic",
bkg = c(A=0.25, C=0.25, G=0.25, T=0.25),
BPPARAM = BiocParallel::MulticoreParam(workers = 10)
)
onPeak.id <- unique( queryHits( findOverlaps(batchResult,m6APeaks.GRList ) ) )
saveRDS(batchResult[onPeak.id], file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allInputQTL_motifBreak_GGACU_motif.rds"))
load("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/matchedSNPs_forSusieSNPs.inCS_qvalue_0.1.RData")
GGACUmotif_QTL <- readRDS( file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allInputQTL_motifBreak_GGACU_motif.rds"))
GGACUmotif_Ctl <- readRDS(file = paste0("~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_allControlQTL_motifBreak_GGACU_motif.rds"))
Ctl_snps_filter_noDup <- Ctl_snps_filter[!duplicated(Ctl_snps_filter$snpID)]
Input_snps_filter_noDup <- Input_snps_filter[!duplicated(Input_snps_filter$snpID)]
Ctl_snps_filter_noDup_onTrans <- Ctl_snps_filter_noDup[unique( queryHits( findOverlaps(Ctl_snps_filter_noDup,transcribed_region ) ) )]
Input_snps_filter_noDup_onTrans <- Input_snps_filter_noDup[unique( queryHits( findOverlaps(Input_snps_filter_noDup,transcribed_region ) ) )]
GGACU_motifBreak_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(names(GGACUmotif_QTL))) , length(Input_snps_filter_noDup_onTrans) - length(unique(names(GGACUmotif_QTL))) ),
"CtlSNPs" = c(length(unique(names(GGACUmotif_Ctl))), length(Ctl_snps_filter_noDup_onTrans) - length(unique(names(GGACUmotif_Ctl))) ) ),
alternative = "two.sided"
)
saveRDS(data.frame(feature = "GGACU motifBreak",oddRatio = GGACU_motifBreak_fisher$estimate, CI05 = GGACU_motifBreak_fisher$conf.int[1], CI95 = GGACU_motifBreak_fisher$conf.int[2], pvalue = GGACU_motifBreak_fisher$p.value, row.names = NULL),file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_m6AQTL_enrichInGGACUmotifBreak_fisherTest.RDS")
miRNAbinding <- read.table("~/m6AQTL/mircoRNA/miRNA_target_in_YRI_LCLs.bed", sep = "\t", header = T)
miRNAbinding.gr <- makeGRangesFromDataFrame(miRNAbinding)
QTLs_miRNA <- findOverlaps(InputSNPs.gr,miRNAbinding.gr)
Ctl_miRNA <- findOverlaps(CtlSNPs.gr, miRNAbinding.gr)
ENCODE_miRNA_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_miRNA)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_miRNA)) ) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_miRNA))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_miRNA))) ) ),
alternative = "two.sided"
)
RiboSNitch <- read.table("~/m6AQTL/RiboSNitch/riboSNitches.hg19.csv", sep = "\t", header = T)
RiboSNitch.gr <- makeGRangesFromDataFrame(RiboSNitch, start.field = "position", end.field = "position", ignore.strand = T)
QTLs_riboSNitch <- findOverlaps(InputSNPs.gr,RiboSNitch.gr)
Ctl_riboSNitch <- findOverlaps(CtlSNPs.gr, RiboSNitch.gr)
ENCODE_riboSNitch_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_riboSNitch)) ) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_riboSNitch)) ) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_riboSNitch))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_riboSNitch))) ) ),
alternative = "two.sided"
)
#save(ENCODE_miRNA_fisher,ENCODE_riboSNitch_fisher,ENCODE_RBP_fisher, RBP_motifBreak_fisher,GGACU_motifBreak_fisher,RBP_motifBreak_inPeak_fisher,file = "~/m6AQTL/m6A_QTL_results/beta-binomial/7PCs/matchedSNPs/enrichRNAfeatures.RData")
save(ENCODE_miRNA_fisher,ENCODE_riboSNitch_fisher,ENCODE_RBP_fisher,GGACU_motifBreak_fisher,file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_enrichRNAfeatures.RData")
registerDoParallel(10)
TFs_fisher <- foreach(i = 1:length(unique(TFs.gr$V4)), .combine = rbind)%dopar%{
tmp_gr <- TFs.gr[TFs.gr$V4 == unique(TFs.gr$V4)[i]]
QTLs_tmp <- findOverlaps(InputSNPs.gr,tmp_gr)
Ctl_tmp <- findOverlaps(CtlSNPs.gr, tmp_gr)
tmp_fisher <- fisher.test(cbind("m6AQTL"=c(length(unique(queryHits(QTLs_tmp))) , length(InputSNPs.gr) - length(unique(queryHits(QTLs_tmp))) ),
"CtlSNPs" = c(length(unique(queryHits(Ctl_tmp))), length(CtlSNPs.gr) - length(unique(queryHits(Ctl_tmp))) ) ),
alternative = "two.sided"
)
data.frame(feature = unique(TFs.gr$V4)[i],oddRatio = tmp_fisher$estimate, CI05 = tmp_fisher$conf.int[1], CI95 = tmp_fisher$conf.int[2], pvalue = tmp_fisher$p.value, row.names = NULL)
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
TFs_fisher <- TFs_fisher[order(TFs_fisher$pvalue, decreasing = F),]
TFs_fisher <- TFs_fisher[TFs_fisher$oddRatio != 0 ,]
TFs_fisher$p.ajd <- p.adjust(TFs_fisher$pvalue,method = "fdr")
save(TFs_fisher, file = "~/m6AQTL/m6A_QTL_results/linear_model/fineMapping/matched_SNPs/MotifBreaks/SusieNoPrior_q0.1_SusieDistOnly_m6AQTL_enrich_TF_encode.RData")
sessionInfo()
R version 3.5.3 (2019-03-11)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 17.10
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] workflowr_1.3.0 Rcpp_1.0.1 digest_0.6.18 rprojroot_1.3-2
[5] backports_1.1.4 git2r_0.25.2 magrittr_1.5 evaluate_0.13
[9] stringi_1.4.3 fs_1.3.0 rmarkdown_1.12 tools_3.5.3
[13] stringr_1.4.0 glue_1.3.1 xfun_0.6 yaml_2.2.0
[17] compiler_3.5.3 htmltools_0.3.6 knitr_1.22