library(RADAR)
samplenames = c("Ctl1","Ctl4","Ctl8","Ctl16","Ctl17","Ctl18","Ctl20","T2D1","T2D4","T2D5","T2D6","T2D7","T2D9","T2D11","T2D12")
RADAR <- countReads(samplenames = samplenames,
gtf = "~/Database/genome/hg38/hg38_UCSC.gtf",
bamFolder = "~/Rohit_T2D/bam_files",
outputDir = "~/Rohit_T2D",
modification = "IP",
threads = 25
)
monster <- RADAR
Summary of read count
library(RADAR)
library(qvalue)
#load("~/Rohit_T2D/RADAR.RData")
load("~/Rohit_T2D/RADAR_analysis.RData")
countSummary <- rbind("Input"= colSums(RADAR$reads)[1:length(RADAR$samplenames)], "IP" =colSums(RADAR$reads)[(1:length(RADAR$samplenames))+length(RADAR$samplenames)] )
colnames(countSummary) <- RADAR$samplenames
knitr::kable(countSummary,format = "pandoc" )
Ctl1 | Ctl4 | Ctl8 | Ctl16 | Ctl17 | Ctl18 | Ctl20 | T2D1 | T2D4 | T2D5 | T2D6 | T2D7 | T2D9 | T2D11 | T2D12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Input | 18320414 | 17749453 | 17923796 | 11508719 | 15118237 | 25923296 | 20302596 | 16803517 | 16105448 | 14146820 | 17202593 | 14336611 | 17758817 | 19782144 | 18779123 |
IP | 30287782 | 25105375 | 21908869 | 23197918 | 21409901 | 31653712 | 24753657 | 27733641 | 26634012 | 28457739 | 22539106 | 36100747 | 27434039 | 22053013 | 19402847 |
X <- c(rep("Ctl",7),rep("T2D",8))
RADAR <- normalizeLibrary(RADAR,X)
RADAR <- RADAR::adjustExprLevel(RADAR)
RADAR <- filterBins(RADAR,minCountsCutOff = 15)
RADAR_pos <- RADAR::adjustExprLevel(RADAR[1:12], adjustBy = "pos")
RADAR_pos <- filterBins(RADAR_pos,minCountsCutOff = 15)
Plot distribution of number of reads in each 50 bp bins
hist(log10(rowMeans(RADAR$reads[rowMeans(RADAR$reads[,grep("input",colnames(RADAR$reads))])>1,grep("input",colnames(RADAR$reads))]) ),xlab = "log10 read count",main = "Distribution of INPUT read count in bins",xlim = c(0,3), col =rgb(0.2,0.2,0.2,0.5),cex.main = 2,cex.axis =2,cex.lab=2)
axis(side = 1, lwd = 2,cex.axis =2)
axis(side = 2, lwd = 2,cex.axis =2)
Compute the within group variability of INPUT geneSum VS INPUT local read count.
var.coef <- function(x){sd(as.numeric(x))/mean(as.numeric(x))}
## filter expressed genes
geneSum <- RADAR$geneSum[rowSums(RADAR$geneSum)>16,]
## within group variability
relative.var <- t( apply(geneSum,1,tapply,RADAR$X,var.coef) )
geneSum.var <- c(relative.var)
## For each gene used above, random sample a 50bp bin within this gene
set.seed(1)
r.bin50 <- tapply(rownames(RADAR$norm.input)[which(RADAR$geneBins$gene %in% rownames(geneSum))],as.character( RADAR$geneBins$gene[which(RADAR$geneBins$gene %in% rownames(geneSum))]) ,function(x){
n <- sample(1:length(x),1)
return(x[n])
})
relative.var <- apply(RADAR$norm.input[r.bin50,],1,tapply,RADAR$X,var.coef)
bin50.var <- c(relative.var)
bin50.var <- bin50.var[!is.na(bin50.var)]
## 100bp bins
r.bin100 <- tapply(rownames(RADAR$norm.input)[which(RADAR$geneBins$gene %in% rownames(geneSum))],as.character( RADAR$geneBins$gene[which(RADAR$geneBins$gene %in% rownames(geneSum))]) ,function(x){
n <- sample(1:(length(x)-2),1)
return(x[n:(n+1)])
})
relative.var <- lapply(r.bin100, function(x){ tapply( colSums(RADAR$norm.input[x,]),RADAR$X,var.coef ) })
bin100.var <- unlist(relative.var)
bin100.var <- bin100.var[!is.na(bin100.var)]
## 200bp bins
r.bin200 <- tapply(rownames(RADAR$norm.input)[which(RADAR$geneBins$gene %in% rownames(geneSum))],as.character( RADAR$geneBins$gene[which(RADAR$geneBins$gene %in% rownames(geneSum))]) ,function(x){
if(length(x)>4){
n <- sample(1:(length(x)-3),1)
return(x[n:(n+3)])
}else{
return(NULL)
}
})
r.bin200 <- r.bin200[which(!unlist(lapply(r.bin200,is.null)) ) ]
relative.var <- lapply(r.bin200, function(x){ tapply( colSums(RADAR$norm.input[x,]),RADAR$X,var.coef ) })
bin200.var <- unlist(relative.var)
bin200.var <- bin200.var[!is.na(bin200.var)]
relative.var <- data.frame(group=c(rep("geneSum",length(geneSum.var)),rep("bin-50bp",length(bin50.var)),rep("bin-100bp",length(bin100.var)),rep("bin-200bp",length(bin200.var))), variance=c(geneSum.var,bin50.var,bin100.var,bin200.var))
ggplot(relative.var,aes(variance,colour=group))+geom_density()+xlab("Relative variation")+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black",size = 1),
axis.title.x=element_text(size=20, face="bold", hjust=0.5,family = "arial"),
axis.title.y=element_text(size=20, face="bold", vjust=0.4, angle=90,family = "arial"),
legend.position = c(0.88,0.88),legend.title=element_blank(),legend.text = element_text(size = 14, face = "bold",family = "arial"),
axis.text = element_text(size = 15,face = "bold",family = "arial",colour = "black") )
var.coef <- function(x){sd(as.numeric(x))/mean(as.numeric(x))}
ip_coef_var <- t(apply(RADAR$ip_adjExpr_filtered,1,tapply,X,var.coef))
ip_coef_var <- ip_coef_var[!apply(ip_coef_var,1,function(x){return(any(is.na(x)))}),]
#hist(c(ip_coef_var),main = "M14KO mouse liver\n m6A-IP",xlab = "within group coefficient of variation",breaks = 50)
###
gene_coef_var <- t(apply(RADAR$geneSum,1,tapply,X,var.coef))
gene_coef_var <- gene_coef_var[!apply(gene_coef_var,1,function(x){return(any(is.na(x)))}),]
#hist(c(gene_coef_var),main = "M14KO mouse liver\n RNA-seq",xlab = "within group coefficient of variation",breaks = 50)
coef_var <- list('RNA-seq'=c(gene_coef_var),'m6A-IP'=c(ip_coef_var))
nn<-sapply(coef_var, length)
rs<-cumsum(nn)
re<-rs-nn+1
grp <- factor(rep(names(coef_var), nn), levels=names(coef_var))
coef_var.df <- data.frame(coefficient_var = c(c(gene_coef_var),c(ip_coef_var)),label = grp)
ggplot(data = coef_var.df,aes(coefficient_var,colour = grp))+geom_density()+ theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black",size = 1),
axis.title.x=element_text(size=20, face="bold", hjust=0.5,family = "arial"),
axis.title.y=element_text(size=20, face="bold", vjust=0.4, angle=90,family = "arial"),
legend.position = c(0.9,0.9),legend.title=element_blank(),legend.text = element_text(size = 18, face = "bold",family = "arial"),
axis.text = element_text(size = 15,face = "bold",family = "arial",colour = "black") )+
xlab("Coefficient of variation")
ip_var <- t(apply(RADAR$ip_adjExpr_filtered,1,tapply,X,var))
#ip_var <- ip_coef_var[!apply(ip_var,1,function(x){return(any(is.na(x)))}),]
ip_mean <- t(apply(RADAR$ip_adjExpr_filtered,1,tapply,X,mean))
###
gene_var <- t(apply(RADAR$geneSum,1,tapply,X,var))
#gene_var <- gene_var[!apply(gene_var,1,function(x){return(any(is.na(x)))}),]
gene_mean <- t(apply(RADAR$geneSum,1,tapply,X,mean))
all_var <- list('RNA-seq'=c(gene_var),'m6A-IP'=c(ip_var))
nn<-sapply(all_var, length)
rs<-cumsum(nn)
re<-rs-nn+1
group <- factor(rep(names(all_var), nn), levels=names(all_var))
all_var.df <- data.frame(variance = c(c(gene_var),c(ip_var)),mean= c(c(gene_mean),c(ip_mean)),label = group)
ggplot(data = all_var.df,aes(x=mean,y=variance,colour = group,shape = group))+geom_point(size = I(0.2))+stat_smooth(se = T,show.legend = F)+stat_smooth(se = F)+theme_bw() +xlab("Mean")+ylab("Variance") +theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black",size = 1),
axis.title.x=element_text(size=22, face="bold", hjust=0.5,family = "arial"),
axis.title.y=element_text(size=22, face="bold", vjust=0.4, angle=90,family = "arial"),
legend.position = c(0.85,0.95),legend.title=element_blank(),legend.text = element_text(size = 20, face = "bold",family = "arial"),
axis.text = element_text(size = 15,face = "bold",family = "arial",colour = "black") ) +
scale_x_continuous(limits = c(0,5000))+scale_y_continuous(limits = c(0,4e6))
inputCount <- as.data.frame(RADAR$reads[,1:length(RADAR$samplenames)])
logInputCount <- as.data.frame(log( RADAR$geneSum ) )
ggplot(logInputCount, aes(x = `Ctl18` , y = `T2D6` ))+geom_point()+geom_abline(intercept = 0,slope = 1, lty = 2, colour = "red")+xlab(paste("Log counts Ctl18") )+ylab(paste("Log counts T2D6") )+theme_classic(base_line_size = 0.8)+theme(axis.title = element_text(size = 12,face = "bold"), axis.text = element_text(size = 10,face = "bold",colour = "black") )
ggplot(logInputCount, aes(x = `Ctl18` , y = `T2D5` ))+geom_point()+geom_abline(intercept = 0,slope = 1, lty = 2, colour = "red")+xlab(paste("Log counts Ctl18") )+ylab(paste("Log counts T2D5") )+theme_classic(base_line_size = 0.8)+theme(axis.title = element_text(size = 12,face = "bold"), axis.text = element_text(size = 10,face = "bold",colour = "black") )
After geting the filtered and normalized IP read count for testing, we can use PCA analysis to check unkown variation.
top_bins <- RADAR$ip_adjExpr_filtered[order(rowMeans(RADAR$ip_adjExpr_filtered))[1:15000],]
We plot PCA colored by Ctl vs T2D
plotPCAfromMatrix(top_bins,group = X)+scale_color_discrete(name = "Group")
(Norm by position)
plotPCAfromMatrix(RADAR_pos$ip_adjExpr_filtered[order(rowMeans(RADAR_pos$ip_adjExpr_filtered))[1:15000],],group = X )+scale_color_discrete(name = "Group")
To check if samples are separated by different sequencing lanes, label the samples by lane batch.
batch <- c("A","A","A","B","B","C","C","A","A","B","A","B","B","A","C")
plotPCAfromMatrix(top_bins,group = paste0(batch))+scale_color_discrete(name = "Batch")
We can see that the samples are clearly separated by batches and we need to account for batch effect in the test. Thus we incorporate batch and gender as known covariates in the test.
Check the PCA after r egressing out the batch and gender.
library(rafalib)
X2 <- as.fumeric(c("A","A","A","B","B","A","A","A","A","B","A","B","B","A","A"))-1 # batch as covariates
X3 <- as.fumeric(c("A","A","A","A","A","C","C","A","A","A","A","A","A","A","C"))-1 # batch as covariates
X4 <- as.fumeric(c("M","F","M","M","M","F","M","M","M","F","M","M","M","M","F"))-1 # sex as covariates
registerDoParallel(cores = 10)
m6A.cov.out <- foreach(i = 1:nrow(top_bins), .combine = rbind) %dopar% {
Y = top_bins[i,]
resi <- residuals( lm(log(Y+1) ~ X2 + X3 + X4 ) )
resi
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
rownames(m6A.cov.out) <- rownames(top_bins)
plotPCAfromMatrix(m6A.cov.out,X,loglink = FALSE)+scale_color_discrete(name = "Group")
After regressing out the covariates, the samples are separated by diseases status in the PCA plot.
cov <- cbind(X2,X3,X4)
RADAR <- diffIP_parallel(RADAR,Covariates = cov,thread = 20)
RADAR_pos <- diffIP_parallel(RADAR_pos[1:14],thread = 25, Covariates = cov,plotPvalue = F)
In order to compare performance of other method on this data set, we run other three methods on default mode on this dataset.
deseq2.res <- DESeq2(countdata = RADAR$ip_adjExpr_filtered,pheno = matrix(c(rep(0,7),rep(1,8)),ncol = 1),covariates = cov)
edgeR.res <- edgeR(countdata = RADAR$ip_adjExpr_filtered,pheno = matrix(c(rep(0,7),rep(1,8)),ncol = 1),covariates = cov)
filteredBins <- rownames(RADAR$all.est)
QNB.res <- QNB::qnbtest(control_ip = RADAR$reads[filteredBins,16:22],
treated_ip = RADAR$reads[filteredBins,23:30],
control_input = RADAR$reads[filteredBins,1:7],
treated_input = RADAR$reads[filteredBins,8:15],plot.dispersion = FALSE)
deseq2.res_pos <- DESeq2(countdata = RADAR_pos$ip_adjExpr_filtered,pheno = matrix(X,ncol = 1),covariates = cov )
edgeR.res_pos <- edgeR(countdata = RADAR_pos$ip_adjExpr_filtered,pheno = matrix(X,ncol = 1),covariates = cov )
Compare distribution of p value.
pvalues <- data.frame(pvalue = c(RADAR$all.est[,"p_value3"],deseq2.res$pvalue,edgeR.res$pvalue,QNB.res$pvalue),
method = factor(rep(c("RADAR","DESeq2","edgeR","QNB"),c(length(RADAR$all.est[,"p_value3"]),
length(deseq2.res$pvalue),
length(edgeR.res$pvalue),
length(QNB.res$pvalue)
)
),levels =c("RADAR","DESeq2","edgeR","QNB")
)
)
ggplot(pvalues, aes(x = pvalue))+geom_histogram(breaks = seq(0,1,0.04),col=I("black"))+facet_grid(.~method)+theme_bw()+xlab("p-value")+theme( axis.title = element_text(size = 22, face = "bold"),strip.text = element_text(size = 25, face = "bold"),axis.text.x = element_text(size = 18, face = "bold",colour = "black"),axis.text.y = element_text(size = 15, face = "bold",colour = "black",angle = 90),panel.grid = element_blank(), axis.line = element_line(size = 0.7 ,colour = "black"),axis.ticks = element_line(colour = "black"), panel.spacing = unit(0.4, "lines") )+ scale_x_continuous(breaks = seq(0,1,0.2),labels=function(x) sprintf("%.1f", x))
Compare the distibution of beta
par(mfrow = c(1,4))
hist(RADAR$all.est[,"beta1"],main = "RADAR",xlab = "beta",breaks = 50,col =rgb(0.2,0.2,0.2,0.5),cex.main = 2.5,cex.axis =2,cex.lab=2)
hist(deseq2.res$log2FC, main = "DESeq2",xlab = "beta",breaks = 100,col =rgb(0.2,0.2,0.2,0.5),cex.main = 2.5,cex.axis =2,cex.lab=2)
hist(edgeR.res$log2FC,main = "edgeR", xlab = "beta",breaks = 50,col =rgb(0.2,0.2,0.2,0.5),cex.main = 2.5,cex.axis =2,cex.lab=2)
hist(QNB.res$log2.OR, main = "QNB",xlab = "beta",breaks = 50,col =rgb(0.2,0.2,0.2,0.5),cex.main = 2.5,cex.axis =2,cex.lab=2)
Compare norm by geneSum with norm by position
par(mfrow = c(1,3))
tmp <- hist(c(RADAR_pos$all.est[,"p_value3"]),plot = F)
tmp$counts <- tmp$counts*(nrow(RADAR$ip_adjExpr_filtered)/nrow(RADAR_pos$ip_adjExpr_filtered))
hist(RADAR$all.est[,"p_value3"],main = "RADAR",xlab = "p value",col =rgb(1,0,0,0.4),cex.main = 2.5,cex.axis =2,cex.lab=2, ylim = c(0,max(hist(RADAR$all.est[,"p_value3"],plot = F)$counts,tmp$counts)) )
plot(tmp,col=rgb(0,0,1,0.5),add = T)
axis(side = 1, lwd = 1,cex.axis =2)
axis(side = 2, lwd = 1,cex.axis =2)
legend("topright", c("By geneSum", "By position"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 10,bty="n",text.font = 2)
y.scale <- max(hist(RADAR$all.est[,"p_value3"],plot = F)$counts)
tmp <- hist(c(deseq2.res_pos$pvalue),plot = F)
tmp$counts <- tmp$counts*(nrow(RADAR$ip_adjExpr_filtered)/nrow(RADAR_pos$ip_adjExpr_filtered))
hist(deseq2.res$pvalue,main = "DESeq2",xlab = "p value",col =rgb(1,0,0,0.4),cex.main = 2.5,cex.axis =2,cex.lab=2, ylim = c(0,y.scale) )
plot(tmp,col=rgb(0,0,1,0.5),add = T)
axis(side = 1, lwd = 1,cex.axis =2)
axis(side = 2, lwd = 1,cex.axis =2)
legend("topright", c("By geneSum", "By position"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 10,bty="n",text.font = 2)
tmp <- hist(c(edgeR.res_pos$pvalue),plot = F)
tmp$counts <- tmp$counts*(nrow(RADAR$ip_adjExpr_filtered)/nrow(RADAR_pos$ip_adjExpr_filtered))
hist(edgeR.res$pvalue,main = "edgeR",xlab = "p value",col =rgb(1,0,0,0.4),cex.main = 2.5,cex.axis =2,cex.lab=2, ylim = c(0,max(hist(edgeR.res$pvalue,plot = F)$counts,tmp$counts,y.scale)) )
plot(tmp,col=rgb(0,0,1,0.5),add = T)
axis(side = 1, lwd = 1,cex.axis =2)
axis(side = 2, lwd = 1,cex.axis =2)
legend("topright", c("By geneSum", "By position"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 10,bty="n",text.font = 2)
Number of significant bins detected at qvalue < 0.1
sigBins <- apply(cbind("RADAR"=RADAR$all.est[,"p_value3"],"DESeq2"=deseq2.res$pvalue,"edgeR"=edgeR.res$pvalue,"QNB"=QNB.res$pvalue),2, function(x){
length( which( qvalue::qvalue(x)$qvalue < 0.1 ) )
})
print(sigBins)
## RADAR DESeq2 edgeR QNB
## 28031 17 7531 27
Plot overlap of significant hits
plot.new()
MyTools::venn_diagram4( names(which( qvalue::qvalue(RADAR$all.est[,"p_value3"])$qvalue < 0.1 ) ),
rownames(RADAR$all.est)[which( qvalue::qvalue(deseq2.res$pvalue)$qvalue < 0.1 ) ],
rownames(RADAR$all.est)[which( qvalue::qvalue(edgeR.res$pvalue)$qvalue < 0.1 ) ],
rownames(RADAR$all.est)[which( qvalue::qvalue(QNB.res$pvalue)$qvalue < 0.1 ) ],
"RADAR","DESeq2","edgeR","QNB")
load("~/PoissonGamma_Method/T2D_data/selfFDR.RData")
ggplot(data = selfFDR, aes(x=method, y= self_FDR, colour = qvalue_cutoff))+geom_boxplot()+theme_bw() +xlab("Method")+ylab("Self-FDR")+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black",size = 1),
axis.title.x=element_text(size=20, face="bold", hjust=0.5),
axis.title.y=element_text(size=20, face="bold", vjust=0.4, angle=90),
legend.title=element_text(size = 15,face = "bold"),legend.text = element_text(size = 18, face = "bold",family = "arial"),
axis.text = element_text(size = 15,face = "bold",colour = "black") , axis.ticks = element_line(colour = "black",size = 1) )+scale_color_discrete(name = "qvalue\ncutoff")
selfFDR_pos <- NULL
samplenames <- RADAR$samplenames
set.seed(3)
for(i in 1:20){
## Sample subset
sample_id <- c(sample(x = 1:7, size = 5,replace = F),7+sample(x = 1:8, size = 6,replace = F) )
Truth_id <- setdiff(1:12,sample_id)
if(length(unique(X2[sample_id])) == 1 ){ ## samples all from one batch
## run radar
radar_sample <- diffIP_parallel(RADAR_pos[1:14] ,Covariates = cov[,2:3],exclude = samplenames[setdiff(1:15,sample_id)],thread = 15,plotPvalue = F)$all.est
radar_sample <- cbind(radar_sample, "qvalue"= qvalue::qvalue(radar_sample[,"p_value3"])$qvalue )
radar_truth <- RADAR_pos$all.est
radar_truth <- cbind(radar_truth, "qvalue"= qvalue::qvalue(radar_truth[,"p_value3"])$qvalue )
radar_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(radar_sample[,"qvalue"]<x) %in% which(radar_truth[,"qvalue"]<0.1) ) )/length(which(radar_sample[,"qvalue"]<x))})
## run DESeq2
deseq_sample <- DESeq2(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,2:3] )
deseq_sample$qvalue <- qvalue::qvalue( deseq_sample$pvalue )$qvalue
deseq_truth <- deseq2.res_pos
deseq_truth$qvalue <- qvalue::qvalue( deseq_truth$pvalue )$qvalue
deseq_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(deseq_sample[,"qvalue"]<x) %in% which(deseq_truth[,"qvalue"]<0.1) ) )/length(which(deseq_sample[,"qvalue"]<x))})
## run edgeR
edgeR_sample <- edgeR(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,2:3] )
edgeR_sample$qvalue <- qvalue::qvalue( edgeR_sample$pvalue )$qvalue
edgeR_truth <- edgeR.res_pos
edgeR_truth$qvalue <- qvalue::qvalue( edgeR_truth$pvalue )$qvalue
edgeR_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(edgeR_sample[,"qvalue"]<x) %in% which(edgeR_truth[,"qvalue"]<0.1) ) )/length(which(edgeR_sample[,"qvalue"]<x))})
fdr <- rbind( data.frame(self_FDR = radar_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("RADAR",3) ),
data.frame(self_FDR = deseq_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("DESeq2",3) ),
data.frame(self_FDR = edgeR_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("edgeR",3) )
)
selfFDR_pos <- rbind(selfFDR_pos,fdr)
}else if(length(unique(X3[sample_id])) == 1 ){
## run radar
radar_sample <- diffIP_parallel(RADAR_pos[1:14] ,Covariates = cov[,c(1,3)],exclude = samplenames[setdiff(1:15,sample_id)],thread = 15,plotPvalue = F)$all.est
radar_sample <- cbind(radar_sample, "qvalue"= qvalue::qvalue(radar_sample[,"p_value3"])$qvalue )
radar_truth <- RADAR_pos$all.est
radar_truth <- cbind(radar_truth, "qvalue"= qvalue::qvalue(radar_truth[,"p_value3"])$qvalue )
radar_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(radar_sample[,"qvalue"]<x) %in% which(radar_truth[,"qvalue"]<0.1) ) )/length(which(radar_sample[,"qvalue"]<x))})
## run DESeq2
deseq_sample <- DESeq2(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,c(1,3)] )
deseq_sample$qvalue <- qvalue::qvalue( deseq_sample$pvalue )$qvalue
deseq_truth <- deseq2.res_pos
deseq_truth$qvalue <- qvalue::qvalue( deseq_truth$pvalue )$qvalue
deseq_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(deseq_sample[,"qvalue"]<x) %in% which(deseq_truth[,"qvalue"]<0.1) ) )/length(which(deseq_sample[,"qvalue"]<x))})
## run edgeR
edgeR_sample <- edgeR(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,c(1,3)] )
edgeR_sample$qvalue <- qvalue::qvalue( edgeR_sample$pvalue )$qvalue
edgeR_truth <- edgeR.res_pos
edgeR_truth$qvalue <- qvalue::qvalue( edgeR_truth$pvalue )$qvalue
edgeR_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(edgeR_sample[,"qvalue"]<x) %in% which(edgeR_truth[,"qvalue"]<0.1) ) )/length(which(edgeR_sample[,"qvalue"]<x))})
fdr <- rbind( data.frame(self_FDR = radar_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("RADAR",3) ),
data.frame(self_FDR = deseq_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("DESeq2",3) ),
data.frame(self_FDR = edgeR_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("edgeR",3) )
)
selfFDR_pos <- rbind(selfFDR_pos,fdr)
}else if(length(unique(X4[sample_id])) == 1){
## run radar
radar_sample <- diffIP_parallel(RADAR_pos[1:14] ,Covariates = cov[,c(1,2)],exclude = samplenames[setdiff(1:15,sample_id)],thread = 15,plotPvalue = F)$all.est
radar_sample <- cbind(radar_sample, "qvalue"= qvalue::qvalue(radar_sample[,"p_value3"])$qvalue )
radar_truth <- RADAR_pos$all.est
radar_truth <- cbind(radar_truth, "qvalue"= qvalue::qvalue(radar_truth[,"p_value3"])$qvalue )
radar_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(radar_sample[,"qvalue"]<x) %in% which(radar_truth[,"qvalue"]<0.1) ) )/length(which(radar_sample[,"qvalue"]<x))})
## run DESeq2
deseq_sample <- DESeq2(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,c(1,2)] )
deseq_sample$qvalue <- qvalue::qvalue( deseq_sample$pvalue )$qvalue
deseq_truth <- deseq2.res_pos
deseq_truth$qvalue <- qvalue::qvalue( deseq_truth$pvalue )$qvalue
deseq_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(deseq_sample[,"qvalue"]<x) %in% which(deseq_truth[,"qvalue"]<0.1) ) )/length(which(deseq_sample[,"qvalue"]<x))})
## run edgeR
edgeR_sample <- edgeR(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,c(1,2)] )
edgeR_sample$qvalue <- qvalue::qvalue( edgeR_sample$pvalue )$qvalue
edgeR_truth <- edgeR.res_pos
edgeR_truth$qvalue <- qvalue::qvalue( edgeR_truth$pvalue )$qvalue
edgeR_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(edgeR_sample[,"qvalue"]<x) %in% which(edgeR_truth[,"qvalue"]<0.1) ) )/length(which(edgeR_sample[,"qvalue"]<x))})
fdr <- rbind( data.frame(self_FDR = radar_selfFDR, qvalue_cuttoff=c("0.1","0.05","0.01"), method = rep("RADAR",3) ),
data.frame(self_FDR = deseq_selfFDR, qvalue_cuttoff=c("0.1","0.05","0.01"), method = rep("DESeq2",3) ),
data.frame(self_FDR = edgeR_selfFDR, qvalue_cuttoff=c("0.1","0.05","0.01"), method = rep("edgeR",3) )
)
selfFDR_pos <- rbind(selfFDR_pos,fdr)
}else{
## run radar
radar_sample <- diffIP_parallel(RADAR_pos[1:14],Covariates = cov ,exclude = samplenames[setdiff(1:15,sample_id)],thread = 15,plotPvalue = F)$all.est
radar_sample <- cbind(radar_sample, "qvalue"= qvalue::qvalue(radar_sample[,"p_value3"])$qvalue )
radar_truth <- RADAR_pos$all.est
radar_truth <- cbind(radar_truth, "qvalue"= qvalue::qvalue(radar_truth[,"p_value3"])$qvalue )
radar_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(radar_sample[,"qvalue"]<x) %in% which(radar_truth[,"qvalue"]<0.1) ) )/length(which(radar_sample[,"qvalue"]<x))})
## run DESeq2
deseq_sample <- DESeq2(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,] )
deseq_sample$qvalue <- qvalue::qvalue( deseq_sample$pvalue )$qvalue
deseq_truth <- deseq2.res_pos
deseq_truth$qvalue <- qvalue::qvalue( deseq_truth$pvalue )$qvalue
deseq_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(deseq_sample[,"qvalue"]<x) %in% which(deseq_truth[,"qvalue"]<0.1) ) )/length(which(deseq_sample[,"qvalue"]<x))})
## run edgeR
edgeR_sample <- edgeR(countdata = RADAR_pos$ip_adjExpr_filtered[,sample_id],pheno = matrix(c(rep(0,7),rep(1,8))[sample_id],ncol = 1),covariates = cov[sample_id,] )
edgeR_sample$qvalue <- qvalue::qvalue( edgeR_sample$pvalue )$qvalue
edgeR_truth <- edgeR.res_pos
edgeR_truth$qvalue <- qvalue::qvalue( edgeR_truth$pvalue )$qvalue
edgeR_selfFDR <- sapply(c(0.1,0.05,0.01),function(x){length( which(! which(edgeR_sample[,"qvalue"]<x) %in% which(edgeR_truth[,"qvalue"]<0.1) ) )/length(which(edgeR_sample[,"qvalue"]<x))})
fdr <- rbind( data.frame(self_FDR = radar_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("RADAR",3) ),
data.frame(self_FDR = deseq_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("DESeq2",3) ),
data.frame(self_FDR = edgeR_selfFDR, qvalue_cutoff=c("0.1","0.05","0.01"), method = rep("edgeR",3) )
)
selfFDR_pos <- rbind(selfFDR_pos,fdr)
}
}
selfFDR_pos$qvalue_cutoff <- factor(selfFDR_pos$qvalue_cutoff,levels = c("0.1","0.05","0.01"))
save(selfFDR_pos, file = "~/PoissonGamma_Method/T2D_data/selfFDR_pos.RData")
Self-FDR norm by local bin read count
load("~/PoissonGamma_Method/T2D_data/selfFDR_pos.RData")
selfFDR_all <- cbind(rbind(selfFDR, selfFDR_pos), "NormBy"= c(rep("By gene-level count",nrow(selfFDR)),rep("By bin-level count",nrow(selfFDR_pos))) )
selfFDR_all$NormBy <- factor(selfFDR_all$NormBy,levels = c("By gene-level count","By bin-level count"))
ggplot(data = selfFDR_all, aes(x=method, y= self_FDR, colour = qvalue_cutoff))+geom_boxplot()+facet_grid(~NormBy)+theme_bw() +xlab("Method")+ylab("Self-FDR")+ theme( panel.grid.major = element_blank(),
axis.line = element_line(colour = "black",size = 1),
axis.title.x=element_text(size=20, face="bold", hjust=0.5,family = "arial"),
axis.title.y=element_text(size=20, face="bold", vjust=0.4, angle=90,family = "arial"),
legend.title=element_text(size = 15,face = "bold"),legend.text = element_text(size = 18, face = "bold",family = "arial"),
axis.text = element_text(size = 18,face = "bold",family = "arial",colour = "black") ,strip.text.x = element_text(size = 20,face = "bold") )+scale_color_discrete(name = "qvalue\ncutoff")
radar_permuted_P <-NULL
deseq_permuted_P <- NULL
edgeR_permuted_P <- NULL
set.seed(51)
for(i in 1:10){
permute_X <- unlist( tapply(RADAR$X,batch,function(x){sample(x,length(x))}) )[match(RADAR$samplenames,unlist( tapply(RADAR$samplenames,batch,function(x)return(x)) ))]
if ( all( permute_X == RADAR$X) | all( permute_X == c(rep("T2D",8),rep("Ctl",7))) ){
cat("Duplicated with original design...\n")
}else{
radar_permute <- diffIP_parallel(c(RADAR[1:11],list("X"=permute_X),RADAR[13:14]) ,thread = 15,Covariates = cov,plotPvalue = F)$all.est[,"p_value3"]
deseq_permute <- DESeq2(countdata = RADAR$ip_adjExpr_filtered,pheno = matrix(permute_X,ncol = 1),covariates = cov )$pvalue
edgeR_permute <- edgeR(countdata = RADAR$ip_adjExpr_filtered,pheno = matrix(permute_X,ncol = 1),covariates = cov )$pvalue
radar_permuted_P <- cbind(radar_permuted_P, radar_permute)
deseq_permuted_P <- cbind(deseq_permuted_P,deseq_permute)
edgeR_permuted_P <- cbind(edgeR_permuted_P,edgeR_permute)
}
}
save(radar_permuted_P ,deseq_permuted_P ,edgeR_permuted_P, file= "~/PoissonGamma_Method/T2D_data/permutation.RData")
load( "~/PoissonGamma_Method/T2D_data/permutation.RData")
for( i in 1:ncol(radar_permuted_P)){
par(mfrow=c(1,3))
hist(RADAR$all.est[,"p_value3"], col =rgb(1,0,0,0.4),main = "RADAR", xlab = "p value",cex.main = 2.5,cex.axis =2,cex.lab=2)
hist(radar_permuted_P[,i],col=rgb(0,0,1,0.5),add = T)
legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 12,bty="n",text.font = 2, cex = 1.5)
hist(deseq2.res$pvalue, col =rgb(1,0,0,0.4),main = "DESeq2",xlab = "p value",cex.main = 2.5,cex.axis =2,cex.lab=2, ylim =c(0,max(hist(deseq_permuted_P[,i],plot = F)$counts) ) )
hist(deseq_permuted_P[,i],col=rgb(0,0,1,0.5),add = T)
legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 12,bty="n",text.font = 2, cex = 1.5)
hist(edgeR.res$pvalue, col =rgb(1,0,0,0.4),main = "edgeR",xlab = "p value",cex.main = 2.5,cex.axis =2,cex.lab=2)
hist(edgeR_permuted_P[,i],col=rgb(0,0,1,0.5),add = T)
legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 12,bty="n",text.font = 2, cex = 1.5)
}
par(mfrow=c(1,2))
hist(RADAR$all.est[,"p_value3"], col =rgb(1,0,0,0.4),main = "RADAR", xlab = "p value",font=2, cex.lab=1.5, font.lab=2 ,cex.main = 2,cex.axis =1, lwd = 2.5)
tmp <- hist(c(radar_permuted_P),plot = F)
tmp$counts <- tmp$counts/10
plot(tmp,col=rgb(0,0,1,0.5),add = T)
legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 12,bty="n",text.font = 2, cex = 1)
y.scale <- max(hist(RADAR$all.est[,"p_value3"],plot = F)$counts)
#hist(deseq2.res$pvalue, col =rgb(1,0,0,0.4),main = "DESeq2",xlab = "p value")
#tmp <- hist(c(deseq_permuted_P),plot = F)
#tmp$counts <- tmp$counts/10
#plot(tmp,col=rgb(0,0,1,0.5),add = T)
#legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 10,bty="n")
hist(edgeR.res$pvalue, col =rgb(1,0,0,0.4),main = "edgeR",xlab = "p value",font=2, cex.lab=1.5, font.lab=2 ,cex.main = 2 , cex.axis =1, ylim = c(0,y.scale), lwd = 2.5)
tmp <- hist(c(edgeR_permuted_P),plot = F)
tmp$counts <- tmp$counts/10
plot(tmp,col=rgb(0,0,1,0.5),add = T)
legend("topright", c("Original", "Permuted"), col=c(rgb(1,0,0,0.4), rgb(0,0,1,0.5)), lwd = 12,bty="n",text.font = 2, cex = 1)
Plot Heat map of significant bins by RADAR
topBins <- RADAR$ip_adjExpr_filtered[which(qvalue(RADAR$all.est[,"p_value3"])$qvalue<0.1),]
log_topBins <- log(topBins+1)
registerDoParallel(cores = 4)
cov.out.RADAR <- foreach(i = 1:nrow(log_topBins), .combine = rbind) %dopar% {
Y = log_topBins[i,]
tmp_data <- as.data.frame(cbind(Y,cov))
resi <- residuals( lm(Y~.,data=tmp_data ) )
resi
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
rownames(cov.out.RADAR) <- rownames(log_topBins)
colnames(cov.out.RADAR) <- colnames(log_topBins)
dist.pear <- function(x) as.dist(1-cor(t(x)))
hclust.ave <- function(x) hclust(x, method="average")
gplots::heatmap.2(cov.out.RADAR,scale="row",trace="none",labRow=NA,main = "Methylation level of significant bins\n(Covariates regressed out)",
distfun=dist.pear, hclustfun=hclust.ave,col=rev(RColorBrewer::brewer.pal(9,"RdBu")))
Plot heap map of significant bins by edgeR
topBins <- RADAR$ip_adjExpr_filtered[which(qvalue(edgeR.res$pvalue)$qvalue<0.1),]
log_topBins <- log(topBins+1)
registerDoParallel(cores = 4)
cov.out.edgeR <- foreach(i = 1:nrow(log_topBins), .combine = rbind) %dopar% {
Y = log_topBins[i,]
tmp_data <- as.data.frame(cbind(Y,cov))
resi <- residuals( lm(Y~.,data=tmp_data ) )
resi
}
rm(list=ls(name=foreach:::.foreachGlobals), pos=foreach:::.foreachGlobals)
rownames(cov.out.edgeR) <- rownames(log_topBins)
colnames(cov.out.edgeR) <- colnames(log_topBins)
dist.pear <- function(x) as.dist(1-cor(t(x)))
hclust.ave <- function(x) hclust(x, method="average")
gplots::heatmap.2(cov.out.edgeR,scale="row",trace="none",labRow=NA,main = "Methylation level of significant bins\n(Covariates regressed out)",
distfun=dist.pear, hclustfun=hclust.ave,col=rev(RColorBrewer::brewer.pal(9,"RdBu")))
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 17.10
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] VennDiagram_1.6.20 futile.logger_1.4.3
## [3] qvalue_2.10.0 RADAR_0.1.5
## [5] RcppArmadillo_0.9.100.5.0 Rcpp_0.12.19
## [7] RColorBrewer_1.1-2 gplots_3.0.1
## [9] doParallel_1.0.14 iterators_1.0.10
## [11] foreach_1.4.4 ggplot2_3.0.0
## [13] Rsamtools_1.30.0 Biostrings_2.46.0
## [15] XVector_0.18.0 GenomicFeatures_1.30.3
## [17] AnnotationDbi_1.40.0 Biobase_2.38.0
## [19] GenomicRanges_1.30.3 GenomeInfoDb_1.14.0
## [21] IRanges_2.12.0 S4Vectors_0.16.0
## [23] BiocGenerics_0.24.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-131.1 bitops_1.0-6
## [3] matrixStats_0.54.0 bit64_0.9-7
## [5] progress_1.2.0 httr_1.3.1
## [7] rprojroot_1.3-2 tools_3.4.4
## [9] backports_1.1.2 R6_2.2.2
## [11] KernSmooth_2.23-15 mgcv_1.8-23
## [13] DBI_1.0.0 lazyeval_0.2.1
## [15] colorspace_1.3-2 withr_2.1.2
## [17] gridExtra_2.3 tidyselect_0.2.4
## [19] prettyunits_1.0.2 RMySQL_0.10.15
## [21] bit_1.1-14 compiler_3.4.4
## [23] formatR_1.5 DelayedArray_0.4.1
## [25] labeling_0.3 rtracklayer_1.38.3
## [27] caTools_1.17.1 scales_0.5.0
## [29] stringr_1.3.1 digest_0.6.15
## [31] rmarkdown_1.10 DOSE_3.4.0
## [33] pkgconfig_2.0.1 htmltools_0.3.6
## [35] highr_0.7 rlang_0.2.1
## [37] RSQLite_2.1.1 bindr_0.1.1
## [39] BiocParallel_1.12.0 gtools_3.8.1
## [41] GOSemSim_2.4.1 dplyr_0.7.6
## [43] RCurl_1.95-4.10 magrittr_1.5
## [45] GO.db_3.5.0 GenomeInfoDbData_1.0.0
## [47] Matrix_1.2-12 munsell_0.5.0
## [49] stringi_1.2.3 yaml_2.1.19
## [51] SummarizedExperiment_1.8.1 zlibbioc_1.24.0
## [53] plyr_1.8.4 blob_1.1.1
## [55] gdata_2.18.0 DO.db_2.9
## [57] crayon_1.3.4 lattice_0.20-35
## [59] splines_3.4.4 hms_0.4.2
## [61] knitr_1.20 pillar_1.2.3
## [63] igraph_1.2.1 fgsea_1.4.1
## [65] reshape2_1.4.3 codetools_0.2-15
## [67] biomaRt_2.34.2 futile.options_1.0.1
## [69] fastmatch_1.1-0 XML_3.98-1.11
## [71] glue_1.2.0 evaluate_0.10.1
## [73] lambda.r_1.2.3 data.table_1.11.4
## [75] tidyr_0.8.1 gtable_0.2.0
## [77] purrr_0.2.5 assertthat_0.2.0
## [79] tibble_1.4.2 clusterProfiler_3.6.0
## [81] rvcheck_0.1.0 GenomicAlignments_1.14.2
## [83] memoise_1.1.0 MyTools_0.0.0
## [85] bindrcpp_0.2.2
This R Markdown site was created with workflowr