Compare effect size of lead m6A-QTLs with other molecular QTLs (ascertained m6A-QTLs) stratified by RBP binding sites

Last updated: 2020-03-17

Checks: 7 0

Knit directory: m6AQTL_reproducibleDocument/

This reproducible R Markdown analysis was created with workflowr (version 1.6.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.

R Markdown file: up-to-date

Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.

Environment: empty

Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.

Seed: set.seed(20200317)

The command set.seed(20200317) was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.

Session information: recorded

Great job! Recording the operating system, R version, and package versions is critical for reproducibility.

Cache: none

Nice! There were no cached chunks for this analysis, so you can be confident that you successfully produced the results during this run.

File paths: relative

Great job! Using relative paths to the files within your workflowr project makes it easier to run your code on other machines.

Repository version: 5c01c10

Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated.

Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use wflow_publish or wflow_git_commit). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:


Ignored files:
    Ignored:    .Rproj.user/

Unstaged changes:
    Modified:   analysis/index.Rmd

Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.

These are the previous versions of the R Markdown and HTML files. If you’ve configured a remote Git repository (see ?wflow_git_remote), click on the hyperlinks in the table below to view them.

File	Version	Author	Date	Message
Rmd	5c01c10	kevinlkx	2020-03-17	wflow_publish(“analysis/cor_effectsize_byRBPs_jointLCLs.Rmd”)

source: m6AQTL_workflowr/analysis/cor_effectsize_byRBPs_scatterplots_logOR.adjusted_YangVCF_noPCs_SNPlevelQvalue0.2_APAdist_noTE.Rmd

Functions

# options(scipen = 999)
suppressPackageStartupMessages(library(GenomicRanges))
library(ggplot2)
library(gplots)
library(RColorBrewer)
library(reshape2)
library(foreach)
library(doParallel)
library(qvalue)
library(BH)

effect_cor <- function(effectsize_joint.df, phenotype_x, phenotype_y, filter_beta = Inf){
  x <- effectsize_joint.df[, phenotype_x]
  y <- effectsize_joint.df[, phenotype_y]
  
  idx_included <- which(abs(x) <= filter_beta & !is.na(x) & !is.na(y))
  x <- x[idx_included]
  y <- y[idx_included]
  
  if(length(idx_included) < 2){
    cor_Pearson <- NA
    slope <- NA
    pvalue <- NA
  }else{
    cor_Pearson <- cor(x, y)
    lm.model <- lm(y ~ x)
    slope <- summary(lm.model)$coefficients[2, "Estimate"]
    pvalue <- summary(lm.model)$coefficients[2, "Pr(>|t|)"]
    r.squared = summary(lm.model)$r.squared

  }
  
  cor_summary <- c(cor_Pearson = cor_Pearson, r.squared = r.squared, slope = slope, pvalue = pvalue, n = length(x))
  return(cor_summary)
}

Settings

m6A-QTL qvalue < 0.2
no PCs

m6A_version <- "jointPeak_threshold5_MeRIP_HISAT2Map"
m6A_phenotype_name <- "m6APeak_logOR_GC.IP.adjusted_qqnorm"

thresh_qvalue <- 0.2
num_PCs_m6AQTL <- 0
num_PCs_joint <- 0
thresh_FDR <- 0.1

type_apaQTL <- "dist"

registerDoParallel(cores = 6)

cat("m6A version: ", m6A_version, "\n")
cat("m6A phenotype: ", m6A_phenotype_name, "\n")
cat("Mapping m6AQTLs: qvalue: ", thresh_qvalue, ", with", num_PCs_m6AQTL, "PCs. \n")
cat("Effect size comparison: ", num_PCs_joint,"PCs, choose SNP-gene pairs for APA-QTLs by: ", type_apaQTL, "\n" )
cat("FDR threshold for testing multiple correlations: ", thresh_FDR*100, "%\n")

Load RBP binding sites (intersect of two replicates) from eCLIP data

dir_m6AQTL <- "/project2/xinhe/m6A/m6A_seq/m6A_QTL"
RBPs.gr <- readRDS(paste0(dir_m6AQTL, "/RBP_data/hg19/all.RBP.intersect.hg19.bed.gr.rds"))

RBP_list <- unique(RBPs.gr$name)
cat("list of RBPs with eCLIP data: ", RBP_list)
cat(length(RBP_list), "RBPs \n")

Gene symbol list

dir_gene_info <- "/project2/xinhe/m6A/m6A_seq/m6A_QTL/gene_info/"
gene_list_unique <- read.table(paste0(dir_gene_info, "/geneSymbol_ensembl_unique.txt"),
                               header = T, sep = "\t", stringsAsFactors = F)
rownames(gene_list_unique) <- gene_list_unique$symbol

Load lead m6AQTLs and combined effect size from molecular traits

dir_m6AQTL_results <- paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/results/hg19/m6A_QTLs/", m6A_version)

if(thresh_qvalue == 0.2){
  if(num_PCs_m6AQTL == 15){
    m6AQTLs_lead.df <- readRDS(paste0(dir_m6AQTL_results, "/fastQTL_YangGeno/", m6A_phenotype_name, "/lead.m6AQTL.", m6A_phenotype_name, ".15PCs.fastQTL.nominals.qvalue_0.2.rds"))
  }else{
    m6AQTLs_lead.df <- readRDS(paste0(dir_m6AQTL_results, "/fastQTL_YangGeno/", m6A_phenotype_name, "/lead.m6AQTL.", m6A_phenotype_name, ".0PCs.fastQTL.nominals.qvalue_0.2.rds"))
  }
  
}else{
  if(num_PCs_m6AQTL == 15){
    m6AQTLs_lead.df <- readRDS(paste0(dir_m6AQTL_results, "/fastQTL_YangGeno/", m6A_phenotype_name, "/lead.m6AQTL.", m6A_phenotype_name, ".15PCs.fastQTL.nominals.rds"))
  }else{
    m6AQTLs_lead.df <- readRDS(paste0(dir_m6AQTL_results, "/fastQTL_YangGeno/", m6A_phenotype_name, "/lead.m6AQTL.", m6A_phenotype_name, ".0PCs.fastQTL.nominals.rds"))
  }
}

m6AQTLs_lead.df$peak_snp_pair <- paste(m6AQTLs_lead.df$PEAK, m6AQTLs_lead.df$SNP, sep = "|")
cat(nrow(m6AQTLs_lead.df), "lead m6A-QTLs (", num_PCs_m6AQTL, "PCs in m6A-QTL mapping, qvalue <", thresh_qvalue, ")\n")

cat("Load combined QTL summary stats (Yang's genotype data,", num_PCs_joint, "PCs in joint analysis) ... \n")
if(num_PCs_joint == 0){
  dir_combined_data <- paste0(dir_m6AQTL_results, "/jointLCLs_analysis/", m6A_phenotype_name, "/m6AQTLs_full_noPCs_APA", type_apaQTL)
}else{
  dir_combined_data <- paste0(dir_m6AQTL_results, "/jointLCLs_analysis/", m6A_phenotype_name, "/m6AQTLs_full_PCs_APA", type_apaQTL)
}

m6AQTLs_info_full.df <- readRDS(paste0(dir_combined_data, "/m6AQTLs_info_full.rds"))
m6AQTLs_info_full.df$peak_snp_pair <- paste(m6AQTLs_info_full.df$PEAK, m6AQTLs_info_full.df$SNP, sep = "|")

idx_sig_peak_snp_pair <- na.omit(match(m6AQTLs_lead.df$peak_snp_pair, m6AQTLs_info_full.df$peak_snp_pair))
m6AQTLs_sig.df <- m6AQTLs_info_full.df[idx_sig_peak_snp_pair, ]
cat(nrow(m6AQTLs_sig.df), "lead m6A-QTLs matched in Yang's genotype data \n")

beta_joint_m6AQTLs_full.df <- readRDS(paste0(dir_combined_data, "/beta_jointLCLs_m6AQTLs_full.rds"))
beta_joint_m6AQTLs_sig.df <- beta_joint_m6AQTLs_full.df[idx_sig_peak_snp_pair, ]
rownames(beta_joint_m6AQTLs_sig.df) <- m6AQTLs_sig.df$peak_snp_pair

pvalue_joint_m6AQTLs_full.df <- readRDS(paste0(dir_combined_data, "/pvalue_jointLCLs_m6AQTLs_full.rds"))
pvalue_joint_m6AQTLs_sig.df <- pvalue_joint_m6AQTLs_full.df[idx_sig_peak_snp_pair, ]
rownames(pvalue_joint_m6AQTLs_sig.df) <- m6AQTLs_sig.df$peak_snp_pair

if(anyDuplicated(m6AQTLs_sig.df$peak_snp_pair)){stop("Duplicated peak-SNP pairs!")}

rm(m6AQTLs_info_full.df)
rm(beta_joint_m6AQTLs_full.df)
rm(pvalue_joint_m6AQTLs_full.df)

Load m6A peaks, save in BED12 format and convert to BED6 exons

if(!file.exists(paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.jointPeaks.bed6"))){
  cat("load m6A peaks, save in BED12 format and convert to BED6 exons \n")
  library(MeRIPtools)
  
  peak_logOR_MeRIPdata <- readRDS(paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.rds"))
  peaks_bed12 <- jointPeak(peak_logOR_MeRIPdata$MeRIPdata)
  peaks_bed12$name <- paste0(peaks_bed12$chr, ":", peaks_bed12$start, "-",peaks_bed12$end, "_", peaks_bed12$name, "_", peaks_bed12$strand)
  peaks_bed12 <- peaks_bed12[peaks_bed12$chr %in% paste0("chr", 1:22),]
  
  if(length(setdiff(m6AQTLs_sig.df$PEAK, peaks_bed12$name)) > 0){
    stop("Not all m6A-QTL peaks exist in bed12 peaks!")
  }
  
  colnames(peaks_bed12)[1] <- "#chr"
  write.table(peaks_bed12, paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.jointPeaks.bed12"), col.names = T, row.names = F, quote = F, sep = "\t")
  
  system(paste("bed12tobed6 -i", paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.jointPeaks.bed12"), ">", paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.jointPeaks.bed6")))
  
}

m6A_peaks_bed6 <- read.table(paste0("/project2/xinhe/m6A/m6A_seq/m6A_QTL/peakcalling/", m6A_version, "/peak_logOR_MeRIPdata.jointPeaks.bed6"), header = F, sep = "\t", comment.char = "", stringsAsFactors = F)
colnames(m6A_peaks_bed6) <- c("chr", "start", "end", "PEAK", "score", "strand")

## keep sig peaks
m6A_sig_peaks_bed6 <- m6A_peaks_bed6[m6A_peaks_bed6$PEAK %in% m6AQTLs_sig.df$PEAK, ]

m6A_sig_peaks_bed6.gr <- makeGRangesFromDataFrame(m6A_sig_peaks_bed6, keep.extra.columns = T)

cat(length(unique(m6A_sig_peaks_bed6.gr$PEAK)), "(", length(unique(m6A_sig_peaks_bed6.gr$PEAK))/nrow(m6AQTLs_sig.df)*100, "% )", "sig.m6A peaks in BED6 list \n")

Overlap m6A-QTL peaks with RBP binding sites

RBP within m6A peaks
RBP binding site on the same strand of m6A peaks (transcript)

## overlap m6A peaks with RBP (RBP within m6A peaks)
RBP_peak_overlaps_within <- as.data.frame(findOverlaps(query = RBPs.gr, subject = m6A_sig_peaks_bed6.gr, ignore.strand = F, type = "within"))
colnames(RBP_peak_overlaps_within) <- c("idx_RBP", "idx_peak")

RBP_peak_overlaps_within$RBP <- RBPs.gr[RBP_peak_overlaps_within$idx_RBP]$name
RBP_peak_overlaps_within$PEAK <- m6A_sig_peaks_bed6.gr[RBP_peak_overlaps_within$idx_peak]$PEAK

if(any(setdiff(m6AQTLs_sig.df$PEAK, m6A_sig_peaks_bed6$PEAK))){
  cat("Peaks not in bed6 peak list:", setdiff(m6AQTLs_sig.df$PEAK, m6A_sig_peaks_bed6$PEAK), "\n")
}


m6AQTLs_sig.df$RBPs_overlapped_within <- foreach(i=1:nrow(m6AQTLs_sig.df), .combine=c) %dopar% {
  
  if(m6AQTLs_sig.df$PEAK[i] %in% RBP_peak_overlaps_within$PEAK){
    RBPs_overlapped <- unique(RBP_peak_overlaps_within[RBP_peak_overlaps_within$PEAK == m6AQTLs_sig.df$PEAK[i], "RBP"])
    return(paste(c(RBPs_overlapped,""), collapse = ","))
  }else{
    return("")
  }
}

Number of peaks bound by each RBP

cat(length(which(m6AQTLs_sig.df$RBPs_overlapped_within != "")), "out of", length(m6AQTLs_sig.df$PEAK), "(",
    length(which(m6AQTLs_sig.df$RBPs_overlapped_within != ""))/length(m6AQTLs_sig.df$PEAK)*100,"% ) sig peaks have RBP binding sites within peaks. \n\n")

## count the number of peaks and genes that each RBP bind to
m6AsigPeaks_RBP_overlap_counts <- sapply(RBP_list, function(x){
  idx_RBP_overlapped_within <- grep(paste0(x, ","), m6AQTLs_sig.df$RBPs_overlapped_within)
  num_peaks <- length(unique(m6AQTLs_sig.df[idx_RBP_overlapped_within, "PEAK"]))
  num_genes <- length(unique(m6AQTLs_sig.df[idx_RBP_overlapped_within, "gene_name"]))
  return(c(num_peaks, num_genes))}
)
rownames(m6AsigPeaks_RBP_overlap_counts) <- c("num_peaks", "num_genes")

RBP_m6AsigPeaks_within <- sort(m6AsigPeaks_RBP_overlap_counts["num_peaks",], decreasing = T)
print(RBP_m6AsigPeaks_within)

select RBPs with at least 50 gene(peak)-SNP pairs

thresh_RBP <- 50
cat("Show RBPs with at least", thresh_RBP, "gene(peak)-SNP pairs (overlap == within). \n\n")

RBP_m6AsigPeaks_within[RBP_m6AsigPeaks_within >= thresh_RBP]
RBPnames_filtered_m6AsigPeaks_within <- names(RBP_m6AsigPeaks_within[RBP_m6AsigPeaks_within >= thresh_RBP])

Compute effect size correlations, ascertained m6A-QTLs by RBPs

dir_combined_data_RBPs <- paste0(dir_combined_data, "/effectsize_jointLCLs_byRBPs/")
dir.create(dir_combined_data_RBPs, showWarnings = F, recursive = T)

RBP_list_included <- sort(RBPnames_filtered_m6AsigPeaks_within)
cat(length(RBP_list_included), "RBPs included. \n ")

phenotype_list <- c("Expression", "Ribosome", "Protein", "Decay", "APA")

cor_m6APeakAnno.m <- matrix(NA, nrow = length(RBP_list_included), ncol = length(phenotype_list))
colnames(cor_m6APeakAnno.m) <- phenotype_list
rownames(cor_m6APeakAnno.m) <- RBP_list_included

Cor_ByRBP <- melt(cor_m6APeakAnno.m)
Cor_ByRBP <- data.frame(anno = Cor_ByRBP[,1], phenotype = Cor_ByRBP[,2], 
                        cor = NA, r.squared = NA, slope = NA, pvalue = NA, n = NA)

for(i in 1:length(RBP_list_included)){
  RBP_name <- RBP_list_included[i]
  
  idx_matched <- grep(paste0(RBP_name, ","), m6AQTLs_sig.df$RBPs_overlapped_within)
  # idx_matched <- sort(which(m6AQTLs_sig.df$PEAK %in% RBP_peak_overlaps_within[RBP_peak_overlaps_within$RBP == RBP_name, "PEAK"]))
  
  m6AQTLs_sig_RBP.df <- m6AQTLs_sig.df[idx_matched, ]
  m6AQTLs_sig_RBP.df <- m6AQTLs_sig_RBP.df[order(m6AQTLs_sig_RBP.df$pvalue), ]
  ## Select lead SNPs
  m6AQTLs_sig_RBP.df <- m6AQTLs_sig_RBP.df[!duplicated(m6AQTLs_sig_RBP.df$PEAK), ]
  ## Select the strongest associated peak-snp pair for each gene-snp pair
  m6AQTLs_sig_RBP.df <- m6AQTLs_sig_RBP.df[!duplicated(m6AQTLs_sig_RBP.df$gene_snp_pair), ]
  
  if( nrow(m6AQTLs_sig.df[idx_matched, ]) != nrow(m6AQTLs_sig_RBP.df) ){
    cat(RBP_name, ": ", length(idx_matched), "peak-SNP pairs --> ", nrow(m6AQTLs_sig_RBP.df), "gene-SNP pairs \n")
  }else{
    cat(RBP_name, ": ", nrow(m6AQTLs_sig_RBP.df), "gene-snp pairs \n")
  }
  
  effectsize_joint.df <- beta_joint_m6AQTLs_sig.df[m6AQTLs_sig_RBP.df$peak_snp_pair,]
  pvalue_joint.df <- pvalue_joint_m6AQTLs_sig.df[m6AQTLs_sig_RBP.df$peak_snp_pair,]
  saveRDS(effectsize_joint.df, paste0(dir_combined_data_RBPs, "/effectjoint_", RBP_name, "_overlapWithin.rds"))
  saveRDS(pvalue_joint.df, paste0(dir_combined_data_RBPs, "/pvaluejoint_", RBP_name, "_overlapWithin.rds"))

  for(phenotype in phenotype_list){
    cor_summary <- effect_cor(effectsize_joint.df, "m6A", phenotype)
    Cor_ByRBP[Cor_ByRBP$anno == RBP_name & Cor_ByRBP$phenotype == phenotype, 3:ncol(Cor_ByRBP)] <- cor_summary
  }
  
}

Cor_ByRBP$anno <- factor(Cor_ByRBP$anno, levels = rev(RBP_list_included))
Cor_ByRBP$phenotype <- factor(Cor_ByRBP$phenotype, levels = phenotype_list)
saveRDS(Cor_ByRBP, paste0(dir_combined_data, "/cor_effectsize_jointLCLs_RBPs_overlapWithin.rds"))

Plot significant RBPs, adjusting for multiple testing for all six phenotypes

Cor_ByRBP <- readRDS(paste0(dir_combined_data, "/cor_effectsize_jointLCLs_RBPs_overlapWithin.rds"))

Cor_ByRBP$sign_nlogP <- sign(Cor_ByRBP$cor) * -log10(Cor_ByRBP$pvalue)

phenotype_selected <- c("APA", "Expression", "Decay", "Ribosome", "Protein")

cat("selected phenotypes: ",phenotype_selected, "\n")

Cor_ByRBP <- Cor_ByRBP[Cor_ByRBP$phenotype %in% phenotype_selected, ]
Cor_ByRBP$phenotype <- factor(Cor_ByRBP$phenotype, levels = phenotype_selected )

cat("FDR threshold for testing multiple correlations: ", thresh_FDR*100, "%\n")

Cor_ByRBP$p_bonferroni <- p.adjust(Cor_ByRBP$pvalue, method = "bonferroni")
Cor_ByRBP$p_BH <- p.adjust(Cor_ByRBP$pvalue, method = "BH")
Cor_ByRBP$qvalue <- qvalue(Cor_ByRBP$pvalue)$qvalues

Cor_ByRBP_sig_bonferroni <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[which(Cor_ByRBP$p_bonferroni < thresh_FDR),"anno"],]
cat(length(unique(Cor_ByRBP_sig_bonferroni$anno)), "RBPs with adjusted pvalue (Bonferroni method) <", thresh_FDR, "\n")

print(unique(as.character(Cor_ByRBP_sig_bonferroni$anno)))

Cor_ByRBP_sig_BH <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[which(Cor_ByRBP$p_BH < thresh_FDR),"anno"],] 
cat(length(unique(Cor_ByRBP_sig_BH$anno)), "RBPs with BH FDR <", thresh_FDR, "\n")

print(unique(as.character(Cor_ByRBP_sig_BH$anno)))

Cor_ByRBP_sig_qvalue <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[which(Cor_ByRBP$qvalue < thresh_FDR),"anno"],] 
cat(length(unique(Cor_ByRBP_sig_qvalue$anno)), "RBPs with qvalue <", thresh_FDR, "\n")

print(unique(as.character(Cor_ByRBP_sig_qvalue$anno)))

dot heat map for BH FDR

Cor_ByRBP_sig <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[ which(Cor_ByRBP$p_BH < thresh_FDR),"anno"] ,]
cat(length(unique(Cor_ByRBP_sig$anno)), "significant RBPs with BH FDR <", thresh_FDR, "\n")

Cor_ByRBP_sig$phenotype <- factor(Cor_ByRBP_sig$phenotype, levels = phenotype_selected)

if(nrow(Cor_ByRBP_sig) > 0){
  # print(Cor_ByRBP_sig[which(Cor_ByRBP_sig$p_BH < thresh_FDR), ])
  cat("Range of correlations:")
  print(round(range(Cor_ByRBP_sig$cor),2))
  cor_limit <- max(abs(Cor_ByRBP_sig$cor)) + 0.2
  
  ggplot(Cor_ByRBP_sig)+
    geom_point(aes(x = phenotype, y = anno, colour = cor, size = -log10(pvalue)) )+ 
    geom_point(data = Cor_ByRBP_sig[which(Cor_ByRBP_sig$p_BH < thresh_FDR), ], aes(x = phenotype, y = anno), size = 8,shape = 1 )+
    scale_color_gradient2(midpoint=0, limit = c(-cor_limit, cor_limit) , low="blue", mid="white",
                          high="red", space ="Lab" ) +
    theme_minimal() + # minimal theme
    theme(text = element_text(face = "bold", size = 14),panel.grid = element_blank(),
          axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text = element_text(size = 16, color = "black")) 
}

dot heat map for qvalue

Cor_ByRBP_sig <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[ which(Cor_ByRBP$qvalue < thresh_FDR),"anno"] ,]
cat(length(unique(Cor_ByRBP_sig$anno)), "significant RBPs with qvalue <", thresh_FDR, "\n")
Cor_ByRBP_sig$phenotype <- factor(Cor_ByRBP_sig$phenotype, levels = phenotype_selected )

if(nrow(Cor_ByRBP_sig) > 0){
  # print(Cor_ByRBP_sig[which(Cor_ByRBP_sig$qvalue < thresh_FDR), ])
  cat("Range of correlations:")
  print(round(range(Cor_ByRBP_sig$cor),2))
  cor_limit <- max(abs(Cor_ByRBP_sig$cor)) + 0.2
  ggplot(Cor_ByRBP_sig)+
    geom_point(aes(x = phenotype, y = anno, colour = cor , size = -log10(pvalue)) )+ 
    geom_point(data = Cor_ByRBP_sig[which(Cor_ByRBP_sig$qvalue < thresh_FDR), ], aes(x = phenotype, y = anno), size = 8,shape = 1)+
    scale_color_gradient2(midpoint=0, limit = c(-cor_limit, cor_limit) , low="blue", mid="white",
                          high="red", space ="Lab" ) +
    theme_minimal() + # minimal theme
    theme(text = element_text(face = "bold", size = 14),panel.grid = element_blank(),
          axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text = element_text(size = 16, color = "black")) 
}

dot heat map for Bonferroni adjusted pvalue

Cor_ByRBP_sig <- Cor_ByRBP[Cor_ByRBP$anno %in% Cor_ByRBP[ which(Cor_ByRBP$p_bonferroni < thresh_FDR),"anno"],]
cat(length(unique(Cor_ByRBP_sig$anno)), "significant RBPs with Bonferroni adjusted pvalue <", thresh_FDR, "\n")

Cor_ByRBP_sig$phenotype <- factor(Cor_ByRBP_sig$phenotype, levels = phenotype_selected )

if(nrow(Cor_ByRBP_sig) > 0){
  # print(Cor_ByRBP_sig[which(Cor_ByRBP_sig$p_bonferroni < thresh_FDR), ])
  cat("Range of correlations:")
  print(round(range(Cor_ByRBP_sig$cor),2))
  cor_limit <- max(abs(Cor_ByRBP_sig$cor)) + 0.2
  
  ggplot(Cor_ByRBP_sig)+
    geom_point(aes(x = phenotype, y = anno, colour = cor, size = -log10(pvalue)) )+
    geom_point(data = Cor_ByRBP_sig[which(Cor_ByRBP_sig$p_bonferroni < thresh_FDR), ], aes(x = phenotype, y = anno), size = 8,shape = 1 )+
    scale_color_gradient2(midpoint=0, limit = c(-cor_limit, cor_limit) , low="blue", mid="white",
                          high="red", space ="Lab" ) +
    theme_minimal() + # minimal theme
    theme(text = element_text(face = "bold", size = 14),panel.grid = element_blank(),
          axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          axis.text = element_text(size = 16, color = "black"))
}

sessionInfo()