Last updated: 2020-04-25
Checks: 7 0
Knit directory: m6AQTL_reproducibleDocument/
This reproducible R Markdown analysis was created with workflowr (version 1.6.0). The Checks tab describes the reproducibility checks that were applied when the results were created. The Past versions tab lists the development history.
Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
The command set.seed(20200317)
was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
Nice! There were no cached chunks for this analysis, so you can be confident that you successfully produced the results during this run.
Great job! Using relative paths to the files within your workflowr project makes it easier to run your code on other machines.
Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated.
Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use wflow_publish
or wflow_git_commit
). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
Ignored files:
Ignored: .Rhistory
Ignored: .Rproj.user/
Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.
These are the previous versions of the R Markdown and HTML files. If you’ve configured a remote Git repository (see ?wflow_git_remote
), click on the hyperlinks in the table below to view them.
File | Version | Author | Date | Message |
---|---|---|---|---|
Rmd | 5c3bc54 | kevinlkx | 2020-04-25 | wflow_publish(“analysis/TWAS_analysis.Rmd”) |
html | 940a631 | kevinlkx | 2020-03-18 | Build site. |
Rmd | bb683c7 | kevinlkx | 2020-03-18 | wflow_publish(“analysis/TWAS_analysis.Rmd”) |
--bfile
genotypes to the set of markers in the LD reference data (1000G EUR) provided by FUSIONwindow_size
= 1e5.sbatch ~/projects/m6A/m6AQTL_workflowr/code/train_m6AQTL_TWAS_weights_using_fusion_PCsRemoved.sbatch /project2/xinhe/m6A/fusion_twas/results/TWAS_m6AQTL_full_100kp_PCsRemoved 1e5
m6AQTL_workflowr/code/train_m6AQTL_TWAS_weights_using_fusion_PCsRemoved.sbatch
#!/bin/bash
output_dir=$1
window_size=$2
num_cores=15
PLINK_prefix="/project2/xinhe/m6A/genotype_plink_bfiles/Imputed.sixty.hg19"
phenotype_file="/project2/xinhe/m6A/data_shared/m6A_QTLs/jointPeak_threshold5_MeRIP_HISAT2Map/phenotype_data/m6APeak_logOR_GC.IP.adjusted_qqnorm/plink/m6APeak_logOR_GC.IP.adjusted_qqnorm.phenotype.PCsRemoved.txt"
coordinate_file="/project2/xinhe/m6A/data_shared/m6A_QTLs/jointPeak_threshold5_MeRIP_HISAT2Map/phenotype_data/m6APeak_logOR_GC.IP.adjusted_qqnorm/plink/m6APeak_logOR_GC.IP.adjusted_qqnorm.coordinate.txt"
module load R/3.5.1
## Training Fusion weights
Rscript ~/projects/m6A/m6AQTL_workflowr/code/train_m6AQTL_TWAS_weights_using_fusion.R \
--PLINK_prefix ${PLINK_prefix} \
--phenotype_file ${phenotype_file} \
--coordinate_file ${coordinate_file} \
--ld_ref_dir "/project2/xinhe/m6A/fusion_twas/LDREF" \
--window_size ${window_size} \
--output_dir ${output_dir} \
--num_cores ${num_cores} \
--fusion_software "/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master" \
--plink "/project2/xinhe/kevinluo/software/plink_1.9/plink" \
--gcta "/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master/gcta_nr_robust" \
--gemma "/project2/xinhe/kevinluo/software/gemma-0.98.1/gemma"
## Packaging Fusion weights
Rscript ~/projects/m6A/m6AQTL_workflowr/code/packaging_fusion_weights.R \
--RDat_dir ${output_dir}/Output \
--coordinate_file ${coordinate_file} \
--output_name m6AQTL_TWAS_Weights \
--output_dir ${output_dir}/WEIGHTS
train_m6AQTL_TWAS_weights_using_fusion.R
: Train m6A-TWAS weights using Fusion’s FUSION.compute_weights.R
functionm6AQTL_workflowr/code/train_m6AQTL_TWAS_weights_using_fusion.R
#!/usr/bin/Rscript
## train TWAS weights
## Adapted from https://github.com/opain/Calculating-FUSION-TWAS-weights-pipeline
suppressMessages(library(dplyr))
suppressMessages(library(foreach))
suppressMessages(library(doParallel))
suppressMessages(library(data.table))
suppressMessages(library(optparse))
option_list = list(
make_option("--PLINK_prefix", action="store", default=NA, type='character',
help="Prefix of PLINK files for the genotype [required]"),
make_option("--phenotype_file", action="store", default=NA, type='character',
help="File name for normalised and adjusted expression data [required]"),
make_option("--coordinate_file", action="store", default=NA, type='character',
help="File name for coordinates of genes/transcripts [required]"),
make_option("--covariate_file", action="store", default=NA, type='character',
help="File name for covariates [optional]"),
make_option("--ld_ref_dir", action="store", default=NA, type='character',
help="FUSION LD reference directory [required]"),
make_option("--window_size", action="store", default=0.5e6, type='integer',
help="Window size (default = 500 kb) [optional]"),
make_option("--output_dir", action="store", default=NA, type='character',
help="Directory name for the output [required]"),
make_option("--num_cores", action="store", default=6, type='integer',
help="number of cores (default = 6) [optional]"),
make_option("--fusion_software", action="store", default=NA, type='character',
help="FUSION software directory [required]"),
make_option("--plink", action="store", default="plink", type='character',
help="Path to PLINK [required]"),
make_option("--gcta", action="store", default="gcta_nr_robust", type='character',
help="Path to gcta_nr_robust binary [required]"),
make_option("--gemma", action="store", default="gemma", type='character',
help="Path to gemma binary [required]")
)
opt = parse_args(OptionParser(option_list=option_list))
print(opt)
dir.create(paste0(opt$output_dir,'/Output'), showWarnings = F, recursive = T)
dir.create(paste0(opt$output_dir,'/log'), showWarnings = F, recursive = T)
dir.create(paste0(opt$output_dir,'/temp'), showWarnings = F, recursive = T)
setwd(paste0(opt$output_dir,'/temp'))
system(paste0('ln -sf ./ output'))
sink(file = paste0(opt$output_dir,'/log/train_m6AQTL_TWAS_weights_using_fusion.log'))
print(opt)
# Read in the gene phenotype data to extract the gene's chromosome and boundary coordinates
peak_coordinates <- read.table(opt$coordinate_file, header = T, stringsAsFactors = F)
colnames(peak_coordinates)[1:4] <- c("chr", "start", "end", "ID")
peak_coordinates <- peak_coordinates[peak_coordinates$chr %in% c(1:22), ]
peak_IDs <- peak_coordinates$ID
cat(length(peak_IDs), "peaks \n")
registerDoParallel(cores = opt$num_cores)
# cat(getDoParWorkers(), "DoPar workers \n")
status_peaks <- foreach(iPeak = peak_IDs, .combine = rbind) %dopar%{
CHR <- peak_coordinates$chr[peak_coordinates$ID == iPeak]
Start <- peak_coordinates$start[peak_coordinates$ID == iPeak] - opt$window_size
Stop <- peak_coordinates$end[peak_coordinates$ID == iPeak] + opt$window_size
if (Start < 0) {Start <- 0}
opt$PLINK_bfile <- paste0(opt$PLINK_prefix, ".chr", CHR)
# Extract peak from phenotype file
phenotype_gene <- fread(opt$phenotype_file, select = c("FID", "IID", iPeak))
fwrite(phenotype_gene, paste0(opt$output_dir,'/temp/temp_',iPeak,'.pheno'), sep = "\t")
# Using PLINK, extract variants within the specified peak +/- window_size from start and stop coordinates
cmd_plink <- paste0(opt$plink,' --allow-no-sex --silent', ' --bfile ',opt$PLINK_bfile,
' --make-bed --pheno ',opt$output_dir,'/temp/temp_',iPeak,'.pheno',
# ' --covar ', opt$covariate_file,
' --out ', opt$output_dir,'/temp/temp_',iPeak,
' --keep ', opt$output_dir,'/temp/temp_',iPeak,'.pheno',
' --chr ',CHR,' --from-bp ',Start,' --to-bp ',Stop,
' --extract ',opt$ld_ref_dir,'/1000G.EUR.',CHR,'.bim')
err_plink <- try(system(cmd_plink), silent = TRUE)
if (!file.exists(paste0(opt$output_dir,'/temp/temp_',iPeak, '.bim'))) {
cat(paste('No SNPs within peak +/-', opt$window_size, 'bp! \n'))
write.table(paste('No SNPs within peak +/-', opt$window_size, 'bp'), paste(opt$output_dir,'/Output/',iPeak,'.err',sep=''), col.names=F, row.names=F, quote=F)
peak_status <- c(iPeak, "No SNPs")
} else {
# Using FUSION, calculate the weights for the genes expression using subset of genotypic data.
cmd_fusion <- paste0('Rscript ',opt$fusion_software,'/FUSION.compute_weights.R',
' --bfile ', opt$output_dir,'/temp/temp_',iPeak,
' --tmp ', opt$output_dir,'/temp/temp_',iPeak,'.tmp',
' --out ', opt$output_dir,'/Output/',iPeak,
' --verbose 0 --save_hsq',
' --PATH_gcta ',opt$gcta,
' --PATH_gemma ',opt$gemma,
' --PATH_plink ',opt$plink,
' --models top1,lasso,enet')
if(!is.na(opt$covariate_file) & (opt$covariate_file != "NA") & file.exists(opt$covariate_file)){
# cat("load covariate file:", opt$covariate_file, "\n")
cmd_fusion <- paste0(cmd_fusion, ' --covar ', opt$covariate_file)
}
# cat('FUSION command: \n', cmd_fusion, '\n', sep = '')
system(cmd_fusion)
peak_status <- c(iPeak, "Done")
}
# Delete the temporary files
system(paste0('rm ',opt$output_dir,'/temp/temp_', iPeak,'*'))
return(peak_status)
}
cat('Results saved at:', paste0(opt$output_dir,'/Output/'), '\n')
write.table(status_peaks, paste0(opt$output_dir,'/log/status_peaks.txt'), col.names = F, row.names = F, quote = F, sep = '\t')
sink()
cat('Done. Results saved at:', paste0(opt$output_dir,'/Output/'), '\n')
packaging_fusion_weights.R
: Combine trained weights across peaks/genesm6AQTL_workflowr/code/packaging_fusion_weights.R
#!/usr/bin/Rscript
## Combine trained weights across peaks/genes
## Adapted from https://github.com/opain/Calculating-FUSION-TWAS-weights-pipeline
library("optparse")
option_list = list(
make_option("--RDat_dir", action="store", default=NA, type='character',
help="Directory where FUSION RDat files are stored [required]"),
make_option("--coordinate_file", action="store", default=NA, type='character',
help="Coordinate files for the target sample [required]"),
make_option("--output_name", action="store", default=NA, type='character',
help="File name for the output [required]"),
make_option("--output_dir", action="store", default=NA, type='character',
help="Directoryfor the output [required]")
)
opt = parse_args(OptionParser(option_list=option_list))
library(data.table)
# Create folder and insert .wgt.RDat files
system(paste('mkdir -p ',opt$output_dir,'/',opt$output_name,sep=''))
system(paste('cp ',opt$RDat_dir,'/*.RDat ',opt$output_dir,'/',opt$output_name,'/',sep=''))
# Create file containing a list of the .wgt.RDat files
# setwd(opt$RDat_dir)
temp = list.files(path = opt$RDat_dir, pattern="*.RDat")
temp_withPath<-paste(opt$output_name,'/', temp, sep='')
write.table(temp_withPath, paste(opt$output_dir,'/',opt$output_name,'.list',sep=''), col.names=F, row.names=F, quote=F)
# Create a file containing wgt.RDat file name, Gene ID, CHR, P0 and P1.
pos_temp<-data.frame( WGT=temp_withPath,
ID=gsub('.wgt.RDat','',gsub('Fetal_','',temp)))
Gene_coordinates_file<-read.table(opt$coordinate_file, header=T, stringsAsFactors=F)
pos_temp_2<-merge(pos_temp, Gene_coordinates_file[1:4], by='ID')
names(pos_temp_2)<-c('ID','WGT','CHR','P0','P1')
pos_temp_2<-pos_temp_2[c('WGT','ID','CHR','P0','P1')]
pos_temp_2_sort<-pos_temp_2[order(pos_temp_2$CHR,pos_temp_2$P0),]
write.table(pos_temp_2_sort, paste(opt$output_dir,'/',opt$output_name,'.pos',sep=''), col.names=T, row.names=F, quote=F)
# Create a file containing the Gene ID, nsnps, hsq, hsq.se, hsq.pv, top1.r2, blup.r2, enet.r2, bslmm.r2, lasso.r2, top1.pv, blup.pv, enet.pv, bslmm.pv and lasso.pv (.profile)
profile_temp<-data.frame( ID=gsub('.wgt.RDat','',temp),
nsnps=NA,
hsq=NA,
hsq.se=NA,
hsq.pv=NA,
top1.r2=NA,
# blup.r2=NA,
enet.r2=NA,
lasso.r2=NA,
top1.pv=NA,
# blup.pv=NA,
enet.pv=NA,
lasso.pv=NA)
for(i in 1:length(temp)){
load(paste(opt$RDat_dir,'/',temp[i], sep=''))
profile_temp$nsnps[i]<-dim(snps)[1]
profile_temp$hsq[i]<-hsq[1]
profile_temp$hsq.se[i]<-hsq[2]
profile_temp$hsq.pv[i]<-hsq.pv
cv.performance<-data.frame(cv.performance)
profile_temp$top1.r2[i]<-cv.performance$top1[1]
# profile_temp$blup.r2[i]<-cv.performance$blup[1]
profile_temp$enet.r2[i]<-cv.performance$enet[1]
profile_temp$lasso.r2[i]<-cv.performance$lasso[1]
profile_temp$top1.pv[i]<-cv.performance$top1[2]
# profile_temp$blup.pv[i]<-cv.performance$blup[2]
profile_temp$enet.pv[i]<-cv.performance$enet[2]
profile_temp$lasso.pv[i]<-cv.performance$lasso[2]
}
write.table(profile_temp,paste(opt$output_dir,'/',opt$output_name,'.profile',sep=''), col.names=T, row.names=F, quote=F)
# Create a file comparing the different models
se <- function(x) sd(x)/sqrt(length(x))
BEST_r2<-c( profile_temp$top1.r2[profile_temp$top1.r2 == max(profile_temp$top1.r2)],
# profile_temp$blup.r2[profile_temp$blup.r2 == max(profile_temp$blup.r2)],
profile_temp$enet.r2[profile_temp$enet.r2 == max(profile_temp$enet.r2)],
profile_temp$lasso.r2[profile_temp$lasso.r2 == max(profile_temp$lasso.r2)])
# profile_temp_r2<-profile_temp[c('top1.r2','blup.r2','enet.r2','lasso.r2')]
profile_temp_r2<-profile_temp[c('top1.r2','enet.r2','lasso.r2')]
for(k in 1:dim(profile_temp_r2)[1]){
profile_temp_r2[k,][profile_temp_r2[k,] != max(profile_temp_r2[k,])]<-NA
}
sink(file = paste(opt$output_dir,'/',opt$output_name,'.profile.err',sep=''))
cat('Average hsq: ',mean(profile_temp$hsq),' ( ',se(profile_temp$hsq),' )
Average crossvalidation R2:
R2\tSE
top1\t',round(mean(profile_temp$top1.r2),3),'\t',round(se(profile_temp$top1.r2),5),'
blup\t',NA,'\t',NA,'
enet\t',round(mean(profile_temp$enet.r2),3),'\t',round(se(profile_temp$enet.r2),5),'
bslmm\t',NA,'\t',NA,'
lasso\t',round(mean(profile_temp$lasso.r2),3),'\t',round(se(profile_temp$lasso.r2),5),'
BEST\t',round(max(BEST_r2),3),'
% Model is best:
top1:\t',round(sum(!is.na(profile_temp_r2$top1.r2))/length(profile_temp_r2$top1.r2)*100,1),'%
blup:\t',NaN,'%
enet:\t',round(sum(!is.na(profile_temp_r2$enet.r2))/length(profile_temp_r2$enet.r2)*100,1),'%
bslmm:\t',NaN,'%
lasso:\t',round(sum(!is.na(profile_temp_r2$lasso.r2))/length(profile_temp_r2$lasso.r2)*100,1),'%\n', sep='')
sink()
cat('Results saved at:', opt$output_dir, '\n')
m6AQTL_workflowr/code/TWAS_assoc_test.sbatch
#!/bin/bash
trait=$1
dir_GWAS=$2
dir_TWAS=$3
name_weights=$4
dir_FUSION="/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master"
dir_ld_ref="/project2/xinhe/m6A/fusion_twas/LDREF"
dir_assoc_test=${dir_TWAS}/FUSION_assoc_test/${trait}
module load R/3.5.1
mkdir -p ${dir_assoc_test}
cd ${dir_assoc_test}
for CHR in {1..22}
do
echo "run FUSION.assoc_test.R for ${trait}"
echo "chr${CHR}"
Rscript ${dir_FUSION}/FUSION.assoc_test.R \
--sumstats ${dir_GWAS}/${trait}.sumstats.gz \
--weights ${dir_TWAS}/WEIGHTS/${name_weights}.pos \
--weights_dir ${dir_TWAS}/WEIGHTS/ \
--ref_ld_chr ${dir_ld_ref}/1000G.EUR. \
--chr ${CHR} \
--out ${dir_assoc_test}/${trait}.${CHR}.dat
done
m6AQTL_workflowr/code/combine_TWAS_coloc_results_chrs.R
args <- commandArgs(trailingOnly = TRUE)
dir_TWAS <- args[1]
trait <- args[2]
library(qvalue)
library(BH)
combine_TWAS_table <- function(trait, dir_TWAS){
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
result_TWAS_noMHC <- data.frame()
for(CHR in 1:22){
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".coloc.dat"), header = T, stringsAsFactors = F)
result_TWAS_chr <- result_TWAS_chr[, -c(1,2)]
result_TWAS_noMHC <- rbind(result_TWAS_noMHC, result_TWAS_chr)
}
result_TWAS_MHC <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".6.coloc.dat.MHC"), header = T, stringsAsFactors = F)
result_TWAS_MHC <- result_TWAS_MHC[, -c(1,2)]
result_TWAS <- rbind(result_TWAS_noMHC, result_TWAS_MHC)
result_TWAS_noMHC$TWAS.P.Bonferroni <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "bonferroni")
result_TWAS_noMHC$TWAS.FDR <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "BH")
result_TWAS_noMHC$TWAS.qvalue <- qvalue(result_TWAS_noMHC$TWAS.P, lambda = seq(0.05, min(max(result_TWAS_noMHC$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
result_TWAS$TWAS.P.Bonferroni <- p.adjust(result_TWAS$TWAS.P, method = "bonferroni")
result_TWAS$TWAS.FDR <- p.adjust(result_TWAS$TWAS.P, method = "BH")
result_TWAS$TWAS.qvalue <- qvalue(result_TWAS$TWAS.P, lambda = seq(0.05, min(max(result_TWAS$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.noMHC.result")
write.table(result_TWAS_noMHC, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.result")
write.table(result_TWAS, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
}
combine_TWAS_table(trait, dir_TWAS)
--bfile
genotypes to the set of markers in the LD reference data (1000G EUR) provided by FUSIONwindow_size
= 1e5.sbatch ~/projects/m6A/m6AQTL_workflowr/code/train_QTL_TWAS_weights_YangGeno_using_fusion.sbatch /project2/xinhe/m6A/fusion_twas/results/TWAS_eQTL_Geuvadis_100kp_PCsRemoved eQTL_TWAS_Weights 1e5 PCsRemoved fastqtl_qqnorm_RNAseqGeuvadis_phase2 15
sbatch ~/projects/m6A/m6AQTL_workflowr/code/train_QTL_TWAS_weights_YangGeno_using_fusion.sbatch /project2/xinhe/m6A/fusion_twas/results/TWAS_sQTL_Geuvadis_100kp_PCsRemoved sQTL_TWAS_Weights 1e5 PCsRemoved fastqtl_qqnorm_ASintron_RNAseqGeuvadis_YangVCF 3
m6AQTL_workflowr/code/train_QTL_TWAS_weights_YangGeno_using_fusion.sbatch
#!/bin/bash
output_dir=$1
output_name=$2
window_size=$3
withPCs=$4
phenotype_name=$5
num_PCs=$6
num_cores=15
dir_phenotype_data=/project2/compbio/jointLCLs/phenotypes_plink_format/${phenotype_name}
if [ "${withPCs}" == "PCsRemoved" ]; then
echo "PCs regressed out"
phenotype_file="${dir_phenotype_data}/${phenotype_name}.phenotype.PCsRemoved.txt"
covariate_file=NA
else
echo "no PCs"
phenotype_file="${dir_phenotype_data}/${phenotype_name}.phenotype.txt"
covariate_file=NA
fi
coordinate_file="${dir_phenotype_data}/${phenotype_name}.coordinate.txt"
echo "TWAS for ${phenotype_name}, covariate file: ${covariate_file}"
PLINK_prefix="/project2/compbio/jointLCLs/genotype/hg19/YRI/genotype_plink_bfiles/YRI.120samples.hg19"
module load R/3.5.1
## Training Fusion weights
Rscript ~/projects/m6A/m6AQTL_workflowr/code/train_QTL_TWAS_weights_using_fusion.R \
--PLINK_prefix ${PLINK_prefix} \
--phenotype_file ${phenotype_file} \
--coordinate_file ${coordinate_file} \
--covariate_file ${covariate_file} \
--ld_ref_dir "/project2/xinhe/m6A/fusion_twas/LDREF" \
--window_size ${window_size} \
--output_dir ${output_dir} \
--num_cores ${num_cores} \
--fusion_software "/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master" \
--plink "/project2/xinhe/kevinluo/software/plink_1.9/plink" \
--gcta "/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master/gcta_nr_robust" \
--gemma "/project2/xinhe/kevinluo/software/gemma-0.98.1/gemma"
## Packaging Fusion weights and create
Rscript ~/projects/m6A/m6AQTL_workflowr/code/packaging_fusion_weights.R \
--RDat_dir ${output_dir}/Output \
--coordinate_file ${coordinate_file} \
--output_name ${output_name} \
--output_dir ${output_dir}/WEIGHTS
m6AQTL_workflowr/code/train_QTL_TWAS_weights_using_fusion.R
#!/usr/bin/Rscript
## train TWAS weight
## Adapted from https://github.com/opain/Calculating-FUSION-TWAS-weights-pipeline
suppressMessages(library(dplyr))
suppressMessages(library(foreach))
suppressMessages(library(doParallel))
suppressMessages(library(data.table))
suppressMessages(library(optparse))
option_list = list(
make_option("--PLINK_prefix", action="store", default=NA, type='character',
help="Prefix of PLINK files for the genotype [required]"),
make_option("--phenotype_file", action="store", default=NA, type='character',
help="File name for normalised and adjusted expression data [required]"),
make_option("--coordinate_file", action="store", default=NA, type='character',
help="File name for coordinates of genes/transcripts [required]"),
make_option("--covariate_file", action="store", default=NA, type='character',
help="File name for covariates [optional]"),
make_option("--ld_ref_dir", action="store", default=NA, type='character',
help="FUSION LD reference directory [required]"),
make_option("--window_size", action="store", default=0.5e6, type='integer',
help="Window size (default = 500 kb) [optional]"),
make_option("--output_dir", action="store", default=NA, type='character',
help="Directory name for the output [required]"),
make_option("--num_cores", action="store", default=6, type='integer',
help="number of cores (default = 6) [optional]"),
make_option("--fusion_software", action="store", default=NA, type='character',
help="FUSION software directory [required]"),
make_option("--plink", action="store", default="plink", type='character',
help="Path to PLINK [required]"),
make_option("--gcta", action="store", default="gcta_nr_robust", type='character',
help="Path to gcta_nr_robust binary [required]"),
make_option("--gemma", action="store", default="gemma", type='character',
help="Path to gemma binary [required]")
)
opt = parse_args(OptionParser(option_list=option_list))
print(opt)
dir.create(paste0(opt$output_dir,'/Output'), showWarnings = F, recursive = T)
dir.create(paste0(opt$output_dir,'/log'), showWarnings = F, recursive = T)
dir.create(paste0(opt$output_dir,'/temp'), showWarnings = F, recursive = T)
setwd(paste0(opt$output_dir,'/temp'))
system(paste0('ln -sf ./ output'))
cat("log saved at: ", paste0(opt$output_dir,'/log/train_TWAS_weights_using_fusion.log'), "\n")
sink(file = paste0(opt$output_dir,'/log/train_TWAS_weights_using_fusion.log'))
print(opt)
# Read in the gene phenotype data to extract the gene's chromosome and boundary coordinates
gene_coordinates <- read.table(opt$coordinate_file, header = T, stringsAsFactors = F)
colnames(gene_coordinates)[1:4] <- c("chr", "start", "end", "ID")
gene_coordinates <- gene_coordinates[gene_coordinates$chr %in% c(1:22), ]
gene_IDs <- gene_coordinates$ID
cat(length(gene_IDs), "genes \n")
registerDoParallel(cores = opt$num_cores)
# cat(getDoParWorkers(), "DoPar workers \n")
status_genes <- foreach(iGene = gene_IDs, .combine = rbind) %dopar%{
CHR <- gene_coordinates$chr[gene_coordinates$ID == iGene]
Start <- gene_coordinates$start[gene_coordinates$ID == iGene] - opt$window_size
Stop <- gene_coordinates$end[gene_coordinates$ID == iGene] + opt$window_size
if (Start < 0) {Start <- 0}
opt$PLINK_bfile <- paste0(opt$PLINK_prefix, ".chr", CHR)
# Extract gene from phenotype file
phenotype_gene <- fread(opt$phenotype_file, select = c("FID", "IID", iGene))
fwrite(phenotype_gene, paste0(opt$output_dir,'/temp/temp_',iGene,'.pheno'), sep = "\t")
# Using PLINK, extract variants within the specified gene +/- window_size from start and stop coordinates
cmd_plink <- paste0(opt$plink,' --allow-no-sex --silent', ' --bfile ',opt$PLINK_bfile,
' --make-bed --pheno ',opt$output_dir,'/temp/temp_',iGene,'.pheno',
# ' --covar ', opt$covariate_file,
' --out ', opt$output_dir,'/temp/temp_',iGene,
' --keep ', opt$output_dir,'/temp/temp_',iGene,'.pheno',
' --chr ',CHR,' --from-bp ',Start,' --to-bp ',Stop,
' --extract ',opt$ld_ref_dir,'/1000G.EUR.',CHR,'.bim')
err_plink <- try(system(cmd_plink), silent = TRUE)
if (!file.exists(paste0(opt$output_dir,'/temp/temp_',iGene, '.bim'))) {
cat(paste('No SNPs within gene +/-', opt$window_size, 'bp! \n'))
write.table(paste('No SNPs within gene +/-', opt$window_size, 'bp'), paste(opt$output_dir,'/Output/',iGene,'.err',sep=''), col.names=F, row.names=F, quote=F)
gene_status <- c(iGene, "No SNPs")
} else {
# Using FUSION, calculate the weights for the genes expression using subset of genotypic data.
cmd_fusion <- paste0('Rscript ',opt$fusion_software,'/FUSION.compute_weights.R',
' --bfile ', opt$output_dir,'/temp/temp_',iGene,
' --tmp ', opt$output_dir,'/temp/temp_',iGene,'.tmp',
' --out ', opt$output_dir,'/Output/',iGene,
' --verbose 0 --save_hsq',
' --PATH_gcta ',opt$gcta,
' --PATH_gemma ',opt$gemma,
' --PATH_plink ',opt$plink,
' --models top1,lasso,enet')
if(!is.na(opt$covariate_file) & (opt$covariate_file != "NA") & file.exists(as.character(opt$covariate_file))){
# cat("load covariate file:", opt$covariate_file, "\n")
cmd_fusion <- paste0(cmd_fusion, ' --covar ', opt$covariate_file)
}
# cat('FUSION command: \n', cmd_fusion, '\n', sep = '')
system(cmd_fusion)
gene_status <- c(iGene, "Done")
}
# Delete the temporary files
system(paste0('rm ',opt$output_dir,'/temp/temp_', iGene,'*'))
return(gene_status)
}
cat('Results saved at:', paste0(opt$output_dir,'/Output/'), '\n')
write.table(status_genes, paste0(opt$output_dir,'/log/status_genes.txt'), col.names = F, row.names = F, quote = F, sep = '\t')
sink()
cat('Done. Results saved at:', paste0(opt$output_dir,'/Output/'), '\n')
m6AQTL_workflowr/code/TWAS_conditional_test.R
args <- commandArgs(trailingOnly = TRUE)
trait <- args[1]
dir_GWAS <- args[2]
library(BH)
library(qvalue)
TWAS_conditional_test <- function(trait, sig_TWAS_ID, CHR, dir_TWAS, dir_GWAS, dir_post_process){
dir.create(dir_post_process, showWarnings = F, recursive = T)
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".dat"), header = T, stringsAsFactors = F)
top_TWAS_chr <- result_TWAS_chr[result_TWAS_chr$ID %in% sig_TWAS_ID, ]
peak_coordinates <- gsub("chr.+:", "", sapply(strsplit(top_TWAS_chr$ID, "_"), "[[", 1))
peak_start <- as.integer(sapply(strsplit(peak_coordinates, "-"), "[[", 1))
peak_end <- as.integer(sapply(strsplit(peak_coordinates, "-"), "[[", 2))
top_TWAS_chr$P0 <- peak_start
top_TWAS_chr$P1 <- peak_end
top_TWAS_gene <- sapply(strsplit(top_TWAS_chr$ID, "_"), "[[", 2)
if(nrow(top_TWAS_chr) == 0){
cat("No significant genes. \n")
}else{
write.table(top_TWAS_chr, paste0(dir_post_process, "/", trait, ".", CHR, ".dat"), col.names = T, row.names = F, quote = F)
## TWAS joint/conditional test
cat("TWAS joint/conditional test:", paste0(dir_post_process,'/', trait, '.', CHR, '.top.analysis'), "\n")
cmd_fusion <- paste0("Rscript ", dir_FUSION,'/FUSION.post_process.R',
' --sumstats ', dir_GWAS,'/', trait, '.sumstats.gz',
' --input ', dir_post_process,'/', trait, '.', CHR, '.dat',
' --out ', dir_post_process,'/', trait, '.', CHR, '.top.analysis',
' --ref_ld_chr ', dir_ld_ref, '/1000G.EUR.',
' --chr ', CHR,
' --plot --locus_win 100000')
system(cmd_fusion)
}
}
TWAS_conditional_test_locus <- function(trait, sig_TWAS_ID, sig_TWAS_result, dir_TWAS, dir_GWAS, dir_post_process, locus_win = 100000){
dir.create(dir_post_process, showWarnings = F, recursive = T)
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
CHR <- sig_TWAS_result[sig_TWAS_result$ID == sig_TWAS_ID, "CHR"]
sig_TWAS_ID_chr <- sig_TWAS_result[sig_TWAS_result$CHR == CHR, "ID"]
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".dat"), header = T, stringsAsFactors = F)
top_TWAS_chr <- result_TWAS_chr[result_TWAS_chr$ID %in% sig_TWAS_ID_chr, ]
peak_coordinates <- gsub("chr.+:", "", sapply(strsplit(top_TWAS_chr$ID, "_"), "[[", 1))
peak_start <- as.integer(sapply(strsplit(peak_coordinates, "-"), "[[", 1))
peak_end <- as.integer(sapply(strsplit(peak_coordinates, "-"), "[[", 2))
top_TWAS_chr$P0 <- peak_start
top_TWAS_chr$P1 <- peak_end
# top_TWAS_chr$GENE <- sapply(strsplit(top_TWAS_chr$ID, "_"), "[[", 2)
locus_start <- top_TWAS_chr[top_TWAS_chr$ID == sig_TWAS_ID, "P0"] - locus_win
locus_end <- top_TWAS_chr[top_TWAS_chr$ID == sig_TWAS_ID, "P1"] + locus_win
top_TWAS_locus <- top_TWAS_chr[top_TWAS_chr$P0 > locus_start & top_TWAS_chr$P1 < locus_end, ]
GENE <- sapply(strsplit(sig_TWAS_ID, "_"), "[[", 2)
locus_name <- paste0(GENE, "_", "chr", CHR, "_", locus_start, "_", locus_end)
if(nrow(top_TWAS_locus) == 0){
cat("No significant genes. \n")
}else{
write.table(top_TWAS_locus, paste0(dir_post_process, "/", trait, ".", locus_name, ".dat"), col.names = T, row.names = F, quote = F)
## TWAS joint/conditional test
cat("TWAS joint/conditional test:", "\n")
cmd_fusion <- paste0("Rscript ", dir_FUSION,'/FUSION.post_process.R',
' --sumstats ', dir_GWAS,'/', trait, '.sumstats.gz',
' --input ', dir_post_process,'/', trait, '.', locus_name, '.dat',
' --out ', dir_post_process,'/', trait, '.', locus_name,
' --ref_ld_chr ', dir_ld_ref, '/1000G.EUR.',
' --chr ', CHR,
' --plot --locus_win ', locus_win)
system(cmd_fusion)
}
}
combine_TWAS_table <- function(trait, dir_TWAS, withMHC = FALSE, write_output = TRUE){
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
result_TWAS_noMHC <- data.frame()
for(CHR in 1:22){
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".dat"), header = T, stringsAsFactors = F)
result_TWAS_chr <- result_TWAS_chr[, -c(1,2)]
result_TWAS_noMHC <- rbind(result_TWAS_noMHC, result_TWAS_chr)
}
result_TWAS_MHC <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".6.dat.MHC"), header = T, stringsAsFactors = F)
result_TWAS_MHC <- result_TWAS_MHC[, -c(1,2)]
result_TWAS <- rbind(result_TWAS_noMHC, result_TWAS_MHC)
result_TWAS_noMHC$TWAS.P.Bonferroni <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "bonferroni")
result_TWAS_noMHC$TWAS.FDR <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "BH")
result_TWAS_noMHC$TWAS.qvalue <- qvalue(result_TWAS_noMHC$TWAS.P, lambda = seq(0.05, min(max(result_TWAS_noMHC$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
result_TWAS$TWAS.P.Bonferroni <- p.adjust(result_TWAS$TWAS.P, method = "bonferroni")
result_TWAS$TWAS.FDR <- p.adjust(result_TWAS$TWAS.P, method = "BH")
result_TWAS$TWAS.qvalue <- qvalue(result_TWAS$TWAS.P, lambda = seq(0.05, min(max(result_TWAS$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
if(write_output){
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".noMHC.result")
write.table(result_TWAS_noMHC, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".result")
write.table(result_TWAS, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
}
if(withMHC){
return(result_TWAS)
}else{
return(result_TWAS_noMHC)
}
}
### combine TWAS coloc table for all chrs
combine_TWAS_coloc_table <- function(trait, dir_TWAS, withMHC = FALSE, write_output = TRUE){
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
result_TWAS_noMHC <- data.frame()
for(CHR in 1:22){
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".coloc.dat"), header = T, stringsAsFactors = F)
result_TWAS_chr <- result_TWAS_chr[, -c(1,2)]
result_TWAS_noMHC <- rbind(result_TWAS_noMHC, result_TWAS_chr)
}
result_TWAS_MHC <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".6.coloc.dat.MHC"), header = T, stringsAsFactors = F)
result_TWAS_MHC <- result_TWAS_MHC[, -c(1,2)]
result_TWAS <- rbind(result_TWAS_noMHC, result_TWAS_MHC)
result_TWAS_noMHC$TWAS.P.Bonferroni <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "bonferroni")
result_TWAS_noMHC$TWAS.FDR <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "BH")
result_TWAS_noMHC$TWAS.qvalue <- qvalue(result_TWAS_noMHC$TWAS.P, lambda = seq(0.05, min(max(result_TWAS_noMHC$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
result_TWAS$TWAS.P.Bonferroni <- p.adjust(result_TWAS$TWAS.P, method = "bonferroni")
result_TWAS$TWAS.FDR <- p.adjust(result_TWAS$TWAS.P, method = "BH")
result_TWAS$TWAS.qvalue <- qvalue(result_TWAS$TWAS.P, lambda = seq(0.05, min(max(result_TWAS$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
if(write_output){
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.noMHC.result")
write.table(result_TWAS_noMHC, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.result")
write.table(result_TWAS, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
}
if(withMHC){
return(result_TWAS)
}else{
return(result_TWAS_noMHC)
}
}
dir_FUSION="/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master"
dir_ld_ref="/project2/xinhe/m6A/fusion_twas/LDREF"
thresh_FDR = 0.05
dir_TWAS <- "/project2/xinhe/m6A/fusion_twas/results/TWAS_m6AQTL_full_100kp_PCsRemoved/"
cat(trait, "\n", sep = "")
cat("dir_GWAS:", dir_GWAS, "\n", sep = "")
cat("dir_TWAS:", dir_TWAS, "\n", sep = "")
cat("thresh_FDR:", thresh_FDR, "\n", sep = "")
## conditional test for all TWAS significant peaks
dir_output <- paste0(dir_TWAS, "/FUSION_post_process/TWAS.P.Bonferroni_", thresh_FDR, "/", trait)
dir.create(dir_output, showWarnings = F, recursive = T)
sink(file = paste0(dir_output,'/FUSION_post_process.', trait, '.log'))
result_TWAS_m6AQTL <- combine_TWAS_table(trait, dir_TWAS, withMHC = FALSE, write_output = FALSE)
result_TWAS_m6AQTL <- result_TWAS_m6AQTL[!is.na(result_TWAS_m6AQTL$TWAS.P), ]
sig_TWAS_m6AQTL <- result_TWAS_m6AQTL[(result_TWAS_m6AQTL$TWAS.P.Bonferroni < thresh_FDR), ]
cat(nrow(sig_TWAS_m6AQTL), "sig peaks \n")
if(nrow(sig_TWAS_m6AQTL)){
for(CHR in unique(sig_TWAS_m6AQTL$CHR)){
sig_TWAS_ID <- sig_TWAS_m6AQTL[sig_TWAS_m6AQTL$CHR == CHR, "ID"]
TWAS_conditional_test(trait, sig_TWAS_ID, CHR, dir_TWAS, dir_GWAS, dir_output)
}
}
## conditional test for coloc peaks
dir_output <- paste0(dir_TWAS, "/coloc_conditional_test/", trait)
dir.create(dir_output, showWarnings = F, recursive = T)
sink(file = paste0(dir_output,'/FUSION_post_process.', trait, '.log'))
result_TWAS_m6AQTL <- combine_TWAS_coloc_table(trait, dir_TWAS, withMHC = FALSE, write_output = FALSE)
result_TWAS_m6AQTL <- result_TWAS_m6AQTL[!is.na(result_TWAS_m6AQTL$TWAS.P), ]
sig_TWAS_m6AQTL <- result_TWAS_m6AQTL[(result_TWAS_m6AQTL$TWAS.P.Bonferroni < thresh_FDR) & (result_TWAS_m6AQTL$COLOC.PP4 > 0.5), ]
cat(length(which(sig_TWAS_m6AQTL$COLOC.PP4 > 0.5)), "significant peaks with COLOC.PP4 > 0.5 \n")
if(nrow(sig_TWAS_m6AQTL)){
for(sig_TWAS_ID in unique(sig_TWAS_m6AQTL$ID)){
TWAS_conditional_test_locus(trait, sig_TWAS_ID, sig_TWAS_m6AQTL, dir_TWAS, dir_GWAS, dir_output, locus_win = 100000)
}
}
sink()
cat("Done. \n")
m6AQTL_workflowr/code/TWAS_assoc_test_coloc.sbatch
#!/bin/bash
trait=$1
dir_GWAS=$2
dir_TWAS=$3
name_weights=$4
name_panel=$5
N_panel=$6
N_GWAS=$7
coloc_P=8
dir_FUSION="/project2/xinhe/kevinluo/software/fusion_twas/fusion_twas-master"
dir_ld_ref="/project2/xinhe/m6A/fusion_twas/LDREF"
dir_assoc_test=${dir_TWAS}/FUSION_assoc_test/${trait}
module load R/3.5.1
mkdir -p ${dir_assoc_test}
cd ${dir_assoc_test}
Rscript ~/projects/m6A/m6AQTL_workflowr/code/add_panel_N_fusion_weights_pos.R ${dir_TWAS}/WEIGHTS ${name_weights} ${name_panel} ${N_panel}
for CHR in {1..22}
do
echo "run FUSION.assoc_test.R with coloc for ${trait}"
echo "chr${CHR}"
echo "coloc: TWAS.P < ${coloc_P}"
echo "GWASN: ${N_GWAS}"
Rscript ${dir_FUSION}/FUSION.assoc_test.R \
--coloc_P ${coloc_P} \
--GWASN ${N_GWAS} \
--sumstats ${dir_GWAS}/${trait}.sumstats.gz \
--weights ${dir_TWAS}/WEIGHTS/${name_weights}.pos \
--weights_dir ${dir_TWAS}/WEIGHTS/ \
--ref_ld_chr ${dir_ld_ref}/1000G.EUR. \
--chr ${CHR} \
--out ${dir_assoc_test}/${trait}.${CHR}.coloc.dat
done
Rscript ~/projects/m6A/m6AQTL_workflowr/code/combine_TWAS_coloc_results_chrs.R ${dir_TWAS} ${trait}
Rscript ~/projects/m6A/m6AQTL_workflowr/code/prepare_coloc_input_LocusCompare.R ${trait} ${dir_GWAS} ${dir_TWAS}
m6AQTL_workflowr/code/add_panel_N_fusion_weights_pos.R
#!/usr/bin/Rscript
## add panel and N (sample size) columns to TWAS weights .pos input file
args <- commandArgs(trailingOnly = TRUE)
dir_weights <- args[1]
name_weights <- args[2]
name_panel <- args[3]
N_panel <- as.numeric(args[4])
weights_pos <- read.table(paste0(dir_weights, '/', name_weights,'.pos'), header=T)
weights_pos$PANEL <- name_panel
weights_pos$N <- N_panel
weights_pos <- weights_pos[, c('PANEL', 'WGT', 'ID', 'CHR', 'P0', 'P1', 'N')]
write.table(weights_pos, paste0(dir_weights, '/', name_weights,'.pos'), col.names=T, row.names=F, quote=F)
print(weights_pos[1:3,])
cat('Results saved at:', paste0(dir_weights, '/', name_weights,'.pos'), '\n')
m6AQTL_workflowr/code/prepare_coloc_input_LocusCompare.R
args <- commandArgs(trailingOnly = TRUE)
trait <- args[1]
dir_GWAS <- args[2]
dir_TWAS <- args[3]
library(data.table)
library(qvalue)
library(BH)
library(reshape2)
library(ggplot2)
library(locuscomparer)
options(scipen=999)
### combine TWAS coloc table for all chrs
combine_TWAS_coloc_table <- function(trait, dir_TWAS, withMHC = FALSE, write_output = TRUE){
dir_TWAS_assoc_test <- paste0(dir_TWAS, "/FUSION_assoc_test/", trait)
result_TWAS_noMHC <- data.frame()
for(CHR in 1:22){
result_TWAS_chr <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".", CHR, ".coloc.dat"), header = T, stringsAsFactors = F)
result_TWAS_chr <- result_TWAS_chr[, -c(1,2)]
result_TWAS_noMHC <- rbind(result_TWAS_noMHC, result_TWAS_chr)
}
result_TWAS_MHC <- read.table(paste0(dir_TWAS_assoc_test, "/",trait, ".6.coloc.dat.MHC"), header = T, stringsAsFactors = F)
result_TWAS_MHC <- result_TWAS_MHC[, -c(1,2)]
result_TWAS <- rbind(result_TWAS_noMHC, result_TWAS_MHC)
result_TWAS_noMHC$TWAS.P.Bonferroni <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "bonferroni")
result_TWAS_noMHC$TWAS.FDR <- p.adjust(result_TWAS_noMHC$TWAS.P, method = "BH")
result_TWAS_noMHC$TWAS.qvalue <- qvalue(result_TWAS_noMHC$TWAS.P, lambda = seq(0.05, min(max(result_TWAS_noMHC$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
result_TWAS$TWAS.P.Bonferroni <- p.adjust(result_TWAS$TWAS.P, method = "bonferroni")
result_TWAS$TWAS.FDR <- p.adjust(result_TWAS$TWAS.P, method = "BH")
result_TWAS$TWAS.qvalue <- qvalue(result_TWAS$TWAS.P, lambda = seq(0.05, min(max(result_TWAS$TWAS.P,na.rm=T), 0.95), 0.05))$qvalues
if(write_output){
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.noMHC.result")
write.table(result_TWAS_noMHC, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
filename_output <- paste0(dir_TWAS_assoc_test, "/", trait, ".coloc.result")
write.table(result_TWAS, filename_output, col.names = T, row.names = F, quote = F, sep = "\t")
}
if(withMHC){
return(result_TWAS)
}else{
return(result_TWAS_noMHC)
}
}
## load TWAS significant loci
thresh_FDR = 0.05
dir_TWAS_coloc <- paste0(dir_TWAS, "/coloc_LocusCompare/", trait)
dir.create(dir_TWAS_coloc, showWarnings = F, recursive = T)
cat(trait, "\n", sep = "")
cat("dir_GWAS:", dir_GWAS, "\n", sep = "")
cat("dir_TWAS:", dir_TWAS, "\n", sep = "")
cat("thresh_FDR:", thresh_FDR, "\n", sep = "")
cat("dir_TWAS_coloc:", dir_TWAS_coloc, "\n", sep = "")
result_TWAS_m6AQTL <- combine_TWAS_coloc_table(trait, dir_TWAS, withMHC = FALSE, write_output = FALSE)
result_TWAS_m6AQTL <- result_TWAS_m6AQTL[!is.na(result_TWAS_m6AQTL$TWAS.P), ]
sig_TWAS_m6AQTL <- result_TWAS_m6AQTL[(result_TWAS_m6AQTL$TWAS.P.Bonferroni < thresh_FDR), ]
cat(length(which(sig_TWAS_m6AQTL$COLOC.PP4 > 0.5)), "significant peaks with COLOC.PP4 > 0.5 \n")
## Prepare m6A-QTLs and GWAS input data for [LocusCompareR](https://github.com/boxiangliu/locuscomparer)
cat("Extract m6A-QTL and GWAS summary stats ... \n")
cat("Directory of m6A and GWAS summary data for coloc figures: ", dir_TWAS_coloc, "\n")
### load all SNPs in 1000 Genomes reference data
SNPs_ref <- fread("/project2/xinhe/m6A/fusion_twas/LDREF/1000G.EUR.allchrs.bim")
all_SNPID_ref <- as.character(SNPs_ref$V2)
### load m6A-QTLs
m6A_version <- "jointPeak_threshold5_MeRIP_HISAT2Map"
m6A_phenotype_name <- "m6APeak_logOR_GC.IP.adjusted_qqnorm"
dir_m6AQTL_output <- paste0("/project2/xinhe/m6A/data_shared/m6A_QTLs/", m6A_version, "/fastQTL_cis_100kb/", m6A_phenotype_name)
m6AQTL <- readRDS(paste0(dir_m6AQTL_output, "/m6AQTL.", m6A_phenotype_name, ".15PCs.fastQTL.nominals.rds"))
### load GWAS summary
dir_GWAS <- "/project2/xinhe/m6A/GWAS/GWAS_summary_stats/GWAS_collection/GWAS_RDS/"
GWAS_ss <- readRDS(paste0(dir_GWAS, "/", trait, "_GWAS_processed.RDS"))
GWAS_summary <- GWAS_ss[, c("SNPID", "P")]
colnames(GWAS_summary) <- c("rsid", "pval")
### Extract m6A-QTL and GWAS summary stats (rsID and p-values) for coloc significant genes (peaks)
### all intersect SNPs
sig_peaks_coloc <- sig_TWAS_m6AQTL[sig_TWAS_m6AQTL$COLOC.PP4 > 0.5, "ID"]
for(PEAK_coloc in sig_peaks_coloc){
m6AQTL_PEAK <- m6AQTL[m6AQTL$PEAK == PEAK_coloc, c("SNPID", "pvalue")]
colnames(m6AQTL_PEAK) <- c("rsid", "pval")
common_snpIDs <- intersect(m6AQTL_PEAK$rsid, GWAS_summary$rsid)
cat(length(common_snpIDs), "SNPs \n")
m6AQTL_PEAK <- m6AQTL_PEAK[match(common_snpIDs, m6AQTL_PEAK$rsid), ]
filename_m6AQTL_summary <- paste0(dir_TWAS_coloc, "/", "m6AQTL.", PEAK_coloc, ".summary.tsv")
fwrite(m6AQTL_PEAK, filename_m6AQTL_summary, sep = "\t")
GWAS_PEAK <- GWAS_summary[match(common_snpIDs, GWAS_summary$rsid), ]
filename_GWAS_summary <- paste0(dir_TWAS_coloc, "/", "GWAS.", trait, ".", PEAK_coloc, ".summary.tsv")
fwrite(GWAS_PEAK, filename_GWAS_summary, sep = "\t")
}
### SNPs in 1000G reference
GWAS_summary <- GWAS_summary[GWAS_summary$rsid %in% all_SNPID_ref, ]
sig_peaks_coloc <- sig_TWAS_m6AQTL[sig_TWAS_m6AQTL$COLOC.PP4 > 0.5, "ID"]
for(PEAK_coloc in sig_peaks_coloc){
m6AQTL_PEAK <- m6AQTL[m6AQTL$PEAK == PEAK_coloc, c("SNPID", "pvalue")]
colnames(m6AQTL_PEAK) <- c("rsid", "pval")
common_snpIDs <- intersect(m6AQTL_PEAK$rsid, GWAS_summary$rsid)
cat(length(common_snpIDs), "SNPs \n")
# load(paste0(dir_TWAS, "/WEIGHTS/m6AQTL_TWAS_Weights/", PEAK_coloc, ".wgt.RDat"))
# common_snpIDs <- as.character(snps$V2)
m6AQTL_PEAK <- m6AQTL_PEAK[match(common_snpIDs, m6AQTL_PEAK$rsid), ]
filename_m6AQTL_summary <- paste0(dir_TWAS_coloc, "/", "m6AQTL.", PEAK_coloc, ".LDREF.summary.tsv")
fwrite(m6AQTL_PEAK, filename_m6AQTL_summary, sep = "\t")
GWAS_PEAK <- GWAS_summary[match(common_snpIDs, GWAS_summary$rsid), ]
filename_GWAS_summary <- paste0(dir_TWAS_coloc, "/", "GWAS.", trait, ".", PEAK_coloc, ".LDREF.summary.tsv")
fwrite(GWAS_PEAK, filename_GWAS_summary, sep = "\t")
}
### plot coloc figures using [LocusCompareR](https://github.com/boxiangliu/locuscomparer)
cat("Plot LocusCompare figures ... \n")
for(PEAK_coloc in sig_peaks_coloc){
gene_coloc <- sapply(strsplit(PEAK_coloc, "_"), "[[", 2)
cat("Gene:", gene_coloc, ", m6A Peak:", PEAK_coloc, "\n")
m6AQTL_leadSNP <- sig_TWAS_m6AQTL[sig_TWAS_m6AQTL$ID == PEAK_coloc, "EQTL.ID"]
p <- locuscompare(in_fn1 = paste0(dir_TWAS_coloc, "/", "GWAS.", trait, ".", PEAK_coloc, ".summary.tsv"),
in_fn2 = paste0(dir_TWAS_coloc, "/", "m6AQTL.", PEAK_coloc, ".summary.tsv"),
title1 = 'GWAS', title2 = 'm6A-QTL',
snp = m6AQTL_leadSNP, population = "EUR", legend_position = "topleft")
pdf(paste0(dir_TWAS_coloc, "/", "LocusCompareR.", trait, ".", PEAK_coloc, ".pdf"))
p
dev.off()
}
cat("Done. \n")
sessionInfo()