Ensure that sample names do not contain underscores, as these may be introduced during the merging of imputed VCF files. In such cases, the filename used during merging may be incorporated into the sample name to maintain uniqueness across datasets.
set-ue
VCF_FILES=$1# Get correct namebcftoolsquery-l$VCF_FILES>sample_name.txt
sed-E's/^(([^_]+)_.*)/\1\t\2/g'sample_name.txt>new_name.txt
# Check whether origin has correct namecol_num=`awk-F'\t''{print NF}'new_name.txt|head-n1`checker=`if(($col_num==1));thenecho1;elseecho0;fi`file_name=$(basename$VCF_FILES.vcf.gz)if(($checker));thenecho"Create symlink"ln-s$VCF_FILES${file_name}.vcf.gz
else# Replace old name by new oneecho"Replace by new name"bcftoolsreheader-snew_name.txt-o${file_name}_correct_name.vcf.gz$VCF_FILESfibcftoolsindex${file_name}_correct_name.vcf.gz
set-ue
PREFIX=$1#PREFIX="AMR-Axiom_JAPONICA"VCF_FOLDER=$2#VCF_FOLDER="/path/to/vcf_files"# List VCF_FILESVCF_FILES=$(ls${VCF_FOLDER}/${PREFIX}_chr*.vcf.gz)bcftoolsconcat-Oz-o${PREFIX}_concat.vcf.gz${VCF_FILES}bcftoolsindex${PREFIX}_concat.vcf.gz
set-eu
VCF_FILES=$1# VCF_FILES="AMR-Axiom_UKB_WCSG_concat.vcf.gz"REF_VCF=$2# REF_VCF="00-All.vcf.gz"file_name=$(basename$VCF_FILES.vcf.gz)## Make sure the format chromosome names is numericbcftoolsview${VCF_FILES}|sed's/chr//g'|bgzip>tem.vcf.gz
bcftoolsindextem.vcf.gz
## Annotate the VCF with reference VCFbcftoolsannotate-a${REF_VCF}-cID-o${file_name}_anno.vcf.gztem.vcf.gz
bcftoolsindex${file_name}_anno.vcf.gz
## Remove temporary filesrmtem.vcf.gztem.vcf.gz.csi
PREFIX=$1# e.g. AMR-Axiom_UKB_WCSGplink--bfile${PREFIX}\--maf0.0001\--hwe1e-6\--geno0.01\--mind0.01\--write-snplist\--make-just-fam\--memory128000\--out${PREFIX}.QC
set-ue
PREFIX=$1# e.g. AMR-Axiom_UKB_WCSG## List duplicated recordsRscriptLIST_NO_DUPLICATE.R${PREFIX}.QC.snplist${PREFIX}.nodup
## Deduplicateplink--bfile${PREFIX}\--threads2\--make-bed\--keep${PREFIX}.QC.fam\--out${PREFIX}.dedup\--extract${PREFIX}.nodup\--memory128000
TARGET_FILE=$1# e.g. AMR-Axiom_UKB_WCSG.dedupBASE_FILE=$2# e.g. GIANT_BMI.QCed.gzOUT_PREFIX=$3# e.g. AMR-Axiom_UKB_WCSG_bmiLD_TRUE=$4# e.g. AMR-null.dedupPRSice--prsice/src/PRSice-2_v2.3.3/PRSice_linux\--base${BASE_FILE}\--target${TARGET_FILE}\--ld${LD_TRUE}\--out${OUT_PREFIX}\--binary-targetF\--bar-levels0.00000005,0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,0.2,0.3,0.5,1\--fastscore\--a1A1\--a2A2\--beta\--bpBP\--chrCHR\--pvalueP\--snpSNP\--statBETA\--clump-kb250kb\--clump-p1\--clump-r20.1\--ultra\--no-regress\--scoresum\--thread1
Shing Wan Choi, Timothy Shin-Heng Mak, and Paul F O’Reilly. Tutorial: a guide to performing polygenic risk score analyses. Nature protocols, 15(9):2759–2772, 2020. ↩