After the imputation process, the data must be stratified by the five superpopulations (EUR, EAS, AMR, AFR, SAS) to enable population-specific evaluation.
set-ue
POPULATION_META=$1POP_NAME=$2MERGED_VCF=$3CHR=$4LPS_COV=$5## Get sample list for the specified populationawk-F'\t'-vpop_name=${POP_NAME}'NR!=1 && $6==pop_name {print $1}'${POPULATION_META}>${POP_NAME}_sample_list.txt
# Filter the merged VCF for the specified populationbcftoolsview-S${POP_NAME}_sample_list.txt${MERGED_VCF}|bgzip>chr${CHR}_${LPS_COV}_${POP_NAME}_imputed.vcf.gz
set-ue
POPULATION_META=$1POP_NAME=$2MERGED_VCF=$3CHR=$4LPS_COV=$5## Get sample list for the specified populationawk-F'\t'-vpop_name=${POP_NAME}'NR!=1 && $6==pop_name {print $1}'${POPULATION_META}>${POP_NAME}_sample_list.txt
## Filter the merged VCF for the specified populationbcftoolsview-S${POP_NAME}_sample_list.txt${MERGED_VCF}|bgzip>chr${CHR}_${LPS_COV}_${POP_NAME}_imputed.vcf.gz
set-ue
## InputPOPULATION_META=$1POP_NAME=$2TRUE_VCF=$3CHR=$4## Get sample list for the specified populationawk-F'\t'-vpop_name=${POP_NAME}'NR!=1 && $6==pop_name {print $1}'${POPULATION_META}>${POP_NAME}_sample_list.txt
## VCF_true_population_sliptbcftoolsview-S${POP_NAME}_sample_list.txt${TRUE_VCF}|bgzip>chr${CHR}_${POP_NAME}_true.vcf.gz