Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
results/Annotation/sampleID/VEP/HaplotypeCaller_sampleID_VEP.ann.vcf

Alternatively if annotated using snpEff

Code Block
results/Annotation/sampleID/snpEff/HaplotypeCaller_sampleID_snpEff.ann.vcf

Example of VCF file annotated using snpEff. Note the metadata header information is not shown.

Code Block
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  FBS1-LNCAP-RNA
chr1    14542   rs1045833       A       G       71.84   .       AC=2;AF=1.00;AN=2;DB;DP=3;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=23.70;QD=23.95;SOR=2.833;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*133A>G|||||133|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2827T>C|||||2827|,G|intron_variant|MODIFIER|MIR68
chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-41T>C||||||     14574   rs28503599GT:AD:DP:GQ:PL  1/1:0,3:3:9:85,9,0
chr1    14574   rs28503599      A       G       121.05  .       AC=2;AF=1.00;AN=2;DB;DP=10;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.52;QD=12.10;SOR=4.804;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*165A>G|||||165|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2795T>C|||||2795|,G|intron_variant|MODIFIER|MIR68
chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-73T>C||||||     14599   rs531646671     TGT:AD:DP:GQ:PL  1/1:0,10:10:27:135,27,0
chr1    14599   rs531646671     T       A       301.02  .       AC=2;AF=1.00;AN=2;DB;DP=7;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.00;QD=25.36;SOR=4.174;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*190T>A|||||190|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2770A>T|||||2770|,A|intron_variant|MODIFIER|MIR685
chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-98A>T||||||     14604   rs541940975      GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,7:7:21:1|1:14599_T_A:315,21,0:14599
chr1    14604   rs541940975     A       G       356.05  .       AC=2;AF=1.00;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.23;QD=28.73;SOR=3.056;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*195A>G|||||195|,G|downstream_gene_variant|MODIFIER|MIR685
chr1    14610   .       T       C       356.05|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2765T>C|||||2765|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-103T>C||||||     GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599
chr1    14610   .       T       C       356.05  .       AC=2;AF=1.00;AN=2;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.23;QD=30.97;SOR=3.056;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*201T>C|||||201|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2759A>G|||||2759|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|ENSG000
chr1|unprocessed_pseudogene|10/10|n.1254-109A>G||||||     14653   rs62635297      GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599
chr1    14653   rs62635297      C       T       1816.06 .       AC=2;AF=1.00;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=34.03;QD=27.94;SOR=2.412;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*244C>T|||||244|,T|downstream_gene_variant|MODIFIER|MIR68
chr1    14677   rs201327123     G       |MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2716G>A|||||2716|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-152G>A||||||    GT:AD:DP:GQ:PL  1/1:0,65:65:99:1830,195,0
chr1    14677   rs201327123     G       A       926.64  .       AC=1;AF=0.500;AN=2;BaseQRankSum=2.83;DB;DP=113;ExcessHet=3.0103;FS=1.545;MLEAC=1;MLEAF=0.500;MQ=37.82;MQRankSum=-1.398e+00;QD=8.35;ReadPosRankSum=0.962;SOR=0.852;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcrip
chr1transcript||n.*268G>A|||||268|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2692C>T|||||2692|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-176C>T||||||    16257   rs11489794  GT:AD:DP:GQ:PL  0/1:63,48:111:99:934,0,1266
chr1    16257   rs11489794      G       C       49.64   .       AC=1;AF=0.500;AN=2;BaseQRankSum=-1.136e+00;DB;DP=23;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=34.10;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0.00;SOR=1.022;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript
chr1    16288   rs113141985     C       G       48.64   .       AC=1;AF=0.500;AN=2;BaseQRankSum=-1.150e+00;DB;||n.*1848G>C|||||1848|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1112C>G|||||1112|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1068-310C>G||||||      GT:AD:DP:GQ:PL  0/1:18,5:23:57:57,0,384
chr1    16288   rs113141985     C       G       48.64   .       AC=1;AF=0.500;AN=2;BaseQRankSum=-1.150e+00;DB;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=32.24;MQRankSum=-1.350e-01;QD=6.08;ReadPosRankSum=-1.029e+00;SOR=0.307;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_
chr1    16298   rs62636498      C       T       112.14  .       AC=2;AF=1.00;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.25;QD=28.04;SOR=3.258;ANN=T|transcript||n.*1879C>G|||||1879|,G|downstream_gene_variant|MODIFIER|DDX11L1MIR6859-1|ENSG00000223972ENSG00000278267|transcript|ENST00000456328ENST00000619216.21|processed_transcriptmiRNA||n.*1889C>T1081G>C|||||18891081|,TG|downstream_geneintron_variant|MODIFIER|MIR6|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+319G>C||||||    GT:AD:DP:GQ:PL  0/1:5,3:8:56:56,0,107
chr1    13604816298   rs62636498 rs371677125     C       T       125112.14  .       AC=2;AF=1.00;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=4022.0025;QD=3128.2904;SOR=03.693258;ANN=T|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-153G>A*1889C>T|||||1531889|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1071G>A|||||1071|,T|intron_variant|MODIFIER|RP
chr1    136573  .   |WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+309G>A||||||    GT:AD:DP:GQ:PL  1/1:0,4:4:12:126,12,0
chr1    T136048  rs371677125     C       T       113125.9714  .       AC=2;AF=1.00;AN=2;DB;DP=74;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=4940.6200;QD=1931.0029;SOR=10.329693;ANN=CT|TFupstream_bindinggene_site_variant|LOWMODIFIER|RP11-34P13.15|ENSG00000268903|Egr1transcript|MA0162ENST00000494149.2|processed_pseudogene||n.136573A>G-153G>A|||||153|,CT|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.1514|ENSG00000268903ENSG00000239906|transcript|ENST00000494149ENST00000493797.21|processed_pseudogene||n
chr1    136962  rs373582709     C       T       118.68  .       AC=1;AF=0.500;AN=2;BaseQRankSum=-8.870e-01;DB;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.330;ANN=T|TF_binding_site_variant|MODIFIER|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP1
chr1    137622  rs376555721     G       Aantisense||n.*3742G>A|||||3742|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1212C>T|||||1212|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1634G>A|||||1634|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136048C>T||||||       GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,4:4:12:1|1:136048_C_T:139,12,0:136048
chr1    136573  .       54.66T   .    C   AC=1    113.97  .       AC=2;AF=01.50000;AN=2;BaseQRankSum=0.674;DB;DP=47;ExcessHet=3.0103;FS=60.021000;MLEAC=12;MLEAF=01.50000;MQ=2049.0062;MQRankSumQD=019.00;QD=13.67;ReadPosRankSum=-3.190e-01;SOR=21.788329;ANN=AC|upstreamTF_binding_genesite_variant|MODIFIERLOW|RP11-34P13.15|ENSG00000268903|transcriptEgr1|ENST00000494149MA0162.2|processed_pseudo
chr1    137825  rs147252685     G       A       622.06  .       AC=2;AF=1.00;AN=2;DB;DP=26;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=36.46;QD=24.88;SOR=0.770;ANN=A|upstream|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G|||||678|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3217A>G|||||3217|,C|downstream_gene_variant|MODIFIER|RP11-34P13.1513|ENSG00000268903ENSG00000241860|transcript|ENST00000494149ENST00000484859.21|processed_pseudogeneantisense||n.-1930C>T*4901A>G|||||19304901|,AC|downstream_gene_variant|MODIFIER
chr1    138156  rs370691115     G       T       197.04  .       AC=2;AF=1.00;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.00;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant||CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1737T>C|||||1737|,C|downstream_gene_variant|MODIFIER|RP11-34P13.1516|ENSG00000268903ENSG00000269981|transcript|ENST00000494149ENST00000595919.21|processed_pseudogene||n.-2261C>A*1109A>G|||||22611109|,TC|upstream_gene_variant|MODIFIER|RP
intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136573T>C||||||   GT:AD:DP:GQ:PL  1/1:0,6:6:18:128,18,0
chr1    138817136962  rs556938922rs373582709     TC       CT       945118.0668  .       AC=21;AF=10.00500;AN=2;BaseQRankSum=-8.870e-01;DB;DP=458;ExcessHet=3.0103;FS=0.000;MLEAC=21;MLEAF=10.00500;MQ=35.16;QD=21.98=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.739330;ANN=CT|upstreamTF_binding_genesite_variant|MODIFIER|RP11-|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2922A>G1067G>A|||||29221067|,CT|upstreamdownstream_gene_variant|MODIFIER|R
chr1    184246  .       T       C       52.84   .       AC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=41.80;QD=17.61;SOR=1.179;ANN=CRP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2828G>A|||||2828|,T|downstream_gene_variant|MODIFIER|FO538757RP11-34P13.213|ENSG00000279928ENSG00000241860|transcript|ENST00000624431ENST00000484859.1|protein_codingantisense||cn.*88T>C4512G>A|||||884512|,CT|downstream_gene_variant|MODIFIER|CICP27|FO538757.1|ENSG000002
chr1    185194  .       G       C       91.64   .       AC=1;AF=0.500;AN=2;BaseQRankSum=-1.345e+00;DP=10;ExcessHet=3.0103;FS=4.260;MLEAC=1;MLEAF=0.500;MQ=24.41;MQRankSum=-1.150e+00;QD=11.45;ReadPosRankSum=0.00;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23

Adding MAF information using bcftools

The following command line adds MAF information to either annotated or non-annotated VCF files.

Code Block
#non-annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID.vcf > HaplotypeCaller_sampleID_tags.vcf

#VEP annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_VEP.vcf > HaplotypeCaller_sampleID_VEP_tags.vcf

#snpEff annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_snpEff.vcf > HaplotypeCaller_sampleID_snpEff_tags.vcf

For the example shown above, find the added MAF information to the VCF annotated using snpEff.

Code Block
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  FBS1-LNCAP-RNA
chr1    14542   rs1045833       A       G       71.84   .       AC=2;AF=1;AN=2;DB;DP=3ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2126C>T|||||2126|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*720G>A|||||720|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136962C>T||||||    GT:AD:DP:GQ:PL  0/1:1,6:8:10:126,0,10
chr1    137622  rs376555721     G       A       54.66   .       AC=1;AF=0.500;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=06.021;MLEAC=1;MLEAF=0.5500;MQ=2320.700;QDMQRankSum=230.9500;SOR=QD=13.67;ReadPosRankSum=-3.190e-01;SOR=2.833788;ANN=GA|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_pseudogene||n.-1727C>T|||||1727|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*133A>G2168C>T|||||1332168|,GA|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG0
chr1    14574   rs28503599      A       G       121.05  .       AC=2;AF=1;AN=2;DB;DP=10;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.52;QD=12.1;SOR=4.804;ANN=GRP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3852C>T|||||3852|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2786G>A|||||2786|,A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.16|ENSG00000269981|transcript|ENST00000456328ENST00000595919.21|processed_transcriptpseudogene||n.*165A>G60C>T|||||16560|,GA|downstreamintergenic_gene_variantregion|MODIFIER|MIR6859-1|ENSG00
chr1    14599   rs531646671|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.137622G>A||||||      GT:AD:DP:GQ:PL  0/1:1,3:4:13:62,0,13
chr1    137825  rs147252685     TG       A       301622.0206  .       AC=2;AF=1.00;AN=2;DB;DP=726;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=2236.46;QD=2524.3688;SOR=40.174770;ANN=A|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*190T>A-1930C>T|||||1901930|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000
chr1    14604   rs541940975     A       G       356.05  .       AC=2;AF=1;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=28.73;SOR=3.056;ANN=GRP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1965C>T|||||1965|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3649C>T|||||3649|,A|downstream_gene_variant|MODIFIER|DDX11L1CICP27|ENSG00000223972ENSG00000233750|transcript|ENST00000456328ENST00000442987.23|processed_transcriptpseudogene||n.*195A>G2989G>A|||||1952989|,G|downstream_geneA|non_coding_transcript_exon_variant|MODIFIER|MIR6859-1|ENSG00
chr1    14610   .  RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene|1/1|n.141C>T||||||     GT:AD:DP:GQ:PL  1/1:0,25:25:75:636,75,0
chr1    138156  rs370691115     TG       CT       356197.0504  .       AC=2;AF=1.00;AN=2;DB;DP=98;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.2300;QD=3024.9763;SOR=30.056693;ANN=CT|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*201T>C-2261C>A|||||2012261|,CT|downstreamupstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|t
chr1    14653   rs62635297      C       T       1816.06 .       AC=2;AF=1;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=34.03;QD=27.94;SOR=2.412;ANN=RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-191C>A|||||191|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1634C>A|||||1634|,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.13|ENSG00000241860|transcript|ENST00000456328ENST00000484859.21|processed_transcriptantisense||n.*244C>T3318C>A|||||2443318|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG0
chr1    14677   rs201327123     G       A  |CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3320G>T|||||3320|,T|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138156G>T||||||   GT:AD:DP:GQ:PL  1/1:0,8:8:24:211,24,0
chr1    138817  rs556938922     T     926.64  .C       AC=1945.06  .       AC=2;AF=01.500;AN=2;BaseQRankSum=2.83;DB;DP=11345;ExcessHet=3.0103;FS=10.545000;MLEAC=12;MLEAF=01.500;MQ=37.82;MQRankSum=-1.39835.16;QD=821.35;ReadPosRankSum=0.962;98;SOR=0.852739;ANN=AC|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*26
chr1    16257   rs11489794      G       C       49.64   .       AC=1;AF=0.5;AN=2;BaseQRankSum=-1.136;DB;DP=23;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=34.1;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0;SOR=1.022;ANN=-2922A>G|||||2922|,C|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-852A>G|||||852|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*973A>G|||||973|,C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.13|ENSG00000241860|transcript|ENST00000456328ENST00000484859.21|processed_transcriptantisense||n.*1848G>C2657A>G||||||
chr1    16288   rs113141985     C       G2657|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3981T>C|||||3981|,C|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138817T>C||||||    GT:AD:DP:GQ:PL  1/1:0,43:43:99:959,126,0
chr1    184246  .       T       C       4852.6484   .       AC=12;AF=01.500;AN=2;BaseQRankSum=-1.15;DB;DP=94;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.5500;MQ=32.24;MQRankSum=-0.13541.80;QD=617.0861;ReadPosRankSumSOR=-1.029;SOR=0.307;179;ANN=GC|downstream_gene_variant|MODIFIER|DDX11L1FO538757.2|ENSG00000223972ENSG00000279928|transcript|ENST00000456328ENST00000624431.21|processedprotein_transcriptcoding||n.*1879C>
chr1    16298   rs62636498      C       T       112.14  .       AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.25;QD=28.04;SOR=3.258;ANN=Tc.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding||c.*971A>G|||||679|,C|downstream_gene_variant|MODIFIER|DDX11L1MIR6859-2|ENSG00000223972ENSG00000273874|transcript|ENST00000456328ENST00000612080.21|processed_transcriptmiRNA||n.*1889C>T3645A>G|||||18893645|,TC|downstreamintergenic_gene_variantregion|MODIFIER|MIR6859FO538757.2-FO538757.1|ENSG
chr1    136048  rs371677125|ENSG00000279928-ENSG00000279457|intergenic_region|ENSG00000279928-ENSG00000279457|||n.184246T>C||||||     GT:AD:DP:GQ:PL  1/1:0,3:3:9:66,9,0
chr1    185194  .       CG       TC       12591.1464   .       AC=21;AF=10.500;AN=2;DBBaseQRankSum=-1.345e+00;DP=410;ExcessHet=3.0103;FS=04.260;MLEAC=21;MLEAF=10.500;MQ=40;QD=31.2924.41;MQRankSum=-1.150e+00;QD=11.45;ReadPosRankSum=0.00;SOR=02.693833;ANN=TC|upstream3_prime_geneUTR_variant|MODIFIER|RP11-34P13FO538757.151|ENSG00000268903ENSG00000279457|transcript|ENST00000494149ENST00000623083.23|processedprotein_pseudogenecoding|11/11|nc.-153G>A*23C>G|||||15323|,TC|downstream_gene_variant|MODIFIER|RP11-34P13.14|E
chr1    136573  .       T       C       113.97  .       AC=2;AF=1;AN=2;DP=7;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=49.62;QD=19;SOR=1.329;ANN=C|TF_binding_site_variant|LOW|||Egr1|MA0162.2|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G|||||
chr1    136962  rs373582709     C       T       118.68  .       AC=1;AF=0.5;AN=2;BaseQRankSum=-0.887;DB;DP=8FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*1036G>C|||||1036|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*2697C>G|||||2697|       GT:AD:DP:GQ:PL  0/1:3,5:8:65:99,0,65

Individual-level MAF: adding MAF information using bcftools

At the individual-level expected allelic frequencies for alternative alleles (minor alleles) can be either 1.0 (homozygous), 0.5 (heterozygous) or 0.0 (absent) if it is absent in the patient when screening againts a reference set of genetic variants.

The following command line adds MAF information to either annotated or non-annotated VCF files.

Code Block
#non-annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID.vcf > HaplotypeCaller_sampleID_tags.vcf

#VEP annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_VEP.ann.vcf > HaplotypeCaller_sampleID_VEP_tags.ann.vcf

#snpEff annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_snpEff.ann.vcf > HaplotypeCaller_sampleID_snpEff_tags.ann.vcf

For the example shown above, find the added MAF information to the VCF annotated using snpEff.

Code Block
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  FBS1-LNCAP-RNA
chr1    14542   rs1045833       A       G       71.84   .       AC=2;AF=1;AN=2;DB;DP=3;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=5023.36;MQRankSum=0.489;7;QD=1623.95;ReadPosRankSum=0.489;SOR=02.33833;ANN=TG|TFdownstream_binding_sitegene_variant|MODIFIER|DDX11L1|ENSG00000223972|Egr1transcript|MA0162ENST00000456328.2|processed_transcript||n.136962G>A*133A>G|||||133|,TG|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|EN
chr1    137622  rs376555721     G       A       54.66   .       AC=1;AF=0.5;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=6.021;MLEAC=1;MLEAF=0.5;MQ=20;MQRankSum=0;QD=13.67;ReadPosRankSum=-0.319;SOR=2.788;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1727C
chr1    137825  rs147252685     GMIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2827T>C|||||2827|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-41T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL  1/1:0,3:3:9:85,9,0
chr1    14574   rs28503599      A       AG       622121.0605  .       AC=2;AF=1;AN=2;DB;DP=2610;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=3622.4652;QD=2412.881;SOR=04.77804;ANN=AG|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-1930C>T*165A>G|||||1930165|,AG|downstream_gene_variant|MODIFIER|RP11-34P13
chr1    138156  rs370691115     G       T       197.04  .       AC=2;AF=1;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2261C>A|||||2261|,T|upstream_gene_variant|MODIFIER|RP11-34P13.16|E
chr1    138817  rs556938922MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2795T>C|||||2795|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-73T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1  GT:AD:DP:GQ:PL  1/1:0,10:10:27:135,27,0
chr1    14599   rs531646671     T       CA       945301.0602  .       AC=2;AF=1;AN=2;DB;DP=457;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=35.1622;QD=2125.9836;SOR=04.739174;ANN=CA|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-2922A>G*190T>A|||||2922190|,CA|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.
chr1    184246  .       T       C       52.84   .       AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=41.8;QD=17.61;SOR=1.179;ANN=C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|tran
chr1    185194  .       G       C       91.64   .       AC=1;AF=0.5;AN=2;BaseQRankSum=-1.345;DP=10;ExcessHet=3.0103;FS=4.26;MLEAC=1;MLEAF=0.5;MQ=24.41;MQRankSum=-1.15;QD=11.45;ReadPosRankSum=0;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23C>G|||||23|,C|dowMIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2770A>T|||||2770|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-98A>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1     GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,7:7:21:1|1:14599_T_A:315,21,0:14599
chr1    14604   rs541940975     A       G       356.05  .       AC=2;AF=1;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=28.73;SOR=3.056;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*195A>G|||||195|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2765T>C|||||2765|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-103T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599
chr1    14610   .       T       C       356.05  .       AC=2;AF=1;AN=2;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=30.97;SOR=3.056;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*201T>C|||||201|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2759A>G|||||2759|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-109A>G||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1    GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599
chr1    14653   rs62635297      C       T       1816.06 .       AC=2;AF=1;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=34.03;QD=27.94;SOR=2.412;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*244C>T|||||244|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2716G>A|||||2716|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-152G>A||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1        GT:AD:DP:GQ:PL  1/1:0,65:65:99:1830,195,0
chr1    14677   rs201327123     G       A       926.64  .       AC=1;AF=0.5;AN=2;BaseQRankSum=2.83;DB;DP=113;ExcessHet=3.0103;FS=1.545;MLEAC=1;MLEAF=0.5;MQ=37.82;MQRankSum=-1.398;QD=8.35;ReadPosRankSum=0.962;SOR=0.852;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*268G>A|||||268|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2692C>T|||||2692|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-176C>T||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1      GT:AD:DP:GQ:PL  0/1:63,48:111:99:934,0,1266
chr1    16257   rs11489794      G       C       49.64   .       AC=1;AF=0.5;AN=2;BaseQRankSum=-1.136;DB;DP=23;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=34.1;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0;SOR=1.022;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1848G>C|||||1848|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1112C>G|||||1112|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1068-310C>G||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1      GT:AD:DP:GQ:PL  0/1:18,5:23:57:57,0,384
chr1    16288   rs113141985     C       G       48.64   .       AC=1;AF=0.5;AN=2;BaseQRankSum=-1.15;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=32.24;MQRankSum=-0.135;QD=6.08;ReadPosRankSum=-1.029;SOR=0.307;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1879C>G|||||1879|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1081G>C|||||1081|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+319G>C||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL  0/1:5,3:8:56:56,0,107
chr1    16298   rs62636498      C       T       112.14  .       AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.25;QD=28.04;SOR=3.258;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1889C>T|||||1889|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1071G>A|||||1071|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+309G>A||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1        GT:AD:DP:GQ:PL  1/1:0,4:4:12:126,12,0
chr1    136048  rs371677125     C       T       125.14  .       AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=40;QD=31.29;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-153G>A|||||153|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3742G>A|||||3742|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1212C>T|||||1212|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1634G>A|||||1634|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136048C>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1      GT:AD:DP:GQ:PGT:PID:PL:PS       1|1:0,4:4:12:1|1:136048_C_T:139,12,0:136048
chr1    136573  .       T       C       113.97  .       AC=2;AF=1;AN=2;DP=7;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=49.62;QD=19;SOR=1.329;ANN=C|TF_binding_site_variant|LOW|||Egr1|MA0162.2|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G|||||678|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3217A>G|||||3217|,C|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*4901A>G|||||4901|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1737T>C|||||1737|,C|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1109A>G|||||1109|,C|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136573T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1  GT:AD:DP:GQ:PL  1/1:0,6:6:18:128,18,0
chr1    136962  rs373582709     C       T       118.68  .       AC=1;AF=0.5;AN=2;BaseQRankSum=-0.887;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.33;ANN=T|TF_binding_site_variant|MODIFIER|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1067G>A|||||1067|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2828G>A|||||2828|,T|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*4512G>A|||||4512|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2126C>T|||||2126|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*720G>A|||||720|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136962C>T||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL  0/1:1,6:8:10:126,0,10
chr1    137622  rs376555721     G       A       54.66   .       AC=1;AF=0.5;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=6.021;MLEAC=1;MLEAF=0.5;MQ=20;MQRankSum=0;QD=13.67;ReadPosRankSum=-0.319;SOR=2.788;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1727C>T|||||1727|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2168C>T|||||2168|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3852C>T|||||3852|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2786G>A|||||2786|,A|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*60C>T|||||60|,A|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.137622G>A||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1    GT:AD:DP:GQ:PL  0/1:1,3:4:13:62,0,13
chr1    137825  rs147252685     G       A       622.06  .       AC=2;AF=1;AN=2;DB;DP=26;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=36.46;QD=24.88;SOR=0.77;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1930C>T|||||1930|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1965C>T|||||1965|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3649C>T|||||3649|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2989G>A|||||2989|,A|non_coding_transcript_exon_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene|1/1|n.141C>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1  GT:AD:DP:GQ:PL  1/1:0,25:25:75:636,75,0
chr1    138156  rs370691115     G       T       197.04  .       AC=2;AF=1;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2261C>A|||||2261|,T|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-191C>A|||||191|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1634C>A|||||1634|,T|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3318C>A|||||3318|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3320G>T|||||3320|,T|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138156G>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1   GT:AD:DP:GQ:PL  1/1:0,8:8:24:211,24,0
chr1    138817  rs556938922     T       C       945.06  .       AC=2;AF=1;AN=2;DB;DP=45;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=35.16;QD=21.98;SOR=0.739;ANN=C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2922A>G|||||2922|,C|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-852A>G|||||852|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*973A>G|||||973|,C|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*2657A>G|||||2657|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3981T>C|||||3981|,C|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138817T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL  1/1:0,43:43:99:959,126,0
chr1    184246  .       T       C       52.84   .       AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=41.8;QD=17.61;SOR=1.179;ANN=C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding||c.*971A>G|||||679|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*3645A>G|||||3645|,C|intergenic_region|MODIFIER|FO538757.2-FO538757.1|ENSG00000279928-ENSG00000279457|intergenic_region|ENSG00000279928-ENSG00000279457|||n.184246T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL  1/1:0,3:3:9:66,9,0
chr1    185194  .       G       C       91.64   .       AC=1;AF=0.5;AN=2;BaseQRankSum=-1.345;DP=10;ExcessHet=3.0103;FS=4.26;MLEAC=1;MLEAF=0.5;MQ=24.41;MQRankSum=-1.15;QD=11.45;ReadPosRankSum=0;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23C>G|||||23|,C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*1036G>C|||||1036|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*2697C>G|||||2697|;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1        GT:AD:DP:GQ:PL  0/1:3,5:8:65:99,0,65

Adding population-level MAF

We will be exploring two methods to achive this, one is an R-based approach (see below) and another will use the vcf2maf tool (https://github.com/mskcc/vcf2maf) to assign allelic frequencies using reference population-level MAF such as Maf.gnomAD or Maf.ExAC when screened againts of set of know variants in dbSNP.

Method 1: using R to collect population-level MAF information

Requirements:

Open an Rstudio session and let’s initially install bioconductor:

Code Block
#STEP1: install bioconductor
if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install(version = "3.14")

Then install the MafDb.gnomAD package for the human GRCh38 genome assembly:

Code Block
#STEP2: install MafDB.gnomAD
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
  
BiocManager::install("MafDb.gnomAD.r2.1.GRCh38")

#load the library
library(MafDb.gnomAD.r2.1.GRCh38)

Next install the SNPlocs.Hsapiens.dbSNP150.GRCh38 package. Note the latest version 151 is approx. 3.8GB in size and can take a while to download. For the execise use verion 150 (total size 2GB).

Code Block
#STEP3: load SNP information from dbSNP150 release
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

BiocManager::install("SNPlocs.Hsapiens.dbSNP150.GRCh38")

#load the library
library(SNPlocs.Hsapiens.dbSNP150.GRCh38)

Create a mafdb by loading the installed MafDb.gnomAD package:

Code Block
#STEP4: load MadDb information
mafdb <- MafDb.gnomAD.r2.1.GRCh38
mafdb
citation(mafdb)
populations(mafdb)

Looking for the MAF information for a known variant of interest, for example: rs1129038

Code Block
#STEP5: look for a population level MAF information for a known SNP identifier
rng <- snpsById(snpdb, ids = "rs1129038")
rng
gscores(mafdb, rng)

To look for the populion MAF information for several variants of interest create a R vector, for example:

Code Block
snpvec = c("rs10875231",
           "rs6678176",
           "rs78286437",
           "rs144406489")

then re-run STEP5 above as follows:

Code Block
rng <- snpsById(snpdb, ids=snpvec)
gscores(mafdb, rng)

export MAF information to a data frame, modify the header “AF” (Allelic Frequency) to MAF and save into a file:

Code Block
#STEP 6: exporting into a dataframe
info = gscores(mafdb, rng)
info

str(info)
infoDF = data.frame(info)
head(infoDF)

colnames(infoDF)
colnames(infoDF) [6] = "MAF"
colnames(infoDF)
head(infoDF)

write.table(infoDF, file = "demo_MAF.txt", append = FALSE, sep = "\t",
            row.names = TRUE, col.names = TRUE)