...
Code Block |
---|
results/Annotation/sampleID/VEP/HaplotypeCaller_sampleID_VEP.ann.vcf |
Alternatively if annotated using snpEff
Code Block |
---|
results/Annotation/sampleID/snpEff/HaplotypeCaller_sampleID_snpEff.ann.vcf |
Example of VCF file annotated using snpEff. Note the metadata header information is not shown.
Code Block |
---|
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT FBS1-LNCAP-RNA chr1 14542 rs1045833 A G 71.84 . AC=2;AF=1.00;AN=2;DB;DP=3;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=23.70;QD=23.95;SOR=2.833;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*133A>G|||||133|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2827T>C|||||2827|,G|intron_variant|MODIFIER|MIR68 chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-41T>C|||||| 14574 rs28503599GT:AD:DP:GQ:PL 1/1:0,3:3:9:85,9,0 chr1 14574 rs28503599 A G 121.05 . AC=2;AF=1.00;AN=2;DB;DP=10;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.52;QD=12.10;SOR=4.804;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*165A>G|||||165|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2795T>C|||||2795|,G|intron_variant|MODIFIER|MIR68 chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-73T>C|||||| 14599 rs531646671 TGT:AD:DP:GQ:PL 1/1:0,10:10:27:135,27,0 chr1 14599 rs531646671 T A 301.02 . AC=2;AF=1.00;AN=2;DB;DP=7;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.00;QD=25.36;SOR=4.174;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*190T>A|||||190|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2770A>T|||||2770|,A|intron_variant|MODIFIER|MIR685 chr1|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-98A>T|||||| 14604 rs541940975 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,7:7:21:1|1:14599_T_A:315,21,0:14599 chr1 14604 rs541940975 A G 356.05 . AC=2;AF=1.00;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.23;QD=28.73;SOR=3.056;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*195A>G|||||195|,G|downstream_gene_variant|MODIFIER|MIR685 chr1 14610 . T C 356.05|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2765T>C|||||2765|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-103T>C|||||| GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599 chr1 14610 . T C 356.05 . AC=2;AF=1.00;AN=2;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.23;QD=30.97;SOR=3.056;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*201T>C|||||201|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2759A>G|||||2759|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|ENSG000 chr1|unprocessed_pseudogene|10/10|n.1254-109A>G|||||| 14653 rs62635297 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599 chr1 14653 rs62635297 C T 1816.06 . AC=2;AF=1.00;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=34.03;QD=27.94;SOR=2.412;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*244C>T|||||244|,T|downstream_gene_variant|MODIFIER|MIR68 chr1 14677 rs201327123 G |MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2716G>A|||||2716|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-152G>A|||||| GT:AD:DP:GQ:PL 1/1:0,65:65:99:1830,195,0 chr1 14677 rs201327123 G A 926.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.83;DB;DP=113;ExcessHet=3.0103;FS=1.545;MLEAC=1;MLEAF=0.500;MQ=37.82;MQRankSum=-1.398e+00;QD=8.35;ReadPosRankSum=0.962;SOR=0.852;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcrip chr1transcript||n.*268G>A|||||268|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2692C>T|||||2692|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-176C>T|||||| 16257 rs11489794 GT:AD:DP:GQ:PL 0/1:63,48:111:99:934,0,1266 chr1 16257 rs11489794 G C 49.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.136e+00;DB;DP=23;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=34.10;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0.00;SOR=1.022;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript chr1 16288 rs113141985 C G 48.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.150e+00;DB;||n.*1848G>C|||||1848|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1112C>G|||||1112|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1068-310C>G|||||| GT:AD:DP:GQ:PL 0/1:18,5:23:57:57,0,384 chr1 16288 rs113141985 C G 48.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.150e+00;DB;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=32.24;MQRankSum=-1.350e-01;QD=6.08;ReadPosRankSum=-1.029e+00;SOR=0.307;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_ chr1 16298 rs62636498 C T 112.14 . AC=2;AF=1.00;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.25;QD=28.04;SOR=3.258;ANN=T|transcript||n.*1879C>G|||||1879|,G|downstream_gene_variant|MODIFIER|DDX11L1MIR6859-1|ENSG00000223972ENSG00000278267|transcript|ENST00000456328ENST00000619216.21|processed_transcriptmiRNA||n.*1889C>T1081G>C|||||18891081|,TG|downstream_geneintron_variant|MODIFIER|MIR6|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+319G>C|||||| GT:AD:DP:GQ:PL 0/1:5,3:8:56:56,0,107 chr1 13604816298 rs62636498 rs371677125 C T 125112.14 . AC=2;AF=1.00;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=4022.0025;QD=3128.2904;SOR=03.693258;ANN=T|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-153G>A*1889C>T|||||1531889|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1071G>A|||||1071|,T|intron_variant|MODIFIER|RP chr1 136573 . |WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+309G>A|||||| GT:AD:DP:GQ:PL 1/1:0,4:4:12:126,12,0 chr1 T136048 rs371677125 C T 113125.9714 . AC=2;AF=1.00;AN=2;DB;DP=74;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=4940.6200;QD=1931.0029;SOR=10.329693;ANN=CT|TFupstream_bindinggene_site_variant|LOWMODIFIER|RP11-34P13.15|ENSG00000268903|Egr1transcript|MA0162ENST00000494149.2|processed_pseudogene||n.136573A>G-153G>A|||||153|,CT|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.1514|ENSG00000268903ENSG00000239906|transcript|ENST00000494149ENST00000493797.21|processed_pseudogene||n chr1 136962 rs373582709 C T 118.68 . AC=1;AF=0.500;AN=2;BaseQRankSum=-8.870e-01;DB;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.330;ANN=T|TF_binding_site_variant|MODIFIER|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP1 chr1 137622 rs376555721 G Aantisense||n.*3742G>A|||||3742|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1212C>T|||||1212|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1634G>A|||||1634|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136048C>T|||||| GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,4:4:12:1|1:136048_C_T:139,12,0:136048 chr1 136573 . 54.66T . C AC=1 113.97 . AC=2;AF=01.50000;AN=2;BaseQRankSum=0.674;DB;DP=47;ExcessHet=3.0103;FS=60.021000;MLEAC=12;MLEAF=01.50000;MQ=2049.0062;MQRankSumQD=019.00;QD=13.67;ReadPosRankSum=-3.190e-01;SOR=21.788329;ANN=AC|upstreamTF_binding_genesite_variant|MODIFIERLOW|RP11-34P13.15|ENSG00000268903|transcriptEgr1|ENST00000494149MA0162.2|processed_pseudo chr1 137825 rs147252685 G A 622.06 . AC=2;AF=1.00;AN=2;DB;DP=26;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=36.46;QD=24.88;SOR=0.770;ANN=A|upstream|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G|||||678|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3217A>G|||||3217|,C|downstream_gene_variant|MODIFIER|RP11-34P13.1513|ENSG00000268903ENSG00000241860|transcript|ENST00000494149ENST00000484859.21|processed_pseudogeneantisense||n.-1930C>T*4901A>G|||||19304901|,AC|downstream_gene_variant|MODIFIER chr1 138156 rs370691115 G T 197.04 . AC=2;AF=1.00;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.00;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant||CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1737T>C|||||1737|,C|downstream_gene_variant|MODIFIER|RP11-34P13.1516|ENSG00000268903ENSG00000269981|transcript|ENST00000494149ENST00000595919.21|processed_pseudogene||n.-2261C>A*1109A>G|||||22611109|,TC|upstream_gene_variant|MODIFIER|RP intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136573T>C|||||| GT:AD:DP:GQ:PL 1/1:0,6:6:18:128,18,0 chr1 138817136962 rs556938922rs373582709 TC CT 945118.0668 . AC=21;AF=10.00500;AN=2;BaseQRankSum=-8.870e-01;DB;DP=458;ExcessHet=3.0103;FS=0.000;MLEAC=21;MLEAF=10.00500;MQ=35.16;QD=21.98=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.739330;ANN=CT|upstreamTF_binding_genesite_variant|MODIFIER|RP11-|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2922A>G1067G>A|||||29221067|,CT|upstreamdownstream_gene_variant|MODIFIER|R chr1 184246 . T C 52.84 . AC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=41.80;QD=17.61;SOR=1.179;ANN=CRP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2828G>A|||||2828|,T|downstream_gene_variant|MODIFIER|FO538757RP11-34P13.213|ENSG00000279928ENSG00000241860|transcript|ENST00000624431ENST00000484859.1|protein_codingantisense||cn.*88T>C4512G>A|||||884512|,CT|downstream_gene_variant|MODIFIER|CICP27|FO538757.1|ENSG000002 chr1 185194 . G C 91.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.345e+00;DP=10;ExcessHet=3.0103;FS=4.260;MLEAC=1;MLEAF=0.500;MQ=24.41;MQRankSum=-1.150e+00;QD=11.45;ReadPosRankSum=0.00;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23 |
Adding MAF information using bcftools
The following command line adds MAF information to either annotated or non-annotated VCF files.
Code Block |
---|
#non-annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID.vcf > HaplotypeCaller_sampleID_tags.vcf
#VEP annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_VEP.vcf > HaplotypeCaller_sampleID_VEP_tags.vcf
#snpEff annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_snpEff.vcf > HaplotypeCaller_sampleID_snpEff_tags.vcf |
For the example shown above, find the added MAF information to the VCF annotated using snpEff.
Code Block |
---|
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT FBS1-LNCAP-RNA chr1 14542 rs1045833 A G 71.84 . AC=2;AF=1;AN=2;DB;DP=3ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2126C>T|||||2126|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*720G>A|||||720|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136962C>T|||||| GT:AD:DP:GQ:PL 0/1:1,6:8:10:126,0,10 chr1 137622 rs376555721 G A 54.66 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=06.021;MLEAC=1;MLEAF=0.5500;MQ=2320.700;QDMQRankSum=230.9500;SOR=QD=13.67;ReadPosRankSum=-3.190e-01;SOR=2.833788;ANN=GA|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_pseudogene||n.-1727C>T|||||1727|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*133A>G2168C>T|||||1332168|,GA|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG0 chr1 14574 rs28503599 A G 121.05 . AC=2;AF=1;AN=2;DB;DP=10;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.52;QD=12.1;SOR=4.804;ANN=GRP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3852C>T|||||3852|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2786G>A|||||2786|,A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.16|ENSG00000269981|transcript|ENST00000456328ENST00000595919.21|processed_transcriptpseudogene||n.*165A>G60C>T|||||16560|,GA|downstreamintergenic_gene_variantregion|MODIFIER|MIR6859-1|ENSG00 chr1 14599 rs531646671|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.137622G>A|||||| GT:AD:DP:GQ:PL 0/1:1,3:4:13:62,0,13 chr1 137825 rs147252685 TG A 301622.0206 . AC=2;AF=1.00;AN=2;DB;DP=726;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=2236.46;QD=2524.3688;SOR=40.174770;ANN=A|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*190T>A-1930C>T|||||1901930|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000 chr1 14604 rs541940975 A G 356.05 . AC=2;AF=1;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=28.73;SOR=3.056;ANN=GRP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1965C>T|||||1965|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3649C>T|||||3649|,A|downstream_gene_variant|MODIFIER|DDX11L1CICP27|ENSG00000223972ENSG00000233750|transcript|ENST00000456328ENST00000442987.23|processed_transcriptpseudogene||n.*195A>G2989G>A|||||1952989|,G|downstream_geneA|non_coding_transcript_exon_variant|MODIFIER|MIR6859-1|ENSG00 chr1 14610 . RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene|1/1|n.141C>T|||||| GT:AD:DP:GQ:PL 1/1:0,25:25:75:636,75,0 chr1 138156 rs370691115 TG CT 356197.0504 . AC=2;AF=1.00;AN=2;DB;DP=98;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=22.2300;QD=3024.9763;SOR=30.056693;ANN=CT|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*201T>C-2261C>A|||||2012261|,CT|downstreamupstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|t chr1 14653 rs62635297 C T 1816.06 . AC=2;AF=1;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=34.03;QD=27.94;SOR=2.412;ANN=RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-191C>A|||||191|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1634C>A|||||1634|,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.13|ENSG00000241860|transcript|ENST00000456328ENST00000484859.21|processed_transcriptantisense||n.*244C>T3318C>A|||||2443318|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG0 chr1 14677 rs201327123 G A |CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3320G>T|||||3320|,T|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138156G>T|||||| GT:AD:DP:GQ:PL 1/1:0,8:8:24:211,24,0 chr1 138817 rs556938922 T 926.64 .C AC=1945.06 . AC=2;AF=01.500;AN=2;BaseQRankSum=2.83;DB;DP=11345;ExcessHet=3.0103;FS=10.545000;MLEAC=12;MLEAF=01.500;MQ=37.82;MQRankSum=-1.39835.16;QD=821.35;ReadPosRankSum=0.962;98;SOR=0.852739;ANN=AC|downstreamupstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.15|ENSG00000268903|transcript|ENST00000456328ENST00000494149.2|processed_transcriptpseudogene||n.*26 chr1 16257 rs11489794 G C 49.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=-1.136;DB;DP=23;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=34.1;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0;SOR=1.022;ANN=-2922A>G|||||2922|,C|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-852A>G|||||852|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*973A>G|||||973|,C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972RP11-34P13.13|ENSG00000241860|transcript|ENST00000456328ENST00000484859.21|processed_transcriptantisense||n.*1848G>C2657A>G|||||| chr1 16288 rs113141985 C G2657|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3981T>C|||||3981|,C|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138817T>C|||||| GT:AD:DP:GQ:PL 1/1:0,43:43:99:959,126,0 chr1 184246 . T C 4852.6484 . AC=12;AF=01.500;AN=2;BaseQRankSum=-1.15;DB;DP=94;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.5500;MQ=32.24;MQRankSum=-0.13541.80;QD=617.0861;ReadPosRankSumSOR=-1.029;SOR=0.307;179;ANN=GC|downstream_gene_variant|MODIFIER|DDX11L1FO538757.2|ENSG00000223972ENSG00000279928|transcript|ENST00000456328ENST00000624431.21|processedprotein_transcriptcoding||n.*1879C> chr1 16298 rs62636498 C T 112.14 . AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.25;QD=28.04;SOR=3.258;ANN=Tc.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding||c.*971A>G|||||679|,C|downstream_gene_variant|MODIFIER|DDX11L1MIR6859-2|ENSG00000223972ENSG00000273874|transcript|ENST00000456328ENST00000612080.21|processed_transcriptmiRNA||n.*1889C>T3645A>G|||||18893645|,TC|downstreamintergenic_gene_variantregion|MODIFIER|MIR6859FO538757.2-FO538757.1|ENSG chr1 136048 rs371677125|ENSG00000279928-ENSG00000279457|intergenic_region|ENSG00000279928-ENSG00000279457|||n.184246T>C|||||| GT:AD:DP:GQ:PL 1/1:0,3:3:9:66,9,0 chr1 185194 . CG TC 12591.1464 . AC=21;AF=10.500;AN=2;DBBaseQRankSum=-1.345e+00;DP=410;ExcessHet=3.0103;FS=04.260;MLEAC=21;MLEAF=10.500;MQ=40;QD=31.2924.41;MQRankSum=-1.150e+00;QD=11.45;ReadPosRankSum=0.00;SOR=02.693833;ANN=TC|upstream3_prime_geneUTR_variant|MODIFIER|RP11-34P13FO538757.151|ENSG00000268903ENSG00000279457|transcript|ENST00000494149ENST00000623083.23|processedprotein_pseudogenecoding|11/11|nc.-153G>A*23C>G|||||15323|,TC|downstream_gene_variant|MODIFIER|RP11-34P13.14|E chr1 136573 . T C 113.97 . AC=2;AF=1;AN=2;DP=7;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=49.62;QD=19;SOR=1.329;ANN=C|TF_binding_site_variant|LOW|||Egr1|MA0162.2|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G||||| chr1 136962 rs373582709 C T 118.68 . AC=1;AF=0.5;AN=2;BaseQRankSum=-0.887;DB;DP=8FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*1036G>C|||||1036|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*2697C>G|||||2697| GT:AD:DP:GQ:PL 0/1:3,5:8:65:99,0,65 |
Individual-level MAF: adding MAF information using bcftools
At the individual-level expected allelic frequencies for alternative alleles (minor alleles) can be either 1.0 (homozygous), 0.5 (heterozygous) or 0.0 (absent) if it is absent in the patient when screening againts a reference set of genetic variants.
The following command line adds MAF information to either annotated or non-annotated VCF files.
Code Block |
---|
#non-annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID.vcf > HaplotypeCaller_sampleID_tags.vcf
#VEP annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_VEP.ann.vcf > HaplotypeCaller_sampleID_VEP_tags.ann.vcf
#snpEff annotated file:
bcftools +fill-tags HaplotypeCaller_sampleID_snpEff.ann.vcf > HaplotypeCaller_sampleID_snpEff_tags.ann.vcf |
For the example shown above, find the added MAF information to the VCF annotated using snpEff.
Code Block |
---|
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT FBS1-LNCAP-RNA chr1 14542 rs1045833 A G 71.84 . AC=2;AF=1;AN=2;DB;DP=3;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=5023.36;MQRankSum=0.489;7;QD=1623.95;ReadPosRankSum=0.489;SOR=02.33833;ANN=TG|TFdownstream_binding_sitegene_variant|MODIFIER|DDX11L1|ENSG00000223972|Egr1transcript|MA0162ENST00000456328.2|processed_transcript||n.136962G>A*133A>G|||||133|,TG|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|EN chr1 137622 rs376555721 G A 54.66 . AC=1;AF=0.5;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=6.021;MLEAC=1;MLEAF=0.5;MQ=20;MQRankSum=0;QD=13.67;ReadPosRankSum=-0.319;SOR=2.788;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1727C chr1 137825 rs147252685 GMIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2827T>C|||||2827|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-41T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,3:3:9:85,9,0 chr1 14574 rs28503599 A AG 622121.0605 . AC=2;AF=1;AN=2;DB;DP=2610;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=3622.4652;QD=2412.881;SOR=04.77804;ANN=AG|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-1930C>T*165A>G|||||1930165|,AG|downstream_gene_variant|MODIFIER|RP11-34P13 chr1 138156 rs370691115 G T 197.04 . AC=2;AF=1;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2261C>A|||||2261|,T|upstream_gene_variant|MODIFIER|RP11-34P13.16|E chr1 138817 rs556938922MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2795T>C|||||2795|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-73T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,10:10:27:135,27,0 chr1 14599 rs531646671 T CA 945301.0602 . AC=2;AF=1;AN=2;DB;DP=457;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=35.1622;QD=2125.9836;SOR=04.739174;ANN=CA|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903DDX11L1|ENSG00000223972|transcript|ENST00000494149ENST00000456328.2|processed_pseudogenetranscript||n.-2922A>G*190T>A|||||2922190|,CA|upstreamdownstream_gene_variant|MODIFIER|RP11-34P13. chr1 184246 . T C 52.84 . AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=41.8;QD=17.61;SOR=1.179;ANN=C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|tran chr1 185194 . G C 91.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=-1.345;DP=10;ExcessHet=3.0103;FS=4.26;MLEAC=1;MLEAF=0.5;MQ=24.41;MQRankSum=-1.15;QD=11.45;ReadPosRankSum=0;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23C>G|||||23|,C|dowMIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2770A>T|||||2770|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-98A>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,7:7:21:1|1:14599_T_A:315,21,0:14599 chr1 14604 rs541940975 A G 356.05 . AC=2;AF=1;AN=2;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=28.73;SOR=3.056;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*195A>G|||||195|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2765T>C|||||2765|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-103T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599 chr1 14610 . T C 356.05 . AC=2;AF=1;AN=2;DP=9;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.23;QD=30.97;SOR=3.056;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*201T>C|||||201|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2759A>G|||||2759|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-109A>G||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,9:9:27:1|1:14599_T_A:370,27,0:14599 chr1 14653 rs62635297 C T 1816.06 . AC=2;AF=1;AN=2;DB;DP=67;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=34.03;QD=27.94;SOR=2.412;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*244C>T|||||244|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2716G>A|||||2716|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-152G>A||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,65:65:99:1830,195,0 chr1 14677 rs201327123 G A 926.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=2.83;DB;DP=113;ExcessHet=3.0103;FS=1.545;MLEAC=1;MLEAF=0.5;MQ=37.82;MQRankSum=-1.398;QD=8.35;ReadPosRankSum=0.962;SOR=0.852;ANN=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*268G>A|||||268|,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*2692C>T|||||2692|,A|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|10/10|n.1254-176C>T||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:63,48:111:99:934,0,1266 chr1 16257 rs11489794 G C 49.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=-1.136;DB;DP=23;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=34.1;MQRankSum=0.381;QD=2.16;ReadPosRankSum=0;SOR=1.022;ANN=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1848G>C|||||1848|,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1112C>G|||||1112|,C|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1068-310C>G||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:18,5:23:57:57,0,384 chr1 16288 rs113141985 C G 48.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=-1.15;DB;DP=9;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=32.24;MQRankSum=-0.135;QD=6.08;ReadPosRankSum=-1.029;SOR=0.307;ANN=G|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1879C>G|||||1879|,G|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1081G>C|||||1081|,G|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+319G>C||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:5,3:8:56:56,0,107 chr1 16298 rs62636498 C T 112.14 . AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.25;QD=28.04;SOR=3.258;ANN=T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|transcript|ENST00000456328.2|processed_transcript||n.*1889C>T|||||1889|,T|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|transcript|ENST00000619216.1|miRNA||n.*1071G>A|||||1071|,T|intron_variant|MODIFIER|WASH7P|ENSG00000227232|transcript|ENST00000488147.1|unprocessed_pseudogene|8/10|n.1067+309G>A||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,4:4:12:126,12,0 chr1 136048 rs371677125 C T 125.14 . AC=2;AF=1;AN=2;DB;DP=4;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=40;QD=31.29;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-153G>A|||||153|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3742G>A|||||3742|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1212C>T|||||1212|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1634G>A|||||1634|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136048C>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PGT:PID:PL:PS 1|1:0,4:4:12:1|1:136048_C_T:139,12,0:136048 chr1 136573 . T C 113.97 . AC=2;AF=1;AN=2;DP=7;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=49.62;QD=19;SOR=1.329;ANN=C|TF_binding_site_variant|LOW|||Egr1|MA0162.2|||n.136573A>G||||||,C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-678A>G|||||678|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*3217A>G|||||3217|,C|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*4901A>G|||||4901|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*1737T>C|||||1737|,C|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*1109A>G|||||1109|,C|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136573T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,6:6:18:128,18,0 chr1 136962 rs373582709 C T 118.68 . AC=1;AF=0.5;AN=2;BaseQRankSum=-0.887;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=50.36;MQRankSum=0.489;QD=16.95;ReadPosRankSum=0.489;SOR=0.33;ANN=T|TF_binding_site_variant|MODIFIER|||Egr1|MA0162.2|||n.136962G>A||||||,T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1067G>A|||||1067|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2828G>A|||||2828|,T|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*4512G>A|||||4512|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2126C>T|||||2126|,T|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*720G>A|||||720|,T|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.136962C>T||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:1,6:8:10:126,0,10 chr1 137622 rs376555721 G A 54.66 . AC=1;AF=0.5;AN=2;BaseQRankSum=0.674;DB;DP=4;ExcessHet=3.0103;FS=6.021;MLEAC=1;MLEAF=0.5;MQ=20;MQRankSum=0;QD=13.67;ReadPosRankSum=-0.319;SOR=2.788;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1727C>T|||||1727|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*2168C>T|||||2168|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3852C>T|||||3852|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2786G>A|||||2786|,A|downstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.*60C>T|||||60|,A|intergenic_region|MODIFIER|RP11-34P13.15-RP11-34P13.16|ENSG00000268903-ENSG00000269981|intergenic_region|ENSG00000268903-ENSG00000269981|||n.137622G>A||||||;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:1,3:4:13:62,0,13 chr1 137825 rs147252685 G A 622.06 . AC=2;AF=1;AN=2;DB;DP=26;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=36.46;QD=24.88;SOR=0.77;ANN=A|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-1930C>T|||||1930|,A|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1965C>T|||||1965|,A|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3649C>T|||||3649|,A|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*2989G>A|||||2989|,A|non_coding_transcript_exon_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene|1/1|n.141C>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,25:25:75:636,75,0 chr1 138156 rs370691115 G T 197.04 . AC=2;AF=1;AN=2;DB;DP=8;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22;QD=24.63;SOR=0.693;ANN=T|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2261C>A|||||2261|,T|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-191C>A|||||191|,T|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*1634C>A|||||1634|,T|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*3318C>A|||||3318|,T|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3320G>T|||||3320|,T|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138156G>T||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,8:8:24:211,24,0 chr1 138817 rs556938922 T C 945.06 . AC=2;AF=1;AN=2;DB;DP=45;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=35.16;QD=21.98;SOR=0.739;ANN=C|upstream_gene_variant|MODIFIER|RP11-34P13.15|ENSG00000268903|transcript|ENST00000494149.2|processed_pseudogene||n.-2922A>G|||||2922|,C|upstream_gene_variant|MODIFIER|RP11-34P13.16|ENSG00000269981|transcript|ENST00000595919.1|processed_pseudogene||n.-852A>G|||||852|,C|downstream_gene_variant|MODIFIER|RP11-34P13.14|ENSG00000239906|transcript|ENST00000493797.1|antisense||n.*973A>G|||||973|,C|downstream_gene_variant|MODIFIER|RP11-34P13.13|ENSG00000241860|transcript|ENST00000484859.1|antisense||n.*2657A>G|||||2657|,C|downstream_gene_variant|MODIFIER|CICP27|ENSG00000233750|transcript|ENST00000442987.3|processed_pseudogene||n.*3981T>C|||||3981|,C|intergenic_region|MODIFIER|RP11-34P13.16-RP11-34P13.14|ENSG00000269981-ENSG00000239906|intergenic_region|ENSG00000269981-ENSG00000239906|||n.138817T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,43:43:99:959,126,0 chr1 184246 . T C 52.84 . AC=2;AF=1;AN=2;DP=4;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=41.8;QD=17.61;SOR=1.179;ANN=C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*88T>C|||||88|,C|downstream_gene_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding||c.*971A>G|||||679|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*3645A>G|||||3645|,C|intergenic_region|MODIFIER|FO538757.2-FO538757.1|ENSG00000279928-ENSG00000279457|intergenic_region|ENSG00000279928-ENSG00000279457|||n.184246T>C||||||;NS=1;MAF=0;AC_Het=0;AC_Hom=2;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 1/1:0,3:3:9:66,9,0 chr1 185194 . G C 91.64 . AC=1;AF=0.5;AN=2;BaseQRankSum=-1.345;DP=10;ExcessHet=3.0103;FS=4.26;MLEAC=1;MLEAF=0.5;MQ=24.41;MQRankSum=-1.15;QD=11.45;ReadPosRankSum=0;SOR=2.833;ANN=C|3_prime_UTR_variant|MODIFIER|FO538757.1|ENSG00000279457|transcript|ENST00000623083.3|protein_coding|11/11|c.*23C>G|||||23|,C|downstream_gene_variant|MODIFIER|FO538757.2|ENSG00000279928|transcript|ENST00000624431.1|protein_coding||c.*1036G>C|||||1036|,C|downstream_gene_variant|MODIFIER|MIR6859-2|ENSG00000273874|transcript|ENST00000612080.1|miRNA||n.*2697C>G|||||2697|;NS=1;MAF=0.5;AC_Het=1;AC_Hom=0;AC_Hemi=0;HWE=1;ExcHet=1 GT:AD:DP:GQ:PL 0/1:3,5:8:65:99,0,65 |
Adding population-level MAF
We will be exploring two methods to achive this, one is an R-based approach (see below) and another will use the vcf2maf tool (https://github.com/mskcc/vcf2maf) to assign allelic frequencies using reference population-level MAF such as Maf.gnomAD or Maf.ExAC when screened againts of set of know variants in dbSNP.
Method 1: using R to collect population-level MAF information
Requirements:
Install Rstudio https://www.rstudio.com/products/rstudio/download/
R version 4.1 (Mac: https://www.youtube.com/watch?v=Vy-lEkJB3cA ; Windows: https://www.youtube.com/watch?v=0jlMXPMoiOg )
Open an Rstudio session and let’s initially install bioconductor:
Code Block |
---|
#STEP1: install bioconductor
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.14") |
Then install the MafDb.gnomAD package for the human GRCh38 genome assembly:
Code Block |
---|
#STEP2: install MafDB.gnomAD
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("MafDb.gnomAD.r2.1.GRCh38")
#load the library
library(MafDb.gnomAD.r2.1.GRCh38) |
Next install the SNPlocs.Hsapiens.dbSNP150.GRCh38 package. Note the latest version 151 is approx. 3.8GB in size and can take a while to download. For the execise use verion 150 (total size 2GB).
Code Block |
---|
#STEP3: load SNP information from dbSNP150 release
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SNPlocs.Hsapiens.dbSNP150.GRCh38")
#load the library
library(SNPlocs.Hsapiens.dbSNP150.GRCh38) |
Create a mafdb by loading the installed MafDb.gnomAD package:
Code Block |
---|
#STEP4: load MadDb information
mafdb <- MafDb.gnomAD.r2.1.GRCh38
mafdb
citation(mafdb)
populations(mafdb) |
Looking for the MAF information for a known variant of interest, for example: rs1129038
Code Block |
---|
#STEP5: look for a population level MAF information for a known SNP identifier
rng <- snpsById(snpdb, ids = "rs1129038")
rng
gscores(mafdb, rng) |
To look for the populion MAF information for several variants of interest create a R vector, for example:
Code Block |
---|
snpvec = c("rs10875231",
"rs6678176",
"rs78286437",
"rs144406489") |
then re-run STEP5 above as follows:
Code Block |
---|
rng <- snpsById(snpdb, ids=snpvec)
gscores(mafdb, rng) |
export MAF information to a data frame, modify the header “AF” (Allelic Frequency) to MAF and save into a file:
Code Block |
---|
#STEP 6: exporting into a dataframe
info = gscores(mafdb, rng)
info
str(info)
infoDF = data.frame(info)
head(infoDF)
colnames(infoDF)
colnames(infoDF) [6] = "MAF"
colnames(infoDF)
head(infoDF)
write.table(infoDF, file = "demo_MAF.txt", append = FALSE, sep = "\t",
row.names = TRUE, col.names = TRUE) |