ONT variant analysis

Source:

Create a conda environment

Prepare an “environment.yml” file that contains the following tool specifications

name: nanoQ
channels:
 - defaults
 - anaconda
 - bioconda
 - conda-forge
dependencies:
 - python=3.7
 - pysam=0.19.1
 - numpy=1.23.3
 - scipy=1.9.1
 - biopython=1.79
 - matplotlib=3.6.0
 - samtools
 - minimap2
 - graphmap
 - emboss

Run the following command to generate the ‘nanoQ’ conda environment:

conda env create -f environment.yml

Activate the environment to access the installed tools:

conda activate nanoQ

Alternatively - manually run each of the following commands:

#create an interactive session
qsub -I -S /bin/bash -l walltime=10:00:00 -l select=1:ncpus=2:mem=4gb

#create conda environment
conda create --name nanoQ2 python=3.7
conda activate nanoQ2

#install tools
conda install -c bioconda pysam
conda install -c conda-forge numpy
conda install -c conda-forge scipy
conda install -c conda-forge matplotlib
conda install -c conda-forge biopython
conda install -c bioconda samtools

#aligners
conda install -c bioconda minimap2
conda install -c bioconda graphmap

#fetch nano-Q
git clone https://github.com/PrestonLeung/Nano-Q.git

#change directory to Nano-Q
cd Nano-Q

#make all python scripts executable
chmod +x *.py

#copy all python scripts to your /home/bin/

cp *.py ~/bin

Map long ONT reads onto a reference genome

Input:

- ref_genome.fasta
- ref_list.txt  # length of sequences in the above file: Accession "tab" Length (i.e., NC001477  10735)
- sample FASTQ file(s)

Prepare a script, for example, called ‘launch_variant_analysis.pbs’ that contains the following information:

#!/bin/bash -l
#PBS -N nano-Q
#PBS -l walltime=24:00:00
#PBS -l mem=16gb
#PBS -l ncpus=8

cd $PBS_O_WORKDIR

################################################################################################################################
# USER DEFINE VARIABLES
################################################################################################################################
SAMPLEID=NC483
REFNAME=NC001477_RefGenome
LIST='/work/phylo/OxfordNanopore/nextflow/assembly/data/NC483/NC483_NC001477_ref_list.txt'
REF='/work/phylo/OxfordNanopore/nextflow/assembly/data/NC483/NC483_NC001477_reference_sequence.fasta'
ONT='/work/phylo/OxfordNanopore/nextflow/assembly/data/NC483/NC483_FAU10290_pass_barcode96_0cf303ee.fastq'
################################################################################################################################

#activate conda environment containing tools for analysis
conda activate nanoQ2

#generic mapping reads
minimap2 -a $REF $ONT > ${SAMPLEID}_aln.sam

#mapping noisy reads
#minimap2 -ax $REF $ONT > ${SAMPLEID}_aln2.sam  

#Samtools
samtools view -bt ${LIST} -o ${SAMPLEID}_aln.bam ${SAMPLEID}_aln.sam

samtools sort -T /tmp/aln.sorted -o ${SAMPLEID}_aln.sorted.bam ${SAMPLEID}_aln.bam

samtools index ${SAMPLEID}_aln.sorted.bam

#run nano-Q
python /work/phylo/OxfordNanopore/nextflow/tools/git/Nano-Q/nano-q.py -b ${SAMPLEID}_aln.sorted.bam -c 1 -l 9000 -nr 1 -q 5 -j 10

creating a ref_list.txt file

#use a tool provided by emboss
infoseq $REF -only -name -length | sed 1d > ${REF}_list.txt

submit the job to the scheduler

qsub launch_variant_analysis.pbs

Monitor progress

qjobs

working example:

/work/phylo/OxfordNanopore/nextflow/variants/nano-Q/run2

Additional information

#generic mapping reads
minimap2 -a ref.fa ont-reads.fq > aln.sam

#mapping noisy reads
minimap2 -ax map-ont ref.fa ont-reads.fq > aln.sam      # for Oxford Nanopore reads

Full genome/assembly alignment

minimap2 -ax asm5 ref.fa asm.fa > aln.sam       # assembly to assembly/ref alignment