-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAMR pipeline using Abricate
More file actions
47 lines (45 loc) · 2.41 KB
/
AMR pipeline using Abricate
File metadata and controls
47 lines (45 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env bash
#Pipeline for bacterial analysis using Conda; trimming, assembly, species identification, and AMR genes
#Make sure you have FastQC and Quast installed on your computer; make sure you have a conda environment with trimmomatic, spades, mlst, and abricate
#Run in directory with a sub-directory for each set of fastq.gz files (i.e. just one R1 file and one R2 file in each directory); the name of each directory should contain the LIMS ID of the sample
#Output will be in output.tsv
#Edit paths as necessary
#First, activate Conda environment: conda activate bacterial
for dir in */
do
cd $dir
printf "${dir}" | cut -d / -f 1 >> output.tsv
#FastQC (quality report)
fastqc *1.fastq *2.fastq -o . --threads 10 --extract
#Pre-processing (trimming and filtering)
##ILLUMINACLIP: Cut adapter and other illumina-specific sequences from the read.
##LEADING: Cut bases off the start of a read, if below a threshold quality
##TRAILING: Cut bases off the end of a read, if below a threshold quality
##MINLEN: Drop the read if it is below a specified length
trimmomatic PE -threads 10 *1.fastq *2.fastq p1.fq.gz up1.fq.gz p2.fq.gz up2.fq.gz ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36
#FastQC (quality report)
fastqc -1 p1.fq.gz -2 p2.fq.gz -o . --threads 10 --extract
printf "\n\nFastQC Quality Report for Reads: Pre- and Post-Processing\n" >> output.tsv
cat ./*/summary.txt >> output.tsv
#Assembly
spades -1 p1.fq.gz -2 p2.fq.gz -o .
#Quast (quality report)
printf "\n\nQuast Quality Report for Consensus Genome\n" >> output.tsv
python /home/mdungsmain/anaconda3/bin/quast.py -o . --threads 8 contigs.fasta
cat report.tsv >> output.tsv
#Species identification (mlst)
printf "\n\nSpecies Identification (mlst)" >> output.tsv
mlst contigs.fasta >> output.tsv
#Identification of AMR genes
printf "\n\n\nAMR Genes Identified\n\n"
printf "\nNCBI Database\n" >> output.tsv
abricate contigs.fasta --db ncbi >> output.tsv
printf "\nCARD Database\n" >> output.tsv
abricate contigs.fasta --db card >> output.tsv
printf "\nResfinder Database\n" >> output.tsv
abricate contigs.fasta --db resfinder >> output.tsv
#Summary
printf "\nSummary of AMR Genes of Interest\n" >> output.tsv
awk -F"\t" '$0 ~ "KPC" || $0 ~ "kpc" || $0 ~ "NDM" || $0 ~ "ndm" || $0 ~ "OXA" || $0 ~ "oxa" || $0 ~ "VIM" || $0 ~ "vim" || $0 ~ "IMP" || $0 ~ "imp" {printf $0 "\n"}' output.tsv >> output.tsv
cd ..
done