Bash/BLAST_16S at main · janis0616/Bash · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/env bash
#This script will run each .fna.gz file (in the current working directory)
#through BLAST+ for database "16S ribosomal RNA" (RefSeq type strains)
#and output the top 10 matches to a concatenated txt file.
#
#Step 1: a for-loop that takes each .fna.gz file, unzips it, and runs it through BLAST+
for eachfile in *.fna.gz
do
	gzip -dc $eachfile > $(basename $eachfile ).fna
	/home/mdungs/Programs/BLAST/bin/blastn -db /home/mdungs/Programs/BLAST/16S_ribosomal_RNA -query $(basename $eachfile ).fna -out $(basename $eachfile ).txt -num_threads 16 -outfmt '7 sseqid stitle evalue bitscore pident'
	rm *.fna
done
#
#Step 2: take the top 10 results from each txt file and concatenate them
for txtfile in *.fna.gz.txt
do
	printf "\n\n\n" >> BLAST_output_all.txt
	printf ${txtfile}"\n" >> BLAST_output_all.txt
	grep "^gi" ${txtfile} | head >> BLAST_output_all.txt
done
rm *.gz.txt
#Your resulting file is called "BLAST_output_all.txt" and will have the name of each
#sequence, followed by the BLAST results