-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstatistics_by_month
More file actions
14 lines (14 loc) · 1.27 KB
/
statistics_by_month
File metadata and controls
14 lines (14 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/usr/bin/env bash
#make sure to run this script in a folder with one tsv file of DC sequences named gisaid*.tsv (downloaded from GISAID; download --> input for the Augur pipeline)
#input the desired date in lines 6 and 8; to change from collection date to submission date, change $5 to $27 in line 6 (and change text description in line 8)
#
#extract a particular month from the file
awk '$5~/2021-07/' gisaid*.tsv > input_by_month.tsv
#print the month
printf "SARS-CoV-2 samples from DC - collected 2021-07\n\n"
#print the number/percentage of samples from that month for each facility
printf "\nNumber and percentage of samples by submitting laboratory:\tNumber:\tPercentage:\n"
awk -F"\t" '{print $22}' input_by_month.tsv | sort | uniq -c | sort -nr | awk '{s+=$1;lines=lines"\n"$0} END {printf "%d Total",s;print lines "\t"}' | awk '!max{max=$1}{s=$1/max*100;c=$1;$1="";printf "%-10s \t %10d \t %7.2f%%\n",$0,c,s;}'
#print the number/percentage of samples from that month for each facility
printf "\nNumber and percentage of variants:\tNumber:\tPercentage:\n"
awk -F"\t" '{print $19}' input_by_month.tsv | sort | uniq -c | sort -nr | awk '{s+=$1;lines=lines"\n"$0} END {printf "%d Total",s;print lines "\t"}' | awk '!max{max=$1}{s=$1/max*100;c=$1;$1="";printf "%-10s \t %10d \t %7.2f%%\n",$0,c,s;}'