-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWeekly_Stats
More file actions
28 lines (18 loc) · 1.36 KB
/
Weekly_Stats
File metadata and controls
28 lines (18 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/env bash
#Run this script in a folder with one tsv file of DC sequences (downloaded from GISAID; download --> Sequencing Technology Metadata; rename files so they start with "gisaid" and end with ".tsv")
#Make specific files to be used later
awk -F"\t" '$1 ~ "DC-DFS-PHL"' gisaid*.tsv > PHL.tsv
awk -F"\t" '$1 ~ "hCoV-19"' gisaid*.tsv > DC.tsv
#Print the number of sequences for DC and DC-DFS-PHL
printf "# sequences from DC: "
grep -c "hCoV-19" DC.tsv
printf "# sequences from DFS-PHL: "
grep -c "DC-DFS-PHL" PHL.tsv
#Print the number of Omicron sequences (based on clade GRA)
printf "# Omicron sequences from DC (# clade GRA): "
grep -c "GRA" DC.tsv
printf "# Omicron sequences from DFS-PHL (# clade GRA): "
grep -c "GRA" PHL.tsv
#Make a spreadsheet of the number/percentage of variants for all labs for DC and DC-DFS-PHL
awk -F"\t" '{print $14}' DC.tsv | sed -r 's/.[(].*//g' | sort | uniq -c | awk '{s+=$1;lines=lines"\n"$0} END {printf "%d Total",s;print lines "\t"}' | awk '!max{max=$1}{s=$1/max*100;c=$1;$1="";printf "%-10s \t %10d \t %7.2f%%\n",$0,c,s;}' > DC_counts_and_percentages.tsv
awk -F"\t" '{print $14}' PHL.tsv | sed -r 's/.[(].*//g' | sort | uniq -c | awk '{s+=$1;lines=lines"\n"$0} END {printf "%d Total",s;print lines "\t"}' | awk '!max{max=$1}{s=$1/max*100;c=$1;$1="";printf "%-10s \t %10d \t %7.2f%%\n",$0,c,s;}' > PHL_counts_and_percentages.tsv