forked from StaPH-B/docker-builds
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathest_coverage.py
More file actions
executable file
·70 lines (58 loc) · 2.2 KB
/
est_coverage.py
File metadata and controls
executable file
·70 lines (58 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
"""
Coverage calculating script for C-BIRD workflow
Created on Thu Aug 4 13:43:46 2022
@author: Kutluhan Incekara
email: kutluhan.incekara@ct.gov
"""
import argparse
import pandas as pd
def main(args):
total_bases = args.total_bases
top_taxid = args.top_taxid
alt_gs = args.alt_gs
genome_length = args.genome_length
# read assembly stats file
gs_df = pd.read_csv("/usr/local/lib/species_genome_size.txt", sep = '\t')
## Coverage ##
# get total base number
with open(total_bases, "r") as tb:
lines = tb.readlines()
basenum = lines[0]
basenum_trim = lines[1]
# find top hit's expected genome size
x = gs_df.loc[gs_df ["#species_taxid"]==int(top_taxid), "expected_ungapped_length"]
if (len(x) != 0):
exp_gs = str(x.values[0])
else:
with open(alt_gs, "r") as f:
exp_gs = f.readline()
# calculate sequencing depth & genome ratio
coverage = 0.0
coverage_trim = 0.0
ratio = 0.0
try:
coverage = round((int(basenum) / int(exp_gs)),2)
coverage_trim = round((int(basenum_trim) / int(exp_gs)),2)
except ValueError:
print("Someting wrong in coverage calculation")
finally:
with open("COVERAGE", "w") as cov:
cov.write(str(coverage))
with open("COVERAGE_TRIM", "w") as cov2:
cov2.write(str(coverage_trim))
try:
ratio = round((int(genome_length) / int(exp_gs)),2)
except ValueError:
print("Someting wrong in genome ratio calculation")
finally:
with open("GENOME_RATIO", "w") as r:
r.write(str(ratio))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Coverage calculating script for C-BIRD workflow')
parser.add_argument('total_bases', type=str, help='file containing the total number of bases')
parser.add_argument('top_taxid', type=int, help='The tax ID of the top hit')
parser.add_argument('alt_gs', type=str, help='file containing the expected genome size (alternative)')
parser.add_argument('genome_length', type=int, help='The length of the genome being analyzed')
args = parser.parse_args()
main(args)