Skip to content

Commit ed59585

Browse files
authored
Merge pull request #101 from zwets/update-rgi-parser
Update RGI parser
2 parents 7521fb3 + 6140f96 commit ed59585

File tree

10 files changed

+83
-84
lines changed

10 files changed

+83
-84
lines changed

.github/workflows/test_package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
python-version: [3.10.15, 3.12.8]
18+
python-version: [3.10.16, 3.12.8]
1919

2020
steps:
2121
- uses: actions/checkout@v2

hAMRonization/RgiIO.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ def __init__(self, source, metadata):
3636
"Reference Model Type": "genetic_variation_type",
3737
"Reference DB": "reference_database_name",
3838
"Alleles with Mapped Reads": None,
39-
"Reference Allele(s) Identity "
40-
"to CARD Reference Protein (%)": "sequence_identity",
39+
"Reference Allele(s) Identity to CARD Reference Protein (%)": "sequence_identity",
4140
"Resistomes & Variants: Observed in Genome(s)": None,
4241
"Resistomes & Variants: Observed in Plasmid(s)": None,
4342
"Resistomes & Variants: Observed Pathogen(s)": None,
@@ -50,11 +49,9 @@ def __init__(self, source, metadata):
5049
"Number of Mapped Baits": None,
5150
"Number of Mapped Baits with Reads": None,
5251
"Average Number of reads per Bait": None,
53-
"Number of reads per Bait " "Coefficient of Variation (%)": None,
54-
"Number of reads mapping to baits "
55-
"and mapping to complete gene": None,
56-
"Number of reads mapping to baits and "
57-
"mapping to complete gene (%)": None,
52+
"Number of reads per Bait Coefficient of Variation (%)": None,
53+
"Number of reads mapping to baits and mapping to complete gene": None,
54+
"Number of reads mapping to baits and mapping to complete gene (%)": None,
5855
"Mate Pair Linkage (# reads)": None,
5956
"Reference Length": "reference_gene_length",
6057
"AMR Gene Family": "gene_name",
@@ -91,6 +88,9 @@ def __init__(self, source, metadata):
9188
"Model_ID": None,
9289
"Nudged": None,
9390
"Note": None,
91+
"Hit_Start": "reference_gene_start",
92+
"Hit_End": "reference_gene_stop",
93+
"Antibiotic": "antimicrobial_agent",
9494
}
9595
# if RGI is run on ORFs then Contig should be None
9696
# and input_sequence_id should the ORF_ID i.e., reverse of
@@ -114,16 +114,14 @@ def parse(self, handle):
114114
# skip any manually specified fields for later
115115
reader = csv.DictReader(handle, delimiter="\t")
116116
for result in reader:
117+
result["_nucleotide_mutation"] = None
118+
result["_amino_acid_mutation"] = None
119+
117120
# rgi-bwt mode doesn't support variant mutations
118121
if "Model_type" not in result:
119-
result["_nucleotide_mutation"] = None
120-
result["_amino_acid_mutation"] = None
121122
result["Reference Model Type"] = GENE_PRESENCE
122123
# normal RGI model
123124
else:
124-
result["_nucleotide_mutation"] = None
125-
result["_amino_acid_mutation"] = None
126-
127125
if result["SNPs_in_Best_Hit_ARO"] == "n/a":
128126
result["SNPs_in_Best_Hit_ARO"] = None
129127

schema/csv2json.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77
from datetime import datetime
88
from ast import literal_eval
99

10-
FIELDNAMES = ['Interface Label','Required/Optional','Definition','Ontology','Value Type','Example','Guidance','Values']
10+
FIELDNAMES = ['Interface Label', 'Required/Optional', 'Definition', 'Ontology', 'Value Type', 'Example', 'Guidance', 'Values']
1111
SEPARATOR = ','
1212
QUOTE = '"'
1313

14+
1415
def string_list_to_list(string):
1516
to_list = literal_eval(string)
1617
to_list = [n.strip() for n in to_list]
1718
return to_list
1819

20+
1921
def interface_label_to_property_key(interface_label):
2022
property_key = re.sub(r'[^\w {}]', '_', interface_label).replace(' ', '_').replace('__', '_').lower()
2123
property_key = re.sub(r'_$', '', property_key)
@@ -35,7 +37,7 @@ def parse_properties_table(path_to_properties_table):
3537
"SRA_ID": "string",
3638
"Genbank_ID": "string",
3739
"GISAID_ID": "string",
38-
"Enums":{
40+
"Enums": {
3941
"type": "string",
4042
"enum": "",
4143
},
@@ -74,8 +76,8 @@ def parse_properties_table(path_to_properties_table):
7476
"Bioproject_ID": "^PRJ(N|E|D)([a-zA-Z]?)[0-9]+*",
7577
"Biosample_ID": "^SAM(D|N|E([AG]?))[0-9]+",
7678
"SRA_ID": "^(SRR|ERR|DRR)[0-9]+",
77-
"Genbank_ID": "^([a-zA-Z]{2})\d*.\d{1}",
78-
"GISAID_ID": "^EPI_ISL_\d*",
79+
"Genbank_ID": "^([a-zA-Z]{2})\\d*.\\d{1}",
80+
"GISAID_ID": "^EPI_ISL_\\d*",
7981
"Integer_or_Range": None,
8082
"Enums": None
8183
}
@@ -112,23 +114,22 @@ def parse_properties_table(path_to_properties_table):
112114
for i in range(len(examples)):
113115
if '-' not in examples[i]:
114116
examples[i] = int(examples[i])
115-
117+
116118
# Special case: enumns
117119
if row['Value Type'] == "Enums":
118120
type = datatype_map[row['Value Type']]
119121
properties[property_key]['type'] = "string"
120122
properties[property_key]['Enums'] = string_list_to_list(row['Values'])
121-
122-
properties[property_key]['examples'] = examples
123123

124+
properties[property_key]['examples'] = examples
124125

125126
return properties
126127

127128

128129
def get_required_fields(path_to_properties_table):
129130
required_fields = set()
130131
with open(path_to_properties_table) as f:
131-
reader = csv.DictReader(f, delimiter=SEPARATOR,quotechar=QUOTE)
132+
reader = csv.DictReader(f, delimiter=SEPARATOR, quotechar=QUOTE)
132133
for row in reader:
133134
property_key = interface_label_to_property_key(row['Interface Label'])
134135
if row['Required/Optional'] == 'Required':
@@ -138,19 +139,18 @@ def get_required_fields(path_to_properties_table):
138139

139140

140141
def main(args):
141-
142+
142143
schema = {
143144
"$schema": "http://json-schema.org/draft/2019-09/schema#",
144145
"version": datetime.now().isoformat(),
145146
"type": "object",
146147
"properties": {},
147148
"required": [],
148149
}
149-
150+
150151
schema["properties"] = parse_properties_table(args.input)
151152
schema["required"] = get_required_fields(args.input)
152-
153-
#print(json.dumps(schema))
153+
154154
with open(args.input.replace("csv", "json"), "w") as fh:
155155
fh.write(json.dumps(schema))
156156

@@ -160,4 +160,4 @@ def main(args):
160160
parser.add_argument('input', help='Input schema attributes table')
161161

162162
args = parser.parse_args()
163-
main(args)
163+
main(args)

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import setuptools
21
import re
32
from distutils.core import setup
43

@@ -7,7 +6,7 @@
76

87
with open('hAMRonization/__init__.py') as fh:
98
info = fh.read()
10-
version = re.search('^__version__\s*=\s*"(.*)"',
9+
version = re.search('^__version__\\s*=\\s*"(.*)"',
1110
info, re.M).group(1)
1211

1312
setup(

test/data/dummy/rgi/rgi.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note
2-
NZ_LR792628.1_1289 # 1333608 # 1334783 # -1 # ID=1_1289;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.599 NZ_LR792628.1_1289 1333608 1334783 - Strict 670 792.727 oqxA 99.49 3003922 protein homolog model n/a n/a fluoroquinolone antibiotic; glycylcycline; tetracycline antibiotic; diaminopyrimidine antibiotic; nitrofuran antibiotic antibiotic efflux resistance-nodulation-cell division (RND) antibiotic efflux pump ATGAGCCTGCAAAAAACCTGGGGAAACATTCACCTGACCGCGCTCGGCGCGATGATGCTCTCCTTTCTGCTCGTCGACTGCGACGACAGCGTCGCGCAGAATGCTGCGCCTCCCGCCCCGACAGTCAGCGCCGCTAAGGTGCTGGTGAAGTCGATCAGTCAGTGGGATAGTTTTAACGGTCGCATTGAAGCGGTGGAGAGCGTTCAGCTTCGCCCTCGCGTCTCGGGATACATTGATAAAGTGAATTACACCGACGGCCAGGAGGTGAAAAAGGGCCAGGTGCTGTTCACGATAGATGACAGAACCTATCGCGCCGCGCTGGAGCAGGCGCAGGCGGCGTTGGCAAGAGCCAAAACGCAGGCCAGCCTCGCGCAAAGCGAGGCGAACCGCACCGATAAATTAGTCCATACCAACCTCGTCTCCCGTGAAGAGTGGGAGCAGCGCCGGTCAGCCGCGGTTCAGGCGCAGGCCGACATTCGCGCCGCGCAGGCGGCGGTGGATGCCGCGCAGCTTAACCTCGACCTCACCAAAGTGACCGCCCCTATTGACGGCCGCGCCAGCCGGGCGCTGATCACCAGCGGTAACCTGGTCACCGCGGGCGACACCGCCAGCGTGCTCACCACCCTGGTCTCGCAAAAGACGGTGTACGTCTACTTTGACGTCGACGAGTCAACCTACCTCCACTATCAAAACCTCGCCCGCCGCGGGCAAGGCGCGTCCAGCGATAATCAGGCGCTCCCGGTGGAGATTGGCCTGGTGGGCGAGGAGGGTTACCCCCACCAGGGCAAAGTGGATTTTCTCGATAATCAGTTAACGCCGAGTACCGGCACCATCCGCATGCGTGCGCTGCTGGATAACTCGCAGCGTCTGTTCACGCCGGGGCTGTTTGCCCGCGTGCGTCTGCCGGGCAGCGCAGAGTTCAAAGCCACGCTGATCGACGACAAAGCGGTACTGACCGATCAGGATCGTAAATACGTCTATATCGTTGATAAAGATGGTAAAGCACAGCGCCGCGACATTACCCCAGGGCGGCTGGCAGACGGTTTACGCATCGTTCAGAAGGGGTTGAATCCTGGGGATAGCGTCATCGTCGACGGCTTACAAAAAGTGTTTATGCCGGGTATGCCGGTTAACGCCAAAACCGTTGCCATGACCTCCAGCGCCACCCTTAACTGA MSLQKTWGNIHLTALGAMMLSFLLVDCDDSVAQNAAPPAPTVSAAKVLVKSISQWDSFNGRIEAVESVQLRPRVSGYIDKVNYTDGQEVKKGQVLFTIDDRTYRAALEQAQAALARAKTQASLAQSEANRTDKLVHTNLVSREEWEQRRSAAVQAQADIRAAQAAVDAAQLNLDLTKVTAPIDGRASRALITSGNLVTAGDTASVLTTLVSQKTVYVYFDVDESTYLHYQNLARRGQGASSDNQALPVEIGLVGEEGYPHQGKVDFLDNQLTPSTGTIRMRALLDNSQRLFTPGLFARVRLPGSAEFKATLIDDKAVLTDQDRKYVYIVDKDGKAQRRDITPGRLADGLRIVQKGLNPGDSVIVDGLQKVFMPGMPVNAKTVAMTSSATLN MSLQKTWGNIHLTALGAMMLSFLLVGCDDSVAQNAAPPAPTVSAAKVLVKSISQWDSFNGRIEAVESVQLRPRVSGYIDKVNYTDGQEVKKGQVLFTIDDRTYRAALEQAQAALARAKTQASLAQSEANRTDKLVHTNLVSREEWEQRRSAAVQAQADIRAAQAAVDAAQLNLDFTKVTAPIDGRASRALITSGNLVTAGDTASVLTTLVSQKTVYVYFDVDESTYLHYQNLARRGQGASSDNQALPVEIGLVGEEGYPHQGKVDFLDNQLTPSTGTIRMRALLDNSQRLFTPGLFARVRLPGSAEFKATLIDDKAVLTDQDRKYVYIVDKDGKAQRRDITPGRLADGLRIVQKGLNPGDSVIVDGLQKVFMPGMPVNAKTVAMTSSATLN 100.00 gnl|BL_ORD_ID|2216|hsp_num:0 2399
1+
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note Hit_Start Hit_End Antibiotic
2+
NZ_LR792628.1_1289 # 1333608 # 1334783 # -1 # ID=1_1289;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.599 NZ_LR792628.1_1289 1333608 1334783 - Strict 670 792.727 oqxA 99.49 3003922 protein homolog model n/a n/a fluoroquinolone antibiotic; glycylcycline; tetracycline antibiotic; diaminopyrimidine antibiotic; nitrofuran antibiotic antibiotic efflux resistance-nodulation-cell division (RND) antibiotic efflux pump ATGAGCCTGCAAAAAACCTGGGGAAACATTCACCTGACCGCGCTCGGCGCGATGATGCTCTCCTTTCTGCTCGTCGACTGCGACGACAGCGTCGCGCAGAATGCTGCGCCTCCCGCCCCGACAGTCAGCGCCGCTAAGGTGCTGGTGAAGTCGATCAGTCAGTGGGATAGTTTTAACGGTCGCATTGAAGCGGTGGAGAGCGTTCAGCTTCGCCCTCGCGTCTCGGGATACATTGATAAAGTGAATTACACCGACGGCCAGGAGGTGAAAAAGGGCCAGGTGCTGTTCACGATAGATGACAGAACCTATCGCGCCGCGCTGGAGCAGGCGCAGGCGGCGTTGGCAAGAGCCAAAACGCAGGCCAGCCTCGCGCAAAGCGAGGCGAACCGCACCGATAAATTAGTCCATACCAACCTCGTCTCCCGTGAAGAGTGGGAGCAGCGCCGGTCAGCCGCGGTTCAGGCGCAGGCCGACATTCGCGCCGCGCAGGCGGCGGTGGATGCCGCGCAGCTTAACCTCGACCTCACCAAAGTGACCGCCCCTATTGACGGCCGCGCCAGCCGGGCGCTGATCACCAGCGGTAACCTGGTCACCGCGGGCGACACCGCCAGCGTGCTCACCACCCTGGTCTCGCAAAAGACGGTGTACGTCTACTTTGACGTCGACGAGTCAACCTACCTCCACTATCAAAACCTCGCCCGCCGCGGGCAAGGCGCGTCCAGCGATAATCAGGCGCTCCCGGTGGAGATTGGCCTGGTGGGCGAGGAGGGTTACCCCCACCAGGGCAAAGTGGATTTTCTCGATAATCAGTTAACGCCGAGTACCGGCACCATCCGCATGCGTGCGCTGCTGGATAACTCGCAGCGTCTGTTCACGCCGGGGCTGTTTGCCCGCGTGCGTCTGCCGGGCAGCGCAGAGTTCAAAGCCACGCTGATCGACGACAAAGCGGTACTGACCGATCAGGATCGTAAATACGTCTATATCGTTGATAAAGATGGTAAAGCACAGCGCCGCGACATTACCCCAGGGCGGCTGGCAGACGGTTTACGCATCGTTCAGAAGGGGTTGAATCCTGGGGATAGCGTCATCGTCGACGGCTTACAAAAAGTGTTTATGCCGGGTATGCCGGTTAACGCCAAAACCGTTGCCATGACCTCCAGCGCCACCCTTAACTGA MSLQKTWGNIHLTALGAMMLSFLLVDCDDSVAQNAAPPAPTVSAAKVLVKSISQWDSFNGRIEAVESVQLRPRVSGYIDKVNYTDGQEVKKGQVLFTIDDRTYRAALEQAQAALARAKTQASLAQSEANRTDKLVHTNLVSREEWEQRRSAAVQAQADIRAAQAAVDAAQLNLDLTKVTAPIDGRASRALITSGNLVTAGDTASVLTTLVSQKTVYVYFDVDESTYLHYQNLARRGQGASSDNQALPVEIGLVGEEGYPHQGKVDFLDNQLTPSTGTIRMRALLDNSQRLFTPGLFARVRLPGSAEFKATLIDDKAVLTDQDRKYVYIVDKDGKAQRRDITPGRLADGLRIVQKGLNPGDSVIVDGLQKVFMPGMPVNAKTVAMTSSATLN MSLQKTWGNIHLTALGAMMLSFLLVGCDDSVAQNAAPPAPTVSAAKVLVKSISQWDSFNGRIEAVESVQLRPRVSGYIDKVNYTDGQEVKKGQVLFTIDDRTYRAALEQAQAALARAKTQASLAQSEANRTDKLVHTNLVSREEWEQRRSAAVQAQADIRAAQAAVDAAQLNLDFTKVTAPIDGRASRALITSGNLVTAGDTASVLTTLVSQKTVYVYFDVDESTYLHYQNLARRGQGASSDNQALPVEIGLVGEEGYPHQGKVDFLDNQLTPSTGTIRMRALLDNSQRLFTPGLFARVRLPGSAEFKATLIDDKAVLTDQDRKYVYIVDKDGKAQRRDITPGRLADGLRIVQKGLNPGDSVIVDGLQKVFMPGMPVNAKTVAMTSSATLN 100.00 gnl|BL_ORD_ID|2147|hsp_num:0 2399 0 1173 tigecycline; ciprofloxacin; nitrofurantoin; trimethoprim

test/data/dummy/rgi/rgi_orf.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note
2-
gb|AEN03071.1|+|NDM-5 [Escherichia coli] Perfect 500 555.444 NDM-5 100.0 3000467 protein homolog model n/a n/a carbapenem; cephalosporin; cephamycin; penam antibiotic inactivation NDM beta-lactamase MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQRFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLLVDTAWTDDQTAQILNWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGLVAAQHSLTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSLGNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQRFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLLVDTAWTDDQTAQILNWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGLVAAQHSLTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSLGNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR 100.00 gnl|BL_ORD_ID|1015|hsp_num:0 1106
1+
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note Hit_Start Hit_End Antibiotic
2+
ndm-1_1 # 1 # 813 # 1 # ID=1_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.615 ndm-1_1 1 813 + Perfect 500 555.829 NDM-1 100.0 3000589 protein homolog model n/a n/a carbapenem; cephalosporin; penicillin beta-lactam antibiotic inactivation NDM beta-lactamase ATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTGAGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTGGCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTTCGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGGACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGGTGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTATGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTCGCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCCCCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGATCAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCGCGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCGCCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGA MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQRFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLVVDTAWTDDQTAQILNWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGMVAAQHSLTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSLGNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR MELPNIMHPVAKLSTALAAALMLSGCMPGEIRPTIGQQMETGDQRFGDLVFRQLAPNVWQHTSYLDMPGFGAVASNGLIVRDGGRVLVVDTAWTDDQTAQILNWIKQEINLPVALAVVTHAHQDKMGGMDALHAAGIATYANALSNQLAPQEGMVAAQHSLTFAANGWVEPATAPNFGPLKVFYPGPGHTSDNITVGIDGTDIAFGGCLIKDSKAKSLGNLGDADTEHYAASARAFGAAFPKASMIVMSHSAPDSRAAITHTARMADKLR 100.00 gnl|BL_ORD_ID|711|hsp_num:0 783 0 810 ertapenem; meropenem; imipenem

test/data/dummy/rgi/rgi_var.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note
2-
BAE77595.1 DNA gyrase, subunit B [Escherichia coli str. K-12 substr. W3110] Strict 1600 1664.05 Escherichia coli gyrB conferring resistance to aminocoumarin 99.88 3003303 protein variant model R136L aminocoumarin antibiotic antibiotic target alteration aminocoumarin resistant gyrB MSNSYDSSSIKVLKGLDAVRKRPGMYIGDTDDGTGLHHMVFEVVDNAIDEALAGHCKEIIVTIHADNSVSVQDDGRGIPTGIHPEEGVSAAEVIMTVLHAGGKFDDNSYKVSGGLHGVGVSVVNALSQKLELVIQLEGKIHRQIYEHGVPQAPLAVTGETEKTGTMVRFWPSLETFTNVTEFEYEILAKRLRELSFLNSGVSIRLRDKRDGKEDHFHYEGGIKAFVEYLNKNKTPIHPNIFYFSTEKDGIGVEVALQWNDGFQENIYCFTNNIPQRDGGTHLAGFRAAMTRTLNAYMDKEGYSKKAKVSATGDDAREGLIAVVSVKVPDPKFSSQTKDKLVSSEVKSAVEQQMNELLAEYLLENPTDAKIVVGKIIDAARAREAARRAREMTRRKGALDLAGLPGKLADCQERDPALSELYLVEGDSAGGSAKQGRNRKNQAILPLKGKILNVEKARFDKMLSSQEVATLITALGCGIGRDEYNPDKLRYHSIIIMTDADVDGSHIRTLLLTFFYRQMPEIVERGHVYIAQPPLYKVKKGKQEQYIKDDEAMDQYQISIALDGATLHTNASAPALAGEALEKLVSEYNATQKMINRMERRYPKAMLKELIYQPTLTEADLSDEQTVTRWVNALVSELNDKEQHGSQWKFDVHTNAEQNLFEPIVRVRTHGVDTDYPLDHEFITGGEYRRICTLGEKLRGLLEEDAFIERGERRQPVASFEQALDWLVKESRRGLSIQRYKGLGEMNPEQLWETTMDPESRRMLRVTVKDAIAADQLFTTLMGDAVEPRRAFIEENALKAANIDI MSNSYDSSSIKVLKGLDAVRKRPGMYIGDTDDGTGLHHMVFEVVDNAIDEALAGHCKEIIVTIHADNSVSVQDDGRGIPTGIHPEEGVSAAEVIMTVLHAGGKFDDNSYKVSGGLHGVGVSVVNALSQKLELVIQREGKIHRQIYEHGVPQAPLAVTGETEKTGTMVRFWPSLETFTNVTEFEYEILAKRLRELSFLNSGVSIRLRDKRDGKEDHFHYEGGIKAFVEYLNKNKTPIHPNIFYFSTEKDGIGVEVALQWNDGFQENIYCFTNNIPQRDGGTHLAGFRAAMTRTLNAYMDKEGYSKKAKVSATGDDAREGLIAVVSVKVPDPKFSSQTKDKLVSSEVKSAVEQQMNELLAEYLLENPTDAKIVVGKIIDAARAREAARRAREMTRRKGALDLAGLPGKLADCQERDPALSELYLVEGDSAGGSAKQGRNRKNQAILPLKGKILNVEKARFDKMLSSQEVATLITALGCGIGRDEYNPDKLRYHSIIIMTDADVDGSHIRTLLLTFFYRQMPEIVERGHVYIAQPPLYKVKKGKQEQYIKDDEAMDQYQISIALDGATLHTNASAPALAGEALEKLVSEYNATQKMINRMERRYPKAMLKELIYQPTLTEADLSDEQTVTRWVNALVSELNDKEQHGSQWKFDVHTNAEQNLFEPIVRVRTHGVDTDYPLDHEFITGGEYRRICTLGEKLRGLLEEDAFIERGERRQPVASFEQALDWLVKESRRGLSIQRYKGLGEMNPEQLWETTMDPESRRMLRVTVKDAIAADQLFTTLMGDAVEPRRAFIEENALKAANIDI 100.00 gnl|BL_ORD_ID|1933|hsp_num:0 111
1+
ORF_ID Contig Start Stop Orientation Cut_Off Pass_Bitscore Best_Hit_Bitscore Best_Hit_ARO Best_Identities ARO Model_type SNPs_in_Best_Hit_ARO Other_SNPs Drug Class Resistance Mechanism AMR Gene Family Predicted_DNA Predicted_Protein CARD_Protein_Sequence Percentage Length of Reference Sequence ID Model_ID Nudged Note Hit_Start Hit_End Antibiotic
2+
BAE77595.1 DNA gyrase, subunit B [Escherichia coli str. K-12 substr. W3110] Strict 1600 1664.05 Escherichia coli gyrB conferring resistance to aminocoumarin 99.88 3003303 protein variant model R136L aminocoumarin antibiotic antibiotic target alteration aminocoumarin resistant gyrB MSNSYDSSSIKVLKGLDAVRKRPGMYIGDTDDGTGLHHMVFEVVDNAIDEALAGHCKEIIVTIHADNSVSVQDDGRGIPTGIHPEEGVSAAEVIMTVLHAGGKFDDNSYKVSGGLHGVGVSVVNALSQKLELVIQLEGKIHRQIYEHGVPQAPLAVTGETEKTGTMVRFWPSLETFTNVTEFEYEILAKRLRELSFLNSGVSIRLRDKRDGKEDHFHYEGGIKAFVEYLNKNKTPIHPNIFYFSTEKDGIGVEVALQWNDGFQENIYCFTNNIPQRDGGTHLAGFRAAMTRTLNAYMDKEGYSKKAKVSATGDDAREGLIAVVSVKVPDPKFSSQTKDKLVSSEVKSAVEQQMNELLAEYLLENPTDAKIVVGKIIDAARAREAARRAREMTRRKGALDLAGLPGKLADCQERDPALSELYLVEGDSAGGSAKQGRNRKNQAILPLKGKILNVEKARFDKMLSSQEVATLITALGCGIGRDEYNPDKLRYHSIIIMTDADVDGSHIRTLLLTFFYRQMPEIVERGHVYIAQPPLYKVKKGKQEQYIKDDEAMDQYQISIALDGATLHTNASAPALAGEALEKLVSEYNATQKMINRMERRYPKAMLKELIYQPTLTEADLSDEQTVTRWVNALVSELNDKEQHGSQWKFDVHTNAEQNLFEPIVRVRTHGVDTDYPLDHEFITGGEYRRICTLGEKLRGLLEEDAFIERGERRQPVASFEQALDWLVKESRRGLSIQRYKGLGEMNPEQLWETTMDPESRRMLRVTVKDAIAADQLFTTLMGDAVEPRRAFIEENALKAANIDI MSNSYDSSSIKVLKGLDAVRKRPGMYIGDTDDGTGLHHMVFEVVDNAIDEALAGHCKEIIVTIHADNSVSVQDDGRGIPTGIHPEEGVSAAEVIMTVLHAGGKFDDNSYKVSGGLHGVGVSVVNALSQKLELVIQREGKIHRQIYEHGVPQAPLAVTGETEKTGTMVRFWPSLETFTNVTEFEYEILAKRLRELSFLNSGVSIRLRDKRDGKEDHFHYEGGIKAFVEYLNKNKTPIHPNIFYFSTEKDGIGVEVALQWNDGFQENIYCFTNNIPQRDGGTHLAGFRAAMTRTLNAYMDKEGYSKKAKVSATGDDAREGLIAVVSVKVPDPKFSSQTKDKLVSSEVKSAVEQQMNELLAEYLLENPTDAKIVVGKIIDAARAREAARRAREMTRRKGALDLAGLPGKLADCQERDPALSELYLVEGDSAGGSAKQGRNRKNQAILPLKGKILNVEKARFDKMLSSQEVATLITALGCGIGRDEYNPDKLRYHSIIIMTDADVDGSHIRTLLLTFFYRQMPEIVERGHVYIAQPPLYKVKKGKQEQYIKDDEAMDQYQISIALDGATLHTNASAPALAGEALEKLVSEYNATQKMINRMERRYPKAMLKELIYQPTLTEADLSDEQTVTRWVNALVSELNDKEQHGSQWKFDVHTNAEQNLFEPIVRVRTHGVDTDYPLDHEFITGGEYRRICTLGEKLRGLLEEDAFIERGERRQPVASFEQALDWLVKESRRGLSIQRYKGLGEMNPEQLWETTMDPESRRMLRVTVKDAIAADQLFTTLMGDAVEPRRAFIEENALKAANIDI 100.00 gnl|BL_ORD_ID|1933|hsp_num:0 111

0 commit comments

Comments
 (0)