Skip to content

Commit 5629c7d

Browse files
authored
Merge pull request #29 from Merck/develop
Add --prodigal-meta-mode option
2 parents f6cfe60 + 653be84 commit 5629c7d

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

deepbgc/command/pipeline.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def add_arguments(self, parser):
5959
parser.add_argument('--limit-to-record', action='append', help="Process only specific record ID. Can be provided multiple times")
6060
parser.add_argument('--minimal-output', dest='is_minimal_output', action='store_true', default=False,
6161
help="Produce minimal output with just the GenBank sequence file")
62+
parser.add_argument('--prodigal-meta-mode', action='store_true', default=False, help="Run Prodigal in '-p meta' mode to enable detecting genes in short contigs")
6263
group = parser.add_argument_group('BGC detection options', '')
6364
no_models_message = 'run "deepbgc download" to download models'
6465
detector_names = util.get_available_models('detector')
@@ -88,7 +89,7 @@ def add_arguments(self, parser):
8889

8990
def run(self, inputs, output, detectors, no_detector, labels, classifiers, no_classifier,
9091
is_minimal_output, limit_to_record, score, classifier_score, merge_max_protein_gap, merge_max_nucl_gap, min_nucl,
91-
min_proteins, min_domains, min_bio_domains):
92+
min_proteins, min_domains, min_bio_domains, prodigal_meta_mode):
9293
if not detectors:
9394
detectors = ['deepbgc']
9495
if not classifiers:
@@ -110,7 +111,7 @@ def run(self, inputs, output, detectors, no_detector, labels, classifiers, no_cl
110111
output_file_name = os.path.basename(os.path.normpath(output))
111112

112113
steps = []
113-
steps.append(DeepBGCAnnotator(tmp_dir_path=tmp_path))
114+
steps.append(DeepBGCAnnotator(tmp_dir_path=tmp_path, prodigal_meta_mode=prodigal_meta_mode))
114115
if not no_detector:
115116
if not labels:
116117
labels = [None] * len(detectors)

deepbgc/command/prepare.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@ class PrepareCommand(BaseCommand):
3232

3333
def add_arguments(self, parser):
3434
parser.add_argument(dest='inputs', nargs='+', help="Input sequence file path(s) (FASTA/GenBank)")
35-
group = parser.add_argument_group('required arguments', '')
3635
parser.add_argument('--limit-to-record', action='append', help="Process only specific record ID. Can be provided multiple times")
36+
group = parser.add_argument_group('required arguments', '')
37+
group.add_argument('--prodigal-meta-mode', action='store_true', default=False, help="Run Prodigal in '-p meta' mode to enable detecting genes in short contigs")
3738
group.add_argument('--output-gbk', required=False, help="Output GenBank file path")
3839
group.add_argument('--output-tsv', required=False, help="Output TSV file path")
3940

40-
def run(self, inputs, limit_to_record, output_gbk, output_tsv):
41+
def run(self, inputs, limit_to_record, output_gbk, output_tsv, prodigal_meta_mode):
4142
first_output = output_gbk or output_tsv
4243
if not first_output:
4344
raise ValueError('Specify at least one of --output-gbk or --output-tsv')
@@ -47,7 +48,7 @@ def run(self, inputs, limit_to_record, output_gbk, output_tsv):
4748
if not os.path.exists(tmp_dir_path):
4849
os.mkdir(tmp_dir_path)
4950

50-
prepare_step = DeepBGCAnnotator(tmp_dir_path=tmp_dir_path)
51+
prepare_step = DeepBGCAnnotator(tmp_dir_path=tmp_dir_path, prodigal_meta_mode=prodigal_meta_mode)
5152

5253
writers = []
5354
if output_gbk:

deepbgc/pipeline/annotator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88

99
class DeepBGCAnnotator(PipelineStep):
1010

11-
def __init__(self, tmp_dir_path):
11+
def __init__(self, tmp_dir_path, prodigal_meta_mode=False):
1212
self.tmp_dir_path = tmp_dir_path
13+
self.prodigal_meta_mode = prodigal_meta_mode
1314

1415
def run(self, record):
1516
logging.info('Preparing record %s', record.id)
@@ -25,7 +26,7 @@ def run(self, record):
2526
if num_proteins:
2627
logging.info('Sequence already contains %s CDS features, skipping CDS detection', num_proteins)
2728
else:
28-
protein_annotator = ProdigalProteinRecordAnnotator(record=record, tmp_path_prefix=record_tmp_path)
29+
protein_annotator = ProdigalProteinRecordAnnotator(record=record, tmp_path_prefix=record_tmp_path, meta_mode=self.prodigal_meta_mode)
2930
protein_annotator.annotate()
3031

3132
num_pfams = len(util.get_pfam_features(record))

deepbgc/pipeline/protein.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212

1313

1414
class ProdigalProteinRecordAnnotator(object):
15-
def __init__(self, record, tmp_path_prefix):
15+
def __init__(self, record, tmp_path_prefix, meta_mode=False):
1616
self.record = record
1717
self.tmp_path_prefix = tmp_path_prefix
18+
self.meta_mode = meta_mode
1819

1920
def annotate(self):
2021
logging.info('Finding genes in record: %s', self.record.id)
@@ -29,8 +30,10 @@ def annotate(self):
2930

3031
logging.debug('Detecting genes using Prodigal...')
3132

33+
34+
3235
p = subprocess.Popen(
33-
['prodigal', '-i', nucl_path, '-a', protein_path],
36+
['prodigal', '-i', nucl_path, '-a', protein_path] + (['-p','meta'] if self.meta_mode else []),
3437
stdout=subprocess.PIPE,
3538
stderr=subprocess.PIPE,
3639
universal_newlines=True
@@ -42,7 +45,7 @@ def annotate(self):
4245
logging.warning('== End Prodigal Error. ============')
4346

4447
if 'Sequence must be' in err:
45-
logging.warning('No proteins detected in short sequence, moving on.')
48+
logging.warning('No proteins detected in short sequence, use --prodigal-meta-mode to run Prodigal in "-p meta" mode.')
4649
elif os.stat(protein_path).st_size == 0:
4750
raise ValueError("Prodigal produced empty output, make sure to use a DNA sequence.")
4851
else:

0 commit comments

Comments
 (0)