Skip to content

Commit 8cedee9

Browse files
committed
Merge branch 'main' of github.com:Adamtaranto/tSplit
2 parents ad8e2e5 + be80d3e commit 8cedee9

File tree

12 files changed

+643
-33
lines changed

12 files changed

+643
-33
lines changed

.github/workflows/ruff.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
contents: write # Allows reading and writing repository contents (e.g., commits)
1010
pull-requests: write # Allows reading and writing pull requests
1111
steps:
12-
- uses: actions/checkout@v5
12+
- uses: actions/checkout@v6
1313
with:
1414
ref: ${{ github.sha }}
1515
token: ${{ secrets.GITHUB_TOKEN }}

.pre-commit-config.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ repos:
2020
args: [--pytest-test-first]
2121
- id: trailing-whitespace
2222
- repo: https://github.com/astral-sh/ruff-pre-commit
23-
rev: v0.14.3
23+
rev: v0.14.11
2424
hooks:
2525
- id: ruff-check
2626
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
2727
- id: ruff-format
2828
- repo: https://github.com/numpy/numpydoc
29-
rev: v1.9.0
29+
rev: v1.10.0
3030
hooks:
3131
- id: numpydoc-validation
3232
exclude: (tests/|docs/|scripts/).*
@@ -38,7 +38,7 @@ repos:
3838
rev: v0.20.0
3939
hooks:
4040
- id: yamlfmt
41-
- repo: https://github.com/pre-commit/mirrors-prettier
42-
rev: v4.0.0-alpha.8
43-
hooks:
44-
- id: prettier
41+
# - repo: https://github.com/pre-commit/mirrors-prettier
42+
# rev: v4.0.0-alpha.8
43+
# hooks:
44+
# - id: prettier
File renamed without changes.

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,13 @@ Split segments will be written to _TIR_split_tsplit_output.fasta_ with suffix "\
8080
TIRs must be at least 10bp in length and share 80%
8181
identity and occur within 10bp of each end of the input element.
8282

83-
Additionally, synthetic MITEs will be constructed by concatenation of left and right TIRs, with internal segments excised.
8483

8584
```bash
86-
tsplit TIR -i tests/data/TIR_element.fa -p TIR_split --makemites --keeptemp
85+
tsplit TIR -i tests/data/TIR_element.fa -p TIR_split
8786

8887
# Equivalet to defaults
89-
tsplit TIR -i tests/data/TIR_element.fa -p TIR_split --maxdist 10 --minid 80.0 --minterm 10 --method blastn --splitmode split --makemites --keeptemp
88+
tsplit TIR -i tests/data/TIR_element.fa -p TIR_split --maxdist 10 --minid 80.0 --minterm 10 --method blastn --splitmode split
89+
# Use '--both' if you want to report both left and right TIRs
9090
```
9191

9292
Output: `TIR_split_tsplit_output.fasta`

environment.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,4 @@ dependencies:
1010
- pip:
1111
- "biopython>=1.70"
1212
- "pymummer>=0.12.0"
13-
- pytest
14-
- hatch
1513
- rich

pyproject.toml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ name = "tsplit"
99
description = "Extract terminal repeats from retrotransposons (LTRs) or DNA transposons (TIRs). Compose synthetic MITES from complete DNA transposons."
1010
readme = "README.md"
1111
requires-python = ">=3.8"
12-
license = { text = "MIT" }
12+
license = { text = "GPL-3.0-or-later" }
1313
authors = [{ name = "Adam Taranto" }]
1414

1515
# Classifiers for project categorization
@@ -35,9 +35,11 @@ dev = [
3535
"mkdocs",
3636
"mkdocstrings-python",
3737
"mkdocstrings",
38+
"mypy",
3839
"notebook",
3940
"numpydoc-validation",
4041
"pre-commit",
42+
"pydocstyle",
4143
"pymdown-extensions",
4244
"pytest-cov",
4345
"pytest",
@@ -101,6 +103,9 @@ ignore = [
101103
"B905", # `zip()` without an explicit `strict=` parameter
102104
]
103105

106+
# Don't auto-fix docstring issues - they're too fragile
107+
unfixable = ["D"]
108+
104109
[tool.ruff.lint.per-file-ignores]
105110
"tests/*" = ["D"] # Ignore all pydocstyle rules in tests
106111

@@ -137,6 +142,11 @@ override_SS05 = [ # allow docstrings to start with these words
137142
'^Access ',
138143
]
139144

145+
# Don't process filepaths that match these regex patterns
146+
exclude_files = [
147+
'^_version\\.py$',
148+
]
149+
140150
[tool.mypy]
141151
python_version = "3.10"
142152
warn_return_any = true
@@ -145,5 +155,5 @@ disallow_untyped_defs = true
145155

146156
[tool.pydocstyle]
147157
convention = "numpy"
148-
# Ignore files in directories named "tests"
149-
match-dir = "((?!tests).)*"
158+
match-dir = "[^\\.].*" # matches all dirs that don't start with a dot
159+
match = "(?!test_).*\\.py" # matches files that don't start with 'test_' but end with '.py'

src/tsplit/app_tsplit.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ def parse_args() -> Namespace:
130130
choices=['blastn', 'nucmer'],
131131
help='Select alignment method: "blastn" or "nucmer".(Default: blastn)',
132132
)
133+
tir_parser.add_argument(
134+
'--both',
135+
action='store_true',
136+
default=False,
137+
help='Report both left and right terminal repeats when splitmode is one of {all, split, external}. Suffixes will be "_L_TIR" and "_R_TIR". Right TIR will be reverse complemented for alignment with left TIR.',
138+
)
133139

134140
# Set up parser for LTR subcommand
135141
ltr_parser = subparsers.add_parser(
@@ -212,6 +218,12 @@ def parse_args() -> Namespace:
212218
choices=['blastn', 'nucmer'],
213219
help='Select alignment method: "blastn" or "nucmer".(Default: blastn)',
214220
)
221+
ltr_parser.add_argument(
222+
'--both',
223+
action='store_true',
224+
default=False,
225+
help='Report both left and right terminal repeats when splitmode is one of {all, split, external}. Suffixes will be "_L_LTR" and "_R_LTR". Right LTR will be in the same orientation as left LTR.',
226+
)
215227

216228
# Parse and return the command-line arguments
217229
return parser.parse_args()

src/tsplit/cmd_LTR.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def main(args: Optional[Namespace] = None) -> None:
7979
temp=args.outdir, # Directory for temporary files
8080
alignTool=args.method, # Alignment tool (blastn or nucmer)
8181
keeptemp=args.keeptemp, # Whether to keep temporary files
82+
both=args.both, # Whether to report both terminal repeats
8283
)
8384

8485
# Write the identified segments to output file

src/tsplit/cmd_TIR.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def main(args: Optional[Namespace] = None) -> None:
8383
alignTool=args.method, # Alignment tool (blastn or nucmer)
8484
temp=args.outdir, # Directory for temporary files
8585
keeptemp=args.keeptemp, # Whether to keep temporary files
86+
both=args.both, # Whether to report both terminal repeats
8687
)
8788

8889
# Write the identified segments to output file

src/tsplit/parseAlign.py

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def getTIRs(
3737
keeptemp: bool = False,
3838
alignTool: str = 'nucmer',
3939
verbose: bool = True,
40+
both: bool = False,
4041
) -> Generator[SeqRecord, None, None]:
4142
"""
4243
Align elements to self and attempt to identify TIRs.
@@ -71,6 +72,8 @@ def getTIRs(
7172
Alignment tool to use ('nucmer' or 'blastn'), by default 'nucmer'.
7273
verbose : bool, optional
7374
Whether to print verbose output, by default True.
75+
both : bool, optional
76+
Whether to report both left and right terminal repeats, by default False.
7477
7578
Yields
7679
------
@@ -85,6 +88,10 @@ def getTIRs(
8588
-----
8689
When mites=True, the function will also yield synthetic MITEs constructed by
8790
joining the identified TIRs.
91+
92+
When both=True and report is in ['split', 'external', 'all'], both left and right
93+
TIRs will be yielded with suffixes '_L_TIR' and '_R_TIR'. The right TIR will be
94+
reverse complemented so it can be aligned with the left TIR.
8895
"""
8996
# Set temp directory to cwd if none is provided
9097
if not temp:
@@ -212,15 +219,42 @@ def getTIRs(
212219
print(pairwise_alignments[0])
213220

214221
if report in ['split', 'external', 'all']:
215-
# yield TIR slice - append "_TIR"
216-
extSeg = rec[ref_start : ref_end + 1] # +1 to include end base
217-
extSeg.id = f'{extSeg.id}_TIR'
218-
extSeg.name = extSeg.id
219-
extSeg.description = f'[{rec.id} TIR segment]'
220-
logging.info(
221-
f'Yielding TIR segment: {extSeg.id}, len: {len(extSeg)}bp'
222-
)
223-
yield extSeg
222+
if both:
223+
# Yield both left and right TIRs
224+
# Left TIR
225+
leftSeg = rec[ref_start : ref_end + 1]
226+
leftSeg.id = f'{leftSeg.id}_L_TIR'
227+
leftSeg.name = leftSeg.id
228+
leftSeg.description = f'[{rec.id} left TIR segment]'
229+
logging.info(
230+
f'Yielding left TIR segment: {leftSeg.id}, len: {len(leftSeg)}bp'
231+
)
232+
yield leftSeg
233+
234+
# Right TIR - reverse complement so it aligns with left TIR
235+
rightSeg = rec[qry_start : qry_end + 1]
236+
# Reverse complement the right TIR
237+
rightSeg = rightSeg.reverse_complement(
238+
id=f'{rec.id}_R_TIR',
239+
name=f'{rec.id}_R_TIR',
240+
description=f'[{rec.id} right TIR segment, reverse complemented]',
241+
)
242+
logging.info(
243+
f'Yielding right TIR segment (reverse complemented): {rightSeg.id}, len: {len(rightSeg)}bp'
244+
)
245+
yield rightSeg
246+
else:
247+
# yield TIR slice - append "_TIR"
248+
extSeg = rec[
249+
ref_start : ref_end + 1
250+
] # +1 to include end base
251+
extSeg.id = f'{extSeg.id}_TIR'
252+
extSeg.name = extSeg.id
253+
extSeg.description = f'[{rec.id} TIR segment]'
254+
logging.info(
255+
f'Yielding TIR segment: {extSeg.id}, len: {len(extSeg)}bp'
256+
)
257+
yield extSeg
224258

225259
if report in ['split', 'internal', 'all']:
226260
# yield internal slice - append "_I"
@@ -371,6 +405,7 @@ def getLTRs(
371405
keeptemp: bool = False,
372406
alignTool: str = 'nucmer',
373407
verbose: bool = True,
408+
both: bool = False,
374409
) -> Generator[SeqRecord, None, None]:
375410
"""
376411
Align elements to self and attempt to identify LTRs.
@@ -402,6 +437,8 @@ def getLTRs(
402437
Alignment tool to use ('nucmer' or 'blastn'), by default 'nucmer'.
403438
verbose : bool, optional
404439
Whether to print verbose output, by default True.
440+
both : bool, optional
441+
Whether to report both left and right terminal repeats, by default False.
405442
406443
Yields
407444
------
@@ -411,6 +448,13 @@ def getLTRs(
411448
- 'external': Only LTRs
412449
- 'internal': Only internal regions
413450
- 'all': Original sequences plus all segments
451+
452+
Notes
453+
-----
454+
When both=True and report is in ['split', 'external', 'all'], both left and right
455+
LTRs will be yielded with suffixes '_L_LTR' and '_R_LTR'. Unlike TIRs, the right
456+
LTR will NOT be reverse complemented as it is already in the same orientation as
457+
the left LTR.
414458
"""
415459
# Set temp directory to cwd if none is provided
416460
if not temp:
@@ -520,15 +564,39 @@ def getLTRs(
520564
print(pairwise_alignments[0])
521565

522566
if report in ['split', 'external', 'all']:
523-
# yield LTR slice - append "_LTR"
524-
extSeg = rec[ref_start : ref_end + 1] # +1 to include end base
525-
extSeg.id = f'{extSeg.id}_LTR'
526-
extSeg.name = extSeg.id
527-
extSeg.description = f'[{rec.id} LTR segment]'
528-
logging.info(
529-
f'Yielding LTR segment: {extSeg.id}, len: {len(extSeg)}bp'
530-
)
531-
yield extSeg
567+
if both:
568+
# Yield both left and right LTRs
569+
# Left LTR
570+
leftSeg = rec[ref_start : ref_end + 1]
571+
leftSeg.id = f'{leftSeg.id}_L_LTR'
572+
leftSeg.name = leftSeg.id
573+
leftSeg.description = f'[{rec.id} left LTR segment]'
574+
logging.info(
575+
f'Yielding left LTR segment: {leftSeg.id}, len: {len(leftSeg)}bp'
576+
)
577+
yield leftSeg
578+
579+
# Right LTR - keep in same orientation (do NOT reverse complement)
580+
rightSeg = rec[qry_start : qry_end + 1]
581+
rightSeg.id = f'{rightSeg.id}_R_LTR'
582+
rightSeg.name = rightSeg.id
583+
rightSeg.description = f'[{rec.id} right LTR segment]'
584+
logging.info(
585+
f'Yielding right LTR segment: {rightSeg.id}, len: {len(rightSeg)}bp'
586+
)
587+
yield rightSeg
588+
else:
589+
# yield LTR slice - append "_LTR"
590+
extSeg = rec[
591+
ref_start : ref_end + 1
592+
] # +1 to include end base
593+
extSeg.id = f'{extSeg.id}_LTR'
594+
extSeg.name = extSeg.id
595+
extSeg.description = f'[{rec.id} LTR segment]'
596+
logging.info(
597+
f'Yielding LTR segment: {extSeg.id}, len: {len(extSeg)}bp'
598+
)
599+
yield extSeg
532600

533601
if report in ['split', 'internal', 'all']:
534602
# yield internal slice - append "_I"

0 commit comments

Comments
 (0)