Skip to content

Commit 2f8e693

Browse files
authored
fix: improve robustness and formatting in snakefmt for complex cases (#259)
A big thank you to @Hocnonsense for this PR. This commit addresses multiple edge cases and improves the reliability and output of snakefmt, resolving several long-standing issues: Safer handling of multi-piece strings and f-strings, resulting in more robust formatting. More compact and stable formatting of inline parameters. Improved consistency of indentation and alignment for complex string and parameter scenarios. Enhanced parsing error messages for clearer diagnostics. Updated supported Python targets to 3.11–3.13 and adjusted the CI matrix accordingly. Switched configuration parsing to Python’s standard library TOML reader, reducing dependencies. CLI tests now assert stdout, and additional regression tests were added for complex parameter handling. The “one-line” format now gracefully falls back to normal style where appropriate. Delegated line merging and comment handling to Black for improved output. Issues addressed: Fixes #190, #208, #240, #242, and closes #255.
1 parent 059e4ae commit 2f8e693

File tree

10 files changed

+255
-143
lines changed

10 files changed

+255
-143
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ${{ matrix.os }}
1515
strategy:
1616
matrix:
17-
python-version: [ 3.9, "3.10", "3.11", "3.12" ]
17+
python-version: [ "3.11", "3.12", "3.13" ]
1818
os: [ ubuntu-latest, macos-latest ]
1919

2020
steps:

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ snakefmt = 'snakefmt.snakefmt:main'
1717
python = "^3.11"
1818
click = "^8.0.0"
1919
black = "^24.3.0"
20-
toml = "^0.10.2"
21-
importlib_metadata = {version = ">=1.7.0,<5.0", python = "<3.8"}
2220

2321
[tool.poetry.dev-dependencies]
2422
pytest = "^7.4.4"

snakefmt/__init__.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,9 @@
66
(in dist-info or egg-info dirs).
77
From Python 3.8, importlib_metadata is in standard library as importlib.metadata.
88
"""
9-
from black import TargetVersion
9+
from importlib import metadata
1010

11-
if sys.version_info >= (3, 8):
12-
from importlib import metadata
13-
else:
14-
import importlib_metadata as metadata
11+
from black.mode import TargetVersion
1512

1613
__version__ = metadata.version("snakefmt")
1714

@@ -20,9 +17,7 @@
2017

2118
DEFAULT_LINE_LENGTH = 88
2219
DEFAULT_TARGET_VERSIONS = {
23-
TargetVersion.PY38,
24-
TargetVersion.PY39,
25-
TargetVersion.PY310,
2620
TargetVersion.PY311,
2721
TargetVersion.PY312,
22+
TargetVersion.PY313,
2823
}

snakefmt/config.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
Code for searching for and parsing snakefmt configuration files
33
"""
44

5+
import tomllib
56
from dataclasses import fields
67
from functools import lru_cache
78
from pathlib import Path
89
from typing import Dict, Optional, Sequence, Tuple, Union
910

1011
import click
11-
import toml
1212
from black import Mode
1313

1414
from snakefmt import DEFAULT_LINE_LENGTH, DEFAULT_TARGET_VERSIONS
@@ -79,11 +79,12 @@ def read_snakefmt_config(path: Optional[str]) -> Dict[str, str]:
7979
if path is None:
8080
return dict()
8181
try:
82-
config_toml = toml.load(path)
82+
with open(path, "rb") as f:
83+
config_toml = tomllib.load(f)
8384
config = config_toml.get("tool", {}).get("snakefmt", {})
8485
config = {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
8586
return config
86-
except (toml.TomlDecodeError, OSError) as error:
87+
except (tomllib.TOMLDecodeError, OSError) as error:
8788
raise click.FileError(
8889
filename=path, hint=f"Error reading configuration file: {error}"
8990
)
@@ -118,9 +119,10 @@ def read_black_config(path: Optional[PathLike]) -> Mode:
118119
raise FileNotFoundError(f"{path} is not a file.")
119120

120121
try:
121-
pyproject_toml = toml.load(path)
122+
with open(path, "rb") as f:
123+
pyproject_toml = tomllib.load(f)
122124
config = pyproject_toml.get("tool", {}).get("black", {})
123-
except toml.TomlDecodeError as error:
125+
except tomllib.TOMLDecodeError as error:
124126
raise MalformattedToml(error)
125127

126128
valid_black_filemode_params = sorted([field.name for field in fields(Mode)])

snakefmt/formatter.py

Lines changed: 45 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
from copy import copy
55
from typing import Optional
66

7-
import black
7+
import black.parsing
88

99
from snakefmt.config import PathLike, read_black_config
1010
from snakefmt.exceptions import InvalidParameterSyntax, InvalidPython
1111
from snakefmt.logging import Warnings
12-
from snakefmt.parser.parser import Parser, comment_start
12+
from snakefmt.parser.parser import Parser, Snakefile, comment_start
1313
from snakefmt.parser.syntax import (
1414
COMMENT_SPACING,
1515
InlineSingleParam,
@@ -18,14 +18,11 @@
1818
ParamList,
1919
SingleParam,
2020
Syntax,
21+
split_code_string,
2122
)
22-
from snakefmt.types import TAB, TokenIterator
23+
from snakefmt.types import TAB
2324

2425
TAB_SIZE = len(TAB)
25-
# This regex matches any number of consecutive strings; each can span multiple lines.
26-
full_string_matcher = re.compile(
27-
r"^\s*(\w?([\"']{3}.*?[\"']{3})|([\"']{1}.*?[\"']{1}))$", re.DOTALL | re.MULTILINE
28-
)
2926
# this regex matches any docstring; can span multiple lines
3027
docstring_matcher = re.compile(
3128
r"\s*([rR]?[\"']{3}.*?[\"']{3})", re.DOTALL | re.MULTILINE
@@ -59,7 +56,7 @@ def index_of_first_docstring(s: str) -> Optional[int]:
5956
class Formatter(Parser):
6057
def __init__(
6158
self,
62-
snakefile: TokenIterator,
59+
snakefile: Snakefile,
6360
line_length: Optional[int] = None,
6461
black_config_file: Optional[PathLike] = None,
6562
):
@@ -193,7 +190,7 @@ def run_black_format_str(
193190
)
194191
try:
195192
fmted = black.format_str(string, mode=black_mode)
196-
except black.InvalidInput as e:
193+
except black.parsing.InvalidInput as e:
197194
err_msg = ""
198195
# Not clear whether all Black errors start with 'Cannot parse' - it seems to
199196
# in the tests I ran
@@ -228,61 +225,25 @@ def align_strings(self, string: str, target_indent: int) -> str:
228225
"""
229226
Takes an ensemble of strings and indents/reindents it
230227
"""
231-
pos = 0
232228
used_indent = TAB * target_indent
233-
indented = ""
234-
for match in re.finditer(full_string_matcher, string):
235-
indented += textwrap.indent(string[pos : match.start(1)], used_indent)
236-
lagging_spaces = len(indented) - len(indented.rstrip(" "))
237-
lagging_indent_lvl = lagging_spaces // TAB_SIZE
238-
match_slice = string[match.start(1) : match.end(1)].replace("\t", TAB)
239-
all_lines = match_slice.splitlines(keepends=True)
240-
first = textwrap.indent(textwrap.dedent(all_lines[0]), used_indent)
241-
indented += first
242-
243-
is_multiline_string = re.match(
244-
r"[bfru]?\"\"\"|'''", first.lstrip(), flags=re.IGNORECASE
245-
)
246-
if not is_multiline_string:
247-
# this check if string is a single-quoted multiline string
248-
# e.g. https://github.com/snakemake/snakefmt/issues/121
249-
is_multiline_string = "\\\n" in first
250-
251-
if len(all_lines) > 2:
252-
if is_multiline_string:
253-
middle = "".join(all_lines[1:-1])
254-
else:
255-
mid = "".join(all_lines[1:-1])
256-
dedent_mid = textwrap.dedent(mid)
257-
258-
if lagging_indent_lvl == 0:
259-
required_indent_lvl = target_indent
260-
else:
261-
current_indent_lvl = (len(mid) - len(mid.lstrip())) // TAB_SIZE
262-
required_indent_lvl = current_indent_lvl + target_indent
263-
264-
required_indent = TAB * required_indent_lvl
265-
middle = textwrap.indent(
266-
dedent_mid,
267-
required_indent,
268-
)
269-
indented += middle
270-
271-
if len(all_lines) > 1:
272-
if is_multiline_string:
273-
last = all_lines[-1]
274-
else:
275-
leading_spaces = len(all_lines[-1]) - len(
276-
textwrap.dedent(all_lines[-1])
277-
)
278-
leading_indent = leading_spaces // TAB_SIZE * TAB
279-
last = textwrap.indent(
280-
textwrap.dedent(all_lines[-1]), used_indent + leading_indent
281-
)
282-
indented += last
283-
pos = match.end()
284-
indented += textwrap.indent(string[pos:], used_indent)
285-
229+
split_string = split_code_string(string)
230+
if len(split_string) == 1:
231+
return textwrap.indent(split_string[0], used_indent)
232+
# First, masks all multi-line strings
233+
mask_string = "`~!@#$%^&*|?"
234+
while mask_string in string:
235+
mask_string += mask_string
236+
mask_string = f'"""{mask_string}"""'
237+
fakewrap = textwrap.indent(
238+
"".join(mask_string if i % 2 else s for i, s in enumerate(split_string)),
239+
used_indent,
240+
)
241+
split_code = fakewrap.split(mask_string)
242+
# After indenting, we puts those strings back
243+
indented = "".join(
244+
s.replace("\t", TAB) if i % 2 else split_code[i // 2]
245+
for i, s in enumerate(split_string)
246+
)
286247
return indented
287248

288249
def format_param(
@@ -304,12 +265,10 @@ def format_param(
304265
raise InvalidParameterSyntax(f"{parameter.line_nb}{val}") from None
305266

306267
if inline_formatting or param_list:
307-
val = " ".join(
308-
val.rstrip().split("\n")
309-
) # collapse strings on multiple lines
268+
val = val.rstrip()
310269
extra_spacing = 0
311270
if param_list:
312-
val = f"f({val})"
271+
val = f"f({val}\n)"
313272
extra_spacing = 3
314273

315274
# get the index of the last character of the first docstring, if any
@@ -367,26 +326,36 @@ def format_params(self, parameters: ParameterSyntax) -> str:
367326

368327
p_class = parameters.__class__
369328
param_list = issubclass(p_class, ParamList)
370-
inline_fmting = False
371-
if p_class is InlineSingleParam:
372-
inline_fmting = True
329+
inline_fmting = p_class is InlineSingleParam
373330

374331
result = f"{used_indent}{parameters.keyword_line}:"
375332
if inline_fmting:
376-
result += " "
333+
# here, check if the value is too large to put in one line
334+
params_iter = iter(parameters.all_params)
335+
try:
336+
param = next(params_iter)
337+
except StopIteration:
338+
# No params; render just the keyword line and its comment.
339+
return f"{result}{parameters.comment}\n"
340+
param_result = self.format_param(
341+
param, target_indent, inline_fmting, param_list
342+
)
343+
inline_fmting = param_result.count("\n") == 1
344+
if inline_fmting:
377345
prepended_comments = ""
378346
if parameters.comment != "":
379347
prepended_comments += f"{used_indent}{parameters.comment.lstrip()}\n"
380-
param = next(iter(parameters.all_params))
381348
for comment in param.pre_comments:
382349
prepended_comments += f"{used_indent}{comment}\n"
383350
if prepended_comments != "":
384351
Warnings.comment_relocation(parameters.keyword_name, param.line_nb)
385-
result = f"{prepended_comments}{result}"
352+
result = f"{prepended_comments}{result} {param_result}"
386353
else:
387-
result += f"{parameters.comment}\n"
388-
for param in parameters.all_params:
389-
result += self.format_param(param, target_indent, inline_fmting, param_list)
354+
result = f"{result}{parameters.comment}\n"
355+
for param in parameters.all_params:
356+
result += self.format_param(
357+
param, target_indent, inline_fmting, param_list
358+
)
390359
num_c = len(param.post_comments)
391360
if num_c > 1 or (not param._has_inline_comment and num_c == 1):
392361
Warnings.block_comment_below(parameters.keyword_name, param.line_nb)

snakefmt/parser/syntax.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,56 @@
5959
}
6060

6161

62+
def split_code_string(string: str) -> list[str]:
63+
"""Splits a code string into individual lines, preserving leading whitespace.
64+
>>> string = '''a = 1\nb = f\"\"\"\n{a}\n1\n2\n\"\"\"\nc=2'''
65+
>>> split_code_string(string)
66+
['a = 1\nb = ', 'f\"\"\"\n{a}\n1\n2\n\"\"\"', '\nc=2']
67+
"""
68+
lines = string.splitlines(keepends=True)
69+
lineiter = iter(lines)
70+
tokens = list(tokenize.generate_tokens(lambda: next(lineiter)))
71+
string_areas = []
72+
tokeniter = iter(tokens)
73+
for token in tokeniter:
74+
if token.type == tokenize.STRING:
75+
if token.start[0] != token.end[0]:
76+
string_areas.append((token.start, token.end))
77+
if fstring_tokeniser_in_use and token.type == tokenize.FSTRING_START:
78+
isin_fstring = 1
79+
for t1 in tokeniter:
80+
if t1.type == tokenize.FSTRING_START:
81+
isin_fstring += 1
82+
elif t1.type == tokenize.FSTRING_END:
83+
isin_fstring -= 1
84+
if isin_fstring == 0:
85+
break
86+
if token.start[0] != t1.end[0]:
87+
string_areas.append((token.start, t1.end))
88+
code_str = [""]
89+
last_area = (1, 0), (1, 0)
90+
for area in string_areas:
91+
code_str[-1] += _extract_line_mid(lines, last_area[-1], area[0])
92+
code_str.append(_extract_line_mid(lines, area[0], area[1]))
93+
code_str.append("")
94+
last_area = area
95+
code_str[-1] += _extract_line_mid(
96+
lines, last_area[-1], (len(lines), len(lines[-1]))
97+
)
98+
return code_str
99+
100+
101+
def _extract_line_mid(
102+
lines: list[str], start: tuple[int, int], end: tuple[int, int]
103+
) -> str:
104+
s = "".join(lines[i] for i in range(start[0] - 1, end[0]))
105+
t = s[start[1] :]
106+
end_trim = end[1] - len(lines[end[0] - 1])
107+
if end_trim != 0:
108+
t = t[:end_trim]
109+
return t
110+
111+
62112
def re_add_curly_bracket_if_needed(token: Token) -> str:
63113
result = ""
64114
if (
@@ -107,7 +157,9 @@ def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
107157
return False
108158

109159

110-
def add_token_space(prev_token: Token, token: Token, in_fstring: bool = False) -> bool:
160+
def add_token_space(
161+
prev_token: Optional[Token], token: Token, in_fstring: bool = False
162+
) -> bool:
111163
result = False
112164
if prev_token is not None:
113165
if not operator_skip_spacing(prev_token, token):

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
@pytest.fixture
66
def cli_runner() -> CliRunner:
7-
return CliRunner(mix_stderr=False)
7+
return CliRunner()
88

99

1010
pytest_plugins = "pytester"

0 commit comments

Comments
 (0)