44from copy import copy
55from typing import Optional
66
7- import black
7+ import black . parsing
88
99from snakefmt .config import PathLike , read_black_config
1010from snakefmt .exceptions import InvalidParameterSyntax , InvalidPython
1111from snakefmt .logging import Warnings
12- from snakefmt .parser .parser import Parser , comment_start
12+ from snakefmt .parser .parser import Parser , Snakefile , comment_start
1313from snakefmt .parser .syntax import (
1414 COMMENT_SPACING ,
1515 InlineSingleParam ,
1818 ParamList ,
1919 SingleParam ,
2020 Syntax ,
21+ split_code_string ,
2122)
22- from snakefmt .types import TAB , TokenIterator
23+ from snakefmt .types import TAB
2324
2425TAB_SIZE = len (TAB )
25- # This regex matches any number of consecutive strings; each can span multiple lines.
26- full_string_matcher = re .compile (
27- r"^\s*(\w?([\"']{3}.*?[\"']{3})|([\"']{1}.*?[\"']{1}))$" , re .DOTALL | re .MULTILINE
28- )
2926# this regex matches any docstring; can span multiple lines
3027docstring_matcher = re .compile (
3128 r"\s*([rR]?[\"']{3}.*?[\"']{3})" , re .DOTALL | re .MULTILINE
@@ -59,7 +56,7 @@ def index_of_first_docstring(s: str) -> Optional[int]:
5956class Formatter (Parser ):
6057 def __init__ (
6158 self ,
62- snakefile : TokenIterator ,
59+ snakefile : Snakefile ,
6360 line_length : Optional [int ] = None ,
6461 black_config_file : Optional [PathLike ] = None ,
6562 ):
@@ -193,7 +190,7 @@ def run_black_format_str(
193190 )
194191 try :
195192 fmted = black .format_str (string , mode = black_mode )
196- except black .InvalidInput as e :
193+ except black .parsing . InvalidInput as e :
197194 err_msg = ""
198195 # Not clear whether all Black errors start with 'Cannot parse' - it seems to
199196 # in the tests I ran
@@ -228,61 +225,25 @@ def align_strings(self, string: str, target_indent: int) -> str:
228225 """
229226 Takes an ensemble of strings and indents/reindents it
230227 """
231- pos = 0
232228 used_indent = TAB * target_indent
233- indented = ""
234- for match in re .finditer (full_string_matcher , string ):
235- indented += textwrap .indent (string [pos : match .start (1 )], used_indent )
236- lagging_spaces = len (indented ) - len (indented .rstrip (" " ))
237- lagging_indent_lvl = lagging_spaces // TAB_SIZE
238- match_slice = string [match .start (1 ) : match .end (1 )].replace ("\t " , TAB )
239- all_lines = match_slice .splitlines (keepends = True )
240- first = textwrap .indent (textwrap .dedent (all_lines [0 ]), used_indent )
241- indented += first
242-
243- is_multiline_string = re .match (
244- r"[bfru]?\"\"\"|'''" , first .lstrip (), flags = re .IGNORECASE
245- )
246- if not is_multiline_string :
247- # this check if string is a single-quoted multiline string
248- # e.g. https://github.com/snakemake/snakefmt/issues/121
249- is_multiline_string = "\\ \n " in first
250-
251- if len (all_lines ) > 2 :
252- if is_multiline_string :
253- middle = "" .join (all_lines [1 :- 1 ])
254- else :
255- mid = "" .join (all_lines [1 :- 1 ])
256- dedent_mid = textwrap .dedent (mid )
257-
258- if lagging_indent_lvl == 0 :
259- required_indent_lvl = target_indent
260- else :
261- current_indent_lvl = (len (mid ) - len (mid .lstrip ())) // TAB_SIZE
262- required_indent_lvl = current_indent_lvl + target_indent
263-
264- required_indent = TAB * required_indent_lvl
265- middle = textwrap .indent (
266- dedent_mid ,
267- required_indent ,
268- )
269- indented += middle
270-
271- if len (all_lines ) > 1 :
272- if is_multiline_string :
273- last = all_lines [- 1 ]
274- else :
275- leading_spaces = len (all_lines [- 1 ]) - len (
276- textwrap .dedent (all_lines [- 1 ])
277- )
278- leading_indent = leading_spaces // TAB_SIZE * TAB
279- last = textwrap .indent (
280- textwrap .dedent (all_lines [- 1 ]), used_indent + leading_indent
281- )
282- indented += last
283- pos = match .end ()
284- indented += textwrap .indent (string [pos :], used_indent )
285-
229+ split_string = split_code_string (string )
230+ if len (split_string ) == 1 :
231+ return textwrap .indent (split_string [0 ], used_indent )
232+ # First, masks all multi-line strings
233+ mask_string = "`~!@#$%^&*|?"
234+ while mask_string in string :
235+ mask_string += mask_string
236+ mask_string = f'"""{ mask_string } """'
237+ fakewrap = textwrap .indent (
238+ "" .join (mask_string if i % 2 else s for i , s in enumerate (split_string )),
239+ used_indent ,
240+ )
241+ split_code = fakewrap .split (mask_string )
242+ # After indenting, we puts those strings back
243+ indented = "" .join (
244+ s .replace ("\t " , TAB ) if i % 2 else split_code [i // 2 ]
245+ for i , s in enumerate (split_string )
246+ )
286247 return indented
287248
288249 def format_param (
@@ -304,12 +265,10 @@ def format_param(
304265 raise InvalidParameterSyntax (f"{ parameter .line_nb } { val } " ) from None
305266
306267 if inline_formatting or param_list :
307- val = " " .join (
308- val .rstrip ().split ("\n " )
309- ) # collapse strings on multiple lines
268+ val = val .rstrip ()
310269 extra_spacing = 0
311270 if param_list :
312- val = f"f({ val } )"
271+ val = f"f({ val } \n )"
313272 extra_spacing = 3
314273
315274 # get the index of the last character of the first docstring, if any
@@ -367,26 +326,36 @@ def format_params(self, parameters: ParameterSyntax) -> str:
367326
368327 p_class = parameters .__class__
369328 param_list = issubclass (p_class , ParamList )
370- inline_fmting = False
371- if p_class is InlineSingleParam :
372- inline_fmting = True
329+ inline_fmting = p_class is InlineSingleParam
373330
374331 result = f"{ used_indent } { parameters .keyword_line } :"
375332 if inline_fmting :
376- result += " "
333+ # here, check if the value is too large to put in one line
334+ params_iter = iter (parameters .all_params )
335+ try :
336+ param = next (params_iter )
337+ except StopIteration :
338+ # No params; render just the keyword line and its comment.
339+ return f"{ result } { parameters .comment } \n "
340+ param_result = self .format_param (
341+ param , target_indent , inline_fmting , param_list
342+ )
343+ inline_fmting = param_result .count ("\n " ) == 1
344+ if inline_fmting :
377345 prepended_comments = ""
378346 if parameters .comment != "" :
379347 prepended_comments += f"{ used_indent } { parameters .comment .lstrip ()} \n "
380- param = next (iter (parameters .all_params ))
381348 for comment in param .pre_comments :
382349 prepended_comments += f"{ used_indent } { comment } \n "
383350 if prepended_comments != "" :
384351 Warnings .comment_relocation (parameters .keyword_name , param .line_nb )
385- result = f"{ prepended_comments } { result } "
352+ result = f"{ prepended_comments } { result } { param_result } "
386353 else :
387- result += f"{ parameters .comment } \n "
388- for param in parameters .all_params :
389- result += self .format_param (param , target_indent , inline_fmting , param_list )
354+ result = f"{ result } { parameters .comment } \n "
355+ for param in parameters .all_params :
356+ result += self .format_param (
357+ param , target_indent , inline_fmting , param_list
358+ )
390359 num_c = len (param .post_comments )
391360 if num_c > 1 or (not param ._has_inline_comment and num_c == 1 ):
392361 Warnings .block_comment_below (parameters .keyword_name , param .line_nb )
0 commit comments