Skip to content

Commit c31d931

Browse files
asukaminato0721meta-codesync[bot]
authored andcommitted
fix Markdown format error applied in code doc block #1383 (#2331)
Summary: Fixes #1383 Updated docstring cleaning to wrap doctest prompts and reST literal blocks in fenced code, preventing markdown emphasis like bold `__name__`. Pull Request resolved: #2331 Test Plan: Added/updated docstring tests to cover fenced doctest and literal-block behavior. Reviewed By: stroxler Differential Revision: D92719067 Pulled By: jvansch1 fbshipit-source-id: 2a397a7acc685cdeff5c46a33218155620ce60ff
1 parent 7790f15 commit c31d931

File tree

1 file changed

+280
-40
lines changed

1 file changed

+280
-40
lines changed

crates/pyrefly_python/src/docstring.rs

Lines changed: 280 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -54,47 +54,16 @@ impl Docstring {
5454
/// Clean a string literal ("""...""") and turn it into a docstring.
5555
pub fn clean(docstring: &str) -> String {
5656
let result = normalize_literal(docstring);
57+
let lines: Vec<&str> = result.lines().collect();
58+
59+
if lines.is_empty() {
60+
return String::new();
61+
}
5762

5863
// Remove the shortest amount of whitespace from the beginning of each line
59-
let min_indent = minimal_indentation(result.lines().skip(1));
60-
61-
result
62-
.lines()
63-
.enumerate()
64-
.map(|(i, line)| {
65-
if i == 0 {
66-
line.to_owned()
67-
} else {
68-
let trimmed = &line[min_indent.min(line.len())..];
69-
let mut content = trimmed;
70-
71-
// Handle potential leading blockquote (`> `) for non-doctest lines
72-
let is_doctest_prompt = {
73-
let t = trimmed.trim_start();
74-
t.starts_with(">>>") && t.as_bytes().get(3).is_none_or(|b| *b != b'>')
75-
};
76-
if !is_doctest_prompt {
77-
while let Some(rest) = content.strip_prefix('>') {
78-
content = rest.strip_prefix(' ').unwrap_or(rest);
79-
}
80-
}
81-
82-
// Replace remaining leading spaces with &nbsp; or they might be ignored in markdown parsers
83-
let leading_spaces = content.bytes().take_while(|&c| c == b' ').count();
84-
if leading_spaces > 0 {
85-
format!(
86-
"{}{}",
87-
"&nbsp;".repeat(leading_spaces),
88-
&content[leading_spaces..]
89-
)
90-
} else {
91-
content.to_owned()
92-
}
93-
}
94-
})
95-
.collect::<Vec<_>>()
96-
// Note: markdown doesn't break on just `\n`
97-
.join(" \n")
64+
let min_indent = minimal_indentation(lines.iter().skip(1).copied());
65+
66+
format_docstring_lines(&lines, min_indent)
9867
}
9968

10069
/// Resolve the docstring to a string. This involves parsing the file to get the contents of the docstring and then cleaning it.
@@ -103,6 +72,266 @@ impl Docstring {
10372
}
10473
}
10574

75+
/// Render cleaned docstring lines into markdown, adding fences for code blocks.
76+
fn format_docstring_lines(lines: &[&str], min_indent: usize) -> String {
77+
let mut state = DocstringRenderState::new();
78+
for (i, line) in lines.iter().enumerate() {
79+
state.handle_line(line, i == 0, min_indent);
80+
}
81+
state.finish()
82+
}
83+
84+
struct DocstringRenderState {
85+
output: Vec<String>,
86+
pending_literal_block: bool,
87+
pending_literal_block_indent: usize,
88+
code_block: Option<CodeBlockKind>,
89+
code_block_indent: usize,
90+
literal_block_marker_indent: usize,
91+
}
92+
93+
impl DocstringRenderState {
94+
/// Start a new rendering state for cleaned docstrings.
95+
fn new() -> Self {
96+
Self {
97+
output: Vec::new(),
98+
pending_literal_block: false,
99+
pending_literal_block_indent: 0,
100+
code_block: None,
101+
code_block_indent: 0,
102+
literal_block_marker_indent: 0,
103+
}
104+
}
105+
106+
/// Process one normalized docstring line, updating fence state and output.
107+
fn handle_line(&mut self, line: &str, is_first: bool, min_indent: usize) {
108+
let raw_leading_spaces = leading_space_count(line);
109+
let base_line = dedent_docstring_line(line, min_indent, is_first);
110+
let mut current = base_line.to_owned();
111+
112+
let saw_literal_marker = self.apply_literal_block_marker(&mut current, raw_leading_spaces);
113+
114+
let trimmed_start = current.trim_start();
115+
let is_blank = trimmed_start.is_empty();
116+
let is_doctest_prompt = is_doctest_prompt(trimmed_start);
117+
let leading_spaces = leading_space_count(&current);
118+
119+
if self.handle_active_code_block(is_blank, is_doctest_prompt, raw_leading_spaces, &current)
120+
{
121+
return;
122+
}
123+
124+
if self.maybe_start_code_block(
125+
is_blank,
126+
is_doctest_prompt,
127+
raw_leading_spaces,
128+
leading_spaces,
129+
&current,
130+
saw_literal_marker,
131+
) {
132+
return;
133+
}
134+
135+
self.output
136+
.push(format_non_code_line(&current, is_doctest_prompt));
137+
}
138+
139+
/// Record a literal-block marker and normalize the line if needed.
140+
fn apply_literal_block_marker(
141+
&mut self,
142+
current: &mut String,
143+
raw_leading_spaces: usize,
144+
) -> bool {
145+
if let Some(updated) = strip_literal_block_marker(current) {
146+
*current = updated;
147+
self.pending_literal_block = true;
148+
self.pending_literal_block_indent = raw_leading_spaces;
149+
return true;
150+
}
151+
false
152+
}
153+
154+
/// Consume a line while inside a fenced code block, if applicable.
155+
fn handle_active_code_block(
156+
&mut self,
157+
is_blank: bool,
158+
is_doctest_prompt: bool,
159+
raw_leading_spaces: usize,
160+
current: &str,
161+
) -> bool {
162+
let Some(kind) = self.code_block else {
163+
return false;
164+
};
165+
166+
match kind {
167+
CodeBlockKind::Doctest => {
168+
if !is_blank && !is_doctest_prompt {
169+
self.output.push("```".to_owned());
170+
self.code_block = None;
171+
false
172+
} else {
173+
self.output
174+
.push(strip_code_indent(current, self.code_block_indent));
175+
true
176+
}
177+
}
178+
CodeBlockKind::Literal => {
179+
if !is_blank && raw_leading_spaces <= self.literal_block_marker_indent {
180+
self.output.push("```".to_owned());
181+
self.code_block = None;
182+
false
183+
} else {
184+
self.output
185+
.push(strip_code_indent(current, self.code_block_indent));
186+
true
187+
}
188+
}
189+
}
190+
}
191+
192+
/// Open a doctest or literal-block fence when the line starts one.
193+
fn maybe_start_code_block(
194+
&mut self,
195+
is_blank: bool,
196+
is_doctest_prompt: bool,
197+
raw_leading_spaces: usize,
198+
leading_spaces: usize,
199+
current: &str,
200+
saw_literal_marker: bool,
201+
) -> bool {
202+
if is_doctest_prompt {
203+
self.code_block = Some(CodeBlockKind::Doctest);
204+
self.code_block_indent = leading_spaces;
205+
self.output.push("```python".to_owned());
206+
self.output
207+
.push(strip_code_indent(current, self.code_block_indent));
208+
self.pending_literal_block = false;
209+
return true;
210+
}
211+
212+
if self.pending_literal_block
213+
&& !is_blank
214+
&& raw_leading_spaces > self.pending_literal_block_indent
215+
{
216+
self.code_block = Some(CodeBlockKind::Literal);
217+
self.code_block_indent = leading_spaces;
218+
self.literal_block_marker_indent = self.pending_literal_block_indent;
219+
self.output.push("```".to_owned());
220+
self.output
221+
.push(strip_code_indent(current, self.code_block_indent));
222+
self.pending_literal_block = false;
223+
return true;
224+
}
225+
226+
if self.pending_literal_block
227+
&& !is_blank
228+
&& raw_leading_spaces <= self.pending_literal_block_indent
229+
&& !saw_literal_marker
230+
{
231+
self.pending_literal_block = false;
232+
}
233+
234+
false
235+
}
236+
237+
/// Close any open fences and join output lines with markdown line breaks.
238+
fn finish(mut self) -> String {
239+
if self.code_block.is_some() {
240+
self.output.push("```".to_owned());
241+
}
242+
243+
// Note: markdown doesn't break on just `\n`
244+
self.output.join(" \n")
245+
}
246+
}
247+
248+
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
249+
enum CodeBlockKind {
250+
Doctest,
251+
Literal,
252+
}
253+
254+
/// Return true if this line looks like a doctest prompt.
255+
fn is_doctest_prompt(line: &str) -> bool {
256+
if line.starts_with(">>>") {
257+
return line.as_bytes().get(3).is_none_or(|b| *b != b'>');
258+
}
259+
if line.starts_with("...") {
260+
return line.as_bytes().get(3).is_none_or(|b| *b != b'.');
261+
}
262+
false
263+
}
264+
265+
/// Strip a reStructuredText literal-block marker (`::`) and return the updated line.
266+
fn strip_literal_block_marker(line: &str) -> Option<String> {
267+
let trimmed = line.trim_end();
268+
if trimmed == "::" {
269+
if line.trim() == "::" {
270+
return Some(String::new());
271+
}
272+
return Some(strip_one_trailing_colon(line));
273+
}
274+
if trimmed.ends_with("::") {
275+
return Some(strip_one_trailing_colon(line));
276+
}
277+
None
278+
}
279+
280+
/// Remove a single trailing colon while preserving trailing whitespace.
281+
fn strip_one_trailing_colon(line: &str) -> String {
282+
let trimmed = line.trim_end();
283+
let trimmed_len = trimmed.len();
284+
if trimmed_len == 0 {
285+
return line.to_owned();
286+
}
287+
if !trimmed.ends_with(':') {
288+
return line.to_owned();
289+
}
290+
let trailing = &line[trimmed_len..];
291+
let before_colon = &line[..trimmed_len - 1];
292+
format!("{before_colon}{trailing}")
293+
}
294+
295+
/// Drop a fixed number of leading spaces for lines inside code fences.
296+
fn strip_code_indent(line: &str, indent: usize) -> String {
297+
if line.trim().is_empty() {
298+
return String::new();
299+
}
300+
let start = indent.min(line.len());
301+
line[start..].to_owned()
302+
}
303+
304+
/// Dedent a docstring line while keeping the first line intact.
305+
fn dedent_docstring_line<'a>(line: &'a str, min_indent: usize, is_first: bool) -> &'a str {
306+
if is_first {
307+
return line;
308+
}
309+
&line[min_indent.min(line.len())..]
310+
}
311+
312+
/// Format a non-code line by handling blockquotes and preserving leading spaces.
313+
fn format_non_code_line(line: &str, is_doctest_prompt: bool) -> String {
314+
// Handle potential leading blockquote (`> `) for non-doctest lines.
315+
let mut content = line;
316+
if !is_doctest_prompt {
317+
while let Some(rest) = content.strip_prefix('>') {
318+
content = rest.strip_prefix(' ').unwrap_or(rest);
319+
}
320+
}
321+
322+
// Replace remaining leading spaces with &nbsp; or they might be ignored in markdown parsers.
323+
let leading_spaces = content.bytes().take_while(|&c| c == b' ').count();
324+
if leading_spaces > 0 {
325+
format!(
326+
"{}{}",
327+
"&nbsp;".repeat(leading_spaces),
328+
&content[leading_spaces..]
329+
)
330+
} else {
331+
content.to_owned()
332+
}
333+
}
334+
106335
fn normalize_literal(docstring: &str) -> String {
107336
let normalized = docstring.replace("\r", "").replace("\t", " ");
108337
let stripped = strip_literal_quotes(&normalized);
@@ -464,7 +693,18 @@ mod tests {
464693
fn test_docstring_preserves_doctest_prompt() {
465694
assert_eq!(
466695
Docstring::clean("\"\"\"Example\n>>> foo()\"\"\"").as_str(),
467-
"Example \n>>> foo()"
696+
"Example \n```python \n>>> foo() \n```"
697+
);
698+
}
699+
700+
#[test]
701+
fn test_docstring_literal_block_uses_code_fence() {
702+
assert_eq!(
703+
Docstring::clean(
704+
"\"\"\"Example::\n\n >>> app = Flask(__name__)\n >>> api = Api()\"\"\""
705+
)
706+
.as_str(),
707+
"Example: \n \n```python \n>>> app = Flask(__name__) \n>>> api = Api() \n```"
468708
);
469709
}
470710

0 commit comments

Comments
 (0)