Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
* the `A5` value that could be specified as page `format` to the `FPDF` constructor was slightly incorrect, and the corresponding page dimensions have been fixed. This could lead to a minor change in your documents dimensions if you used this `A5` page format. - _cf._ [issue #1699](https://github.com/py-pdf/fpdf2/issues/1699)
* a bug when rendering empty tables with `INTERNAL` layout, that caused an extra border to be rendered due to an erroneous use of `list.index()` - _cf._ [issue #1669](https://github.com/py-pdf/fpdf2/issues/1669)
* edge cases where tiny floating-point drift could trigger incorrect behaviour like [issue #1393](https://github.com/py-pdf/fpdf2/issues/1393)
* the bug “Escape character escaping multiple consecutive markdown characters” (https://github.com/py-pdf/fpdf2/issues/1236) has been fixed.
* parentheses are now escaped in more places. This fixes embedding a file with a single parenthesis in the filename, but might also have impacts on e.g. section names.
### Changed
* improved performance when rendering paths, SVGs, and opaque raster images with an alpha channel - _cf._ [PR #1675](https://github.com/py-pdf/fpdf2/pull/1675)
Expand Down
61 changes: 37 additions & 24 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4464,9 +4464,32 @@ def frag() -> Fragment:
font_glyphs = self.current_font.cmap # type: ignore[union-attr]
else:
font_glyphs = []
num_escape_chars = 0

while text:
tlt = text[:3] ## get triples to check for escape character
if self.MARKDOWN_ESCAPE_CHARACTER == tlt[0] and tlt[1:] in [
"**",
"__",
"~~",
"--",
]:
Comment on lines +4470 to +4475
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if self.MARKDOWN_ESCAPE_CHARACTER == tlt[0] and tlt[1:] in [
"**",
"__",
"~~",
"--",
]:
if tlt.startswith(self.MARKDOWN_ESCAPE_CHARACTER) and tlt[1:] in (
self.MARKDOWN_BOLD_MARKER,
self.MARKDOWN_ITALICS_MARKER,
self.MARKDOWN_STRIKETHROUGH_MARKER,
self.MARKDOWN_UNDERLINE_MARKER,
):

This change would make it easier to read - less 'magic values' across the code.

## remove the escape character
txt_frag.append(text[1])
txt_frag.append(text[2])
yield frag()
text = text[3:]
continue

if self.MARKDOWN_ESCAPE_CHARACTER == tlt[0:1] and \
self.MARKDOWN_ESCAPE_CHARACTER == tlt[1:2]:
# double-escape, juste produce it
txt_frag.append(text[0])
txt_frag.append(text[1])
yield frag()
text = text[2:]
continue


Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are removing characters from the input text even when markdown=False. This portion of code should be inside an if markdown: block

is_marker = text[:2] in (
self.MARKDOWN_BOLD_MARKER,
self.MARKDOWN_ITALICS_MARKER,
Expand Down Expand Up @@ -4509,29 +4532,19 @@ def frag() -> Fragment:
and (not txt_frag or txt_frag[-1] != half_marker)
and (len(text) < 3 or text[2] != half_marker)
):
txt_frag = (
txt_frag[: -((num_escape_chars + 1) // 2)]
if num_escape_chars > 0
else txt_frag
)
if num_escape_chars % 2 == 0:
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_STRIKETHROUGH_MARKER:
in_strikethrough = not in_strikethrough
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue
num_escape_chars = (
num_escape_chars + 1
if text[0] == self.MARKDOWN_ESCAPE_CHARACTER
else 0
)
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_STRIKETHROUGH_MARKER:
in_strikethrough = not in_strikethrough
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue

is_link = self.MARKDOWN_LINK_REGEX.match(text)
if is_link:
link_text, link_dest, text = is_link.groups()
Expand Down
Binary file modified test/text/cell_markdown_bold_italic_escaped.pdf
Binary file not shown.
Binary file modified test/text/cell_markdown_escaped.pdf
Binary file not shown.
Binary file modified test/text/cell_markdown_with_ttf_fonts_escaped.pdf
Binary file not shown.
Binary file modified test/text/multi_cell_markdown_escaped.pdf
Binary file not shown.
Binary file modified test/text/multi_cell_markdown_with_ttf_fonts_escaped.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions test/text/test_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def test_cell_markdown_escaped(tmp_path):
pdf.add_page()
pdf.set_font("Times", size=40)
pdf.cell(text="**Lo\\rem** \\__Ipsum\\__ \\\\--dolor\\\\--", markdown=True)
pdf.write(text="\n")
pdf.cell(text="\\****BOLD**\\**", markdown=True)
assert_pdf_equal(pdf, HERE / "cell_markdown_escaped.pdf", tmp_path)


Expand Down
Binary file added test/text/test_cell_markdown_right_aligned
Binary file not shown.
124 changes: 90 additions & 34 deletions test/text/test_markdown_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,40 @@
GSTATE_BI = GSTATE.copy()
GSTATE_BI.font_style = "BI"

def merge_fragments(fragments):
"""
Helper function for testing the escaping chracters
Will merge fragments that have different characters but same fragment.graphics_state
and same fragment.k and same fragment.link.
Example Input:
(
Fragment(characters=['a'], graphics_state={000}, k=1, link=None),
Fragment(characters=['b'], graphics_state={000}, k=1, link=None)
)
Example Output:
(Fragment(characters=['a', 'b'], graphics_state={000}, k=1, link=None))
"""
if not fragments:
return []

merged_fragments = []
current_fragment = fragments[0]

for fragment in fragments[1:]:
if (
fragment.graphics_state == current_fragment.graphics_state
and fragment.k == current_fragment.k
and fragment.link == current_fragment.link
):
current_fragment.characters.extend(fragment.characters)
else:
merged_fragments.append(current_fragment)
current_fragment = fragment

merged_fragments.append(current_fragment)

return tuple(merged_fragments)


def test_markdown_parse_simple_ok():
frags = tuple(
Expand All @@ -35,63 +69,75 @@ def test_markdown_parse_simple_ok():


def test_markdown_parse_simple_ok_escaped():
frags = tuple(
FPDF()._parse_chars(
"\\**bold\\**, \\__italics\\__ and \\--underlined\\-- escaped", True
frags = merge_fragments(
tuple(
FPDF()._parse_chars(
"\\**bold\\**, \\__italics\\__ and \\--underlined\\-- escaped", True)
)
)
expected = (
Fragment("**bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k),
Fragment("**bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k),
)
assert frags == expected
frags = tuple(
frags = merge_fragments (tuple(
FPDF()._parse_chars(
r"raw \**bold\**, \__italics\__ and \--underlined\-- escaped", True
)
)
))
expected = (
Fragment(
"raw **bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k
),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape *\\*between marker*\\*", True))
frags =(
tuple(FPDF()._parse_chars("escape *\\*between marker*\\*", True))
)
expected = (Fragment("escape *\\*between marker*\\*", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape **\\after marker**\\", True))
frags = merge_fragments(
tuple(FPDF()._parse_chars("escape **\\after marker**\\", True))
)
expected = (
Fragment("escape ", GSTATE, k=PDF.k),
Fragment("\\after marker", GSTATE_B, k=PDF.k),
Fragment("\\", GSTATE, k=PDF.k),
)


def test_markdown_unrelated_escape():
frags = tuple(FPDF()._parse_chars("unrelated \\ escape \\**bold\\**", True))
frags = merge_fragments(
tuple(FPDF()._parse_chars("unrelated \\ escape \\**bold\\**", True))
)
expected = (Fragment("unrelated \\ escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(
FPDF()._parse_chars("unrelated \\\\ double escape \\**bold\\**", True)
frags = merge_fragments(
tuple(FPDF()._parse_chars("unrelated \\\\ double escape \\**bold\\**", True))
)
expected = (Fragment("unrelated \\\\ double escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_multiple_escape():
frags = tuple(FPDF()._parse_chars("\\\\**bold\\\\** double escaped", True))
frags = merge_fragments(
tuple(FPDF()._parse_chars("\\\\**bold\\\\** double escaped", True))
)
expected = (
Fragment("\\", GSTATE, k=PDF.k),
Fragment("bold\\", GSTATE_B, k=PDF.k),
Fragment("\\\\", GSTATE, k=PDF.k),
Fragment("bold\\\\", GSTATE_B, k=PDF.k),
Fragment(" double escaped", GSTATE, k=PDF.k),
)

assert frags == expected
frags = tuple(FPDF()._parse_chars("\\\\\\**triple bold\\\\\\** escaped", True))
expected = (Fragment("\\**triple bold\\** escaped", GSTATE, k=PDF.k),)
frags = merge_fragments(
tuple(FPDF()._parse_chars("\\\\\\**triple bold\\\\\\** escaped", True))
)
expected = (Fragment("\\\\**triple bold\\\\** escaped", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_overlapping():
frags = tuple(FPDF()._parse_chars("**bold __italics__**", True))
frags = merge_fragments(
tuple(FPDF()._parse_chars("**bold __italics__**", True))
)
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("italics", GSTATE_BI, k=PDF.k),
Expand All @@ -100,13 +146,14 @@ def test_markdown_parse_overlapping():


def test_markdown_parse_overlapping_escaped():
frags = tuple(FPDF()._parse_chars("**bold \\__italics\\__**", True))
frags = merge_fragments(
tuple(FPDF()._parse_chars("**bold \\__italics\\__**", True))
)
expected = (Fragment("bold __italics__", GSTATE_B, k=PDF.k),)
assert frags == expected


def test_markdown_parse_crossing_markers():
frags = tuple(FPDF()._parse_chars("**bold __and** italics__", True))
frags = merge_fragments (tuple(FPDF()._parse_chars("**bold __and** italics__", True)))
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("and", GSTATE_BI, k=PDF.k),
Expand All @@ -116,7 +163,7 @@ def test_markdown_parse_crossing_markers():


def test_markdown_parse_crossing_markers_escaped():
frags = tuple(FPDF()._parse_chars("**bold __and\\** italics__", True))
frags =merge_fragments (tuple(FPDF()._parse_chars("**bold __and\\** italics__", True)))
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("and** italics", GSTATE_BI, k=PDF.k),
Expand All @@ -125,7 +172,7 @@ def test_markdown_parse_crossing_markers_escaped():


def test_markdown_parse_unterminated():
frags = tuple(FPDF()._parse_chars("**bold __italics__", True))
frags = merge_fragments (tuple(FPDF()._parse_chars("**bold __italics__", True)))
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("italics", GSTATE_BI, k=PDF.k),
Expand All @@ -134,7 +181,7 @@ def test_markdown_parse_unterminated():


def test_markdown_parse_unterminated_escaped():
frags = tuple(FPDF()._parse_chars("**bold\\** __italics__", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("**bold\\** __italics__", True)))
expected = (
Fragment("bold** ", GSTATE_B, k=PDF.k),
Fragment("italics", GSTATE_BI, k=PDF.k),
Expand All @@ -143,16 +190,22 @@ def test_markdown_parse_unterminated_escaped():


def test_markdown_parse_line_of_markers():
frags = tuple(FPDF()._parse_chars("*** woops", True))
frags = merge_fragments (tuple(FPDF()._parse_chars("*** woops", True)))
expected = (Fragment("*** woops", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("----------", True))
frags = merge_fragments (tuple(FPDF()._parse_chars("----------", True)))
expected = (Fragment("----------", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("****BOLD**", True))
expected = (Fragment("****BOLD", GSTATE, k=PDF.k),)
expected = (Fragment("**BOLD", GSTATE_B, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("\\****BOLD**\\**", True)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you missing an assert after line 200

expected = (
Fragment("**", GSTATE, k=PDF.k),
Fragment("BOLD", GSTATE_B, k=PDF.k),
Fragment("**", GSTATE, k=PDF.k),)

assert frags == expected
frags = tuple(FPDF()._parse_chars("* **BOLD**", True))
frags = merge_fragments (tuple(FPDF()._parse_chars("* **BOLD**", True)))
expected = (
Fragment("* ", GSTATE, k=PDF.k),
Fragment("BOLD", GSTATE_B, k=PDF.k),
Expand All @@ -161,17 +214,20 @@ def test_markdown_parse_line_of_markers():


def test_markdown_parse_line_of_markers_escaped():
frags = tuple(FPDF()._parse_chars("\\****BOLD**", True))
expected = (Fragment("\\****BOLD", GSTATE, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("\\****BOLD**", True)))
expected = (
Fragment("**", GSTATE, k=PDF.k),
Fragment("BOLD", GSTATE_B, k=PDF.k),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("*\\***BOLD**", True))
expected = (Fragment("*\\***BOLD", GSTATE, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("*\\***BOLD**", True)))
expected = (Fragment("****BOLD", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_newline_after_markdown_link(): # issue 916
text = "[fpdf2](https://py-pdf.github.io/fpdf2/)\nGo visit it!"
frags = tuple(FPDF()._parse_chars(text, True))
frags = merge_fragments(tuple(FPDF()._parse_chars(text, True)))
gstate_link = GSTATE.copy()
gstate_link.underline = True
expected = (
Expand Down
Binary file added test/text/test_multi_cell_j_paragraphs
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added test/text/test_write_font_stretching
Binary file not shown.
Binary file added test/text/test_write_font_stretching.pdf
Binary file not shown.