Skip to content

Commit fb50906

Browse files
committed
Don not allow empty documents for fragmented parser. Fixes #210
1 parent 65d5096 commit fb50906

File tree

2 files changed

+7
-9
lines changed

2 files changed

+7
-9
lines changed

selectolax/lexbor.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ cdef class LexborHTMLParser:
113113
if self.document == NULL:
114114
return -1
115115

116+
if self._is_fragment and html_len == 0:
117+
PyErr_SetObject(SelectolaxError, "Can't parse empty HTML as HTML fragment.")
118+
return -1
119+
116120
with nogil:
117121
if self._is_fragment:
118122
status = self._parse_html_fragment(html, html_len)

tests/test_lexbor_fragment.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from inspect import cleandoc
22
import pytest
3-
from selectolax.lexbor import LexborHTMLParser
3+
from selectolax.lexbor import LexborHTMLParser, SelectolaxError
44

55

66
def clean_doc(text: str) -> str:
@@ -70,8 +70,8 @@ def test_fragment_parser_whole_doc():
7070

7171
def test_fragment_parser_empty_doc():
7272
html = ""
73-
parser = LexborHTMLParser(html, is_fragment=True)
74-
assert parser.html is None
73+
with pytest.raises(SelectolaxError):
74+
LexborHTMLParser(html, is_fragment=True)
7575

7676

7777
@pytest.mark.parametrize(
@@ -377,12 +377,6 @@ def test_fragment_parser_malformed_html():
377377
assert "content" in html_result
378378

379379

380-
def test_fragment_parser_empty_input():
381-
parser = LexborHTMLParser("", is_fragment=True)
382-
assert parser.root is None
383-
assert parser.html is None
384-
385-
386380
def test_attributes_access_on_non_element():
387381
html = "<!-- comment --><div>text</div>"
388382
parser = LexborHTMLParser(html, is_fragment=True)

0 commit comments

Comments
 (0)