Don not allow empty documents for fragmented parser. Fixes #210

rushter · rushter · commit fb509066dbc9 · 2026-01-24T11:39:49.000+04:00
diff --git a/selectolax/lexbor.pyx b/selectolax/lexbor.pyx
@@ -113,6 +113,10 @@ cdef class LexborHTMLParser:
         if self.document == NULL:
             return -1
 
+        if self._is_fragment and html_len == 0:
+            PyErr_SetObject(SelectolaxError, "Can't parse empty HTML as HTML fragment.")
+            return -1
+
         with nogil:
             if self._is_fragment:
                 status = self._parse_html_fragment(html, html_len)
diff --git a/tests/test_lexbor_fragment.py b/tests/test_lexbor_fragment.py
@@ -1,6 +1,6 @@
 from inspect import cleandoc
 import pytest
-from selectolax.lexbor import LexborHTMLParser
+from selectolax.lexbor import LexborHTMLParser, SelectolaxError
 
 
 def clean_doc(text: str) -> str:
@@ -70,8 +70,8 @@ def test_fragment_parser_whole_doc():
 
 def test_fragment_parser_empty_doc():
     html = ""
-    parser = LexborHTMLParser(html, is_fragment=True)
-    assert parser.html is None
+    with pytest.raises(SelectolaxError):
+        LexborHTMLParser(html, is_fragment=True)
 
 
 @pytest.mark.parametrize(
@@ -377,12 +377,6 @@ def test_fragment_parser_malformed_html():
     assert "content" in html_result
 
 
-def test_fragment_parser_empty_input():
-    parser = LexborHTMLParser("", is_fragment=True)
-    assert parser.root is None
-    assert parser.html is None
-
-
 def test_attributes_access_on_non_element():
     html = "<!-- comment --><div>text</div>"
     parser = LexborHTMLParser(html, is_fragment=True)