Skip to content

Commit 56c6eac

Browse files
committed
Merge pull request #31 from miso-belica/fix-invalid-tag-names
Don't throw an exception for invalid tag names
2 parents 1f2861b + 4401688 commit 56c6eac

File tree

3 files changed

+97
-2
lines changed

3 files changed

+97
-2
lines changed

src/HTML5/Parser/DOMTreeBuilder.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,14 @@ public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
223223
$lname = Elements::normalizeSvgElement($lname);
224224
}
225225

226+
try {
227+
$ele = $this->doc->createElement($lname);
228+
}
229+
catch(\DOMException $e) {
230+
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
231+
$ele = $this->doc->createElement('invalid');
232+
}
226233

227-
$ele = $this->doc->createElement($lname);
228234
foreach ($attributes as $aName => $aVal) {
229235

230236
if ($this->insertMode == static::IM_IN_SVG) {

src/HTML5/Parser/Tokenizer.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,9 @@ protected function tagName() {
322322
}
323323

324324
// We know this is at least one char.
325-
$name = strtolower($this->scanner->charsUntil("/> \n\f\t"));
325+
$name = strtolower($this->scanner->charsWhile(
326+
":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
327+
));
326328
$attributes = array();
327329
$selfClose = FALSE;
328330

test/HTML5/Parser/TokenizerTest.php

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,93 @@ public function testSimpleTags() {
320320
}
321321
}
322322

323+
public function testTagsWithAttributeAndMissingName() {
324+
$cases = array(
325+
'<id="top_featured">' => 'id',
326+
'<color="white">' => 'color',
327+
"<class='neaktivni_stranka'>" => 'class',
328+
'<bgcolor="white">' => 'bgcolor',
329+
'<class="nom">' => 'class',
330+
);
331+
332+
foreach($cases as $html => $expected) {
333+
$events = $this->parse($html);
334+
$this->assertEventError($events->get(0));
335+
$this->assertEventError($events->get(1));
336+
$this->assertEventError($events->get(2));
337+
$this->assertEventEquals('startTag', $expected, $events->get(3));
338+
$this->assertEventEquals('eof', NULL, $events->get(4));
339+
}
340+
}
341+
342+
public function testTagNotClosedAfterTagName() {
343+
$cases = array(
344+
"<noscript<img>" => array('noscript', 'img'),
345+
'<center<a>' => array('center', 'a'),
346+
'<br<br>' => array('br', 'br'),
347+
);
348+
349+
foreach($cases as $html => $expected) {
350+
$events = $this->parse($html);
351+
$this->assertEventError($events->get(0));
352+
$this->assertEventEquals('startTag', $expected[0], $events->get(1));
353+
$this->assertEventEquals('startTag', $expected[1], $events->get(2));
354+
$this->assertEventEquals('eof', NULL, $events->get(3));
355+
}
356+
357+
$events = $this->parse('<span<>02</span>');
358+
$this->assertEventError($events->get(0));
359+
$this->assertEventEquals('startTag', 'span', $events->get(1));
360+
$this->assertEventError($events->get(2));
361+
$this->assertEventEquals('text', '>02', $events->get(3));
362+
$this->assertEventEquals('endTag', 'span', $events->get(4));
363+
$this->assertEventEquals('eof', NULL, $events->get(5));
364+
365+
$events = $this->parse('<p</p>');
366+
$this->assertEventError($events->get(0));
367+
$this->assertEventEquals('startTag', 'p', $events->get(1));
368+
$this->assertEventEquals('endTag', 'p', $events->get(2));
369+
$this->assertEventEquals('eof', NULL, $events->get(3));
370+
371+
$events = $this->parse('<strong><WordPress</strong>');
372+
$this->assertEventEquals('startTag', 'strong', $events->get(0));
373+
$this->assertEventError($events->get(1));
374+
$this->assertEventEquals('startTag', 'wordpress', $events->get(2));
375+
$this->assertEventEquals('endTag', 'strong', $events->get(3));
376+
$this->assertEventEquals('eof', NULL, $events->get(4));
377+
378+
$events = $this->parse('<src=<a>');
379+
$this->assertEventError($events->get(0));
380+
$this->assertEventError($events->get(1));
381+
$this->assertEventError($events->get(2));
382+
$this->assertEventEquals('startTag', 'src', $events->get(3));
383+
$this->assertEventEquals('startTag', 'a', $events->get(4));
384+
$this->assertEventEquals('eof', NULL, $events->get(5));
385+
386+
$events = $this->parse('<br...<a>');
387+
$this->assertEventError($events->get(0));
388+
$this->assertEventEquals('startTag', 'br', $events->get(1));
389+
$this->assertEventEquals('eof', NULL, $events->get(2));
390+
}
391+
392+
public function testIllegalTagNames() {
393+
$cases = array(
394+
'<li">' => 'li',
395+
'<p">' => 'p',
396+
'<b&nbsp; >' => 'b',
397+
'<static*all>' => 'static',
398+
'<h*0720/>' => 'h',
399+
'<st*ATTRIBUTE />' => 'st',
400+
'<a-href="http://url.com/">' => 'a',
401+
);
402+
403+
foreach($cases as $html => $expected) {
404+
$events = $this->parse($html);
405+
$this->assertEventError($events->get(0));
406+
$this->assertEventEquals('startTag', $expected, $events->get(1));
407+
}
408+
}
409+
323410
/**
324411
* @depends testCharacterReference
325412
*/

0 commit comments

Comments
 (0)