@@ -308,17 +308,6 @@ static void tokenizer_add_parse_error(GumboParser* parser, GumboErrorType type)
308308 }
309309}
310310
311- static bool is_alpha (int c ) {
312- // We don't use ISO C isupper/islower functions here because they
313- // depend upon the program's locale, while the behavior of the HTML5 spec is
314- // independent of which locale the program is run in.
315- return (c >= 'A' && c <= 'Z' ) || (c >= 'a' && c <= 'z' );
316- }
317-
318- static int ensure_lowercase (int c ) {
319- return c >= 'A' && c <= 'Z' ? c + 0x20 : c ;
320- }
321-
322311static GumboTokenType get_char_token_type (bool is_in_cdata , int c ) {
323312 if (is_in_cdata && c > 0 ) {
324313 return GUMBO_TOKEN_CDATA ;
@@ -689,9 +678,9 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
689678 GumboTokenizerState * tokenizer = parser -> _tokenizer_state ;
690679 GumboTagState * tag_state = & tokenizer -> _tag_state ;
691680 int c = utf8iterator_current (& tokenizer -> _input );
692- assert (is_alpha (c ));
693- c = ensure_lowercase (c );
694- assert (is_alpha (c ));
681+ assert (gumbo_isalpha (c ));
682+ c = gumbo_tolower (c );
683+ assert (gumbo_isalpha (c ));
695684
696685 initialize_tag_buffer (parser );
697686 gumbo_string_buffer_append_codepoint (c , & tag_state -> _buffer );
@@ -1027,7 +1016,7 @@ static StateResult handle_tag_open_state(
10271016 tokenizer_add_parse_error (parser , GUMBO_ERR_TAG_STARTS_WITH_QUESTION );
10281017 return NEXT_CHAR ;
10291018 default :
1030- if (is_alpha (c )) {
1019+ if (gumbo_isalpha (c )) {
10311020 gumbo_tokenizer_set_state (parser , GUMBO_LEX_TAG_NAME );
10321021 start_new_tag (parser , true);
10331022 return NEXT_CHAR ;
@@ -1055,7 +1044,7 @@ static StateResult handle_end_tag_open_state(
10551044 gumbo_tokenizer_set_state (parser , GUMBO_LEX_DATA );
10561045 return emit_temporary_buffer (parser , output );
10571046 default :
1058- if (is_alpha (c )) {
1047+ if (gumbo_isalpha (c )) {
10591048 gumbo_tokenizer_set_state (parser , GUMBO_LEX_TAG_NAME );
10601049 start_new_tag (parser , false);
10611050 } else {
@@ -1098,7 +1087,7 @@ static StateResult handle_tag_name_state(
10981087 gumbo_tokenizer_set_state (parser , GUMBO_LEX_DATA );
10991088 return NEXT_CHAR ;
11001089 default :
1101- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1090+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
11021091 return NEXT_CHAR ;
11031092 }
11041093}
@@ -1124,7 +1113,7 @@ static StateResult handle_rcdata_end_tag_open_state(
11241113 GumboParser * parser , GumboTokenizerState * tokenizer ,
11251114 int c , GumboToken * output ) {
11261115 assert (temporary_buffer_equals (parser , "</" ));
1127- if (is_alpha (c )) {
1116+ if (gumbo_isalpha (c )) {
11281117 gumbo_tokenizer_set_state (parser , GUMBO_LEX_RCDATA_END_TAG_NAME );
11291118 start_new_tag (parser , false);
11301119 append_char_to_temporary_buffer (parser , c );
@@ -1141,8 +1130,8 @@ static StateResult handle_rcdata_end_tag_name_state(
11411130 GumboParser * parser , GumboTokenizerState * tokenizer ,
11421131 int c , GumboToken * output ) {
11431132 assert (tokenizer -> _temporary_buffer .length >= 2 );
1144- if (is_alpha (c )) {
1145- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1133+ if (gumbo_isalpha (c )) {
1134+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
11461135 append_char_to_temporary_buffer (parser , c );
11471136 return NEXT_CHAR ;
11481137 } else if (is_appropriate_end_tag (parser )) {
@@ -1190,7 +1179,7 @@ static StateResult handle_rawtext_end_tag_open_state(
11901179 GumboParser * parser , GumboTokenizerState * tokenizer ,
11911180 int c , GumboToken * output ) {
11921181 assert (temporary_buffer_equals (parser , "</" ));
1193- if (is_alpha (c )) {
1182+ if (gumbo_isalpha (c )) {
11941183 gumbo_tokenizer_set_state (parser , GUMBO_LEX_RAWTEXT_END_TAG_NAME );
11951184 start_new_tag (parser , false);
11961185 append_char_to_temporary_buffer (parser , c );
@@ -1208,8 +1197,8 @@ static StateResult handle_rawtext_end_tag_name_state(
12081197 assert (tokenizer -> _temporary_buffer .length >= 2 );
12091198 gumbo_debug ("Last end tag: %*s\n" , (int ) tokenizer -> _tag_state ._buffer .length ,
12101199 tokenizer -> _tag_state ._buffer .data );
1211- if (is_alpha (c )) {
1212- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1200+ if (gumbo_isalpha (c )) {
1201+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
12131202 append_char_to_temporary_buffer (parser , c );
12141203 return NEXT_CHAR ;
12151204 } else if (is_appropriate_end_tag (parser )) {
@@ -1262,7 +1251,7 @@ static StateResult handle_script_end_tag_open_state(
12621251 GumboParser * parser , GumboTokenizerState * tokenizer ,
12631252 int c , GumboToken * output ) {
12641253 assert (temporary_buffer_equals (parser , "</" ));
1265- if (is_alpha (c )) {
1254+ if (gumbo_isalpha (c )) {
12661255 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_END_TAG_NAME );
12671256 start_new_tag (parser , false);
12681257 append_char_to_temporary_buffer (parser , c );
@@ -1278,8 +1267,8 @@ static StateResult handle_script_end_tag_name_state(
12781267 GumboParser * parser , GumboTokenizerState * tokenizer ,
12791268 int c , GumboToken * output ) {
12801269 assert (tokenizer -> _temporary_buffer .length >= 2 );
1281- if (is_alpha (c )) {
1282- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1270+ if (gumbo_isalpha (c )) {
1271+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
12831272 append_char_to_temporary_buffer (parser , c );
12841273 return NEXT_CHAR ;
12851274 } else if (is_appropriate_end_tag (parser )) {
@@ -1421,11 +1410,11 @@ static StateResult handle_script_escaped_lt_state(
14211410 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN );
14221411 append_char_to_temporary_buffer (parser , c );
14231412 return NEXT_CHAR ;
1424- } else if (is_alpha (c )) {
1413+ } else if (gumbo_isalpha (c )) {
14251414 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START );
14261415 append_char_to_temporary_buffer (parser , c );
14271416 gumbo_string_buffer_append_codepoint (
1428- ensure_lowercase (c ), & tokenizer -> _script_data_buffer );
1417+ gumbo_tolower (c ), & tokenizer -> _script_data_buffer );
14291418 return emit_temporary_buffer (parser , output );
14301419 } else {
14311420 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_ESCAPED );
@@ -1438,7 +1427,7 @@ static StateResult handle_script_escaped_end_tag_open_state(
14381427 GumboParser * parser , GumboTokenizerState * tokenizer ,
14391428 int c , GumboToken * output ) {
14401429 assert (temporary_buffer_equals (parser , "</" ));
1441- if (is_alpha (c )) {
1430+ if (gumbo_isalpha (c )) {
14421431 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME );
14431432 start_new_tag (parser , false);
14441433 append_char_to_temporary_buffer (parser , c );
@@ -1454,8 +1443,8 @@ static StateResult handle_script_escaped_end_tag_name_state(
14541443 GumboParser * parser , GumboTokenizerState * tokenizer ,
14551444 int c , GumboToken * output ) {
14561445 assert (tokenizer -> _temporary_buffer .length >= 2 );
1457- if (is_alpha (c )) {
1458- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1446+ if (gumbo_isalpha (c )) {
1447+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
14591448 append_char_to_temporary_buffer (parser , c );
14601449 return NEXT_CHAR ;
14611450 } else if (is_appropriate_end_tag (parser )) {
@@ -1498,9 +1487,9 @@ static StateResult handle_script_double_escaped_start_state(
14981487 ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED : GUMBO_LEX_SCRIPT_ESCAPED );
14991488 return emit_current_char (parser , output );
15001489 default :
1501- if (is_alpha (c )) {
1490+ if (gumbo_isalpha (c )) {
15021491 gumbo_string_buffer_append_codepoint (
1503- ensure_lowercase (c ), & tokenizer -> _script_data_buffer );
1492+ gumbo_tolower (c ), & tokenizer -> _script_data_buffer );
15041493 return emit_current_char (parser , output );
15051494 } else {
15061495 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_ESCAPED );
@@ -1616,9 +1605,9 @@ static StateResult handle_script_double_escaped_end_state(
16161605 ? GUMBO_LEX_SCRIPT_ESCAPED : GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED );
16171606 return emit_current_char (parser , output );
16181607 default :
1619- if (is_alpha (c )) {
1608+ if (gumbo_isalpha (c )) {
16201609 gumbo_string_buffer_append_codepoint (
1621- ensure_lowercase (c ), & tokenizer -> _script_data_buffer );
1610+ gumbo_tolower (c ), & tokenizer -> _script_data_buffer );
16221611 return emit_current_char (parser , output );
16231612 } else {
16241613 gumbo_tokenizer_set_state (parser , GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED );
@@ -1662,7 +1651,7 @@ static StateResult handle_before_attr_name_state(
16621651 // Fall through.
16631652 default :
16641653 gumbo_tokenizer_set_state (parser , GUMBO_LEX_ATTR_NAME );
1665- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1654+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
16661655 return NEXT_CHAR ;
16671656 }
16681657}
@@ -1706,7 +1695,7 @@ static StateResult handle_attr_name_state(
17061695 tokenizer_add_parse_error (parser , GUMBO_ERR_ATTR_NAME_INVALID );
17071696 // Fall through.
17081697 default :
1709- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1698+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
17101699 return NEXT_CHAR ;
17111700 }
17121701}
@@ -1747,7 +1736,7 @@ static StateResult handle_after_attr_name_state(
17471736 // Fall through.
17481737 default :
17491738 gumbo_tokenizer_set_state (parser , GUMBO_LEX_ATTR_NAME );
1750- append_char_to_tag_buffer (parser , ensure_lowercase (c ), true);
1739+ append_char_to_tag_buffer (parser , gumbo_tolower (c ), true);
17511740 return NEXT_CHAR ;
17521741 }
17531742}
@@ -2298,7 +2287,7 @@ static StateResult handle_before_doctype_name_state(
22982287 default :
22992288 gumbo_tokenizer_set_state (parser , GUMBO_LEX_DOCTYPE_NAME );
23002289 tokenizer -> _doc_type_state .force_quirks = false;
2301- append_char_to_temporary_buffer (parser , ensure_lowercase (c ));
2290+ append_char_to_temporary_buffer (parser , gumbo_tolower (c ));
23022291 return NEXT_CHAR ;
23032292 }
23042293}
@@ -2340,7 +2329,7 @@ static StateResult handle_doctype_name_state(
23402329 default :
23412330 gumbo_tokenizer_set_state (parser , GUMBO_LEX_DOCTYPE_NAME );
23422331 tokenizer -> _doc_type_state .force_quirks = false;
2343- append_char_to_temporary_buffer (parser , ensure_lowercase (c ));
2332+ append_char_to_temporary_buffer (parser , gumbo_tolower (c ));
23442333 return NEXT_CHAR ;
23452334 }
23462335}
0 commit comments