Skip to content

Commit 2347178

Browse files
committed
Speed up both encode() and decode() methods.
1 parent 50b4afa commit 2347178

File tree

4 files changed

+110
-53
lines changed

4 files changed

+110
-53
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2.1.1
2+
-----
3+
4+
* Speed up both `encode()` and `decode()` methods.
5+
16
2.1.0
27
-----
38

README.md

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -97,68 +97,68 @@ Common
9797
9898
Initialization / Load speed
9999
100-
* #1: html-entities x 2,992,640 ops/sec ±1.76% (82 runs sampled)
101-
#2: entities x 2,093,859 ops/sec ±1.17% (82 runs sampled)
102-
#3: he x 1,815,445 ops/sec ±1.30% (87 runs sampled)
100+
* #1: html-entities x 2,941,745 ops/sec ±1.87% (81 runs sampled)
101+
#2: entities x 2,061,661 ops/sec ±1.16% (82 runs sampled)
102+
#3: he x 1,861,758 ops/sec ±1.15% (86 runs sampled)
103103
104104
HTML5
105105
106106
Encode test
107107
108-
* #1: html-entities.encode - html5, nonAsciiPrintable x 427,051 ops/sec ±0.25% (96 runs sampled)
109-
* #2: html-entities.encode - html5, nonAscii x 427,332 ops/sec ±0.68% (96 runs sampled)
110-
#3: entities.encodeNonAsciiHTML x 333,348 ops/sec ±1.08% (93 runs sampled)
111-
* #4: html-entities.encode - html5, extensive x 269,630 ops/sec ±0.26% (98 runs sampled)
112-
#5: entities.encodeHTML x 126,117 ops/sec ±0.27% (93 runs sampled)
113-
#6: he.encode x 114,119 ops/sec ±0.20% (96 runs sampled)
108+
* #1: html-entities.encode - html5, nonAscii x 439,350 ops/sec ±0.21% (96 runs sampled)
109+
* #2: html-entities.encode - html5, nonAsciiPrintable x 410,462 ops/sec ±0.22% (93 runs sampled)
110+
#3: entities.encodeNonAsciiHTML x 332,966 ops/sec ±0.54% (92 runs sampled)
111+
* #4: html-entities.encode - html5, extensive x 280,865 ops/sec ±0.22% (95 runs sampled)
112+
#5: entities.encodeHTML x 125,338 ops/sec ±0.30% (92 runs sampled)
113+
#6: he.encode x 112,572 ops/sec ±0.25% (97 runs sampled)
114114
115115
Decode test
116116
117-
* #1: html-entities.decode - html5, strict x 347,055 ops/sec ±0.27% (94 runs sampled)
118-
* #2: html-entities.decode - html5, attribute x 340,751 ops/sec ±0.22% (97 runs sampled)
119-
* #3: html-entities.decode - html5, body x 333,538 ops/sec ±0.28% (94 runs sampled)
120-
#4: entities.decodeHTMLStrict x 329,206 ops/sec ±1.64% (92 runs sampled)
121-
#5: entities.decodeHTML x 278,862 ops/sec ±0.24% (97 runs sampled)
122-
#6: he.decode x 185,834 ops/sec ±0.23% (96 runs sampled)
117+
* #1: html-entities.decode - html5, body x 428,051 ops/sec ±0.22% (98 runs sampled)
118+
* #2: html-entities.decode - html5, strict x 402,821 ops/sec ±0.22% (91 runs sampled)
119+
* #3: html-entities.decode - html5, attribute x 391,007 ops/sec ±0.33% (90 runs sampled)
120+
#4: entities.decodeHTMLStrict x 332,909 ops/sec ±0.56% (95 runs sampled)
121+
#5: entities.decodeHTML x 274,700 ops/sec ±0.29% (97 runs sampled)
122+
#6: he.decode x 184,440 ops/sec ±0.27% (95 runs sampled)
123123
124124
HTML4
125125
126126
Encode test
127127
128-
* #1: html-entities.encode - html4, nonAscii x 413,667 ops/sec ±0.51% (94 runs sampled)
129-
* #2: html-entities.encode - html4, nonAsciiPrintable x 390,540 ops/sec ±0.39% (95 runs sampled)
130-
* #3: html-entities.encode - html4, extensive x 199,258 ops/sec ±0.20% (97 runs sampled)
128+
* #1: html-entities.encode - html4, nonAscii x 419,600 ops/sec ±0.65% (94 runs sampled)
129+
* #2: html-entities.encode - html4, nonAsciiPrintable x 413,954 ops/sec ±0.83% (91 runs sampled)
130+
* #3: html-entities.encode - html4, extensive x 216,838 ops/sec ±0.22% (96 runs sampled)
131131
132132
Decode test
133133
134-
* #1: html-entities.decode - html4, strict x 369,977 ops/sec ±1.13% (93 runs sampled)
135-
* #2: html-entities.decode - html4, body x 366,084 ops/sec ±0.30% (94 runs sampled)
136-
* #3: html-entities.decode - html4, attribute x 363,317 ops/sec ±0.33% (94 runs sampled)
134+
* #1: html-entities.decode - html4, strict x 420,850 ops/sec ±0.23% (92 runs sampled)
135+
* #2: html-entities.decode - html4, body x 413,042 ops/sec ±0.49% (94 runs sampled)
136+
* #3: html-entities.decode - html4, attribute x 408,538 ops/sec ±2.59% (92 runs sampled)
137137
138138
XML
139139
140140
Encode test
141141
142-
* #1: html-entities.encode - xml, nonAscii x 478,394 ops/sec ±2.54% (92 runs sampled)
143-
* #2: html-entities.encode - xml, nonAsciiPrintable x 459,013 ops/sec ±0.20% (97 runs sampled)
144-
#3: entities.encodeXML x 352,570 ops/sec ±1.05% (93 runs sampled)
145-
* #4: html-entities.encode - xml, extensive x 269,313 ops/sec ±0.24% (92 runs sampled)
142+
* #1: html-entities.encode - xml, nonAscii x 511,788 ops/sec ±0.21% (97 runs sampled)
143+
* #2: html-entities.encode - xml, nonAsciiPrintable x 482,136 ops/sec ±0.40% (93 runs sampled)
144+
#3: entities.encodeXML x 353,189 ops/sec ±0.57% (95 runs sampled)
145+
* #4: html-entities.encode - xml, extensive x 291,091 ops/sec ±0.23% (96 runs sampled)
146146
147147
Decode test
148148
149-
* #1: html-entities.decode - xml, body x 429,601 ops/sec ±0.20% (96 runs sampled)
150-
* #2: html-entities.decode - xml, strict x 428,820 ops/sec ±0.22% (96 runs sampled)
151-
#3: entities.decodeXML x 423,011 ops/sec ±0.28% (94 runs sampled)
152-
* #4: html-entities.decode - xml, attribute x 419,337 ops/sec ±0.66% (94 runs sampled)
149+
* #1: html-entities.decode - xml, body x 543,327 ops/sec ±0.25% (89 runs sampled)
150+
* #2: html-entities.decode - xml, attribute x 533,470 ops/sec ±0.22% (94 runs sampled)
151+
* #3: html-entities.decode - xml, strict x 528,014 ops/sec ±2.27% (95 runs sampled)
152+
#4: entities.decodeXML x 421,154 ops/sec ±0.32% (96 runs sampled)
153153
154154
Escaping
155155
156156
Escape test
157157
158-
#1: he.escape x 1,126,149 ops/sec ±0.23% (98 runs sampled)
159-
* #2: html-entities.encode - xml, specialChars x 1,077,095 ops/sec ±1.09% (94 runs sampled)
160-
#3: entities.escapeUTF8 x 724,973 ops/sec ±0.25% (98 runs sampled)
161-
#4: entities.escape x 316,363 ops/sec ±0.20% (97 runs sampled)
158+
* #1: html-entities.encode - xml, specialChars x 1,583,074 ops/sec ±0.24% (95 runs sampled)
159+
#2: he.escape x 1,131,879 ops/sec ±1.65% (94 runs sampled)
160+
#3: entities.escapeUTF8 x 736,205 ops/sec ±0.28% (94 runs sampled)
161+
#4: entities.escape x 314,225 ops/sec ±0.24% (93 runs sampled)
162162
```
163163

164164
License

src/index.ts

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,42 @@ export function encode(
4646
if (!text) {
4747
return '';
4848
}
49+
50+
const encodeRegExp = encodeRegExps[mode];
51+
encodeRegExp.lastIndex = 0;
52+
53+
let match = encodeRegExp.exec(text);
54+
55+
if (!match) {
56+
return text;
57+
}
58+
4959
const references = allNamedReferences[level].characters;
5060
const isHex = numeric === 'hexadecimal';
5161

52-
return text.replace(encodeRegExps[mode], function (input) {
62+
let lastIndex = 0;
63+
let result = '';
64+
65+
do {
66+
if (lastIndex !== match.index) {
67+
result += text.substring(lastIndex, match.index);
68+
}
69+
const input = match[0];
5370
const entity = references[input];
5471
if (entity) {
55-
return entity;
72+
result += entity;
73+
} else {
74+
const code = input.length > 1 ? getCodePoint(input, 0)! : input.charCodeAt(0);
75+
result += (isHex ? '&#x' + code.toString(16) : '&#' + code) + ';';
5676
}
57-
const code = input.length > 1 ? getCodePoint(input, 0)! : input.charCodeAt(0);
58-
return (isHex ? '&#x' + code.toString(16) : '&#' + code) + ';';
59-
});
77+
lastIndex = match.index + input.length;
78+
} while ((match = encodeRegExp.exec(text)));
79+
80+
if (lastIndex !== text.length) {
81+
result += text.substring(lastIndex, text.length);
82+
}
83+
84+
return result;
6085
}
6186

6287
const defaultDecodeOptions: DecodeOptions = {
@@ -100,24 +125,48 @@ export function decode(
100125
if (!text) {
101126
return '';
102127
}
128+
const decodeRegExp = decodeRegExps[level][scope];
129+
130+
let match = decodeRegExp.exec(text);
131+
132+
if (!match) {
133+
return text;
134+
}
135+
103136
const references = allNamedReferences[level].entities;
104137
const isAttribute = scope === 'attribute';
105138

106-
return text.replace(decodeRegExps[level][scope], function (entity) {
107-
if (isAttribute && entity[entity.length - 1] === '=') {
108-
return entity;
139+
let lastIndex = 0;
140+
let result = '';
141+
142+
do {
143+
const entity = match[0];
144+
if (lastIndex !== match.index) {
145+
result += text.substring(lastIndex, match.index);
109146
}
110-
if (entity[1] != '#') {
111-
return references[entity] || entity;
147+
if (isAttribute && entity[entity.length - 1] === '=') {
148+
result += entity;
149+
} else if (entity[1] != '#') {
150+
result += references[entity] || entity;
151+
} else {
152+
const secondChar = entity[2];
153+
const code =
154+
secondChar == 'x' || secondChar == 'X' ? parseInt(entity.substr(3), 16) : parseInt(entity.substr(2));
155+
156+
result +=
157+
code >= 0x10ffff
158+
? outOfBoundsChar
159+
: code > 65535
160+
? fromCodePoint(code)
161+
: fromCharCode(numericUnicodeMap[code] || code);
112162
}
113-
const secondChar = entity[2];
114-
const code =
115-
secondChar == 'x' || secondChar == 'X' ? parseInt(entity.substr(3), 16) : parseInt(entity.substr(2));
116-
117-
return code >= 0x10ffff
118-
? outOfBoundsChar
119-
: code > 65535
120-
? fromCodePoint(code)
121-
: fromCharCode(numericUnicodeMap[code] || code);
122-
});
163+
164+
lastIndex = match.index + entity.length;
165+
} while ((match = decodeRegExp.exec(text)));
166+
167+
if (lastIndex !== text.length) {
168+
result += text.substring(lastIndex, text.length);
169+
}
170+
171+
return result;
123172
}

test/index.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ describe('encode()', () => {
1919
expect(encode('a\n<>"\'&©∆℞😂\0\x01', {mode: 'specialChars'})).to.equal(
2020
'a\n&lt;&gt;&quot;&apos;&amp;©∆℞😂\0\x01'
2121
);
22+
expect(encode('a\n<>"\'&©∆℞😂\0\x01END', {mode: 'specialChars'})).to.equal(
23+
'a\n&lt;&gt;&quot;&apos;&amp;©∆℞😂\0\x01END'
24+
);
2225
expect(encode('a\n<>"\'&©∆℞😂\0\x01', {mode: 'nonAscii'})).to.equal(
2326
'a\n&lt;&gt;&quot;&apos;&amp;&copy;&#8710;&rx;&#128514;\0\x01'
2427
);

0 commit comments

Comments
 (0)