Skip to content

Commit 052feb9

Browse files
committed
perf: avoid push() in utf8toString fallback
1 parent f60874c commit 052feb9

File tree

1 file changed

+32
-50
lines changed

1 file changed

+32
-50
lines changed

fallback/utf8.js

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,68 +9,48 @@ export function decode(arr, loose, start = 0) {
99
start |= 0
1010
const end = arr.length
1111
let out = ''
12-
const tmp = []
12+
const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
13+
const tmp = new Array(chunkSize + 1).fill(0) // need 1 extra slot for last codepoint, which can be 2 charcodes
14+
let ti = 0
1315

1416
for (let i = start; i < end; i++) {
15-
if (tmp.length > 0x2_00) {
16-
// far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
17-
// length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
17+
if (ti >= chunkSize) {
18+
tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
1819
out += String.fromCharCode.apply(String, tmp)
19-
tmp.length = 0
20+
if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
21+
ti = 0
2022
}
2123

2224
const byte = arr[i]
2325
if (byte < 0x80) {
24-
// Fast path ascii
25-
tmp.push(byte)
26-
// Unroll the loop a bit for faster ops, overshoot by 20 chars
27-
for (let j = 0; j < 5; j++) {
28-
if (i + 1 >= end) break
29-
const byte1 = arr[i + 1]
30-
if (byte1 >= 0x80) break
31-
tmp.push(byte1)
32-
i++
33-
if (i + 1 >= end) break
34-
const byte2 = arr[i + 1]
35-
if (byte2 >= 0x80) break
36-
tmp.push(byte2)
37-
i++
38-
if (i + 1 >= end) break
39-
const byte3 = arr[i + 1]
40-
if (byte3 >= 0x80) break
41-
tmp.push(byte3)
42-
i++
43-
if (i + 1 >= end) break
44-
const byte4 = arr[i + 1]
45-
if (byte4 >= 0x80) break
46-
tmp.push(byte4)
47-
i++
48-
}
26+
tmp[ti++] = byte
27+
// ascii fast path is in ../utf8.js, this is called only on non-ascii input
28+
// so we don't unroll this anymore
4929
} else if (byte < 0xc2) {
5030
if (!loose) throw new TypeError(E_STRICT)
51-
tmp.push(replacementPoint)
31+
tmp[ti++] = replacementPoint
5232
} else if (byte < 0xe0) {
5333
// need 1 more
5434
if (i + 1 >= end) {
5535
if (!loose) throw new TypeError(E_STRICT)
56-
tmp.push(replacementPoint)
36+
tmp[ti++] = replacementPoint
5737
break
5838
}
5939

6040
const byte1 = arr[i + 1]
6141
if (byte1 < 0x80 || byte1 > 0xbf) {
6242
if (!loose) throw new TypeError(E_STRICT)
63-
tmp.push(replacementPoint)
43+
tmp[ti++] = replacementPoint
6444
continue
6545
}
6646

6747
i++
68-
tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
48+
tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
6949
} else if (byte < 0xf0) {
7050
// need 2 more
7151
if (i + 1 >= end) {
7252
if (!loose) throw new TypeError(E_STRICT)
73-
tmp.push(replacementPoint)
53+
tmp[ti++] = replacementPoint
7454
break
7555
}
7656

@@ -79,31 +59,31 @@ export function decode(arr, loose, start = 0) {
7959
const byte1 = arr[i + 1]
8060
if (byte1 < lower || byte1 > upper) {
8161
if (!loose) throw new TypeError(E_STRICT)
82-
tmp.push(replacementPoint)
62+
tmp[ti++] = replacementPoint
8363
continue
8464
}
8565

8666
i++
8767
if (i + 1 >= end) {
8868
if (!loose) throw new TypeError(E_STRICT)
89-
tmp.push(replacementPoint)
69+
tmp[ti++] = replacementPoint
9070
break
9171
}
9272

9373
const byte2 = arr[i + 1]
9474
if (byte2 < 0x80 || byte2 > 0xbf) {
9575
if (!loose) throw new TypeError(E_STRICT)
96-
tmp.push(replacementPoint)
76+
tmp[ti++] = replacementPoint
9777
continue
9878
}
9979

10080
i++
101-
tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
81+
tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
10282
} else if (byte <= 0xf4) {
10383
// need 3 more
10484
if (i + 1 >= end) {
10585
if (!loose) throw new TypeError(E_STRICT)
106-
tmp.push(replacementPoint)
86+
tmp[ti++] = replacementPoint
10787
break
10888
}
10989

@@ -112,35 +92,35 @@ export function decode(arr, loose, start = 0) {
11292
const byte1 = arr[i + 1]
11393
if (byte1 < lower || byte1 > upper) {
11494
if (!loose) throw new TypeError(E_STRICT)
115-
tmp.push(replacementPoint)
95+
tmp[ti++] = replacementPoint
11696
continue
11797
}
11898

11999
i++
120100
if (i + 1 >= end) {
121101
if (!loose) throw new TypeError(E_STRICT)
122-
tmp.push(replacementPoint)
102+
tmp[ti++] = replacementPoint
123103
break
124104
}
125105

126106
const byte2 = arr[i + 1]
127107
if (byte2 < 0x80 || byte2 > 0xbf) {
128108
if (!loose) throw new TypeError(E_STRICT)
129-
tmp.push(replacementPoint)
109+
tmp[ti++] = replacementPoint
130110
continue
131111
}
132112

133113
i++
134114
if (i + 1 >= end) {
135115
if (!loose) throw new TypeError(E_STRICT)
136-
tmp.push(replacementPoint)
116+
tmp[ti++] = replacementPoint
137117
break
138118
}
139119

140120
const byte3 = arr[i + 1]
141121
if (byte3 < 0x80 || byte3 > 0xbf) {
142122
if (!loose) throw new TypeError(E_STRICT)
143-
tmp.push(replacementPoint)
123+
tmp[ti++] = replacementPoint
144124
continue
145125
}
146126

@@ -150,19 +130,21 @@ export function decode(arr, loose, start = 0) {
150130
if (codePoint > 0xff_ff) {
151131
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
152132
const u = codePoint - 0x1_00_00
153-
tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
133+
tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
134+
tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
154135
} else {
155-
tmp.push(codePoint)
136+
tmp[ti++] = codePoint
156137
}
157138
// eslint-disable-next-line sonarjs/no-duplicated-branches
158139
} else {
159140
if (!loose) throw new TypeError(E_STRICT)
160-
tmp.push(replacementPoint)
141+
tmp[ti++] = replacementPoint
161142
}
162143
}
163144

164-
if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
165-
return out
145+
if (ti === 0) return out
146+
tmp.length = ti
147+
return out + String.fromCharCode.apply(String, tmp)
166148
}
167149

168150
export function encode(string, loose) {

0 commit comments

Comments
 (0)