@@ -9,68 +9,48 @@ export function decode(arr, loose, start = 0) {
99 start |= 0
1010 const end = arr . length
1111 let out = ''
12- const tmp = [ ]
12+ const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
13+ const tmp = new Array ( chunkSize + 1 ) . fill ( 0 ) // need 1 extra slot for last codepoint, which can be 2 charcodes
14+ let ti = 0
1315
1416 for ( let i = start ; i < end ; i ++ ) {
15- if ( tmp . length > 0x2_00 ) {
16- // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
17- // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
17+ if ( ti >= chunkSize ) {
18+ tmp . length = ti // can be larger by 1 if last codepoint is two charcodes
1819 out += String . fromCharCode . apply ( String , tmp )
19- tmp . length = 0
20+ if ( tmp . length <= chunkSize ) tmp . push ( 0 ) // restore 1 extra slot for last codepoint
21+ ti = 0
2022 }
2123
2224 const byte = arr [ i ]
2325 if ( byte < 0x80 ) {
24- // Fast path ascii
25- tmp . push ( byte )
26- // Unroll the loop a bit for faster ops, overshoot by 20 chars
27- for ( let j = 0 ; j < 5 ; j ++ ) {
28- if ( i + 1 >= end ) break
29- const byte1 = arr [ i + 1 ]
30- if ( byte1 >= 0x80 ) break
31- tmp . push ( byte1 )
32- i ++
33- if ( i + 1 >= end ) break
34- const byte2 = arr [ i + 1 ]
35- if ( byte2 >= 0x80 ) break
36- tmp . push ( byte2 )
37- i ++
38- if ( i + 1 >= end ) break
39- const byte3 = arr [ i + 1 ]
40- if ( byte3 >= 0x80 ) break
41- tmp . push ( byte3 )
42- i ++
43- if ( i + 1 >= end ) break
44- const byte4 = arr [ i + 1 ]
45- if ( byte4 >= 0x80 ) break
46- tmp . push ( byte4 )
47- i ++
48- }
26+ tmp [ ti ++ ] = byte
27+ // ascii fast path is in ../utf8.js, this is called only on non-ascii input
28+ // so we don't unroll this anymore
4929 } else if ( byte < 0xc2 ) {
5030 if ( ! loose ) throw new TypeError ( E_STRICT )
51- tmp . push ( replacementPoint )
31+ tmp [ ti ++ ] = replacementPoint
5232 } else if ( byte < 0xe0 ) {
5333 // need 1 more
5434 if ( i + 1 >= end ) {
5535 if ( ! loose ) throw new TypeError ( E_STRICT )
56- tmp . push ( replacementPoint )
36+ tmp [ ti ++ ] = replacementPoint
5737 break
5838 }
5939
6040 const byte1 = arr [ i + 1 ]
6141 if ( byte1 < 0x80 || byte1 > 0xbf ) {
6242 if ( ! loose ) throw new TypeError ( E_STRICT )
63- tmp . push ( replacementPoint )
43+ tmp [ ti ++ ] = replacementPoint
6444 continue
6545 }
6646
6747 i ++
68- tmp . push ( ( ( byte & 0x1f ) << 6 ) | ( byte1 & 0x3f ) )
48+ tmp [ ti ++ ] = ( ( byte & 0x1f ) << 6 ) | ( byte1 & 0x3f )
6949 } else if ( byte < 0xf0 ) {
7050 // need 2 more
7151 if ( i + 1 >= end ) {
7252 if ( ! loose ) throw new TypeError ( E_STRICT )
73- tmp . push ( replacementPoint )
53+ tmp [ ti ++ ] = replacementPoint
7454 break
7555 }
7656
@@ -79,31 +59,31 @@ export function decode(arr, loose, start = 0) {
7959 const byte1 = arr [ i + 1 ]
8060 if ( byte1 < lower || byte1 > upper ) {
8161 if ( ! loose ) throw new TypeError ( E_STRICT )
82- tmp . push ( replacementPoint )
62+ tmp [ ti ++ ] = replacementPoint
8363 continue
8464 }
8565
8666 i ++
8767 if ( i + 1 >= end ) {
8868 if ( ! loose ) throw new TypeError ( E_STRICT )
89- tmp . push ( replacementPoint )
69+ tmp [ ti ++ ] = replacementPoint
9070 break
9171 }
9272
9373 const byte2 = arr [ i + 1 ]
9474 if ( byte2 < 0x80 || byte2 > 0xbf ) {
9575 if ( ! loose ) throw new TypeError ( E_STRICT )
96- tmp . push ( replacementPoint )
76+ tmp [ ti ++ ] = replacementPoint
9777 continue
9878 }
9979
10080 i ++
101- tmp . push ( ( ( byte & 0xf ) << 12 ) | ( ( byte1 & 0x3f ) << 6 ) | ( byte2 & 0x3f ) )
81+ tmp [ ti ++ ] = ( ( byte & 0xf ) << 12 ) | ( ( byte1 & 0x3f ) << 6 ) | ( byte2 & 0x3f )
10282 } else if ( byte <= 0xf4 ) {
10383 // need 3 more
10484 if ( i + 1 >= end ) {
10585 if ( ! loose ) throw new TypeError ( E_STRICT )
106- tmp . push ( replacementPoint )
86+ tmp [ ti ++ ] = replacementPoint
10787 break
10888 }
10989
@@ -112,35 +92,35 @@ export function decode(arr, loose, start = 0) {
11292 const byte1 = arr [ i + 1 ]
11393 if ( byte1 < lower || byte1 > upper ) {
11494 if ( ! loose ) throw new TypeError ( E_STRICT )
115- tmp . push ( replacementPoint )
95+ tmp [ ti ++ ] = replacementPoint
11696 continue
11797 }
11898
11999 i ++
120100 if ( i + 1 >= end ) {
121101 if ( ! loose ) throw new TypeError ( E_STRICT )
122- tmp . push ( replacementPoint )
102+ tmp [ ti ++ ] = replacementPoint
123103 break
124104 }
125105
126106 const byte2 = arr [ i + 1 ]
127107 if ( byte2 < 0x80 || byte2 > 0xbf ) {
128108 if ( ! loose ) throw new TypeError ( E_STRICT )
129- tmp . push ( replacementPoint )
109+ tmp [ ti ++ ] = replacementPoint
130110 continue
131111 }
132112
133113 i ++
134114 if ( i + 1 >= end ) {
135115 if ( ! loose ) throw new TypeError ( E_STRICT )
136- tmp . push ( replacementPoint )
116+ tmp [ ti ++ ] = replacementPoint
137117 break
138118 }
139119
140120 const byte3 = arr [ i + 1 ]
141121 if ( byte3 < 0x80 || byte3 > 0xbf ) {
142122 if ( ! loose ) throw new TypeError ( E_STRICT )
143- tmp . push ( replacementPoint )
123+ tmp [ ti ++ ] = replacementPoint
144124 continue
145125 }
146126
@@ -150,19 +130,21 @@ export function decode(arr, loose, start = 0) {
150130 if ( codePoint > 0xff_ff ) {
151131 // split into char codes as String.fromCharCode is faster than String.fromCodePoint
152132 const u = codePoint - 0x1_00_00
153- tmp . push ( 0xd8_00 + ( ( u >> 10 ) & 0x3_ff ) , 0xdc_00 + ( u & 0x3_ff ) )
133+ tmp [ ti ++ ] = 0xd8_00 + ( ( u >> 10 ) & 0x3_ff )
134+ tmp [ ti ++ ] = 0xdc_00 + ( u & 0x3_ff )
154135 } else {
155- tmp . push ( codePoint )
136+ tmp [ ti ++ ] = codePoint
156137 }
157138 // eslint-disable-next-line sonarjs/no-duplicated-branches
158139 } else {
159140 if ( ! loose ) throw new TypeError ( E_STRICT )
160- tmp . push ( replacementPoint )
141+ tmp [ ti ++ ] = replacementPoint
161142 }
162143 }
163144
164- if ( tmp . length > 0 ) out += String . fromCharCode . apply ( String , tmp )
165- return out
145+ if ( ti === 0 ) return out
146+ tmp . length = ti
147+ return out + String . fromCharCode . apply ( String , tmp )
166148}
167149
168150export function encode ( string , loose ) {
0 commit comments