Skip to content

Commit d79b602

Browse files
authored
perf(encode): bypass token creation for primitives and arrays (#163)
Implement direct encoding path that writes CBOR bytes without intermediate Token allocation for most value types: - Primitives: null, boolean, number, bigint, string, Uint8Array - Arrays: header + recursive direct encode of elements - Maps: fall back to token-based encoding (required for key sorting) - Custom typeEncoders: checked per-value, fall back to tokens as needed Benchmark results (dag-cbor mode): - Integers: +25-66% encode throughput - Bytes: +17-20% - Strings: +15-17% - Overall w/ current benchmark: ~3% average improvement
1 parent e0748b0 commit d79b602

File tree

6 files changed

+168
-10
lines changed

6 files changed

+168
-10
lines changed

lib/7float.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ import { encodeUint } from './0uint.js'
1111
* @typedef {import('../interface').EncodeOptions} EncodeOptions
1212
*/
1313

14-
const MINOR_FALSE = 20
15-
const MINOR_TRUE = 21
16-
const MINOR_NULL = 22
17-
const MINOR_UNDEFINED = 23
14+
export const MINOR_FALSE = 20
15+
export const MINOR_TRUE = 21
16+
export const MINOR_NULL = 22
17+
export const MINOR_UNDEFINED = 23
1818

1919
/**
2020
* @param {Uint8Array} _data

lib/encode.js

Lines changed: 152 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@ import { Token, Type } from './token.js'
33
import { Bl, U8Bl } from './bl.js'
44
import { encodeErrPrefix } from './common.js'
55
import { quickEncodeToken } from './jump.js'
6-
import { asU8A, compare } from './byte-utils.js'
6+
import { asU8A, compare, fromString } from './byte-utils.js'
77

8-
import { encodeUint } from './0uint.js'
8+
import { encodeUint, encodeUintValue } from './0uint.js'
99
import { encodeNegint } from './1negint.js'
1010
import { encodeBytes } from './2bytes.js'
1111
import { encodeString } from './3string.js'
1212
import { encodeArray } from './4array.js'
1313
import { encodeMap } from './5map.js'
1414
import { encodeTag } from './6tag.js'
15-
import { encodeFloat } from './7float.js'
15+
import { encodeFloat, MINOR_FALSE, MINOR_TRUE, MINOR_NULL, MINOR_UNDEFINED } from './7float.js'
1616

1717
/**
1818
* @typedef {import('../interface').EncodeOptions} EncodeOptions
@@ -468,6 +468,139 @@ function tokensToEncoded (writer, tokens, encoders, options) {
468468
}
469469
}
470470

471+
// CBOR major type prefixes, cached from Type for hot path performance
472+
const MAJOR_UINT = Type.uint.majorEncoded
473+
const MAJOR_NEGINT = Type.negint.majorEncoded
474+
const MAJOR_BYTES = Type.bytes.majorEncoded
475+
const MAJOR_STRING = Type.string.majorEncoded
476+
const MAJOR_ARRAY = Type.array.majorEncoded
477+
478+
// Simple value bytes (CBOR major type 7 + minor value)
479+
const SIMPLE_FALSE = Type.float.majorEncoded | MINOR_FALSE
480+
const SIMPLE_TRUE = Type.float.majorEncoded | MINOR_TRUE
481+
const SIMPLE_NULL = Type.float.majorEncoded | MINOR_NULL
482+
const SIMPLE_UNDEFINED = Type.float.majorEncoded | MINOR_UNDEFINED
483+
484+
const neg1b = BigInt(-1)
485+
const pos1b = BigInt(1)
486+
487+
/**
488+
* Check if direct encoding can be used for the given options.
489+
* Direct encoding bypasses token creation for most values.
490+
* @param {EncodeOptions} options
491+
* @returns {boolean}
492+
*/
493+
function canDirectEncode (options) {
494+
// Cannot use direct encode with addBreakTokens (needs special break token handling).
495+
// Direct encode checks typeEncoders per-value, falling back to tokens as needed.
496+
// Maps fall back to token-based encoding for efficient key sorting.
497+
return options.addBreakTokens !== true
498+
}
499+
500+
/**
501+
* Direct encode a value to the writer, bypassing token creation for most types.
502+
* Falls back to token-based encoding for custom type encoders.
503+
* @param {ByteWriter} writer
504+
* @param {any} data
505+
* @param {EncodeOptions} options
506+
* @param {Reference|undefined} refStack
507+
*/
508+
function directEncode (writer, data, options, refStack) {
509+
const typ = is(data)
510+
511+
// Check for custom encoder for THIS specific type
512+
const customEncoder = options.typeEncoders && options.typeEncoders[typ]
513+
if (customEncoder) {
514+
const tokens = customEncoder(data, typ, options, refStack)
515+
if (tokens != null) {
516+
// Custom encoder returned tokens, serialize immediately
517+
tokensToEncoded(writer, tokens, cborEncoders, options)
518+
return
519+
}
520+
// Custom encoder returned null, fall through to default handling
521+
}
522+
523+
// Direct encode based on type
524+
switch (typ) {
525+
case 'null':
526+
writer.push([SIMPLE_NULL])
527+
return
528+
529+
case 'undefined':
530+
writer.push([SIMPLE_UNDEFINED])
531+
return
532+
533+
case 'boolean':
534+
writer.push([data ? SIMPLE_TRUE : SIMPLE_FALSE])
535+
return
536+
537+
case 'number':
538+
if (!Number.isInteger(data) || !Number.isSafeInteger(data)) {
539+
// Float, use token encoder for complex float encoding
540+
encodeFloat(writer, new Token(Type.float, data), options)
541+
} else if (data >= 0) {
542+
encodeUintValue(writer, MAJOR_UINT, data)
543+
} else {
544+
// Negative integer
545+
encodeUintValue(writer, MAJOR_NEGINT, data * -1 - 1)
546+
}
547+
return
548+
549+
case 'bigint':
550+
if (data >= BigInt(0)) {
551+
encodeUintValue(writer, MAJOR_UINT, data)
552+
} else {
553+
encodeUintValue(writer, MAJOR_NEGINT, data * neg1b - pos1b)
554+
}
555+
return
556+
557+
case 'string': {
558+
const bytes = fromString(data)
559+
encodeUintValue(writer, MAJOR_STRING, bytes.length)
560+
writer.push(bytes)
561+
return
562+
}
563+
564+
case 'Uint8Array':
565+
encodeUintValue(writer, MAJOR_BYTES, data.length)
566+
writer.push(data)
567+
return
568+
569+
case 'Array':
570+
if (!data.length) {
571+
writer.push([MAJOR_ARRAY]) // Empty array: 0x80
572+
return
573+
}
574+
refStack = Ref.createCheck(refStack, data)
575+
encodeUintValue(writer, MAJOR_ARRAY, data.length)
576+
for (const elem of data) {
577+
directEncode(writer, elem, options, refStack)
578+
}
579+
return
580+
581+
case 'Object':
582+
case 'Map':
583+
// Maps require key sorting, use token-based encoding for efficiency
584+
// (pre-encoding all keys for sorting is expensive)
585+
{
586+
const tokens = typeEncoders.Object(data, typ, options, refStack)
587+
tokensToEncoded(writer, tokens, cborEncoders, options)
588+
}
589+
return
590+
591+
default:
592+
// Fall back to token-based encoding for other types (DataView, TypedArrays, etc.)
593+
{
594+
const typeEncoder = typeEncoders[typ]
595+
if (!typeEncoder) {
596+
throw new Error(`${encodeErrPrefix} unsupported type: ${typ}`)
597+
}
598+
const tokens = typeEncoder(data, typ, options, refStack)
599+
tokensToEncoded(writer, tokens, cborEncoders, options)
600+
}
601+
}
602+
}
603+
471604
/**
472605
* @param {any} data
473606
* @param {TokenTypeEncoder[]} encoders
@@ -518,6 +651,14 @@ function encodeCustom (data, encoders, options, destination) {
518651
*/
519652
function encode (data, options) {
520653
options = Object.assign({}, defaultEncodeOptions, options)
654+
655+
// Use direct encode path when possible
656+
if (canDirectEncode(options)) {
657+
defaultWriter.reset()
658+
directEncode(defaultWriter, data, options, undefined)
659+
return defaultWriter.toBytes(true)
660+
}
661+
521662
return encodeCustom(data, cborEncoders, options)
522663
}
523664

@@ -529,6 +670,14 @@ function encode (data, options) {
529670
*/
530671
function encodeInto (data, destination, options) {
531672
options = Object.assign({}, defaultEncodeOptions, options)
673+
674+
// Use direct encode path when possible
675+
if (canDirectEncode(options)) {
676+
const writer = new U8Bl(destination)
677+
directEncode(writer, data, options, undefined)
678+
return { written: writer.toBytes().length }
679+
}
680+
532681
const result = encodeCustom(data, cborEncoders, options, destination)
533682
return { written: result.length }
534683
}

types/lib/7float.d.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ export namespace encodeFloat {
5353
function encodedSize(token: Token, options: EncodeOptions): number;
5454
let compareTokens: (tok1: Token, tok2: Token) => number;
5555
}
56+
/**
57+
* @typedef {import('../interface').ByteWriter} ByteWriter
58+
* @typedef {import('../interface').DecodeOptions} DecodeOptions
59+
* @typedef {import('../interface').EncodeOptions} EncodeOptions
60+
*/
61+
export const MINOR_FALSE: 20;
62+
export const MINOR_TRUE: 21;
63+
export const MINOR_NULL: 22;
64+
export const MINOR_UNDEFINED: 23;
5665
export type ByteWriter = import("../interface").ByteWriter;
5766
export type DecodeOptions = import("../interface").DecodeOptions;
5867
export type EncodeOptions = import("../interface").EncodeOptions;

types/lib/7float.d.ts.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

types/lib/byte-utils.d.ts.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

types/lib/encode.d.ts.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)