Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"changes": [
{
"comment": "feat: support `countField` option of `bin` and export `percentage`\n\n",
"type": "none",
"packageName": "@visactor/vdataset"
}
],
"packageName": "@visactor/vdataset",
"email": "dingling112@gmail.com"
}
36 changes: 36 additions & 0 deletions packages/vdataset/__tests__/bin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,40 @@ describe('bin transform', () => {
const total = out.reduce((s: number, b: any) => s + b.cnt, 0);
expect(total).toBe(3);
});

test('countField is used as weights and percentage calculated correctly', () => {
const data = [
{ v: 1, w: 2 }, // goes to first bin
{ v: 2, w: 3 }, // goes to first bin
{ v: 8, w: 5 } // goes to second bin
];
// thresholds split at 5 -> two bins [0,5) and [5,10]
const out: any = bin(data, { field: 'v', thresholds: [0, 5, 10], countField: 'w' });
expect(out.length).toBe(2);
// first bin should have count 5 (2+3), second bin 5
expect(out[0].count).toBe(5);
expect(out[1].count).toBe(5);
// percentage should be 0.5 for both
expect(out[0].percentage).toBeCloseTo(0.5, 12);
expect(out[1].percentage).toBeCloseTo(0.5, 12);
});

test('renamed percentage field via outputNames is present and correct', () => {
const data = [
{ v: 1, w: 1 },
{ v: 2, w: 1 },
{ v: 9, w: 2 }
];
const out: any = bin(data, {
field: 'v',
thresholds: [0, 5, 10],
countField: 'w',
outputNames: { percentage: 'pct' }
});
expect(out.length).toBe(2);
// counts: first bin 2, second bin 2 -> percentages 0.5 each
expect(out[0].cnt === undefined).toBeTruthy(); // ensure default countName not renamed here
expect(out[0].pct).toBeCloseTo(0.5, 12);
expect(out[1].pct).toBeCloseTo(0.5, 12);
});
});
28 changes: 20 additions & 8 deletions packages/vdataset/src/transform/bin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ export interface IBinOptions {
* numeric field to bin
*/
field: string;
/**
* count of numeric field
*/
countField?: string;
/**
* number of bins (default 10)
*/
Expand All @@ -29,7 +33,7 @@ export interface IBinOptions {
/**
* the field name of output data
*/
outputNames?: { x0?: string; x1?: string; count?: string; values?: string };
outputNames?: { x0?: string; x1?: string; count?: string; values?: string; percentage?: string };
}

/**
Expand All @@ -41,13 +45,13 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
if (!field) {
return [];
}

const countField = options.countField;
const n = data.length;
// compute data-driven extent
let min = Infinity;
let max = -Infinity;

if (options?.extent) {
if (options.extent) {
min = options.extent[0];
max = options.extent[1];
} else {
Expand All @@ -74,11 +78,11 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {

// build thresholds
let thresholds: number[] | undefined;
if (options && options.thresholds && options.thresholds.length) {
if (options.thresholds && options.thresholds.length) {
// explicit thresholds provided by user
thresholds = options.thresholds.slice();
thresholds.sort((a, b) => a - b);
} else if (options && typeof options.step === 'number' && options.step > 0) {
} else if (typeof options.step === 'number' && options.step > 0) {
// fixed bin width (step) provided: compute number of bins to cover [min, max]
const stepSize = options.step;
let startMin = min;
Expand All @@ -94,7 +98,7 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
}
} else {
// fallback to bins count (default 10)
const bins = options?.bins && options.bins > 0 ? Math.floor(options.bins) : 10;
const bins = options.bins && options.bins > 0 ? Math.floor(options.bins) : 10;
const stepSize = (max - min) / bins;
thresholds = new Array(bins + 1);
for (let i = 0; i <= bins; i++) {
Expand All @@ -111,14 +115,16 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
const x1Name = options.outputNames?.x1 ?? 'x1';
const countName = options.outputNames?.count ?? 'count';
const valuesName = options.outputNames?.values ?? 'values';
const percentageName = options.outputNames?.percentage ?? 'percentage';
const out: any[] = new Array(numBins);
for (let i = 0; i < numBins; i++) {
out[i] = { [x0Name]: thresholds[i], [x1Name]: thresholds[i + 1], [countName]: 0 };
if (options?.includeValues) {
if (options.includeValues) {
out[i][valuesName] = [] as object[];
}
}

let totalCount = 0;
// assign each datum to a bin (left-inclusive, right-exclusive except last bin includes max)
for (let i = 0; i < n; i++) {
const v: any = (data[i] as any)[field];
Expand All @@ -136,7 +142,9 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
const right = out[j][x1Name];
const isLast = j === numBins - 1;
if ((num >= left && num < right) || (isLast && num <= right)) {
out[j][countName]++;
const count = (data[i] as any)[countField] ?? 1;
out[j][countName] += count;
totalCount += count;
if (options && options.includeValues) {
out[j][valuesName].push(data[i]);
}
Expand All @@ -145,6 +153,10 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
}
}

for (let i = 0, len = out.length; i < len; i++) {
out[i][percentageName] = totalCount > 0 ? out[i][countName] / totalCount : 0;
}

return out;
};

Expand Down
Loading