@@ -178210,27 +178210,37 @@ async function readHeaderFooter(tuple) {
178210178210 });
178211178211}
178212178212
178213- function readEntry(rntuple, fieldName, entryIndex) {
178213+ function readEntry(rntuple, fieldName, clusterIndex, entryIndex) {
178214178214 const builder = rntuple.builder,
178215178215 field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
178216- fieldData = rntuple._clusterData [fieldName];
178216+ columns = rntuple.fieldToColumns [fieldName];
178217178217
178218178218 if (!field)
178219178219 throw new Error(`No descriptor for field ${fieldName}`);
178220- if (!fieldData )
178221- throw new Error(`No data for field ${fieldName}`);
178220+ if (!columns )
178221+ throw new Error(`No columns field ${fieldName}`);
178222178222
178223- // Detect and decode string fields
178224- if (Array.isArray(fieldData) && fieldData.length === 2) {
178225- const [offsets, payload] = fieldData,
178226- start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]),
178227- end = Number(offsets[entryIndex]),
178228- decoded = payload.slice(start, end).join(''); // Convert to string
178229- return decoded;
178223+
178224+ const pages = builder.pageLocations[clusterIndex]?.[columns[0].index]?.pages;
178225+ if (!pages)
178226+ throw new Error(`No pages found ${fieldName}`);
178227+
178228+ let pageid = 0;
178229+ while ((pageid < pages.length - 1) && (entryIndex >= Number(pages[pageid].numElements))) {
178230+ entryIndex -= Number(pages[pageid].numElements);
178231+ pageid++;
178230178232 }
178231178233
178232- // Fallback: primitive type (e.g. int, float)
178233- return fieldData[0][entryIndex];
178234+ if (field.typeName === 'std::string') {
178235+ // string extracted from two columns
178236+ const offsets = rntuple._clusterData[columns[0].index][pageid],
178237+ payload = rntuple._clusterData[columns[1].index][pageid],
178238+ start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]),
178239+ end = Number(offsets[entryIndex]);
178240+ return payload.slice(start, end).join(''); // Convert to string
178241+ }
178242+ const values = rntuple._clusterData[columns[0].index];
178243+ return values[pageid][entryIndex];
178234178244}
178235178245
178236178246/** @summary Return field name for specified branch index
@@ -178241,11 +178251,11 @@ function getSelectorFieldName(selector, i) {
178241178251}
178242178252
178243178253// Read and process the next data cluster from the RNTuple
178244- function readNextCluster(rntuple, selector) {
178254+ async function readNextCluster(rntuple, selector) {
178245178255 const builder = rntuple.builder;
178246178256
178247178257 // Add validation
178248- if (!builder.clusterSummaries || builder.clusterSummaries.length === 0 )
178258+ if (!builder.clusterSummaries)
178249178259 throw new Error('No cluster summaries available - possibly incomplete file reading');
178250178260
178251178261 const clusterIndex = selector.currentCluster,
@@ -178255,6 +178265,11 @@ function readNextCluster(rntuple, selector) {
178255178265 // Collect only selected field names from selector
178256178266 selectedFields = [];
178257178267
178268+ if (!clusterSummary) {
178269+ selector.Terminate(clusterIndex > 0);
178270+ return false;
178271+ }
178272+
178258178273 for (let i = 0; i < selector.numBranches(); ++i)
178259178274 selectedFields.push(getSelectorFieldName(selector, i));
178260178275
@@ -178281,7 +178296,7 @@ function readNextCluster(rntuple, selector) {
178281178296 // Early exit if no pages to read (i.e., no selected fields matched)
178282178297 if (pages.length === 0) {
178283178298 selector.Terminate(false);
178284- return Promise.resolve() ;
178299+ return false ;
178285178300 }
178286178301
178287178302 // Build flat array of [offset, size, offset, size, ...] to read pages
@@ -178328,68 +178343,35 @@ function readNextCluster(rntuple, selector) {
178328178343 });
178329178344
178330178345 return Promise.all(unzipPromises).then(unzipBlobs => {
178331- rntuple._clusterData = {}; // store deserialized data per field
178346+ rntuple._clusterData = {}; // store deserialized data per column index
178332178347
178333178348 for (let i = 0; i < unzipBlobs.length; ++i) {
178334178349 const blob = unzipBlobs[i];
178335178350 // Ensure blob is a DataView
178336178351 if (!(blob instanceof DataView))
178337178352 throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(blob)}`);
178338- const {
178339- page,
178340- colDesc
178341- } = pages[i],
178342- field = builder.fieldDescriptors[colDesc.fieldId],
178343- values = builder.deserializePage(blob, colDesc, page);
178353+ const colDesc = pages[i].colDesc,
178354+ values = builder.deserializePage(blob, colDesc, pages[i].page);
178344178355
178345178356 // Support multiple representations (e.g., string fields with offsets + payload)
178346- if (!rntuple._clusterData[field.fieldName])
178347- rntuple._clusterData[field.fieldName] = [];
178348-
178349- // splitting string fields into offset and payload components
178350- if (field.typeName === 'std::string') {
178351- if (
178352- colDesc.coltype === ENTupleColumnType.kIndex64 ||
178353- colDesc.coltype === ENTupleColumnType.kIndex32 ||
178354- colDesc.coltype === ENTupleColumnType.kSplitIndex64 ||
178355- colDesc.coltype === ENTupleColumnType.kSplitIndex32
178356- ) // Index64/Index32
178357- rntuple._clusterData[field.fieldName][0] = values; // Offsets
178358- else if (colDesc.coltype === ENTupleColumnType.kChar)
178359- rntuple._clusterData[field.fieldName][1] = values; // Payload
178360- else
178361- throw new Error(`Unsupported column type for string field: ${colDesc.coltype}`);
178362- } else
178363- rntuple._clusterData[field.fieldName][0] = values;
178364- }
178357+ if (!rntuple._clusterData[colDesc.index])
178358+ rntuple._clusterData[colDesc.index] = [];
178365178359
178366- // Ensure string fields have ending offset for proper reconstruction of the last entry
178367- for (const fieldName of selectedFields) {
178368- const field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
178369- colData = rntuple._clusterData[fieldName];
178370- if (field.typeName === 'std::string') {
178371- if (!Array.isArray(colData) || colData.length !== 2)
178372- throw new Error(`String field '${fieldName}' must have 2 columns`);
178373- if (colData[0].length !== builder.clusterSummaries[clusterIndex].numEntries)
178374- throw new Error(`Malformed string field '${fieldName}': missing final offset`);
178375- }
178360+ rntuple._clusterData[colDesc.index].push(values);
178376178361 }
178377178362
178378178363 const numEntries = clusterSummary.numEntries;
178379178364 for (let i = 0; i < numEntries; ++i) {
178380178365 for (let b = 0; b < selector.numBranches(); ++b) {
178381178366 const fieldName = getSelectorFieldName(selector, b),
178382- tgtName = selector.nameOfBranch(b),
178383- values = rntuple._clusterData[fieldName];
178367+ tgtName = selector.nameOfBranch(b);
178384178368
178385- if (!values)
178386- throw new Error(`Missing values for selected field: ${fieldName}`);
178387- selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i);
178369+ selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, clusterIndex, i);
178388178370 }
178389- selector.Process();
178371+ selector.Process(selector.currentEntry++ );
178390178372 }
178391178373
178392- selector.Terminate(true );
178374+ return readNextCluster(rntuple, selector );
178393178375 });
178394178376 });
178395178377}
@@ -178400,6 +178382,7 @@ function rntupleProcess(rntuple, selector, args) {
178400178382 return readHeaderFooter(rntuple).then(() => {
178401178383 selector.Begin();
178402178384 selector.currentCluster = 0;
178385+ selector.currentEntry = 0;
178403178386 return readNextCluster(rntuple, selector);
178404178387 }).then(() => selector);
178405178388}
0 commit comments