Skip to content

Commit e28fbfb

Browse files
authored
Merge pull request #13 from hugr-lab/013-batch-table-signature
feat: simplify batch table interfaces and add RemoveField
2 parents 0957854 + d33db59 commit e28fbfb

File tree

20 files changed

+1159
-113
lines changed

20 files changed

+1159
-113
lines changed

CLAUDE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Auto-generated from all feature plans. Last updated: 2025-11-29
1212
- N/A (storage-agnostic library) (001-batch-table-interfaces)
1313
- Go 1.25+ (matching existing project) + Standard library only (encoding/json for parsing, strings/fmt for encoding). No new external dependencies required. (012-filter-pushdown)
1414
- N/A (pure transformation library) (012-filter-pushdown)
15+
- Go 1.25+ + Apache Arrow Go v18 (`github.com/apache/arrow-go/v18`), gRPC (013-batch-table-signature)
1516

1617
## Project Structure
1718

@@ -76,9 +77,9 @@ go work sync
7677
- No silent failures - errors must be handled explicitly
7778

7879
## Recent Changes
80+
- 013-batch-table-signature: Added Go 1.25+ + Apache Arrow Go v18 (`github.com/apache/arrow-go/v18`), gRPC
7981
- 012-filter-pushdown: Added Go 1.25+ (matching existing project) + Standard library only (encoding/json for parsing, strings/fmt for encoding). No new external dependencies required.
8082
- 001-batch-table-interfaces: Added Go 1.25+ + Apache Arrow Go v18, gRPC, msgpack-go
81-
- 006-returning-optimization: Added Go 1.25+ + Arrow-Go v18, gRPC, msgpack-go
8283

8384
<!-- MANUAL ADDITIONS START -->
8485
<!-- MANUAL ADDITIONS END -->

catalog/dynamic.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ type RenameFieldOptions struct {
145145
IgnoreNotFound bool
146146
}
147147

148+
// RemoveFieldOptions configures field removal from struct columns.
149+
type RemoveFieldOptions struct {
150+
// IgnoreNotFound suppresses error if table or column doesn't exist.
151+
IgnoreNotFound bool
152+
}
153+
148154
// CatalogVersion contains the version information for a catalog.
149155
type CatalogVersion struct {
150156
// Version is the current version number of the catalog.
@@ -251,4 +257,9 @@ type DynamicTable interface {
251257
// The columnPath is the path to the field (e.g., ["col", "nested", "field"]).
252258
// Returns ErrNotFound if column path doesn't exist and IgnoreNotFound is false.
253259
RenameField(ctx context.Context, columnPath []string, newName string, opts RenameFieldOptions) error
260+
261+
// RemoveField removes a field from a struct-typed column.
262+
// The columnPath is the path to the field (e.g., ["col", "nested", "field"]).
263+
// Returns ErrNotFound if column path doesn't exist and IfFieldExists is false.
264+
RemoveField(ctx context.Context, columnPath []string, opts RemoveFieldOptions) error
254265
}

catalog/table.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,47 +102,49 @@ type DeletableTable interface {
102102
}
103103

104104
// UpdatableBatchTable extends Table with batch-oriented UPDATE capability.
105-
// The Update method receives the complete input RecordReader including the rowid column.
106-
// Implementations extract rowid values from the rowid column in the RecordReader.
105+
// The Update method receives the complete input RecordBatch including the rowid column.
106+
// Implementations extract rowid values from the rowid column in the RecordBatch.
107107
// This interface is preferred over UpdatableTable when both are implemented.
108108
// Implementations MUST be goroutine-safe.
109109
type UpdatableBatchTable interface {
110110
Table
111111

112-
// Update modifies existing rows using data from the RecordReader.
113-
// The rows RecordReader contains both the rowid column (identifying rows to update)
112+
// Update modifies existing rows using data from the RecordBatch.
113+
// The rows RecordBatch contains both the rowid column (identifying rows to update)
114114
// and the new column values. Implementations MUST extract rowid values from
115115
// the rowid column (identified by name "rowid" or metadata key "is_rowid").
116116
// Use FindRowIDColumn(rows.Schema()) to locate the rowid column.
117-
// Row order in RecordReader determines update order.
117+
// Implementations MUST return ErrNullRowID if any rowid value is null.
118+
// Row order in RecordBatch determines update order.
118119
// The opts parameter provides options including RETURNING clause information:
119120
// - opts.Returning: true if RETURNING clause was specified
120121
// - opts.ReturningColumns: column names to include in RETURNING results
121122
// Returns DMLResult with affected row count and optional returning data.
122123
// Context may contain transaction ID for coordinated operations.
123124
// Caller MUST call rows.Release() after Update returns.
124-
Update(ctx context.Context, rows array.RecordReader, opts *DMLOptions) (*DMLResult, error)
125+
Update(ctx context.Context, rows arrow.RecordBatch, opts *DMLOptions) (*DMLResult, error)
125126
}
126127

127128
// DeletableBatchTable extends Table with batch-oriented DELETE capability.
128-
// The Delete method receives a RecordReader containing the rowid column.
129-
// Implementations extract rowid values from the rowid column in the RecordReader.
129+
// The Delete method receives a RecordBatch containing the rowid column.
130+
// Implementations extract rowid values from the rowid column in the RecordBatch.
130131
// This interface is preferred over DeletableTable when both are implemented.
131132
// Implementations MUST be goroutine-safe.
132133
type DeletableBatchTable interface {
133134
Table
134135

135-
// Delete removes rows identified by rowid values in the RecordReader.
136-
// The rows RecordReader contains the rowid column (identified by name "rowid"
136+
// Delete removes rows identified by rowid values in the RecordBatch.
137+
// The rows RecordBatch contains the rowid column (identified by name "rowid"
137138
// or metadata key "is_rowid") that identifies rows to delete.
138139
// Use FindRowIDColumn(rows.Schema()) to locate the rowid column.
140+
// Implementations MUST return ErrNullRowID if any rowid value is null.
139141
// The opts parameter provides options including RETURNING clause information:
140142
// - opts.Returning: true if RETURNING clause was specified
141143
// - opts.ReturningColumns: column names to include in RETURNING results
142144
// Returns DMLResult with affected row count and optional returning data.
143145
// Context may contain transaction ID for coordinated operations.
144146
// Caller MUST call rows.Release() after Delete returns.
145-
Delete(ctx context.Context, rows array.RecordReader, opts *DMLOptions) (*DMLResult, error)
147+
Delete(ctx context.Context, rows arrow.RecordBatch, opts *DMLOptions) (*DMLResult, error)
146148
}
147149

148150
// ColumnStats contains statistics for a single table column.

catalog/types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@ package catalog
22

33
import (
44
"context"
5+
"errors"
56

67
"github.com/apache/arrow-go/v18/arrow"
78
"github.com/apache/arrow-go/v18/arrow/array"
89
)
910

11+
// ErrNullRowID is returned when a null rowid value is encountered in UPDATE or DELETE operations.
12+
// Implementations of UpdatableBatchTable and DeletableBatchTable MUST return this error
13+
// when they encounter null values in the rowid column of the input Record.
14+
var ErrNullRowID = errors.New("null rowid value not allowed")
15+
1016
// ScanOptions provides options for table scans.
1117
type ScanOptions struct {
1218
// Columns to return. If nil/empty, all columns are projected.

docs/api-guide.md

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -231,46 +231,57 @@ type DeletableTable interface {
231231

232232
### catalog.UpdatableBatchTable
233233

234-
Alternative UPDATE interface where the rowid column is embedded in the RecordReader.
234+
Alternative UPDATE interface where the rowid column is embedded in the RecordBatch.
235235
This interface is preferred over `UpdatableTable` when both are implemented.
236236

237237
```go
238238
type UpdatableBatchTable interface {
239239
Table
240240

241-
// Update modifies existing rows using data from the RecordReader.
242-
// The rows RecordReader contains both the rowid column (identifying rows to update)
241+
// Update modifies existing rows using data from the RecordBatch.
242+
// The rows RecordBatch contains both the rowid column (identifying rows to update)
243243
// and the new column values. Implementations MUST extract rowid values from
244244
// the rowid column (identified by name "rowid" or metadata key "is_rowid").
245245
// Use FindRowIDColumn(rows.Schema()) to locate the rowid column.
246-
// Row order in RecordReader determines update order.
246+
// Implementations MUST return ErrNullRowID if any rowid value is null.
247+
// Row order in RecordBatch determines update order.
247248
// opts contains RETURNING clause information.
248249
// Returns DMLResult with affected row count and optional returning data.
249250
// Caller MUST call rows.Release() after Update returns.
250-
Update(ctx context.Context, rows array.RecordReader, opts *DMLOptions) (*DMLResult, error)
251+
Update(ctx context.Context, rows arrow.RecordBatch, opts *DMLOptions) (*DMLResult, error)
251252
}
252253
```
253254

254255
### catalog.DeletableBatchTable
255256

256-
Alternative DELETE interface where the rowid column is embedded in the RecordReader.
257+
Alternative DELETE interface where the rowid column is embedded in the RecordBatch.
257258
This interface is preferred over `DeletableTable` when both are implemented.
258259

259260
```go
260261
type DeletableBatchTable interface {
261262
Table
262263

263-
// Delete removes rows identified by rowid values in the RecordReader.
264-
// The rows RecordReader contains the rowid column (identified by name "rowid"
264+
// Delete removes rows identified by rowid values in the RecordBatch.
265+
// The rows RecordBatch contains the rowid column (identified by name "rowid"
265266
// or metadata key "is_rowid") that identifies rows to delete.
266267
// Use FindRowIDColumn(rows.Schema()) to locate the rowid column.
268+
// Implementations MUST return ErrNullRowID if any rowid value is null.
267269
// opts contains RETURNING clause information.
268270
// Returns DMLResult with affected row count and optional returning data.
269271
// Caller MUST call rows.Release() after Delete returns.
270-
Delete(ctx context.Context, rows array.RecordReader, opts *DMLOptions) (*DMLResult, error)
272+
Delete(ctx context.Context, rows arrow.RecordBatch, opts *DMLOptions) (*DMLResult, error)
271273
}
272274
```
273275

276+
### catalog.ErrNullRowID
277+
278+
Sentinel error for null rowid values:
279+
280+
```go
281+
// ErrNullRowID is returned when a null rowid value is encountered in UPDATE or DELETE operations.
282+
var ErrNullRowID = errors.New("null rowid value not allowed")
283+
```
284+
274285
### catalog.FindRowIDColumn
275286

276287
Helper function to locate the rowid column in a schema:
@@ -288,21 +299,26 @@ func FindRowIDColumn(schema *arrow.Schema) int
288299
**Example usage:**
289300

290301
```go
291-
func (t *MyTable) Update(ctx context.Context, rows array.RecordReader, opts *catalog.DMLOptions) (*catalog.DMLResult, error) {
302+
func (t *MyTable) Update(ctx context.Context, rows arrow.RecordBatch, opts *catalog.DMLOptions) (*catalog.DMLResult, error) {
292303
rowidIdx := catalog.FindRowIDColumn(rows.Schema())
293304
if rowidIdx == -1 {
294305
return nil, errors.New("rowid column required")
295306
}
296307

297-
var affected int64
298-
for rows.Next() {
299-
batch := rows.RecordBatch()
300-
rowidCol := batch.Column(rowidIdx).(*array.Int64)
301-
// Process updates using rowidCol values...
302-
affected += batch.NumRows()
308+
// Check for null rowids
309+
rowidCol := rows.Column(rowidIdx)
310+
if rowidCol.NullN() > 0 {
311+
return nil, catalog.ErrNullRowID
303312
}
304313

305-
return &catalog.DMLResult{AffectedRows: affected}, nil
314+
// Direct access to RecordBatch - no iterator needed
315+
rowidArr := rowidCol.(*array.Int64)
316+
for i := 0; i < int(rows.NumRows()); i++ {
317+
rowID := rowidArr.Value(i)
318+
// Process update for rowID...
319+
}
320+
321+
return &catalog.DMLResult{AffectedRows: rows.NumRows()}, nil
306322
}
307323
```
308324

@@ -500,6 +516,9 @@ type DynamicTable interface {
500516

501517
// RenameField renames a field in a struct-typed column.
502518
RenameField(ctx context.Context, columnPath []string, newName string, opts RenameFieldOptions) error
519+
520+
// RemoveField removes a field from a struct-typed column.
521+
RemoveField(ctx context.Context, columnPath []string, opts RemoveFieldOptions) error
503522
}
504523

505524
type AddColumnOptions struct {
@@ -541,6 +560,10 @@ type AddFieldOptions struct {
541560
type RenameFieldOptions struct {
542561
IgnoreNotFound bool
543562
}
563+
564+
type RemoveFieldOptions struct {
565+
IgnoreNotFound bool // Don't error if table/column doesn't exist
566+
}
544567
```
545568

546569
## Statistics Interface

examples/ddl/main.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,3 +628,16 @@ func (t *DDLTable) RenameField(_ context.Context, columnPath []string, newName s
628628
fmt.Printf("[DDLTable:%s] RenameField called: %v -> %s (simplified)\n", t.name, columnPath, newName)
629629
return nil
630630
}
631+
632+
// RemoveField implements catalog.DynamicTable.
633+
func (t *DDLTable) RemoveField(_ context.Context, columnPath []string, opts catalog.RemoveFieldOptions) error {
634+
// Simplified implementation - real implementation would handle nested struct fields
635+
if len(columnPath) == 0 {
636+
if opts.IgnoreNotFound {
637+
return nil
638+
}
639+
return catalog.ErrNotFound
640+
}
641+
fmt.Printf("[DDLTable:%s] RemoveField called: %v (simplified)\n", t.name, columnPath)
642+
return nil
643+
}

examples/dml/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,10 @@ func (m *InMemoryTransactionManager) GetTransactionStatus(_ context.Context, txI
216216
// and catalog.DeletableTable interfaces.
217217
//
218218
// Alternative: You can implement catalog.UpdatableBatchTable and catalog.DeletableBatchTable
219-
// instead (or in addition). The batch interfaces receive rowid embedded in the RecordReader
220-
// rather than as a separate []int64 slice. Use catalog.FindRowIDColumn(rows.Schema())
221-
// to locate the rowid column. Batch interfaces are preferred when both are implemented.
219+
// instead (or in addition). The batch interfaces receive a single arrow.RecordBatch containing
220+
// all data including the rowid column, rather than a separate []int64 slice for rowids.
221+
// Use catalog.FindRowIDColumn(rows.Schema()) to locate the rowid column.
222+
// Batch interfaces are preferred when both are implemented.
222223
type UsersTable struct {
223224
schema *arrow.Schema
224225
alloc memory.Allocator

flight/doaction.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ func (s *Server) DoAction(action *flight.Action, stream flight.FlightService_DoA
6969
return s.handleAddFieldAction(ctx, action, stream)
7070
case "rename_field":
7171
return s.handleRenameFieldAction(ctx, action, stream)
72+
case "remove_field":
73+
return s.handleRemoveFieldAction(ctx, action, stream)
7274

7375
// Catalog version action
7476
case "catalog_version":

0 commit comments

Comments
 (0)