From 051070e72cc68c13ad1087baca3198265ce20a21 Mon Sep 17 00:00:00 2001 From: constshift Date: Wed, 28 Jan 2026 08:41:29 +0100 Subject: [PATCH 1/3] Add in-memory data support for all formats - Introduced OpenBytes function for in-memory data handling in CSV, TSV, XLS, and XLSX formats. - Updated registration functions to support in-memory operations. - Added example demonstrating in-memory file processing. - Created tests for opening files from byte slices. --- examples/example_inmemory.go | 40 ++++++++++++++++++++ grate.go | 40 +++++++++++++++++--- inmemory_test.go | 73 ++++++++++++++++++++++++++++++++++++ simple/csv.go | 15 +++++++- simple/tsv.go | 23 +++++++++--- xls/cfb/interface.go | 12 ++++++ xls/xls.go | 20 ++++++++-- xlsx/xlsx.go | 23 +++++++++++- 8 files changed, 228 insertions(+), 18 deletions(-) create mode 100644 examples/example_inmemory.go create mode 100644 inmemory_test.go diff --git a/examples/example_inmemory.go b/examples/example_inmemory.go new file mode 100644 index 0000000..99254cb --- /dev/null +++ b/examples/example_inmemory.go @@ -0,0 +1,40 @@ +// Example demonstrating in-memory file processing with grate +package main + +import ( + "fmt" + "os" + + "github.com/pbnjay/grate" + _ "github.com/pbnjay/grate/simple" + _ "github.com/pbnjay/grate/xls" + _ "github.com/pbnjay/grate/xlsx" +) + +// Example 1: Opening from byte slice +func fromBytes() error { + // Read file into memory + data, err := os.ReadFile("../testdata/basic.xlsx") + + // Open from memory + wb, err := grate.OpenBytes(data) + if err != nil { + return err + } + defer wb.Close() + + sheets, _ := wb.List() + sheet, _ := wb.Get(sheets[0]) + + fmt.Println("Example 1: From Bytes") + for sheet.Next() { + fmt.Println(sheet.Strings()) + } + return nil +} + +func main() { + if err := fromBytes(); err != nil { + fmt.Fprintf(os.Stderr, "Error in fromBytes: %v\n", err) + } +} diff --git a/grate.go b/grate.go index b06f111..93a21e8 100644 --- a/grate.go +++ b/grate.go @@ -50,10 +50,14 @@ type Collection interface { Err() error } -// OpenFunc defines a Source's instantiation function. +// OpenFunc defines a Source's instantiation function from a filename. // It should return ErrNotInFormat immediately if filename is not of the correct file type. type OpenFunc func(filename string) (Source, error) +// OpenBytesFunc defines a Source's instantiation function from in-memory data. +// It should return ErrNotInFormat immediately if data is not of the correct file type. +type OpenBytesFunc func(data []byte) (Source, error) + // Open a tabular data file and return a Source for accessing it's contents. func Open(filename string) (Source, error) { for _, o := range srcTable { @@ -71,20 +75,46 @@ func Open(filename string) (Source, error) { return nil, ErrUnknownFormat } +// OpenBytes opens tabular data from an in-memory byte slice and returns a Source for accessing its contents. +func OpenBytes(data []byte) (Source, error) { + for _, o := range srcTable { + if o.opBytes == nil { + continue + } + src, err := o.opBytes(data) + if err == nil { + return src, nil + } + if !errors.Is(err, ErrNotInFormat) { + return nil, err + } + if Debug { + log.Println(" data is not in", o.name, "format") + } + } + return nil, ErrUnknownFormat +} + type srcOpenTab struct { - name string - pri int - op OpenFunc + name string + pri int + op OpenFunc + opBytes OpenBytesFunc } var srcTable = make([]*srcOpenTab, 0, 20) // Register the named source as a grate datasource implementation. func Register(name string, priority int, opener OpenFunc) error { + return RegisterWithBytes(name, priority, opener, nil) +} + +// RegisterWithBytes registers the named source as a grate datasource implementation with in-memory support. +func RegisterWithBytes(name string, priority int, opener OpenFunc, openerBytes OpenBytesFunc) error { if Debug { log.Println("Registering the", name, "format at priority", priority) } - srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener}) + srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener, opBytes: openerBytes}) sort.Slice(srcTable, func(i, j int) bool { return srcTable[i].pri < srcTable[j].pri }) diff --git a/inmemory_test.go b/inmemory_test.go new file mode 100644 index 0000000..abd3932 --- /dev/null +++ b/inmemory_test.go @@ -0,0 +1,73 @@ +package grate_test + +import ( + "os" + "testing" + + "github.com/pbnjay/grate" + _ "github.com/pbnjay/grate/simple" + _ "github.com/pbnjay/grate/xls" + _ "github.com/pbnjay/grate/xlsx" +) + +func TestOpenBytes(t *testing.T) { + testFiles := []string{ + "testdata/basic.tsv", + "testdata/basic.xls", + "testdata/basic.xlsx", + } + + for _, filename := range testFiles { + t.Run(filename, func(t *testing.T) { + // Read file into memory + data, err := os.ReadFile(filename) + if err != nil { + if os.IsNotExist(err) { + t.Skipf("Test file %s does not exist", filename) + return + } + t.Fatalf("Failed to read test file %s: %v", filename, err) + } + + // Open from memory + wb, err := grate.OpenBytes(data) + if err != nil { + t.Fatalf("Failed to open %s from bytes: %v", filename, err) + } + defer wb.Close() + + // Get sheet list + sheets, err := wb.List() + if err != nil { + t.Fatalf("Failed to list sheets from %s: %v", filename, err) + } + + if len(sheets) == 0 { + t.Fatalf("Expected at least one sheet in %s", filename) + } + + // Open first sheet and read some data + sheet, err := wb.Get(sheets[0]) + if err != nil { + t.Fatalf("Failed to get sheet from %s: %v", filename, err) + } + + rowCount := 0 + for sheet.Next() { + row := sheet.Strings() + if len(row) == 0 { + t.Errorf("Expected non-empty row in %s", filename) + } + rowCount++ + } + + if rowCount == 0 { + t.Errorf("Expected at least one row in %s", filename) + } + + if sheet.Err() != nil { + t.Errorf("Error iterating sheet from %s: %v", filename, sheet.Err()) + } + }) + } +} diff --git a/simple/csv.go b/simple/csv.go index 8df92e5..d761d58 100644 --- a/simple/csv.go +++ b/simple/csv.go @@ -1,13 +1,14 @@ package simple import ( + "bytes" "encoding/csv" "os" "github.com/pbnjay/grate" ) -var _ = grate.Register("csv", 15, OpenCSV) +var _ = grate.RegisterWithBytes("csv", 15, OpenCSV, OpenCSVBytes) // OpenCSV defines a Source's instantiation function. // It should return ErrNotInFormat immediately if filename is not of the correct file type. @@ -17,12 +18,22 @@ func OpenCSV(filename string) (grate.Source, error) { return nil, err } defer f.Close() + return parseCSV(csv.NewReader(f), filename) +} + +// OpenCSVBytes opens CSV data from an in-memory byte slice. +func OpenCSVBytes(data []byte) (grate.Source, error) { + r := bytes.NewReader(data) + return parseCSV(csv.NewReader(r), "") +} + +// parseCSV is a helper function that parses CSV data from a csv.Reader. +func parseCSV(s *csv.Reader, filename string) (grate.Source, error) { t := &simpleFile{ filename: filename, iterRow: -1, } - s := csv.NewReader(f) s.FieldsPerRecord = -1 total := 0 diff --git a/simple/tsv.go b/simple/tsv.go index ae51707..d56860b 100644 --- a/simple/tsv.go +++ b/simple/tsv.go @@ -2,13 +2,15 @@ package simple import ( "bufio" + "bytes" + "io" "os" "strings" "github.com/pbnjay/grate" ) -var _ = grate.Register("tsv", 10, OpenTSV) +var _ = grate.RegisterWithBytes("tsv", 10, OpenTSV, OpenTSVBytes) // OpenTSV defines a Source's instantiation function. // It should return ErrNotInFormat immediately if filename is not of the correct file type. @@ -18,19 +20,30 @@ func OpenTSV(filename string) (grate.Source, error) { return nil, err } defer f.Close() + return parseTSV(f, filename) +} + +// OpenTSVBytes opens TSV data from an in-memory byte slice. +func OpenTSVBytes(data []byte) (grate.Source, error) { + r := bytes.NewReader(data) + return parseTSV(r, "") +} + +// parseTSV is a helper function that parses TSV data from an io.Reader. +func parseTSV(r io.Reader, filename string) (grate.Source, error) { t := &simpleFile{ filename: filename, iterRow: -1, } - s := bufio.NewScanner(f) + s := bufio.NewScanner(r) total := 0 ncols := make(map[int]int) for s.Scan() { - r := strings.Split(s.Text(), "\t") - ncols[len(r)]++ + row := strings.Split(s.Text(), "\t") + ncols[len(row)]++ total++ - t.rows = append(t.rows, r) + t.rows = append(t.rows, row) } if s.Err() != nil { // this can only be read errors, not format diff --git a/xls/cfb/interface.go b/xls/cfb/interface.go index ec5a6a9..be0e59e 100644 --- a/xls/cfb/interface.go +++ b/xls/cfb/interface.go @@ -1,6 +1,7 @@ package cfb import ( + "bytes" "fmt" "io" "os" @@ -20,6 +21,17 @@ func Open(filename string) (*Document, error) { return d, nil } +// OpenBytes opens a Compound File Binary Format document from in-memory data. +func OpenBytes(data []byte) (*Document, error) { + d := &Document{} + r := bytes.NewReader(data) + err := d.load(r) + if err != nil { + return nil, err + } + return d, nil +} + // List the streams contained in the document. func (d *Document) List() ([]string, error) { var res []string diff --git a/xls/xls.go b/xls/xls.go index 15ff55c..96ae4bf 100644 --- a/xls/xls.go +++ b/xls/xls.go @@ -20,7 +20,7 @@ import ( "github.com/pbnjay/grate/xls/crypto" ) -var _ = grate.Register("xls", 1, Open) +var _ = grate.RegisterWithBytes("xls", 1, Open, OpenBytes) // WorkBook represents an Excel workbook containing 1 or more sheets. type WorkBook struct { @@ -54,11 +54,23 @@ func Open(filename string) (grate.Source, error) { if err != nil { return nil, err } + return openWorkbook(doc, filename) +} - b := &WorkBook{ - filename: filename, - doc: doc, +// OpenBytes opens an XLS workbook from in-memory data. +func OpenBytes(data []byte) (grate.Source, error) { + doc, err := cfb.OpenBytes(data) + if err != nil { + return nil, err + } + return openWorkbook(doc, "") +} +// openWorkbook is a helper function that creates a WorkBook from a CFB document. +func openWorkbook(doc *cfb.Document, filename string) (grate.Source, error) { + b := &WorkBook{ + filename: filename, + doc: doc, pos2substream: make(map[int64]int, 16), xfs: make([]uint16, 0, 128), } diff --git a/xlsx/xlsx.go b/xlsx/xlsx.go index 3d5eaad..e525f81 100644 --- a/xlsx/xlsx.go +++ b/xlsx/xlsx.go @@ -2,6 +2,7 @@ package xlsx import ( "archive/zip" + "bytes" "encoding/xml" "errors" "io" @@ -14,7 +15,7 @@ import ( "github.com/pbnjay/grate/commonxl" ) -var _ = grate.Register("xlsx", 5, Open) +var _ = grate.RegisterWithBytes("xlsx", 5, Open, OpenBytes) // Document contains an Office Open XML document. type Document struct { @@ -38,7 +39,10 @@ func (d *Document) Close() error { d.strings = nil d.sheets = d.sheets[:0] d.sheets = nil - return d.f.Close() + if d.f != nil { + return d.f.Close() + } + return nil } func Open(filename string) (grate.Source, error) { @@ -54,6 +58,21 @@ func Open(filename string) (grate.Source, error) { if err != nil { return nil, grate.WrapErr(err, grate.ErrNotInFormat) } + return parseDocument(z, filename, f) +} + +// OpenBytes opens an XLSX workbook from in-memory data. +func OpenBytes(data []byte) (grate.Source, error) { + r := bytes.NewReader(data) + z, err := zip.NewReader(r, int64(len(data))) + if err != nil { + return nil, grate.WrapErr(err, grate.ErrNotInFormat) + } + return parseDocument(z, "", nil) +} + +// parseDocument is a helper function that parses an XLSX document from a zip reader. +func parseDocument(z *zip.Reader, filename string, f *os.File) (grate.Source, error) { d := &Document{ filename: filename, f: f, From 61e39402078d44f0b6f12515b44ea5a637371570 Mon Sep 17 00:00:00 2001 From: constshift Date: Wed, 28 Jan 2026 09:15:28 +0100 Subject: [PATCH 2/3] Add example for opening files from memory in README --- README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/README.md b/README.md index fdc1463..50eb8c8 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ There are certainly still some bugs and edge cases, but we have run it successfu Grate provides a simple standard interface for all supported filetypes, allowing access to both named worksheets in spreadsheets and single tables in plaintext formats. +## Opening from File + ```go package main @@ -40,6 +42,41 @@ func main() { } ``` +## Opening from Memory + +Grate supports opening files directly from memory, which is useful for processing data from HTTP requests, embedded files, or other sources without writing to disk. + +```go +package main + +import ( + "fmt" + "io" + "os" + + "github.com/pbnjay/grate" + _ "github.com/pbnjay/grate/simple" + _ "github.com/pbnjay/grate/xls" + _ "github.com/pbnjay/grate/xlsx" +) + +func main() { + // Read file into memory + data, _ := os.ReadFile("data.xlsx") + + // Open from byte slice + wb, _ := grate.OpenBytes(data) + defer wb.Close() + + sheets, _ := wb.List() + sheet, _ := wb.Get(sheets[0]) + + for sheet.Next() { + fmt.Println(sheet.Strings()) + } +} +``` + # License All source code is licensed under the [MIT License](https://raw.github.com/pbnjay/grate/master/LICENSE). From 7e06ef78de1bb78e8225b482e69937a1538b4235 Mon Sep 17 00:00:00 2001 From: constshift Date: Wed, 28 Jan 2026 09:19:14 +0100 Subject: [PATCH 3/3] Refactor memory file opening example in README for clarity and completeness --- README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 50eb8c8..1bf31b6 100644 --- a/README.md +++ b/README.md @@ -61,19 +61,17 @@ import ( ) func main() { - // Read file into memory - data, _ := os.ReadFile("data.xlsx") - - // Open from byte slice - wb, _ := grate.OpenBytes(data) - defer wb.Close() - - sheets, _ := wb.List() - sheet, _ := wb.Get(sheets[0]) - - for sheet.Next() { - fmt.Println(sheet.Strings()) + d, _ := os.ReadFile(os.Args[1]) // Read file into memory + wb, _ := grate.OpenBytes(d) // open the file + sheets, _ := wb.List() // list available sheets + for _, s := range sheets { // enumerate each sheet name + sheet, _ := wb.Get(s) // open the sheet + for sheet.Next() { // enumerate each row of data + row := sheet.Strings() // get the row's content as []string + fmt.Println(strings.Join(row, "\t")) + } } + wb.Close() } ```