Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ There are certainly still some bugs and edge cases, but we have run it successfu

Grate provides a simple standard interface for all supported filetypes, allowing access to both named worksheets in spreadsheets and single tables in plaintext formats.

## Opening from File

```go
package main

Expand Down Expand Up @@ -40,6 +42,39 @@ func main() {
}
```

## Opening from Memory

Grate supports opening files directly from memory, which is useful for processing data from HTTP requests, embedded files, or other sources without writing to disk.

```go
package main

import (
"fmt"
"io"
"os"

"github.com/pbnjay/grate"
_ "github.com/pbnjay/grate/simple"
_ "github.com/pbnjay/grate/xls"
_ "github.com/pbnjay/grate/xlsx"
)

func main() {
d, _ := os.ReadFile(os.Args[1]) // Read file into memory
wb, _ := grate.OpenBytes(d) // open the file
sheets, _ := wb.List() // list available sheets
for _, s := range sheets { // enumerate each sheet name
sheet, _ := wb.Get(s) // open the sheet
for sheet.Next() { // enumerate each row of data
row := sheet.Strings() // get the row's content as []string
fmt.Println(strings.Join(row, "\t"))
}
}
wb.Close()
}
```

# License

All source code is licensed under the [MIT License](https://raw.github.com/pbnjay/grate/master/LICENSE).
40 changes: 40 additions & 0 deletions examples/example_inmemory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Example demonstrating in-memory file processing with grate
package main

import (
"fmt"
"os"

"github.com/pbnjay/grate"
_ "github.com/pbnjay/grate/simple"
_ "github.com/pbnjay/grate/xls"
_ "github.com/pbnjay/grate/xlsx"
)

// Example 1: Opening from byte slice
func fromBytes() error {
// Read file into memory
data, err := os.ReadFile("../testdata/basic.xlsx")

// Open from memory
wb, err := grate.OpenBytes(data)
if err != nil {
return err
}
defer wb.Close()

sheets, _ := wb.List()
sheet, _ := wb.Get(sheets[0])

fmt.Println("Example 1: From Bytes")
for sheet.Next() {
fmt.Println(sheet.Strings())
}
return nil
}

func main() {
if err := fromBytes(); err != nil {
fmt.Fprintf(os.Stderr, "Error in fromBytes: %v\n", err)
}
}
40 changes: 35 additions & 5 deletions grate.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ type Collection interface {
Err() error
}

// OpenFunc defines a Source's instantiation function.
// OpenFunc defines a Source's instantiation function from a filename.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
type OpenFunc func(filename string) (Source, error)

// OpenBytesFunc defines a Source's instantiation function from in-memory data.
// It should return ErrNotInFormat immediately if data is not of the correct file type.
type OpenBytesFunc func(data []byte) (Source, error)

// Open a tabular data file and return a Source for accessing it's contents.
func Open(filename string) (Source, error) {
for _, o := range srcTable {
Expand All @@ -71,20 +75,46 @@ func Open(filename string) (Source, error) {
return nil, ErrUnknownFormat
}

// OpenBytes opens tabular data from an in-memory byte slice and returns a Source for accessing its contents.
func OpenBytes(data []byte) (Source, error) {
for _, o := range srcTable {
if o.opBytes == nil {
continue
}
src, err := o.opBytes(data)
if err == nil {
return src, nil
}
if !errors.Is(err, ErrNotInFormat) {
return nil, err
}
if Debug {
log.Println(" data is not in", o.name, "format")
}
}
return nil, ErrUnknownFormat
}

type srcOpenTab struct {
name string
pri int
op OpenFunc
name string
pri int
op OpenFunc
opBytes OpenBytesFunc
}

var srcTable = make([]*srcOpenTab, 0, 20)

// Register the named source as a grate datasource implementation.
func Register(name string, priority int, opener OpenFunc) error {
return RegisterWithBytes(name, priority, opener, nil)
}

// RegisterWithBytes registers the named source as a grate datasource implementation with in-memory support.
func RegisterWithBytes(name string, priority int, opener OpenFunc, openerBytes OpenBytesFunc) error {
if Debug {
log.Println("Registering the", name, "format at priority", priority)
}
srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener})
srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener, opBytes: openerBytes})
sort.Slice(srcTable, func(i, j int) bool {
return srcTable[i].pri < srcTable[j].pri
})
Expand Down
73 changes: 73 additions & 0 deletions inmemory_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package grate_test

import (
"os"
"testing"

"github.com/pbnjay/grate"
_ "github.com/pbnjay/grate/simple"
_ "github.com/pbnjay/grate/xls"
_ "github.com/pbnjay/grate/xlsx"
)

func TestOpenBytes(t *testing.T) {
testFiles := []string{
"testdata/basic.tsv",
"testdata/basic.xls",
"testdata/basic.xlsx",
}

for _, filename := range testFiles {
t.Run(filename, func(t *testing.T) {
// Read file into memory
data, err := os.ReadFile(filename)
if err != nil {
if os.IsNotExist(err) {
t.Skipf("Test file %s does not exist", filename)
return
}
t.Fatalf("Failed to read test file %s: %v", filename, err)
}

// Open from memory
wb, err := grate.OpenBytes(data)
if err != nil {
t.Fatalf("Failed to open %s from bytes: %v", filename, err)
}
defer wb.Close()

// Get sheet list
sheets, err := wb.List()
if err != nil {
t.Fatalf("Failed to list sheets from %s: %v", filename, err)
}

if len(sheets) == 0 {
t.Fatalf("Expected at least one sheet in %s", filename)
}

// Open first sheet and read some data
sheet, err := wb.Get(sheets[0])
if err != nil {
t.Fatalf("Failed to get sheet from %s: %v", filename, err)
}

rowCount := 0
for sheet.Next() {
row := sheet.Strings()
if len(row) == 0 {
t.Errorf("Expected non-empty row in %s", filename)
}
rowCount++
}

if rowCount == 0 {
t.Errorf("Expected at least one row in %s", filename)
}

if sheet.Err() != nil {
t.Errorf("Error iterating sheet from %s: %v", filename, sheet.Err())
}
})
}
}
15 changes: 13 additions & 2 deletions simple/csv.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package simple

import (
"bytes"
"encoding/csv"
"os"

"github.com/pbnjay/grate"
)

var _ = grate.Register("csv", 15, OpenCSV)
var _ = grate.RegisterWithBytes("csv", 15, OpenCSV, OpenCSVBytes)

// OpenCSV defines a Source's instantiation function.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
Expand All @@ -17,12 +18,22 @@ func OpenCSV(filename string) (grate.Source, error) {
return nil, err
}
defer f.Close()
return parseCSV(csv.NewReader(f), filename)
}

// OpenCSVBytes opens CSV data from an in-memory byte slice.
func OpenCSVBytes(data []byte) (grate.Source, error) {
r := bytes.NewReader(data)
return parseCSV(csv.NewReader(r), "<memory>")
}

// parseCSV is a helper function that parses CSV data from a csv.Reader.
func parseCSV(s *csv.Reader, filename string) (grate.Source, error) {
t := &simpleFile{
filename: filename,
iterRow: -1,
}

s := csv.NewReader(f)
s.FieldsPerRecord = -1

total := 0
Expand Down
23 changes: 18 additions & 5 deletions simple/tsv.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package simple

import (
"bufio"
"bytes"
"io"
"os"
"strings"

"github.com/pbnjay/grate"
)

var _ = grate.Register("tsv", 10, OpenTSV)
var _ = grate.RegisterWithBytes("tsv", 10, OpenTSV, OpenTSVBytes)

// OpenTSV defines a Source's instantiation function.
// It should return ErrNotInFormat immediately if filename is not of the correct file type.
Expand All @@ -18,19 +20,30 @@ func OpenTSV(filename string) (grate.Source, error) {
return nil, err
}
defer f.Close()
return parseTSV(f, filename)
}

// OpenTSVBytes opens TSV data from an in-memory byte slice.
func OpenTSVBytes(data []byte) (grate.Source, error) {
r := bytes.NewReader(data)
return parseTSV(r, "<memory>")
}

// parseTSV is a helper function that parses TSV data from an io.Reader.
func parseTSV(r io.Reader, filename string) (grate.Source, error) {
t := &simpleFile{
filename: filename,
iterRow: -1,
}

s := bufio.NewScanner(f)
s := bufio.NewScanner(r)
total := 0
ncols := make(map[int]int)
for s.Scan() {
r := strings.Split(s.Text(), "\t")
ncols[len(r)]++
row := strings.Split(s.Text(), "\t")
ncols[len(row)]++
total++
t.rows = append(t.rows, r)
t.rows = append(t.rows, row)
}
if s.Err() != nil {
// this can only be read errors, not format
Expand Down
12 changes: 12 additions & 0 deletions xls/cfb/interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cfb

import (
"bytes"
"fmt"
"io"
"os"
Expand All @@ -20,6 +21,17 @@ func Open(filename string) (*Document, error) {
return d, nil
}

// OpenBytes opens a Compound File Binary Format document from in-memory data.
func OpenBytes(data []byte) (*Document, error) {
d := &Document{}
r := bytes.NewReader(data)
err := d.load(r)
if err != nil {
return nil, err
}
return d, nil
}

// List the streams contained in the document.
func (d *Document) List() ([]string, error) {
var res []string
Expand Down
20 changes: 16 additions & 4 deletions xls/xls.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"github.com/pbnjay/grate/xls/crypto"
)

var _ = grate.Register("xls", 1, Open)
var _ = grate.RegisterWithBytes("xls", 1, Open, OpenBytes)

// WorkBook represents an Excel workbook containing 1 or more sheets.
type WorkBook struct {
Expand Down Expand Up @@ -54,11 +54,23 @@ func Open(filename string) (grate.Source, error) {
if err != nil {
return nil, err
}
return openWorkbook(doc, filename)
}

b := &WorkBook{
filename: filename,
doc: doc,
// OpenBytes opens an XLS workbook from in-memory data.
func OpenBytes(data []byte) (grate.Source, error) {
doc, err := cfb.OpenBytes(data)
if err != nil {
return nil, err
}
return openWorkbook(doc, "<memory>")
}

// openWorkbook is a helper function that creates a WorkBook from a CFB document.
func openWorkbook(doc *cfb.Document, filename string) (grate.Source, error) {
b := &WorkBook{
filename: filename,
doc: doc,
pos2substream: make(map[int64]int, 16),
xfs: make([]uint16, 0, 128),
}
Expand Down
Loading