Skip to content

Commit 4338c7c

Browse files
committed
feat(tests): add comprehensive range format tests for Excel dataset retrieval
1 parent 798dd66 commit 4338c7c

File tree

2 files changed

+167
-10
lines changed

2 files changed

+167
-10
lines changed

core/dbio/filesys/sheet_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,52 @@ func TestExcel(t *testing.T) {
9999
os.RemoveAll("test/test.excel6.xlsx")
100100
}
101101

102+
func TestExcelRangeFormats(t *testing.T) {
103+
t.Parallel()
104+
105+
xls, err := iop.NewExcelFromFile("test/test.excel2.xlsx")
106+
if !assert.NoError(t, err) {
107+
return
108+
}
109+
110+
// Test standard range format "A1:H29"
111+
data, err := xls.GetDatasetFromRange(xls.Sheets[0], "A1:H29")
112+
assert.NoError(t, err)
113+
assert.Equal(t, 8, len(data.Columns))
114+
assert.Equal(t, 28, len(data.Rows))
115+
116+
// Test row-only range format "1:10" - should detect columns with data
117+
data, err = xls.GetDatasetFromRange(xls.Sheets[0], "1:10")
118+
assert.NoError(t, err)
119+
assert.Greater(t, len(data.Columns), 0, "should detect columns with data")
120+
assert.Equal(t, 9, len(data.Rows)) // rows 1-10 = 10 rows, minus header = 9
121+
122+
// Test partial range format "A1:C" - should extend to last row
123+
data, err = xls.GetDatasetFromRange(xls.Sheets[0], "A1:C")
124+
assert.NoError(t, err)
125+
assert.Equal(t, 3, len(data.Columns))
126+
assert.Greater(t, len(data.Rows), 100, "should extend to last row")
127+
128+
// Test single row range "5:5"
129+
data, err = xls.GetDatasetFromRange(xls.Sheets[0], "5:5")
130+
assert.NoError(t, err)
131+
assert.Greater(t, len(data.Columns), 0)
132+
133+
// Test error: reversed row range "10:5"
134+
_, err = xls.GetDatasetFromRange(xls.Sheets[0], "10:5")
135+
assert.Error(t, err, "should error on reversed row range")
136+
assert.Contains(t, err.Error(), "reversed")
137+
138+
// Test error: reversed row range in standard format "A10:C5"
139+
_, err = xls.GetDatasetFromRange(xls.Sheets[0], "A10:C5")
140+
assert.Error(t, err, "should error on reversed row range")
141+
assert.Contains(t, err.Error(), "reversed")
142+
143+
// Test error: invalid range format
144+
_, err = xls.GetDatasetFromRange(xls.Sheets[0], "invalid")
145+
assert.Error(t, err, "should error on invalid range format")
146+
}
147+
102148
func TestGoogleSheet(t *testing.T) {
103149

104150
url := "https://docs.google.com/spreadsheets/d/1Wo7d_2oiYpWy1hYGqHIy0DSPWki24Xif3FnlRjNGzo4/edit#gid=0"

core/dbio/iop/sheet_excel.go

Lines changed: 121 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,13 @@ func NewExcelDataset(reader io.Reader, props map[string]string) (data Dataset, e
3131
sheetName := props["sheet"]
3232
sheetRange := ""
3333

34-
if sheetName == "" {
35-
sheetName = xls.Sheets[0]
36-
} else if sheetNameArr := strings.Split(sheetName, "!"); len(sheetNameArr) == 2 {
34+
if sheetNameArr := strings.Split(sheetName, "!"); len(sheetNameArr) == 2 {
3735
sheetName = sheetNameArr[0]
3836
sheetRange = sheetNameArr[1]
3937
}
38+
if sheetName == "" {
39+
sheetName = xls.Sheets[0] // if sheet is blank, use first
40+
}
4041

4142
if sheetRange != "" {
4243
data, err = xls.GetDatasetFromRange(sheetName, sheetRange)
@@ -139,13 +140,27 @@ func (xls *Excel) GetDataset(sheet string) (data Dataset) {
139140
}
140141

141142
// GetDatasetFromRange returns a dataset of the provided sheet / range
142-
// cellRange example: `$AH$13:$AI$20` or `AH13:AI20` or `A:E`
143+
// cellRange examples:
144+
// - `$AH$13:$AI$20` or `AH13:AI20` - standard cell range
145+
// - `A:E` - column range (all rows)
146+
// - `9:15` - row range (detects columns with data)
147+
// - `A4:C` - partial range (extends to last row)
143148
func (xls *Excel) GetDatasetFromRange(sheet, cellRange string) (data Dataset, err error) {
144149

145150
regexAlpha := *regexp.MustCompile(`[^a-zA-Z]`)
146151
regexNum := *regexp.MustCompile(`[^0-9]`)
147152

148-
cellRange = strings.ReplaceAll(cellRange, "$", "")
153+
allRows, err := xls.File.GetRows(sheet)
154+
if err != nil {
155+
return data, g.Error(err, "could not get rows")
156+
}
157+
158+
// Normalize range format (handles row-only and partial ranges)
159+
cellRange, err = xls.normalizeRange(cellRange, allRows)
160+
if err != nil {
161+
return data, g.Error(err, "could not normalize range")
162+
}
163+
149164
rangeArr := strings.Split(cellRange, ":")
150165
if len(rangeArr) != 2 {
151166
err = g.Error(err, "Invalid range: "+cellRange)
@@ -159,11 +174,6 @@ func (xls *Excel) GetDatasetFromRange(sheet, cellRange string) (data Dataset, er
159174
rowStart := cast.ToInt(regexNum.ReplaceAllString(rangeArr[0], "")) - 1
160175
rowEnd := cast.ToInt(regexNum.ReplaceAllString(rangeArr[1], "")) - 1
161176

162-
allRows, err := xls.File.GetRows(sheet)
163-
if err != nil {
164-
return data, g.Error(err, "could not get rows")
165-
}
166-
167177
if rowStart == -1 {
168178
rowStart = 0
169179
}
@@ -265,6 +275,107 @@ func (xls *Excel) TitleToNumber(s string) int {
265275
return sum - 1
266276
}
267277

278+
// NumberToTitle converts a 0-based column index to Excel column letters.
279+
// For example: 0 -> "A", 25 -> "Z", 26 -> "AA"
280+
func (xls *Excel) NumberToTitle(n int) string {
281+
result := ""
282+
for n >= 0 {
283+
result = string(rune('A'+n%26)) + result
284+
n = n/26 - 1
285+
}
286+
return result
287+
}
288+
289+
// normalizeRange normalizes various range formats into standard Excel range notation.
290+
// Supported formats:
291+
// - "9:15" (row-only) -> "A9:<lastCol>15" (detects last column with data)
292+
// - "A4:C" (partial) -> "A4:C<lastRow>" (extends to last row)
293+
// - "A4:C8" (standard) -> "A4:C8" (no change)
294+
func (xls *Excel) normalizeRange(cellRange string, allRows [][]string) (string, error) {
295+
regexAlpha := regexp.MustCompile(`[^a-zA-Z]`)
296+
regexNum := regexp.MustCompile(`[^0-9]`)
297+
regexRowOnly := regexp.MustCompile(`^\d+:\d+$`)
298+
299+
cellRange = strings.ReplaceAll(cellRange, "$", "")
300+
rangeArr := strings.Split(cellRange, ":")
301+
if len(rangeArr) != 2 {
302+
return "", g.Error("invalid range format: %s", cellRange)
303+
}
304+
305+
startPart := rangeArr[0]
306+
endPart := rangeArr[1]
307+
308+
// Check if it's a row-only range like "9:15"
309+
if regexRowOnly.MatchString(cellRange) {
310+
rowStart := cast.ToInt(startPart)
311+
rowEnd := cast.ToInt(endPart)
312+
313+
if rowStart <= 0 || rowEnd <= 0 {
314+
return "", g.Error("invalid row numbers in range: %s", cellRange)
315+
}
316+
if rowStart > rowEnd {
317+
return "", g.Error("row range %d:%d is reversed", rowStart, rowEnd)
318+
}
319+
320+
// Convert to 0-based indices
321+
rowStartIdx := rowStart - 1
322+
rowEndIdx := rowEnd - 1
323+
324+
if rowStartIdx >= len(allRows) {
325+
return "", g.Error("row range %d:%d exceeds sheet bounds (max row: %d)", rowStart, rowEnd, len(allRows))
326+
}
327+
328+
// Scan rows to find max column with data (limit to SampleSize rows for performance)
329+
maxCol := 0
330+
scannedRows := 0
331+
for r := rowStartIdx; r <= rowEndIdx && r < len(allRows); r++ {
332+
if len(allRows[r]) > maxCol {
333+
maxCol = len(allRows[r])
334+
}
335+
scannedRows++
336+
if scannedRows >= SampleSize {
337+
break
338+
}
339+
}
340+
341+
if maxCol == 0 {
342+
return "", g.Error("no data found in row range %d:%d", rowStart, rowEnd)
343+
}
344+
345+
// Convert to column letter (maxCol is count, so subtract 1 for 0-based index)
346+
endCol := xls.NumberToTitle(maxCol - 1)
347+
return g.F("A%d:%s%d", rowStart, endCol, rowEnd), nil
348+
}
349+
350+
// Extract column and row parts
351+
_ = regexAlpha.ReplaceAllString(startPart, "") // startCol - not needed for normalization
352+
endCol := regexAlpha.ReplaceAllString(endPart, "")
353+
startRowStr := regexNum.ReplaceAllString(startPart, "")
354+
endRowStr := regexNum.ReplaceAllString(endPart, "")
355+
356+
// Check for partial range like "A4:C" (has start row but no end row)
357+
if startRowStr != "" && endRowStr == "" && endCol != "" {
358+
startRow := cast.ToInt(startRowStr)
359+
if startRow <= 0 {
360+
return "", g.Error("invalid start row in range: %s", cellRange)
361+
}
362+
// Extend to last row
363+
return g.F("%s:%s%d", startPart, endCol, len(allRows)), nil
364+
}
365+
366+
// Validate row order if both rows are specified
367+
if startRowStr != "" && endRowStr != "" {
368+
startRow := cast.ToInt(startRowStr)
369+
endRow := cast.ToInt(endRowStr)
370+
if startRow > endRow {
371+
return "", g.Error("row range %d:%d is reversed", startRow, endRow)
372+
}
373+
}
374+
375+
// Standard format or column-only format (A:E), return as-is
376+
return cellRange, nil
377+
}
378+
268379
// WriteSheet write a datastream into a sheet
269380
// mode can be: `new`, `append` or `overwrite`. Default is `new`
270381
func (xls *Excel) WriteSheet(shtName string, ds *Datastream, mode string) (err error) {

0 commit comments

Comments
 (0)