Skip to content

Commit 64fc730

Browse files
poncitobaumgold
andauthored
Compatibility of schemas with nested types (#504)
Hi, Here is a minimal example of the issue I've encountered. ```julia struct A x::Int end struct B a::A end v = [B(A(i)) for i =1:3] io = IOBuffer() Arrow.write(io, v; file=false) seekstart(io) Arrow.append(io, v) # throws ``` I don't know if this is really necessary, or if I'm not using this library properly, but this issue makes it difficult to append to arrow files with nested types. Since I've only added more cases where the call to `append` can succeed, I do not think that this creates retro-compatibility issues. Thanks for the review! --------- Co-authored-by: Ben Baumgold <4933671+baumgold@users.noreply.github.com>
1 parent ac199b0 commit 64fc730

File tree

3 files changed

+44
-3
lines changed

3 files changed

+44
-3
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
name = "Arrow"
1818
uuid = "69666777-d1a9-59fb-9406-91d4454c9d45"
1919
authors = ["quinnj <quinn.jacobd@gmail.com>"]
20-
version = "2.7.1"
20+
version = "2.7.2"
2121

2222
[deps]
2323
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"

src/append.jl

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,31 @@ function is_equivalent_schema(sch1::Tables.Schema, sch2::Tables.Schema)
282282
for (t1, t2) in zip(sch1.types, sch2.types)
283283
tt1 = Base.nonmissingtype(t1)
284284
tt2 = Base.nonmissingtype(t2)
285-
if t1 == t2 ||
286-
(tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2))
285+
if t1 == t2
287286
continue
287+
elseif tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2)
288+
continue
289+
elseif isstructtype(tt1) && isstructtype(tt2)
290+
is_equivalent_type_by_field(tt1, tt2)
291+
else
292+
return false
293+
end
294+
end
295+
true
296+
end
297+
298+
function is_equivalent_type_by_field(T1, T2)
299+
n1 = fieldcount(T1)
300+
n2 = fieldcount(T2)
301+
n1 != n2 && return false
302+
303+
for i = 1:n1
304+
fieldname(T1, i) == fieldname(T2, i) || return false
305+
306+
if fieldtype(T1, i) == fieldtype(T2, i)
307+
continue
308+
elseif isstructtype(T1) && isstructtype(T2)
309+
is_equivalent_type_by_field(T1, T2) || continue
288310
else
289311
return false
290312
end

test/runtests.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,5 +1042,24 @@ end
10421042
@test tbl.f[2] === Foo493(4, 5)
10431043
end
10441044
end
1045+
1046+
@testset "# 504" begin
1047+
struct Foo504
1048+
x::Int
1049+
end
1050+
1051+
struct Bar504
1052+
a::Foo504
1053+
end
1054+
1055+
v = [Bar504(Foo504(i)) for i = 1:3]
1056+
io = IOBuffer()
1057+
Arrow.write(io, v; file=false)
1058+
seekstart(io)
1059+
Arrow.append(io, v) # testing the compatility between the schema of the arrow Table, and the "schema" of v (using the fallback mechanism of Tables.jl)
1060+
seekstart(io)
1061+
t = Arrow.Table(io)
1062+
@test Arrow.Tables.rowcount(t) == 6
1063+
end
10451064
end # @testset "misc"
10461065
end

0 commit comments

Comments
 (0)