Skip to content

Commit 27fd40c

Browse files
committed
Fix use case resulting 4GB+ files created as 0 bytes on disk
Signed-off-by: LordKiRon <lordkiron@gmail.com>
1 parent 5f32b8d commit 27fd40c

File tree

4 files changed

+81
-15
lines changed

4 files changed

+81
-15
lines changed

object-file.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2108,6 +2108,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
21082108
struct object_info oi = OBJECT_INFO_INIT;
21092109
struct odb_loose_read_stream *st;
21102110
unsigned long mapsize;
2111+
unsigned long size_ul;
21112112
void *mapped;
21122113

21132114
mapped = odb_source_loose_map_object(source, oid, &mapsize);
@@ -2131,11 +2132,18 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
21312132
goto error;
21322133
}
21332134

2134-
oi.sizep = &st->base.size;
2135+
/*
2136+
* object_info.sizep is unsigned long* (32-bit on Windows), but
2137+
* st->base.size is size_t (64-bit). Use temporary variable.
2138+
* Note: loose objects >4GB would still truncate here, but such
2139+
* large loose objects are uncommon (they'd normally be packed).
2140+
*/
2141+
oi.sizep = &size_ul;
21352142
oi.typep = &st->base.type;
21362143

21372144
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
21382145
goto error;
2146+
st->base.size = size_ul;
21392147

21402148
st->mapped = mapped;
21412149
st->mapsize = mapsize;

odb/streaming.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,15 +158,26 @@ static int open_istream_incore(struct odb_read_stream **out,
158158
.base.read = read_istream_incore,
159159
};
160160
struct odb_incore_read_stream *st;
161+
unsigned long size_ul;
161162
int ret;
162163

163164
oi.typep = &stream.base.type;
164-
oi.sizep = &stream.base.size;
165+
/*
166+
* object_info.sizep is unsigned long* (32-bit on Windows), but
167+
* stream.base.size is size_t (64-bit). We use a temporary variable
168+
* because the types are incompatible. Note: this path still truncates
169+
* for >4GB objects, but large objects should use pack streaming
170+
* (packfile_store_read_object_stream) which handles size_t properly.
171+
* This incore fallback is only used for small objects or when pack
172+
* streaming is unavailable.
173+
*/
174+
oi.sizep = &size_ul;
165175
oi.contentp = (void **)&stream.buf;
166176
ret = odb_read_object_info_extended(odb, oid, &oi,
167177
OBJECT_INFO_DIE_IF_CORRUPT);
168178
if (ret)
169179
return ret;
180+
stream.base.size = size_ul;
170181

171182
CALLOC_ARRAY(st, 1);
172183
*st = stream;

odb/streaming.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ struct odb_read_stream {
2121
odb_read_stream_close_fn close;
2222
odb_read_stream_read_fn read;
2323
enum object_type type;
24-
unsigned long size; /* inflated size of full object */
24+
size_t size; /* inflated size of full object */
2525
};
2626

2727
/*

packfile.c

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,8 +1130,8 @@ unsigned long repo_approximate_object_count(struct repository *r)
11301130
return r->objects->approximate_object_count;
11311131
}
11321132

1133-
unsigned long unpack_object_header_buffer(const unsigned char *buf,
1134-
unsigned long len, enum object_type *type, unsigned long *sizep)
1133+
static unsigned long unpack_object_header_buffer_internal(const unsigned char *buf,
1134+
unsigned long len, enum object_type *type, size_t *sizep)
11351135
{
11361136
unsigned shift;
11371137
size_t size, c;
@@ -1142,7 +1142,11 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
11421142
size = c & 15;
11431143
shift = 4;
11441144
while (c & 0x80) {
1145-
if (len <= used || (bitsizeof(long) - 7) < shift) {
1145+
/*
1146+
* Each continuation byte adds 7 bits. Ensure shift won't
1147+
* overflow size_t (use size_t not long for 64-bit on Windows).
1148+
*/
1149+
if (len <= used || (bitsizeof(size_t) - 7) < shift) {
11461150
error("bad object header");
11471151
size = used = 0;
11481152
break;
@@ -1151,6 +1155,15 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
11511155
size = st_add(size, st_left_shift(c & 0x7f, shift));
11521156
shift += 7;
11531157
}
1158+
*sizep = size;
1159+
return used;
1160+
}
1161+
1162+
unsigned long unpack_object_header_buffer(const unsigned char *buf,
1163+
unsigned long len, enum object_type *type, unsigned long *sizep)
1164+
{
1165+
size_t size;
1166+
unsigned long used = unpack_object_header_buffer_internal(buf, len, type, &size);
11541167
*sizep = cast_size_t_to_ulong(size);
11551168
return used;
11561169
}
@@ -1210,6 +1223,32 @@ unsigned long get_size_from_delta(struct packed_git *p,
12101223
return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
12111224
}
12121225

1226+
/*
1227+
* Like unpack_object_header(), but returns size via size_t* instead of
1228+
* unsigned long*. This is needed for >4GB objects on Windows where
1229+
* unsigned long is 32-bit but size_t is 64-bit. Used by streaming code
1230+
* to get the correct untruncated object size.
1231+
*/
1232+
static int unpack_object_header_sz(struct packed_git *p,
1233+
struct pack_window **w_curs,
1234+
off_t *curpos,
1235+
size_t *sizep)
1236+
{
1237+
unsigned char *base;
1238+
unsigned long left;
1239+
unsigned long used;
1240+
enum object_type type;
1241+
1242+
base = use_pack(p, w_curs, *curpos, &left);
1243+
used = unpack_object_header_buffer_internal(base, left, &type, sizep);
1244+
if (!used) {
1245+
type = OBJ_BAD;
1246+
} else
1247+
*curpos += used;
1248+
1249+
return type;
1250+
}
1251+
12131252
int unpack_object_header(struct packed_git *p,
12141253
struct pack_window **w_curs,
12151254
off_t *curpos,
@@ -2561,21 +2600,29 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
25612600
struct pack_window *window = NULL;
25622601
struct object_info oi = OBJECT_INFO_INIT;
25632602
enum object_type in_pack_type;
2564-
unsigned long size;
2565-
2566-
oi.sizep = &size;
2603+
size_t size;
25672604

2605+
/*
2606+
* We need to check if this is a delta or if the object is smaller
2607+
* than the big file threshold. For the initial check, we don't need
2608+
* the exact size, just whether it qualifies for streaming.
2609+
*/
25682610
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
25692611
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
2570-
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
2571-
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
2612+
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA)
25722613
return -1;
25732614

2574-
in_pack_type = unpack_object_header(oi.u.packed.pack,
2575-
&window,
2576-
&oi.u.packed.offset,
2577-
&size);
2615+
/* Read the actual size using size_t to handle >4GB objects on Windows */
2616+
in_pack_type = unpack_object_header_sz(oi.u.packed.pack,
2617+
&window,
2618+
&oi.u.packed.offset,
2619+
&size);
25782620
unuse_pack(&window);
2621+
2622+
/* Now check the big file threshold with the correct size */
2623+
if (repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
2624+
return -1;
2625+
25792626
switch (in_pack_type) {
25802627
default:
25812628
return -1; /* we do not do deltas for now */

0 commit comments

Comments
 (0)