From 6bdda3a3b00fff9a1d64d1bb4732f0c446d7012c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:26 +0100 Subject: [PATCH 01/32] streaming: rename `git_istream` into `odb_read_stream` In the following patches we are about to make the `git_istream` more generic so that it becomes fully controlled by the specific object source that wants to create it. As part of these refactorings we'll fully move the structure into the object database subsystem. Prepare for this change by renaming the structure from `git_istream` to `odb_read_stream`. This mirrors the `odb_write_stream` structure that we already have. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 4 +-- object-file.c | 2 +- streaming.c | 62 +++++++++++++++++++++--------------------- streaming.h | 12 ++++---- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41..dc1eda09e0 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -129,7 +129,7 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; char buf[BLOCKSIZE]; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43d..40a9c93ff9 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4..5f90f12f92 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 69e80b1443..c693d948e1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -513,7 +513,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { diff --git a/object-file.c b/object-file.c index 811c569ed3..b62b21a452 100644 --- a/object-file.c +++ b/object-file.c @@ -134,7 +134,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) struct object_id real_oid; unsigned long size; enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; diff --git a/streaming.c b/streaming.c index 00ad649ae3..1fb4b7c1c0 100644 --- a/streaming.c +++ b/streaming.c @@ -14,17 +14,17 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct git_istream *, +typedef int (*open_istream_fn)(struct odb_read_stream *, struct repository *, const struct object_id *, enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); +typedef int (*close_istream_fn)(struct odb_read_stream *); +typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) struct filtered_istream { - struct git_istream *upstream; + struct odb_read_stream *upstream; struct stream_filter *filter; char ibuf[FILTER_BUFFER]; char obuf[FILTER_BUFFER]; @@ -33,7 +33,7 @@ struct filtered_istream { int input_finished; }; -struct git_istream { +struct odb_read_stream { open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -71,7 +71,7 @@ struct git_istream { * *****************************************************************/ -static void close_deflated_stream(struct git_istream *st) +static void close_deflated_stream(struct odb_read_stream *st) { if (st->z_state == z_used) git_inflate_end(&st->z); @@ -84,13 +84,13 @@ static void close_deflated_stream(struct git_istream *st) * *****************************************************************/ -static int close_istream_filtered(struct git_istream *st) +static int close_istream_filtered(struct odb_read_stream *st) { free_stream_filter(st->u.filtered.filter); return close_istream(st->u.filtered.upstream); } -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, size_t sz) { struct filtered_istream *fs = &(st->u.filtered); @@ -150,10 +150,10 @@ static ssize_t read_istream_filtered(struct git_istream *st, char *buf, return filled; } -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) { - struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); struct filtered_istream *fs = &(ifs->u.filtered); ifs->close = close_istream_filtered; @@ -173,7 +173,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -218,14 +218,14 @@ static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) return total_read; } -static int close_istream_loose(struct git_istream *st) +static int close_istream_loose(struct odb_read_stream *st) { close_deflated_stream(st); munmap(st->u.loose.mapped, st->u.loose.mapsize); return 0; } -static int open_istream_loose(struct git_istream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { @@ -277,7 +277,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -336,13 +336,13 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, return total_read; } -static int close_istream_pack_non_delta(struct git_istream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *st) { close_deflated_stream(st); return 0; } -static int open_istream_pack_non_delta(struct git_istream *st, +static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, const struct object_id *oid UNUSED, enum object_type *type UNUSED) @@ -380,13 +380,13 @@ static int open_istream_pack_non_delta(struct git_istream *st, * *****************************************************************/ -static int close_istream_incore(struct git_istream *st) +static int close_istream_incore(struct odb_read_stream *st) { free(st->u.incore.buf); return 0; } -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) { size_t read_size = sz; size_t remainder = st->size - st->u.incore.read_ptr; @@ -400,7 +400,7 @@ static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) return read_size; } -static int open_istream_incore(struct git_istream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; @@ -420,7 +420,7 @@ static int open_istream_incore(struct git_istream *st, struct repository *r, * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct git_istream *st, +static int istream_source(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) @@ -458,25 +458,25 @@ static int istream_source(struct git_istream *st, * Users of streaming interface ****************************************************************/ -int close_istream(struct git_istream *st) +int close_istream(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) +ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *open_istream(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { - struct git_istream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); int ret = istream_source(st, r, real, type); @@ -493,7 +493,7 @@ struct git_istream *open_istream(struct repository *r, } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); + struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { close_istream(st); return NULL; @@ -508,7 +508,7 @@ struct git_istream *open_istream(struct repository *r, int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, int can_seek) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; ssize_t kept = 0; diff --git a/streaming.h b/streaming.h index bd27f59e57..f5ff5d7ac9 100644 --- a/streaming.h +++ b/streaming.h @@ -7,14 +7,14 @@ #include "object.h" /* opaque */ -struct git_istream; +struct odb_read_stream; struct stream_filter; -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); +struct odb_read_stream *open_istream(struct repository *, const struct object_id *, + enum object_type *, unsigned long *, + struct stream_filter *); +int close_istream(struct odb_read_stream *); +ssize_t read_istream(struct odb_read_stream *, void *, size_t); int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); -- GitLab From 70c8b5f5453b9f128a72fad4398acfb9e7d869c4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:27 +0100 Subject: [PATCH 02/32] streaming: drop the `open()` callback function When creating a read stream we first populate the structure with the open callback function and then subsequently call the function. This layout is somewhat weird though: - The structure needs to be allocated and partially populated with the open function before we can properly initialize it. - We only ever call the `open()` callback function right after having populated the `struct odb_read_stream::open` member, and it's never called thereafter again. So it is somewhat pointless to store the callback in the first place. Especially the first point creates a problem for us. In subsequent commits we'll want to fully move construction of the read source into the respective object sources. E.g., the loose object source will be the one that is responsible for creating the structure. But this creates a problem: if we first need to create the structure so that we can call the source-specific callback we cannot fully handle creation of the structure in the source itself. We could of course work around that and have the loose object source create the structure and populate its `open()` callback, only. But this doesn't really buy us anything due to the second bullet point above. Instead, drop the callback entirely and refactor `istream_source()` so that we open the streams immediately. This unblocks a subsequent step, where we'll also start to allocate the structure in the source-specific logic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/streaming.c b/streaming.c index 1fb4b7c1c0..1bb3f393b8 100644 --- a/streaming.c +++ b/streaming.c @@ -14,10 +14,6 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct odb_read_stream *, - struct repository *, - const struct object_id *, - enum object_type *); typedef int (*close_istream_fn)(struct odb_read_stream *); typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); @@ -34,7 +30,6 @@ struct filtered_istream { }; struct odb_read_stream { - open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -437,21 +432,25 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - st->open = open_istream_loose; + if (open_istream_loose(st, r, oid, type) < 0) + break; return 0; case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; + if (oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + break; + + st->u.in_pack.pack = oi.u.packed.pack; + st->u.in_pack.pos = oi.u.packed.offset; + if (open_istream_pack_non_delta(st, r, oid, type) < 0) + break; + return 0; + default: + break; } + + return open_istream_incore(st, r, oid, type); } /**************************************************************** @@ -485,12 +484,6 @@ struct odb_read_stream *open_istream(struct repository *r, return NULL; } - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); -- GitLab From 3f64deabdf0a2a9664acec61698affc449e07496 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:28 +0100 Subject: [PATCH 03/32] streaming: propagate final object type via the stream When opening the read stream for a specific object the caller is also expected to pass in a pointer to the object type. This type is passed down via multiple levels and will eventually be populated with the type of the looked-up object. The way we propagate down the pointer though is somewhat non-obvious. While `istream_source()` still expects the pointer and looks it up via `odb_read_object_info_extended()`, we also pass it down even further into the format-specific callbacks that perform another lookup. This is quite confusing overall. Refactor the code so that the responsibility to populate the object type rests solely with the format-specific callbacks. This will allow us to drop the call to `odb_read_object_info_extended()` in `istream_source()` entirely in a subsequent patch. Furthermore, instead of propagating the type via an in-pointer, we now propagate the type via a new field in the object stream. It already has a `size` field, so it's only natural to have a second field that contains the object type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/streaming.c b/streaming.c index 1bb3f393b8..665624ddc0 100644 --- a/streaming.c +++ b/streaming.c @@ -33,6 +33,7 @@ struct odb_read_stream { close_istream_fn close; read_istream_fn read; + enum object_type type; unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; @@ -159,6 +160,7 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, fs->o_end = fs->o_ptr = 0; fs->input_finished = 0; ifs->size = -1; /* unknown */ + ifs->type = st->type; return ifs; } @@ -221,14 +223,13 @@ static int close_istream_loose(struct odb_read_stream *st) } static int open_istream_loose(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_source *source; oi.sizep = &st->size; - oi.typep = type; + oi.typep = &st->type; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { @@ -249,7 +250,7 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; @@ -339,8 +340,7 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) + const struct object_id *oid UNUSED) { struct pack_window *window; enum object_type in_pack_type; @@ -361,6 +361,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } + st->type = in_pack_type; st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; @@ -396,7 +397,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t } static int open_istream_incore(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -404,7 +405,7 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, st->close = close_istream_incore; st->read = read_istream_incore; - oi.typep = type; + oi.typep = &st->type; oi.sizep = &st->size; oi.contentp = (void **)&st->u.incore.buf; return odb_read_object_info_extended(r->objects, oid, &oi, @@ -417,14 +418,12 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, static int istream_source(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { unsigned long size; int status; struct object_info oi = OBJECT_INFO_INIT; - oi.typep = type; oi.sizep = &size; status = odb_read_object_info_extended(r->objects, oid, &oi, 0); if (status < 0) @@ -432,7 +431,7 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid, type) < 0) + if (open_istream_loose(st, r, oid) < 0) break; return 0; case OI_PACKED: @@ -442,7 +441,7 @@ static int istream_source(struct odb_read_stream *st, st->u.in_pack.pack = oi.u.packed.pack; st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid, type) < 0) + if (open_istream_pack_non_delta(st, r, oid) < 0) break; return 0; @@ -450,7 +449,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid, type); + return open_istream_incore(st, r, oid); } /**************************************************************** @@ -477,7 +476,7 @@ struct odb_read_stream *open_istream(struct repository *r, { struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); + int ret = istream_source(st, r, real); if (ret) { free(st); @@ -495,6 +494,7 @@ struct odb_read_stream *open_istream(struct repository *r, } *size = st->size; + *type = st->type; return st; } -- GitLab From 3c7722dd4d376e0fce4c48f723fe8b69af785998 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:29 +0100 Subject: [PATCH 04/32] streaming: explicitly pass packfile info when streaming a packed object When streaming a packed object we first populate the stream with information about the pack that contains the object before calling `open_istream_pack_non_delta()`. This is done because we have already looked up both the pack and the object's offset, so it would be a waste of time to look up this information again. But the way this is done makes for a somewhat awkward calling interface, as the caller now needs to be aware of how exactly the function itself behaves. Refactor the code so that we instead explicitly pass the packfile info into `open_istream_pack_non_delta()`. This makes the calling convention explicit, but more importantly this allows us to refactor the function so that it becomes its responsibility to allocate the stream itself in a subsequent patch. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/streaming.c b/streaming.c index 665624ddc0..bf277daadd 100644 --- a/streaming.c +++ b/streaming.c @@ -340,16 +340,18 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED) + const struct object_id *oid UNUSED, + struct packed_git *pack, + off_t offset) { struct pack_window *window; enum object_type in_pack_type; window = NULL; - in_pack_type = unpack_object_header(st->u.in_pack.pack, + in_pack_type = unpack_object_header(pack, &window, - &st->u.in_pack.pos, + &offset, &st->size); unuse_pack(&window); switch (in_pack_type) { @@ -365,6 +367,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; + st->u.in_pack.pack = pack; + st->u.in_pack.pos = offset; return 0; } @@ -436,14 +440,10 @@ static int istream_source(struct odb_read_stream *st, return 0; case OI_PACKED: if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(the_repository) >= size || + open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + oi.u.packed.offset) < 0) break; - - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid) < 0) - break; - return 0; default: break; -- GitLab From 595296e124f5e8a67c4669fcaeb1b28e71c2d751 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:30 +0100 Subject: [PATCH 05/32] streaming: allocate stream inside the backend-specific logic When creating a new stream we first allocate it and then call into backend-specific logic to populate the stream. This design requires that the stream itself contains a `union` with backend-specific members that then ultimately get populated by the backend-specific logic. This works, but it's awkward in the context of pluggable object databases. Each backend will need its own member in that union, and as the structure itself is completely opaque (it's only defined in "streaming.c") it also has the consequence that we must have the logic that is specific to backends in "streaming.c". Ideally though, the infrastructure would be reversed: we have a generic `struct odb_read_stream` and some helper functions in "streaming.c", whereas the backend-specific logic sits in the backend's subsystem itself. This can be realized by using a design that is similar to how we handle reference databases: instead of having a union of members, we instead have backend-specific structures with a `struct odb_read_stream base` as its first member. The backends would thus hand out the pointer to the base, but internally they know to cast back to the backend-specific type. This means though that we need to allocate different structures depending on the backend. To prepare for this, move allocation of the structure into the backend-specific functions that open a new stream. Subsequent commits will then create those new backend-specific structs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 103 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/streaming.c b/streaming.c index bf277daadd..a2c2d88738 100644 --- a/streaming.c +++ b/streaming.c @@ -222,27 +222,34 @@ static int close_istream_loose(struct odb_read_stream *st) return 0; } -static int open_istream_loose(struct odb_read_stream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_read_stream *st; struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = &st->type; + unsigned long mapsize; + void *mapped; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (mapped) break; } - if (!st->u.loose.mapped) + if (!mapped) return -1; - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { case ULHR_OK: break; @@ -250,19 +257,28 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } + + oi.sizep = &st->size; + oi.typep = &st->type; + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; + st->u.loose.mapped = mapped; + st->u.loose.mapsize = mapsize; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; st->u.loose.hdr_avail = st->z.total_out; st->z_state = z_used; st->close = close_istream_loose; st->read = read_istream_loose; + *out = st; + return 0; error: git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); + free(st); return -1; } @@ -338,12 +354,16 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) return 0; } -static int open_istream_pack_non_delta(struct odb_read_stream *st, +static int open_istream_pack_non_delta(struct odb_read_stream **out, struct repository *r UNUSED, const struct object_id *oid UNUSED, struct packed_git *pack, off_t offset) { + struct odb_read_stream stream = { + .close = close_istream_pack_non_delta, + .read = read_istream_pack_non_delta, + }; struct pack_window *window; enum object_type in_pack_type; @@ -352,7 +372,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, in_pack_type = unpack_object_header(pack, &window, &offset, - &st->size); + &stream.size); unuse_pack(&window); switch (in_pack_type) { default: @@ -363,12 +383,13 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } - st->type = in_pack_type; - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - st->u.in_pack.pack = pack; - st->u.in_pack.pos = offset; + stream.type = in_pack_type; + stream.z_state = z_unused; + stream.u.in_pack.pack = pack; + stream.u.in_pack.pos = offset; + + CALLOC_ARRAY(*out, 1); + **out = stream; return 0; } @@ -400,27 +421,35 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t return read_size; } -static int open_istream_incore(struct odb_read_stream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = &st->type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); + struct odb_read_stream stream = { + .close = close_istream_incore, + .read = read_istream_incore, + }; + int ret; + + oi.typep = &stream.type; + oi.sizep = &stream.size; + oi.contentp = (void **)&stream.u.incore.buf; + ret = odb_read_object_info_extended(r->objects, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(*out, 1); + **out = stream; + return 0; } /***************************************************************************** * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct odb_read_stream *st, +static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { @@ -435,13 +464,13 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid) < 0) + if (open_istream_loose(out, r, oid) < 0) break; return 0; case OI_PACKED: if (oi.u.packed.is_delta || repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, oi.u.packed.offset) < 0) break; return 0; @@ -449,7 +478,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid); + return open_istream_incore(out, r, oid); } /**************************************************************** @@ -474,14 +503,12 @@ struct odb_read_stream *open_istream(struct repository *r, unsigned long *size, struct stream_filter *filter) { - struct odb_read_stream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st; const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real); + int ret = istream_source(&st, r, real); - if (ret) { - free(st); + if (ret) return NULL; - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ -- GitLab From e030d0aeb5ebf79cdc4910e79d59e33998de78cd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:31 +0100 Subject: [PATCH 06/32] streaming: create structure for in-core object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for in-core object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/streaming.c b/streaming.c index a2c2d88738..35307d7229 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - struct { void *mapped; unsigned long mapsize; @@ -401,22 +396,30 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, * *****************************************************************/ -static int close_istream_incore(struct odb_read_stream *st) +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) { - free(st->u.incore.buf); + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); return 0; } -static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; + size_t remainder = st->base.size - st->read_ptr; if (remainder <= read_size) read_size = remainder; if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; } return read_size; } @@ -426,22 +429,25 @@ static int open_istream_incore(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream stream = { - .close = close_istream_incore, - .read = read_istream_incore, + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, }; + struct odb_incore_read_stream *st; int ret; - oi.typep = &stream.type; - oi.sizep = &stream.size; - oi.contentp = (void **)&stream.u.incore.buf; + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(r->objects, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + return 0; } -- GitLab From b7774c0f0de43379c40984b4ede265a512c1a4f0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:32 +0100 Subject: [PATCH 07/32] streaming: create structure for loose object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for loose object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 85 +++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 35307d7229..ac7b3026f5 100644 --- a/streaming.c +++ b/streaming.c @@ -39,14 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - struct { struct packed_git *pack; off_t pos; @@ -165,11 +157,21 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) +struct odb_loose_read_stream { + struct odb_read_stream base; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_done: return 0; case z_error: @@ -178,42 +180,43 @@ static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t break; } - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; if (sz < to_copy) to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; total_read += to_copy; } while (total_read < sz) { int status; - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + status = git_inflate(&st->base.z, Z_FINISH); - total_read = st->z.next_out - (unsigned char *)buf; + total_read = st->base.z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_loose(struct odb_read_stream *st) +static int close_istream_loose(struct odb_read_stream *_st) { - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + close_deflated_stream(&st->base); + munmap(st->mapped, st->mapsize); return 0; } @@ -222,7 +225,7 @@ static int open_istream_loose(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream *st; + struct odb_loose_read_stream *st; struct odb_source *source; unsigned long mapsize; void *mapped; @@ -244,8 +247,8 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { + switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { case ULHR_OK: break; case ULHR_BAD: @@ -253,26 +256,26 @@ static int open_istream_loose(struct odb_read_stream **out, goto error; } - oi.sizep = &st->size; - oi.typep = &st->type; + oi.sizep = &st->base.size; + oi.typep = &st->base.type; - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; - st->u.loose.mapped = mapped; - st->u.loose.mapsize = mapsize; - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->base.z.total_out; + st->base.z_state = z_used; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; - *out = st; + *out = &st->base; return 0; error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + git_inflate_end(&st->base.z); + munmap(st->mapped, st->mapsize); free(st); return -1; } -- GitLab From 5f0d8d2e8d3f992f58af247b6d21509c3c7595ca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:33 +0100 Subject: [PATCH 08/32] streaming: create structure for packed object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for packed object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 75 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/streaming.c b/streaming.c index ac7b3026f5..788f04e83e 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - struct filtered_istream filtered; } u; }; @@ -287,16 +282,23 @@ static int open_istream_loose(struct odb_read_stream **out, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; + memset(&st->base.z, 0, sizeof(st->base.z)); + git_inflate_init(&st->base.z); + st->base.z_state = z_used; break; case z_done: return 0; @@ -311,21 +313,21 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf struct pack_window *window = NULL; unsigned char *mapped; - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); + mapped = use_pack(st->pack, &window, + st->pos, &st->base.z.avail_in); - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + st->base.z.next_in = mapped; + status = git_inflate(&st->base.z, Z_FINISH); - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; + st->pos += st->base.z.next_in - mapped; + total_read = st->base.z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } @@ -338,17 +340,18 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_pack_non_delta(struct odb_read_stream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *_st) { - close_deflated_stream(st); + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + close_deflated_stream(&st->base); return 0; } @@ -358,19 +361,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, struct packed_git *pack, off_t offset) { - struct odb_read_stream stream = { - .close = close_istream_pack_non_delta, - .read = read_istream_pack_non_delta, - }; + struct odb_packed_read_stream *stream; struct pack_window *window; enum object_type in_pack_type; + size_t size; window = NULL; in_pack_type = unpack_object_header(pack, &window, &offset, - &stream.size); + &size); unuse_pack(&window); switch (in_pack_type) { default: @@ -381,13 +382,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, case OBJ_TAG: break; } - stream.type = in_pack_type; - stream.z_state = z_unused; - stream.u.in_pack.pack = pack; - stream.u.in_pack.pos = offset; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->base.z_state = z_unused; + stream->pack = pack; + stream->pos = offset; + + *out = &stream->base; return 0; } -- GitLab From 1154b2d2e511113e9b7d567788b72acb05713915 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:34 +0100 Subject: [PATCH 09/32] streaming: create structure for filtered object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for filtered object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 54 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/streaming.c b/streaming.c index 788f04e83e..199cca5abb 100644 --- a/streaming.c +++ b/streaming.c @@ -19,16 +19,6 @@ typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) -struct filtered_istream { - struct odb_read_stream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - struct odb_read_stream { close_istream_fn close; read_istream_fn read; @@ -37,10 +27,6 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct filtered_istream filtered; - } u; }; /***************************************************************** @@ -62,16 +48,28 @@ static void close_deflated_stream(struct odb_read_stream *st) * *****************************************************************/ -static int close_istream_filtered(struct odb_read_stream *st) +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) { - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return close_istream(fs->upstream); } -static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, size_t sz) { - struct filtered_istream *fs = &(st->u.filtered); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; size_t filled = 0; while (sz) { @@ -131,19 +129,17 @@ static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct stream_filter *filter) { - struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); + struct odb_filtered_read_stream *fs; - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; fs->upstream = st; fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - ifs->type = st->type; - return ifs; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; } /***************************************************************** -- GitLab From eb5abbb4e6a8c06f5c6275bbb541bf7d736171c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:35 +0100 Subject: [PATCH 10/32] streaming: move zlib stream into backends While all backend-specific data is now contained in a backend-specific structure, we still share the zlib stream across the loose and packed objects. Refactor the code and move it into the specific structures so that we fully detangle the different backends from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 104 ++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/streaming.c b/streaming.c index 199cca5abb..46fddaf2ca 100644 --- a/streaming.c +++ b/streaming.c @@ -25,23 +25,8 @@ struct odb_read_stream { enum object_type type; unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; }; -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct odb_read_stream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - /***************************************************************** * * Filtered stream @@ -150,6 +135,12 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct odb_loose_read_stream { struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; void *mapped; unsigned long mapsize; char hdr[32]; @@ -162,10 +153,10 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_done: + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: return 0; - case z_error: + case ODB_LOOSE_READ_STREAM_ERROR: return -1; default: break; @@ -183,20 +174,20 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t while (total_read < sz) { int status; - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); - total_read = st->base.z.next_out - (unsigned char *)buf; + total_read = st->z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; return -1; } } @@ -206,7 +197,8 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t static int close_istream_loose(struct odb_read_stream *_st) { struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); return 0; } @@ -238,7 +230,7 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, sizeof(st->hdr))) { case ULHR_OK: break; @@ -256,8 +248,8 @@ static int open_istream_loose(struct odb_read_stream **out, st->mapped = mapped; st->mapsize = mapsize; st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->base.z.total_out; - st->base.z_state = z_used; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; st->base.close = close_istream_loose; st->base.read = read_istream_loose; @@ -265,7 +257,7 @@ static int open_istream_loose(struct odb_read_stream **out, return 0; error: - git_inflate_end(&st->base.z); + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); free(st); return -1; @@ -281,6 +273,13 @@ static int open_istream_loose(struct odb_read_stream **out, struct odb_packed_read_stream { struct odb_read_stream base; struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; off_t pos; }; @@ -290,17 +289,17 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_unused: - memset(&st->base.z, 0, sizeof(st->base.z)); - git_inflate_init(&st->base.z); - st->base.z_state = z_used; + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; break; - case z_done: + case ODB_PACKED_READ_STREAM_DONE: return 0; - case z_error: + case ODB_PACKED_READ_STREAM_ERROR: return -1; - case z_used: + case ODB_PACKED_READ_STREAM_INUSE: break; } @@ -310,20 +309,20 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu unsigned char *mapped; mapped = use_pack(st->pack, &window, - st->pos, &st->base.z.avail_in); + st->pos, &st->z.avail_in); - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - st->base.z.next_in = mapped; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); - st->pos += st->base.z.next_in - mapped; - total_read = st->base.z.next_out - (unsigned char *)buf; + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; break; } @@ -336,8 +335,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; return -1; } } @@ -347,7 +346,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu static int close_istream_pack_non_delta(struct odb_read_stream *_st) { struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); return 0; } @@ -384,7 +384,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.read = read_istream_pack_non_delta; stream->base.type = in_pack_type; stream->base.size = size; - stream->base.z_state = z_unused; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; stream->pack = pack; stream->pos = offset; -- GitLab From 385e18810f10ec0ce0a266d25da4e1878c8ce15a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:36 +0100 Subject: [PATCH 11/32] packfile: introduce function to read object info from a store Extract the logic to read object info for a packed object from `do_oid_object_into_extended()` into a standalone function that operates on the packfile store. This function will be used in a subsequent commit. Note that this change allows us to make `find_pack_entry()` an internal implementation detail. As a consequence though we have to move around `packfile_store_freshen_object()` so that it is defined after that function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 29 +++------------------- packfile.c | 71 +++++++++++++++++++++++++++++++++++++++++------------- packfile.h | 12 ++++++++- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/odb.c b/odb.c index 3ec21ef24e..f4cbee4b04 100644 --- a/odb.c +++ b/odb.c @@ -666,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -702,8 +700,8 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) @@ -713,8 +711,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -747,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, diff --git a/packfile.c b/packfile.c index 40f733dd23..b4bc40d895 100644 --- a/packfile.c +++ b/packfile.c @@ -819,22 +819,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -2064,7 +2048,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct list_head *pos; @@ -2087,6 +2073,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { diff --git a/packfile.h b/packfile.h index 58fcc88e20..0a98bddd81 100644 --- a/packfile.h +++ b/packfile.h @@ -144,6 +144,17 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -357,7 +368,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); -- GitLab From 4c89d31494bff4bde6079a0e0821f1437e37d07b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:37 +0100 Subject: [PATCH 12/32] streaming: rely on object sources to create object stream When creating an object stream we first look up the object info and, if it's present, we call into the respective backend that contains the object to create a new stream for it. This has the consequence that, for loose object source, we basically iterate through the object sources twice: we first discover that the file exists as a loose object in the first place by iterating through all sources. And, once we have discovered it, we again walk through all sources to try and map the object. The same issue will eventually also surface once the packfile store becomes per-object-source. Furthermore, it feels rather pointless to first look up the object only to then try and read it. Refactor the logic to be centered around sources instead. Instead of first reading the object, we immediately ask the source to create the object stream for us. If the object exists we get stream, otherwise we'll try the next source. Like this we only have to iterate through sources once. But even more importantly, this change also helps us to make the whole logic pluggable. The object read stream subsystem does not need to be aware of the different source backends anymore, but eventually it'll only have to call the source's callback function. Note that at the current point in time we aren't fully there yet: - The packfile store still sits on the object database level and is thus agnostic of the sources. - We still have to call into both the packfile store and the loose object source. But both of these issues will soon be addressed. This refactoring results in a slight change to semantics: previously, it was `odb_read_object_info_extended()` that picked the source for us, and it would have favored packed (non-deltified) objects over loose objects. And while we still favor packed over loose objects for a single source with the new logic, we'll now favor a loose object from an earlier source over a packed object from a later source. Ultimately this shouldn't matter though: the stream doesn't indicate to the caller which source it is from and whether it was created from a packed or loose object, so such details are opaque to the caller. And other than that we should be able to assume that two objects with the same object ID should refer to the same content, so the streamed data would be the same, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 65 ++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 46fddaf2ca..f0f7d31956 100644 --- a/streaming.c +++ b/streaming.c @@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st) } static int open_istream_loose(struct odb_read_stream **out, - struct repository *r, + struct odb_source *source, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; - struct odb_source *source; unsigned long mapsize; void *mapped; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (mapped) - break; - } + mapped = odb_source_loose_map_object(source, oid, &mapsize); if (!mapped) return -1; @@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) } static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - struct packed_git *pack, - off_t offset) + struct object_database *odb, + const struct object_id *oid) { struct odb_packed_read_stream *stream; - struct pack_window *window; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - size_t size; + unsigned long size; - window = NULL; + oi.sizep = &size; + + if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + return -1; - in_pack_type = unpack_object_header(pack, + in_pack_type = unpack_object_header(oi.u.packed.pack, &window, - &offset, + &oi.u.packed.offset, &size); unuse_pack(&window); switch (in_pack_type) { @@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = pack; - stream->pos = offset; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; *out = &stream->base; @@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; + struct odb_source *source; - switch (oi.whence) { - case OI_LOOSE: - if (open_istream_loose(out, r, oid) < 0) - break; - return 0; - case OI_PACKED: - if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, - oi.u.packed.offset) < 0) - break; + if (!open_istream_pack_non_delta(out, r->objects, oid)) return 0; - default: - break; - } + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) + if (!open_istream_loose(out, source, oid)) + return 0; return open_istream_incore(out, r, oid); } -- GitLab From c26da3446e98ad4aa98ec9154c70c6fd35cb9ad6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:38 +0100 Subject: [PATCH 13/32] streaming: get rid of `the_repository` Subsequent commits will move the backend-specific logic of object streaming into their respective subsystems. These subsystems have gotten rid of `the_repository` already, but we still use it in two locations in the streaming subsystem. Prepare for the move by fixing those two cases. Converting the logic in `open_istream_pack_non_delta()` is trivial as we already got the object database as input. But for `stream_blob_to_fd()` we have to add a new parameter to make it accessible. So, as we already have to adjust all callers anyway, rename the function to `odb_stream_blob_to_fd()` to indicate it's part of the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/fsck.c | 3 ++- builtin/log.c | 4 ++-- entry.c | 2 +- parallel-checkout.c | 3 ++- streaming.c | 13 +++++++------ streaming.h | 18 +++++++++++++++++- 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837..120d626d66 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index b1a650c673..1a348d43c2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/log.c b/builtin/log.c index c8319b8af3..e7b83a6e00 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/entry.c b/entry.c index cae02eb503..38dfe670f7 100644 --- a/entry.c +++ b/entry.c @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6..1cb6701b92 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/streaming.c b/streaming.c index f0f7d31956..807a6e03a8 100644 --- a/streaming.c +++ b/streaming.c @@ -2,8 +2,6 @@ * Copyright (c) 2011, Google Inc. */ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "convert.h" #include "environment.h" @@ -359,7 +357,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, @@ -518,8 +516,11 @@ struct odb_read_stream *open_istream(struct repository *r, return st; } -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) { struct odb_read_stream *st; enum object_type type; @@ -527,7 +528,7 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter ssize_t kept = 0; int result = -1; - st = open_istream(the_repository, oid, &type, &sz, filter); + st = open_istream(odb->repo, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); diff --git a/streaming.h b/streaming.h index f5ff5d7ac9..148f6b3069 100644 --- a/streaming.h +++ b/streaming.h @@ -6,6 +6,7 @@ #include "object.h" +struct object_database; /* opaque */ struct odb_read_stream; struct stream_filter; @@ -16,6 +17,21 @@ struct odb_read_stream *open_istream(struct repository *, const struct object_id int close_istream(struct odb_read_stream *); ssize_t read_istream(struct odb_read_stream *, void *, size_t); -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); #endif /* STREAMING_H */ -- GitLab From ffc9a3448500caa50766876ef2169e0f26ad3b3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:39 +0100 Subject: [PATCH 14/32] streaming: make the `odb_read_stream` definition public Subsequent commits will move the backend-specific logic of setting up an object read stream into the specific subsystems. As the backends are now the ones that are responsible for allocating the stream they'll need to have the stream definition available to them. Make the stream definition public to prepare for this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 11 ----------- streaming.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/streaming.c b/streaming.c index 807a6e03a8..0635b7c12e 100644 --- a/streaming.c +++ b/streaming.c @@ -12,19 +12,8 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*close_istream_fn)(struct odb_read_stream *); -typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); - #define FILTER_BUFFER (1024*16) -struct odb_read_stream { - close_istream_fn close; - read_istream_fn read; - - enum object_type type; - unsigned long size; /* inflated size of full object */ -}; - /***************************************************************** * * Filtered stream diff --git a/streaming.h b/streaming.h index 148f6b3069..acfdef1598 100644 --- a/streaming.h +++ b/streaming.h @@ -7,10 +7,23 @@ #include "object.h" struct object_database; -/* opaque */ struct odb_read_stream; struct stream_filter; +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + struct odb_read_stream *open_istream(struct repository *, const struct object_id *, enum object_type *, unsigned long *, struct stream_filter *); -- GitLab From bc30a2f5dff6dd39966819ca3771ab5e9e072123 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:40 +0100 Subject: [PATCH 15/32] streaming: move logic to read loose objects streams into backend Move the logic to read loose object streams into the respective subsystem. This allows us to make a couple of function declarations private. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++--- object-file.h | 42 ++----------- streaming.c | 133 +--------------------------------------- 3 files changed, 164 insertions(+), 178 deletions(-) diff --git a/object-file.c b/object-file.c index b62b21a452..8c67847fea 100644 --- a/object-file.c +++ b/object-file.c @@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; int fd = open_loose_object(source->loose, oid, &p); @@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source, return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbd..1229d5f675 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/streaming.c b/streaming.c index 0635b7c12e..d5acc1c396 100644 --- a/streaming.c +++ b/streaming.c @@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -static int open_istream_loose(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - oi.sizep = &st->base.size; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - free(st); - return -1; -} - - /***************************************************************** * * Non-delta packed object stream @@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) - if (!open_istream_loose(out, source, oid)) + if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; return open_istream_incore(out, r, oid); -- GitLab From 8c1b84bc977bf1e4515efe0386de87257ec28689 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:41 +0100 Subject: [PATCH 16/32] streaming: move logic to read packed objects streams into backend Move the logic to read packed object streams into the respective subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ streaming.c | 136 +--------------------------------------------------- 3 files changed, 134 insertions(+), 135 deletions(-) diff --git a/packfile.c b/packfile.c index b4bc40d895..ad56ce0b90 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -2406,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(p); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index 0a98bddd81..3fcc5ae6e0 100644 --- a/packfile.h +++ b/packfile.h @@ -8,6 +8,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct hashmap_entry packmap_ent; @@ -144,6 +145,10 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + /* * Try to read the object identified by its ID from the object store and * populate the object info with its data. Returns 1 in case the object was diff --git a/streaming.c b/streaming.c index d5acc1c396..3140728a70 100644 --- a/streaming.c +++ b/streaming.c @@ -114,140 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -struct odb_packed_read_stream { - struct odb_read_stream base; - struct packed_git *pack; - git_zstream z; - enum { - ODB_PACKED_READ_STREAM_UNINITIALIZED, - ODB_PACKED_READ_STREAM_INUSE, - ODB_PACKED_READ_STREAM_DONE, - ODB_PACKED_READ_STREAM_ERROR, - } z_state; - off_t pos; -}; - -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, - size_t sz) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_PACKED_READ_STREAM_UNINITIALIZED: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_INUSE; - break; - case ODB_PACKED_READ_STREAM_DONE: - return 0; - case ODB_PACKED_READ_STREAM_ERROR: - return -1; - case ODB_PACKED_READ_STREAM_INUSE: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->pack, &window, - st->pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_DONE; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct odb_read_stream *_st) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) - git_inflate_end(&st->z); - return 0; -} - -static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct object_database *odb, - const struct object_id *oid) -{ - struct odb_packed_read_stream *stream; - struct pack_window *window = NULL; - struct object_info oi = OBJECT_INFO_INIT; - enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; - - if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || - oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(odb->repo) >= size) - return -1; - - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - - CALLOC_ARRAY(stream, 1); - stream->base.close = close_istream_pack_non_delta; - stream->base.read = read_istream_pack_non_delta; - stream->base.type = in_pack_type; - stream->base.size = size; - stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = oi.u.packed.pack; - stream->pos = oi.u.packed.offset; - - *out = &stream->base; - - return 0; -} - - /***************************************************************** * * In-core stream @@ -319,7 +185,7 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!open_istream_pack_non_delta(out, r->objects, oid)) + if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) return 0; odb_prepare_alternates(r->objects); -- GitLab From 378ec56beba161abbef6e2c87d9bc2ac43c355f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:42 +0100 Subject: [PATCH 17/32] streaming: refactor interface to be object-database-centric Refactor the streaming interface to be centered around object databases instead of centered around the repository. Rename the functions accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 6 +++--- archive-zip.c | 12 ++++++------ builtin/index-pack.c | 8 ++++---- builtin/pack-objects.c | 14 +++++++------- object-file.c | 8 ++++---- streaming.c | 44 +++++++++++++++++++++--------------------- streaming.h | 30 +++++++++++++++++++++++----- 7 files changed, 71 insertions(+), 51 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index dc1eda09e0..4d87b28504 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -135,16 +135,16 @@ static int stream_blocked(struct repository *r, const struct object_id *oid) char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index 40a9c93ff9..c44684aebc 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -348,8 +348,8 @@ static int write_zip_entry(struct archiver_args *args, if (!buffer) { enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, + &type, &size, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); @@ -429,7 +429,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +439,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +462,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +486,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5f90f12f92..fb76ef0f4c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -807,15 +807,15 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c693d948e1..1353c2384c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct odb_read_stream *st, struct ha for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -520,8 +520,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL)) != NULL) buf = NULL; else { buf = odb_read_object(the_repository->objects, @@ -577,7 +577,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +591,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +601,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +609,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); diff --git a/object-file.c b/object-file.c index 8c67847fea..9ba40a848c 100644 --- a/object-file.c +++ b/object-file.c @@ -139,7 +139,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); if (!st) return -1; @@ -151,10 +151,10 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,7 +162,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } diff --git a/streaming.c b/streaming.c index 3140728a70..06993a751c 100644 --- a/streaming.c +++ b/streaming.c @@ -35,7 +35,7 @@ static int close_istream_filtered(struct odb_read_stream *_fs) { struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; free_stream_filter(fs->filter); - return close_istream(fs->upstream); + return odb_read_stream_close(fs->upstream); } static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, @@ -87,7 +87,7 @@ static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, /* refill the input from the upstream */ if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); if (fs->i_end < 0) return -1; if (fs->i_end) @@ -149,7 +149,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_ } static int open_istream_incore(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -163,7 +163,7 @@ static int open_istream_incore(struct odb_read_stream **out, oi.typep = &stream.base.type; oi.sizep = &stream.base.size; oi.contentp = (void **)&stream.buf; - ret = odb_read_object_info_extended(r->objects, oid, &oi, + ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; @@ -180,47 +180,47 @@ static int open_istream_incore(struct odb_read_stream **out, *****************************************************************************/ static int istream_source(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) return 0; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; - return open_istream_incore(out, r, oid); + return open_istream_incore(out, odb, oid); } /**************************************************************** * Users of streaming interface ****************************************************************/ -int close_istream(struct odb_read_stream *st) +int odb_read_stream_close(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct odb_read_stream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { struct odb_read_stream *st; - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(&st, r, real); + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); if (ret) return NULL; @@ -229,7 +229,7 @@ struct odb_read_stream *open_istream(struct repository *r, /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { - close_istream(st); + odb_read_stream_close(st); return NULL; } st = nst; @@ -252,7 +252,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, ssize_t kept = 0; int result = -1; - st = open_istream(odb->repo, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); @@ -263,7 +263,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, for (;;) { char buf[1024 * 16]; ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) goto close_and_exit; @@ -294,6 +294,6 @@ int odb_stream_blob_to_fd(struct object_database *odb, result = 0; close_and_exit: - close_istream(st); + odb_read_stream_close(st); return result; } diff --git a/streaming.h b/streaming.h index acfdef1598..7cb55213b7 100644 --- a/streaming.h +++ b/streaming.h @@ -24,11 +24,31 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ }; -struct odb_read_stream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct odb_read_stream *); -ssize_t read_istream(struct odb_read_stream *, void *, size_t); +/* + * Create a new object stream for the given object database. Populates the type + * and size pointers with the object's info. An optional filter can be used to + * transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); /* * Look up the object by its ID and write the full contents to the file -- GitLab From 1599b68d5e960a12f5ac624f81c70ece317db5a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:43 +0100 Subject: [PATCH 18/32] streaming: move into object database subsystem The "streaming" terminology is somewhat generic, so it may not be immediately obvious that "streaming.{c,h}" is specific to the object database. Rectify this by moving it into the "odb/" directory so that it can be immediately attributed to the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 +- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/cat-file.c | 2 +- builtin/fsck.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/pack-objects.c | 2 +- entry.c | 2 +- meson.build | 2 +- object-file.c | 2 +- streaming.c => odb/streaming.c | 2 +- streaming.h => odb/streaming.h | 0 packfile.c | 2 +- parallel-checkout.c | 2 +- 15 files changed, 14 insertions(+), 14 deletions(-) rename streaming.c => odb/streaming.c (99%) rename streaming.h => odb/streaming.h (100%) diff --git a/Makefile b/Makefile index 7e0f77e298..6d8dcc4622 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 4d87b28504..494b9f0667 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" diff --git a/archive-zip.c b/archive-zip.c index c44684aebc..a0bdc2fe3b 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 120d626d66..505ddaa12f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" diff --git a/builtin/fsck.c b/builtin/fsck.c index 1a348d43c2..c7d2eea287 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" diff --git a/builtin/index-pack.c b/builtin/index-pack.c index fb76ef0f4c..581023495f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" diff --git a/builtin/log.c b/builtin/log.c index e7b83a6e00..d4cf9c59c8 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1353c2384c..f109e26786 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" diff --git a/entry.c b/entry.c index 38dfe670f7..7817aee362 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" diff --git a/meson.build b/meson.build index 1f95a06edb..fc82929b37 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 9ba40a848c..9601fdb12d 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" diff --git a/streaming.c b/odb/streaming.c similarity index 99% rename from streaming.c rename to odb/streaming.c index 06993a751c..7ef58adaa2 100644 --- a/streaming.c +++ b/odb/streaming.c @@ -5,10 +5,10 @@ #include "git-compat-util.h" #include "convert.h" #include "environment.h" -#include "streaming.h" #include "repository.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "packfile.h" diff --git a/streaming.h b/odb/streaming.h similarity index 100% rename from streaming.h rename to odb/streaming.h diff --git a/packfile.c b/packfile.c index ad56ce0b90..7a16aaa90d 100644 --- a/packfile.c +++ b/packfile.c @@ -20,7 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" -#include "streaming.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" diff --git a/parallel-checkout.c b/parallel-checkout.c index 1cb6701b92..0bf4bd6d4a 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" -- GitLab From 7b940286527ec2175dffbb317f47e080bb37cf3e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:44 +0100 Subject: [PATCH 19/32] streaming: drop redundant type and size pointers In the preceding commits we have turned `struct odb_read_stream` into a publicly visible structure. Furthermore, this structure now contains the type and size of the object that we are about to stream. Consequently, the out-pointers that we used before to propagate the type and size of the streamed object are now somewhat redundant with the data contained in the structure itself. Drop these out-pointers and adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 4 +--- archive-zip.c | 5 ++--- builtin/index-pack.c | 7 ++----- builtin/pack-objects.c | 6 ++++-- object-file.c | 6 ++---- odb/streaming.c | 10 ++-------- odb/streaming.h | 7 ++----- 7 files changed, 15 insertions(+), 30 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 494b9f0667..0fc70d13a8 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -130,12 +130,10 @@ static void write_trailer(void) static int stream_blocked(struct repository *r, const struct object_id *oid) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; char buf[BLOCKSIZE]; ssize_t readlen; - st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { diff --git a/archive-zip.c b/archive-zip.c index a0bdc2fe3b..97ea8d60d6 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = odb_read_stream_open(args->repo->objects, oid, - &type, &size, NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 581023495f..b01cb77f4a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,11 +805,10 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f109e26786..0d1d6995bf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -521,9 +521,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL)) != NULL) + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); diff --git a/object-file.c b/object-file.c index 9601fdb12d..12177a7dd7 100644 --- a/object-file.c +++ b/object-file.c @@ -132,19 +132,17 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); diff --git a/odb/streaming.c b/odb/streaming.c index 7ef58adaa2..745cd486fb 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -214,8 +214,6 @@ ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter) { struct odb_read_stream *st; @@ -235,8 +233,6 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, st = nst; } - *size = st->size; - *type = st->type; return st; } @@ -247,18 +243,16 @@ int odb_stream_blob_to_fd(struct object_database *odb, int can_seek) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; ssize_t kept = 0; int result = -1; - st = odb_read_stream_open(odb, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, filter); if (!st) { if (filter) free_stream_filter(filter); return result; } - if (type != OBJ_BLOB) + if (st->type != OBJ_BLOB) goto close_and_exit; for (;;) { char buf[1024 * 16]; diff --git a/odb/streaming.h b/odb/streaming.h index 7cb55213b7..c7861f7e13 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -25,16 +25,13 @@ struct odb_read_stream { }; /* - * Create a new object stream for the given object database. Populates the type - * and size pointers with the object's info. An optional filter can be used to - * transform the object's content. + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. * * Returns the stream on success, a `NULL` pointer otherwise. */ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter); /* -- GitLab From fc98532f06d2a18ce219f165830a288f1de06c98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:19:58 +0100 Subject: [PATCH 20/32] builtin/gc: fix condition for whether to write commit graphs When performing auto-maintenance we check whether commit graphs need to be generated by counting the number of commits that are reachable by any reference, but not covered by a commit graph. This search is performed by iterating through all references and then doing a depth-first search until we have found enough commits that are not present in the commit graph. This logic has a memory leak though: Direct leak of 16 byte(s) in 1 object(s) allocated from: #0 0x55555562e433 in malloc (git+0xda433) #1 0x555555964322 in do_xmalloc ../wrapper.c:55:8 #2 0x5555559642e6 in xmalloc ../wrapper.c:76:9 #3 0x55555579bf29 in commit_list_append ../commit.c:1872:35 #4 0x55555569f160 in dfs_on_ref ../builtin/gc.c:1165:4 #5 0x5555558c33fd in do_for_each_ref_iterator ../refs/iterator.c:431:12 #6 0x5555558af520 in do_for_each_ref ../refs.c:1828:9 #7 0x5555558ac317 in refs_for_each_ref ../refs.c:1833:9 #8 0x55555569e207 in should_write_commit_graph ../builtin/gc.c:1188:11 #9 0x55555569c915 in maintenance_is_needed ../builtin/gc.c:3492:8 #10 0x55555569b76a in cmd_maintenance ../builtin/gc.c:3542:9 #11 0x55555575166a in run_builtin ../git.c:506:11 #12 0x5555557502f0 in handle_builtin ../git.c:779:9 #13 0x555555751127 in run_argv ../git.c:862:4 #14 0x55555575007b in cmd_main ../git.c:984:19 #15 0x5555557523aa in main ../common-main.c:9:11 #16 0x7ffff7a2a4d7 in __libc_start_call_main (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x2a4d7) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) #17 0x7ffff7a2a59a in __libc_start_main@GLIBC_2.2.5 (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x2a59a) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) #18 0x5555555f0934 in _start (git+0x9c934) The root cause of this memory leak is our use of `commit_list_append()`. This function expects as parameters the item to append and the _tail_ of the list to append. This tail will then be overwritten with the new tail of the list so that it can be used in subsequent calls. But we call it with `commit_list_append(parent->item, &stack)`, so we end up losing everything but the new item. This issue only surfaces when counting merge commits. Next to being a memory leak, it also shows that we're in fact miscounting as we only respect children of the last parent. All previous parents are discarded, so their children will be disregarded unless they are hit via another reference. While crafting a test case for the issue I was puzzled that I couldn't establish the proper border at which the auto-condition would be fulfilled. As it turns out, there's another bug: if an object is at the tip of any reference we don't mark it as seen. Consequently, if it is reachable via any other reference, we'd count that object twice. Fix both of these bugs so that we properly count objects without leaking any memory. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 8 +++++--- t/t7900-maintenance.sh | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 92c6e7b954..17ff68cbd9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1130,8 +1130,10 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) return 0; commit = lookup_commit(the_repository, maybe_peeled); - if (!commit) + if (!commit || commit->object.flags & SEEN) return 0; + commit->object.flags |= SEEN; + if (repo_parse_commit(the_repository, commit) || commit_graph_position(commit) != COMMIT_NOT_FROM_GRAPH) return 0; @@ -1141,7 +1143,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) if (data->num_not_in_graph >= data->limit) return 1; - commit_list_append(commit, &stack); + commit_list_insert(commit, &stack); while (!result && stack) { struct commit_list *parent; @@ -1162,7 +1164,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) break; } - commit_list_append(parent->item, &stack); + commit_list_insert(parent->item, &stack); } } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 6b36f52df7..a2b4403595 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -206,6 +206,31 @@ test_expect_success 'commit-graph auto condition' ' test_subcommand $COMMIT_GRAPH_WRITE err && test_grep "is not a valid task" err -- GitLab From 891588168607a1bafe4d0a1038864d655c35e646 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:19:59 +0100 Subject: [PATCH 21/32] odb: properly close sources before freeing them It is possible to hit a memory leak when reading data from a submodule via git-grep(1): Direct leak of 192 byte(s) in 1 object(s) allocated from: #0 0x55555562e726 in calloc (git+0xda726) #1 0x555555964734 in xcalloc ../wrapper.c:154:8 #2 0x555555835136 in load_multi_pack_index_one ../midx.c:135:2 #3 0x555555834fd6 in load_multi_pack_index ../midx.c:382:6 #4 0x5555558365b6 in prepare_multi_pack_index_one ../midx.c:716:17 #5 0x55555586c605 in packfile_store_prepare ../packfile.c:1103:3 #6 0x55555586c90c in packfile_store_reprepare ../packfile.c:1118:2 #7 0x5555558546b3 in odb_reprepare ../odb.c:1106:2 #8 0x5555558539e4 in do_oid_object_info_extended ../odb.c:715:4 #9 0x5555558533d1 in odb_read_object_info_extended ../odb.c:862:8 #10 0x5555558540bd in odb_read_object ../odb.c:920:6 #11 0x55555580a330 in grep_source_load_oid ../grep.c:1934:12 #12 0x55555580a13a in grep_source_load ../grep.c:1986:10 #13 0x555555809103 in grep_source_is_binary ../grep.c:2014:7 #14 0x555555807574 in grep_source_1 ../grep.c:1625:8 #15 0x555555807322 in grep_source ../grep.c:1837:10 #16 0x5555556a5c58 in run ../builtin/grep.c:208:10 #17 0x55555562bb42 in void* ThreadStartFunc(void*) lsan_interceptors.cpp.o #18 0x7ffff7a9a979 in start_thread (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x9a979) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) #19 0x7ffff7b22d2b in __GI___clone3 (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x122d2b) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) The root caues of this leak is the way we set up and release the submodule: 1. We use `repo_submodule_init()` to initialize a new repository. This repository is stored in `repos_to_free`. 2. We now read data from the submodule repository. 3. We then call `repo_clear()` on the submodule repositories. 4. `repo_clear()` calls `odb_free()`. 5. `odb_free()` calls `odb_free_sources()` followed by `odb_close()`. The issue here is the 5th step: we call `odb_free_sources()` _before_ we call `odb_close()`. But `odb_free_sources()` already frees all sources, so the logic that closes them in `odb_close()` now becomes a no-op. As a consequence, we never explicitly close sources at all. Fix the leak by closing the store before we free the sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/odb.c b/odb.c index dc8f292f3d..8e67afe185 100644 --- a/odb.c +++ b/odb.c @@ -1132,13 +1132,13 @@ void odb_free(struct object_database *o) oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); + odb_close(o); odb_free_sources(o); for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - odb_close(o); packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); -- GitLab From 098fd0d221fee2b9fa8782d6bd0d22b3d6dca8f6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 10:54:54 +0100 Subject: [PATCH 22/32] Start tracking packfiles per object database source Hi, the `struct packfile_store` tracks packfiles we have in the repository so that we can look up objects stored therein. Right now, the packfile store is tracked on the object database level -- each object database has exactly one packfile store. Consequently, we track packfiles that are part of different object database sources via the same packfile store. This patch series refactors this so that we instead have one packfile store per ODB source. This means that access to any object, regardless of whether it is stored in a packfile or in a loose object, is always done via its owning source. This is the last step required for pluggable object databases: all object access is routed through sources, and we can thus now abstract these sources and then plug in a different implementation. Of course, these abstractions are still very leaky, and we still reach into the implementation details in a bunch of files. But this is something that will be addressed over subsequent steps. This series is built on top of d8af7cadaa (The eighth batch, 2025-12-14) with the following two series merged into it: - ps/object-read-stream at 7b94028652 (streaming: drop redundant type and size pointers, 2025-11-23). - ps/odb-misc-fixes at 8915881686 (odb: properly close sources before freeing them, 2025-12-11). The latter topic isn't in "next" yet, but the second version of this topic only contains two small memory leak fixes. I don't expect it to change, and I guess it should land soonish anyway. Thanks! Patrick To: git@vger.kernel.org --- Changes in v2: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v1: https://lore.kernel.org/r/20251215-b4-pks-pack-store-via-source-v1-0-433aac465295@pks.im --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 2, "change-id": "20251201-b4-pks-pack-store-via-source-fd43dc0765a7", "prefixes": [], "history": { "v1": [ "20251215-b4-pks-pack-store-via-source-v1-0-433aac465295@pks.im" ] } } } -- GitLab From 01c5fba2c13b9c6b7468038e26afda7c35412d8c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 7 Nov 2025 10:53:52 +0100 Subject: [PATCH 23/32] packfile: create store via its owning source In subsequent patches we're about to move the packfile store from the object database layer into the object database source layer. Once done, we'll have one packfile store per source, where the source is owning the store. Prepare for this future and refactor `packfile_store_new()` to be initialized via an object database source instead of via the object database itself. This refactoring leads to a weird in-between state where the store is owned by the object database but created via the source. But this makes subsequent refactorings easier because we can now start to access the owning source of a given store. Signed-off-by: Patrick Steinhardt --- odb.c | 2 +- packfile.c | 20 ++++++++++---------- packfile.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index 45b6600800..94144a69f5 100644 --- a/odb.c +++ b/odb.c @@ -1056,7 +1056,6 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); @@ -1065,6 +1064,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->packfiles = packfile_store_new(o->sources); free(to_free); diff --git a/packfile.c b/packfile.c index c88bd92619..0a05a10daa 100644 --- a/packfile.c +++ b/packfile.c @@ -876,7 +876,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -1068,8 +1068,8 @@ void packfile_store_prepare(struct packfile_store *store) if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { + odb_prepare_alternates(store->source->odb); + for (source = store->source->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } @@ -1092,7 +1092,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { + for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -2121,7 +2121,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2142,7 +2142,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 1; /* @@ -2152,7 +2152,7 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi == &blank_oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, oid); return -1; @@ -2411,11 +2411,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2534,7 +2534,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(store->odb->repo) >= size) + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, diff --git a/packfile.h b/packfile.h index 59d162a3f4..33cc1c1654 100644 --- a/packfile.h +++ b/packfile.h @@ -77,7 +77,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -129,9 +129,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles -- GitLab From 2b7c0fb30793fa221c20446a8dcda395ffe081f5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 09:48:32 +0200 Subject: [PATCH 24/32] packfile: pass source to `prepare_pack()` When preparing a packfile we pass various pieces attached to the pack's object database source via the `struct prepare_pack_data`. Refactor this code to instead pass in the source directly. This reduces the number of variables we need to pass and allows for a subsequent refactoring where we start to prepare the pack via the source. Signed-off-by: Patrick Steinhardt --- packfile.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packfile.c b/packfile.c index 0a05a10daa..ab86afa01d 100644 --- a/packfile.c +++ b/packfile.c @@ -975,10 +975,8 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -988,10 +986,10 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(data->source->odb->packfiles, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1020,10 +1018,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); -- GitLab From d49080f286348aac51e2e57c5f0854d12e7d0f7b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:23:43 +0200 Subject: [PATCH 25/32] packfile: refactor kept-pack cache to work with packfile stores The kept pack cache is a cache of packfiles that are marked as kept either via an accompanying ".kept" file or via an in-memory flag. The cache can be retrieved via `kept_pack_cache()`, where one needs to pass in a repository. Ultimately though the kept-pack cache is a property of the packfile store, and this causes problems in a subsequent commit where we want to move down the packfile store to be a per-object-source entity. Prepare for this and refactor the kept-pack cache to work on top of a packfile store instead. While at it, rename both the function and flags specific to the kept-pack cache so that they can be properly attributed to the respective subsystems. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 12 ++++++------ packfile.c | 37 ++++++++++++++++++++----------------- packfile.h | 25 +++++++++++++++++-------- reachable.c | 2 +- revision.c | 8 ++++---- 5 files changed, 48 insertions(+), 36 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee21..e86b8f387a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,9 +1529,9 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; off_t ofs; uint32_t candidate_mtime; @@ -1624,9 +1624,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -3931,7 +3931,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4030,7 +4030,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) diff --git a/packfile.c b/packfile.c index ab86afa01d..191344eb1c 100644 --- a/packfile.c +++ b/packfile.c @@ -2164,25 +2164,26 @@ int packfile_store_read_object_info(struct packfile_store *store, return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2192,9 +2193,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2202,11 +2205,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, @@ -2214,9 +2217,9 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; if (fill_pack_entry(oid, e, p)) return 1; diff --git a/packfile.h b/packfile.h index 33cc1c1654..410f85f03d 100644 --- a/packfile.h +++ b/packfile.h @@ -90,9 +90,10 @@ struct packfile_store { * is an on-disk ".keep" file or because they are marked as "kept" in * memory. * - * Should not be accessed directly, but via `kept_pack_cache()`. The - * list of packs gets invalidated when the stored flags and the flags - * passed to `kept_pack_cache()` mismatch. + * Should not be accessed directly, but via + * `packfile_store_get_kept_pack_cache()`. The list of packs gets + * invalidated when the stored flags and the flags passed to + * `packfile_store_get_kept_pack_cache()` mismatch. */ struct { struct packed_git **packs; @@ -210,6 +211,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -385,9 +399,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. @@ -398,8 +409,6 @@ int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/reachable.c b/reachable.c index b753c39553..4b532039d5 100644 --- a/reachable.c +++ b/reachable.c @@ -242,7 +242,7 @@ static int want_recent_object(struct recent_data *data, const struct object_id *oid) { if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) return 0; return 1; } diff --git a/revision.c b/revision.c index 5f0850ae5c..64d223a7c6 100644 --- a/revision.c +++ b/revision.c @@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; -- GitLab From 64ef848088831a25b2c3f4f1a123a9f189cb64d9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 17:19:31 +0200 Subject: [PATCH 26/32] packfile: refactor misleading code when unusing pack windows The function `unuse_one_window()` is responsible for unmapping one of the packfile windows, which is done when we have exceeded the allowed number of window. The function receives a `struct packed_git` as input, which serves as an additional packfile that should be considered to be closed. If not given, we seemingly skip that and instead go through all of the repository's packfiles. The conditional that checks whether we have a packfile though does not make much sense anymore, as we dereference the packfile regardless of whether or not it is a `NULL` pointer to derive the repository's packfile store. The function was originally introduced via f0e17e86e1 (pack: move release_pack_memory(), 2017-08-18), and here we indeed had a caller that passed a `NULL` pointer. That caller was later removed via 9827d4c185 (packfile: drop release_pack_memory(), 2019-08-12), so starting with that commit we always pass a `struct packed_git`. In 9c5ce06d74 (packfile: use `repository` from `packed_git` directly, 2024-12-03) we then inadvertently started to rely on the fact that the pointer is never `NULL` because we use it now to identify the repository. Arguably, it didn't really make sense in the first place that the caller provides a packfile, as the selected window would have been overridden anyway by the subsequent loop over all packfiles if there was an older window. So the overall logic is quite misleading overall. The only case where it _could_ make a difference is when there were two packfiles with the same `last_used` value, but that case doesn't ever happen because the `pack_used_ctr` is strictly increasing. Refactor the code so that we instead pass in the object database to help make the code less misleading. Signed-off-by: Patrick Steinhardt --- packfile.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packfile.c b/packfile.c index 191344eb1c..3700612465 100644 --- a/packfile.c +++ b/packfile.c @@ -355,16 +355,15 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + for (e = odb->packfiles->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -740,8 +739,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, -- GitLab From 6fa20979befa175c6b47d1339e1dedb098ec6591 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 12:56:45 +0100 Subject: [PATCH 27/32] packfile: move packfile store into object source The packfile store is a member of `struct object_database`, which means that we have a single store per database. This doesn't really make much sense though: each source connected to the database has its own set of packfiles, so there is a conceptual mismatch here. This hasn't really caused much of a problem in the past, but with the advent of pluggable object databases this is becoming more of a problem because some of the sources may not even use packfiles in the first place. Move the packfile store down by one level from the object database into the object database source. This ensures that each source now has its own packfile store, and we can eventually start to abstract it away entirely so that the caller doesn't even know what kind of store it uses. Note that we only need to adjust a relatively small number of callers, way less than one might expect. This is because most callers are using `repo_for_each_pack()`, which handles enumeration of all packfiles that exist in the repository. So for now, none of these callers need to be adapted. The remaining callers that iterate through the packfiles directly and that need adjustment are those that are a bit more tangled with packfiles. These will be adjusted over time. Note that this patch only moves the packfile store, and there is still a bunch of functions that seemingly operate on a packfile store but that end up iterating over all sources. These will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 37 +++++++----- builtin/grep.c | 6 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 96 ++++++++++++++++--------------- http.c | 2 +- midx.c | 5 +- odb.c | 36 ++++++------ odb.h | 6 +- odb/streaming.c | 9 ++- packfile.c | 127 ++++++++++++++++++++++++++--------------- packfile.h | 62 +++++++++++++++++--- 11 files changed, 243 insertions(+), 145 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 7849005ccb..b8a7757cfd 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,11 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1100,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1183,29 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25..4855b871dd 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,12 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); + /* + * Note: `packfile_store_prepare()` prepares stores from all + * sources. This will be fixed in a subsequent commit. + */ if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->sources->packfiles); start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a7e901e49c..b67fb0256c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, + packfile_store_load_pack(the_repository->objects->sources->packfiles, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e86b8f387a..7fd90a9996 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,49 +1529,53 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1749,13 +1753,15 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&source->packfiles->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { diff --git a/http.c b/http.c index 41f850db16..7815f144de 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/midx.c b/midx.c index 24e1e72175..dbb2aa68ba 100644 --- a/midx.c +++ b/midx.c @@ -95,7 +95,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); + packfile_store_prepare(source->packfiles); return source->midx; } @@ -447,7 +447,6 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -460,7 +459,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, + p = packfile_store_load_pack(m->source->packfiles, pack_name.buf, m->source->local); strbuf_release(&pack_name); diff --git a/odb.c b/odb.c index 94144a69f5..f159fbdd99 100644 --- a/odb.c +++ b/odb.c @@ -155,6 +155,7 @@ static struct odb_source *odb_source_new(struct object_database *odb, source->local = local; source->path = xstrdup(path); source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); return source; } @@ -373,6 +374,7 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); free(source); } @@ -704,19 +706,19 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; + } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + return 0; } /* @@ -975,13 +977,14 @@ int odb_freshen_object(struct object_database *odb, { struct odb_source *source; - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { + if (packfile_store_freshen_object(source->packfiles, oid)) + return 1; + if (odb_source_loose_freshen_object(source, oid)) return 1; + } return 0; } @@ -1064,7 +1067,6 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->packfiles = packfile_store_new(o->sources); free(to_free); @@ -1077,9 +1079,8 @@ void odb_close(struct object_database *o) { struct odb_source *source; - packfile_store_close(o->packfiles); - for (source = o->sources; source; source = source->next) { + packfile_store_close(source->packfiles); if (source->midx) close_midx(source->midx); source->midx = NULL; @@ -1118,7 +1119,6 @@ void odb_free(struct object_database *o) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1141,13 +1141,13 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) + for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); + packfile_store_reprepare(source->packfiles); + } o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a..c97b41c58c 100644 --- a/odb.h +++ b/odb.h @@ -51,6 +51,9 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; + /* * private data * @@ -128,9 +131,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want diff --git a/odb/streaming.c b/odb/streaming.c index 745cd486fb..4a4474f891 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -185,13 +185,12 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) - return 0; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) return 0; + } return open_istream_incore(out, odb, oid); } diff --git a/packfile.c b/packfile.c index 3700612465..a0225cb2cb 100644 --- a/packfile.c +++ b/packfile.c @@ -357,12 +357,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - for (e = odb->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) + for (e = source->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -528,15 +530,18 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -987,7 +992,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->odb->packfiles, + packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); free(trimmed_path); } @@ -1245,11 +1250,15 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + for (e = source->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -2089,26 +2098,32 @@ static int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct packfile_list_entry *l; + struct odb_source *source; - packfile_store_prepare(r->objects->packfiles); + /* + * Note: `packfile_store_prepare()` prepares stores from all sources. + * This will be fixed in a subsequent commit. + */ + packfile_store_prepare(r->objects->sources->packfiles); - for (struct odb_source *source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs.head) - return 0; + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *l; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = source->packfiles->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); - return 1; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!source->packfiles->skip_mru_updates) + packfile_list_prepend(&source->packfiles->packs, p); + return 1; + } } } + return 0; } @@ -2216,12 +2231,18 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } } return 0; @@ -2287,32 +2308,46 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) { - struct packed_git *p; + struct odb_source *source; int r = 0; int pack_errors = 0; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + + source->packfiles->skip_mru_updates = true; + + for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + r = for_each_object_in_pack(p, cb, data, flags); + if (r) + break; } - r = for_each_object_in_pack(p, cb, data, flags); + + source->packfiles->skip_mru_updates = false; + if (r) break; } - repo->objects->packfiles->skip_mru_updates = false; return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index 410f85f03d..07f7cdbad1 100644 --- a/packfile.h +++ b/packfile.h @@ -5,6 +5,7 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ @@ -170,14 +171,65 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) int packfile_store_read_object_stream(struct odb_read_stream **out, struct packfile_store *store, @@ -194,12 +246,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags); -/* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. - */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a -- GitLab From c540fa7460189168b6ac14f4b1369ec5f6557d8a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:56:26 +0200 Subject: [PATCH 28/32] packfile: only prepare owning store in `packfile_store_get_packs()` When calling `packfile_store_get_packs()` we prepare not only the provided packfile store, but also all those of all other sources part of teh same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Adapt the code so that we only prepare the MIDX of the provided store. All callers only work in the context of a single store or call the function in a loop over all sources, so this change shouldn't have any practical effects. Signed-off-by: Patrick Steinhardt --- packfile.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index a0225cb2cb..c46d53b75d 100644 --- a/packfile.c +++ b/packfile.c @@ -1092,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->source->midx) { + struct multi_pack_index *m = store->source->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } -- GitLab From dae25f3b7c3b9f47c764d991c18aea7ac43d4e8b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:59:02 +0200 Subject: [PATCH 29/32] packfile: only prepare owning store in `packfile_store_prepare()` When calling `packfile_store_prepare()` we prepare not only the provided packfile store, but also all those of all other sources part of the same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Refactor the code so that we only prepare the single packfile store passed by the caller. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt --- builtin/grep.c | 14 ++++++++------ packfile.c | 19 +++++-------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 4855b871dd..5b8b87b1ac 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,12 +1213,14 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - /* - * Note: `packfile_store_prepare()` prepares stores from all - * sources. This will be fixed in a subsequent commit. - */ - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->sources->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) + packfile_store_prepare(source->packfiles); + } start_threads(&opt); } else { diff --git a/packfile.c b/packfile.c index c46d53b75d..23d8f7cb93 100644 --- a/packfile.c +++ b/packfile.c @@ -1063,16 +1063,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->source->odb); - for (source = store->source->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -2098,15 +2093,11 @@ static int find_pack_entry(struct repository *r, { struct odb_source *source; - /* - * Note: `packfile_store_prepare()` prepares stores from all sources. - * This will be fixed in a subsequent commit. - */ - packfile_store_prepare(r->objects->sources->packfiles); - - for (source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) { + packfile_store_prepare(r->objects->sources->packfiles); if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; + } for (source = r->objects->sources; source; source = source->next) { struct packfile_list_entry *l; -- GitLab From 9b074ee864887dcf0b2de9743fb8ebef6e249189 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:26:46 +0200 Subject: [PATCH 30/32] packfile: inline `find_kept_pack_entry()` The `find_kept_pack_entry()` function is only used in `has_oject_kept_pack()`, which is only a trivial wrapper itself. Inline the latter into the former. Furthermore, reorder the code so that we can drop the declaration of the function in "packfile.h". This allow us to make the function file-local. Signed-off-by: Patrick Steinhardt --- packfile.c | 28 ++++++++++------------------ packfile.h | 6 ------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/packfile.c b/packfile.c index 23d8f7cb93..3bce1b150d 100644 --- a/packfile.c +++ b/packfile.c @@ -2215,12 +2215,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) +int has_object_pack(struct repository *r, const struct object_id *oid) +{ + struct pack_entry e; + return find_pack_entry(r, oid, &e); +} + +int has_object_kept_pack(struct repository *r, const struct object_id *oid, + unsigned flags) { struct odb_source *source; + struct pack_entry e; for (source = r->objects->sources; source; source = source->next) { struct packed_git **cache; @@ -2229,7 +2234,7 @@ int find_kept_pack_entry(struct repository *r, for (; *cache; cache++) { struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) + if (fill_pack_entry(oid, &e, p)) return 1; } } @@ -2237,19 +2242,6 @@ int find_kept_pack_entry(struct repository *r, return 0; } -int has_object_pack(struct repository *r, const struct object_id *oid) -{ - struct pack_entry e; - return find_pack_entry(r, oid, &e); -} - -int has_object_kept_pack(struct repository *r, const struct object_id *oid, - unsigned flags) -{ - struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); -} - int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) diff --git a/packfile.h b/packfile.h index 07f7cdbad1..08a666d538 100644 --- a/packfile.h +++ b/packfile.h @@ -445,12 +445,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -- GitLab From 54ab47d344bef7f95fc2bbf29b925c0b53da9daa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:29:38 +0200 Subject: [PATCH 31/32] packfile: refactor `find_pack_entry()` to work on the packfile store The function `find_pack_entry()` doesn't work on a specific packfile store, but instead works on the whole repository. This causes a bit of a conceptual mismatch in its callers: - `packfile_store_freshen_object()` supposedly acts on a store, and its callers know to iterate through all sources already. - `packfile_store_read_object_info()` behaves likewise. The only exception that doesn't know to handle iteration through sources is `has_object_pack()`, but that function is trivial to adapt. Refactor the code so that `find_pack_entry()` works on the packfile store level instead. Signed-off-by: Patrick Steinhardt --- packfile.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/packfile.c b/packfile.c index 3bce1b150d..0e4c63e11d 100644 --- a/packfile.c +++ b/packfile.c @@ -2087,29 +2087,23 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_pack_entry(struct repository *r, +static int find_pack_entry(struct packfile_store *store, const struct object_id *oid, struct pack_entry *e) { - struct odb_source *source; - - for (source = r->objects->sources; source; source = source->next) { - packfile_store_prepare(r->objects->sources->packfiles); - if (source->midx && fill_midx_entry(source->midx, oid, e)) - return 1; - } + struct packfile_list_entry *l; - for (source = r->objects->sources; source; source = source->next) { - struct packfile_list_entry *l; + packfile_store_prepare(store); + if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + return 1; - for (l = source->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = store->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!source->packfiles->skip_mru_updates) - packfile_list_prepend(&source->packfiles->packs, p); - return 1; - } + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); + return 1; } } @@ -2120,7 +2114,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2141,7 +2135,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 1; /* @@ -2217,8 +2211,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st int has_object_pack(struct repository *r, const struct object_id *oid) { + struct odb_source *source; struct pack_entry e; - return find_pack_entry(r, oid, &e); + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + int ret = find_pack_entry(source->packfiles, oid, &e); + if (ret) + return ret; + } + + return 0; } int has_object_kept_pack(struct repository *r, const struct object_id *oid, -- GitLab From f9887d0b53af3ef15502c22423b7203df958a299 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 16:44:30 +0200 Subject: [PATCH 32/32] packfile: move MIDX into packfile store The multi-pack index still is tracked as a member of the object database source, but ultimately the MIDX is always tied to one specific packfile store. Move the structure into `struct packfile_store` accordingly. This ensures that the packfile store now keeps track of all data related to packfiles. Signed-off-by: Patrick Steinhardt --- midx.c | 14 +++++++------- odb.c | 8 +------- odb.h | 7 ------- packfile.c | 12 ++++++++---- packfile.h | 3 +++ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/midx.c b/midx.c index dbb2aa68ba..fa7a7e5d13 100644 --- a/midx.c +++ b/midx.c @@ -96,7 +96,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { packfile_store_prepare(source->packfiles); - return source->midx; + return source->packfiles->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -709,12 +709,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (source->packfiles->midx) return 1; - source->midx = load_multi_pack_index(source); + source->packfiles->midx = load_multi_pack_index(source); - return !!source->midx; + return !!source->packfiles->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -803,9 +803,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + if (source->packfiles->midx) + close_midx(source->packfiles->midx); + source->packfiles->midx = NULL; } } diff --git a/odb.c b/odb.c index f159fbdd99..902251f9ed 100644 --- a/odb.c +++ b/odb.c @@ -1078,14 +1078,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - for (source = o->sources; source; source = source->next) { + for (source = o->sources; source; source = source->next) packfile_store_close(source->packfiles); - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - close_commit_graph(o); } diff --git a/odb.h b/odb.h index c97b41c58c..300c3c0c46 100644 --- a/odb.h +++ b/odb.h @@ -54,13 +54,6 @@ struct odb_source { /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; - /* * Figure out whether this is the local source of the owning * repository, which would typically be its ".git/objects" directory. diff --git a/packfile.c b/packfile.c index 0e4c63e11d..097dd8d85d 100644 --- a/packfile.c +++ b/packfile.c @@ -990,7 +990,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { + !(data->source->packfiles->midx && + midx_contains_pack(data->source->packfiles->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); @@ -1087,8 +1088,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - if (store->source->midx) { - struct multi_pack_index *m = store->source->midx; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -2094,7 +2095,7 @@ static int find_pack_entry(struct packfile_store *store, struct packfile_list_entry *l; packfile_store_prepare(store); - if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + if (store->midx && fill_midx_entry(store->midx, oid, e)) return 1; for (l = store->packs.head; l; l = l->next) { @@ -2454,6 +2455,9 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; } struct odb_packed_read_stream { diff --git a/packfile.h b/packfile.h index 08a666d538..92baf8ee88 100644 --- a/packfile.h +++ b/packfile.h @@ -101,6 +101,9 @@ struct packfile_store { unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. -- GitLab