From 6bdda3a3b00fff9a1d64d1bb4732f0c446d7012c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:26 +0100 Subject: [PATCH 001/110] streaming: rename `git_istream` into `odb_read_stream` In the following patches we are about to make the `git_istream` more generic so that it becomes fully controlled by the specific object source that wants to create it. As part of these refactorings we'll fully move the structure into the object database subsystem. Prepare for this change by renaming the structure from `git_istream` to `odb_read_stream`. This mirrors the `odb_write_stream` structure that we already have. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 4 +-- object-file.c | 2 +- streaming.c | 62 +++++++++++++++++++++--------------------- streaming.h | 12 ++++---- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41..dc1eda09e0 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -129,7 +129,7 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; char buf[BLOCKSIZE]; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43d..40a9c93ff9 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4..5f90f12f92 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 69e80b1443..c693d948e1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -513,7 +513,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { diff --git a/object-file.c b/object-file.c index 811c569ed3..b62b21a452 100644 --- a/object-file.c +++ b/object-file.c @@ -134,7 +134,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) struct object_id real_oid; unsigned long size; enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; diff --git a/streaming.c b/streaming.c index 00ad649ae3..1fb4b7c1c0 100644 --- a/streaming.c +++ b/streaming.c @@ -14,17 +14,17 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct git_istream *, +typedef int (*open_istream_fn)(struct odb_read_stream *, struct repository *, const struct object_id *, enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); +typedef int (*close_istream_fn)(struct odb_read_stream *); +typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) struct filtered_istream { - struct git_istream *upstream; + struct odb_read_stream *upstream; struct stream_filter *filter; char ibuf[FILTER_BUFFER]; char obuf[FILTER_BUFFER]; @@ -33,7 +33,7 @@ struct filtered_istream { int input_finished; }; -struct git_istream { +struct odb_read_stream { open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -71,7 +71,7 @@ struct git_istream { * *****************************************************************/ -static void close_deflated_stream(struct git_istream *st) +static void close_deflated_stream(struct odb_read_stream *st) { if (st->z_state == z_used) git_inflate_end(&st->z); @@ -84,13 +84,13 @@ static void close_deflated_stream(struct git_istream *st) * *****************************************************************/ -static int close_istream_filtered(struct git_istream *st) +static int close_istream_filtered(struct odb_read_stream *st) { free_stream_filter(st->u.filtered.filter); return close_istream(st->u.filtered.upstream); } -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, size_t sz) { struct filtered_istream *fs = &(st->u.filtered); @@ -150,10 +150,10 @@ static ssize_t read_istream_filtered(struct git_istream *st, char *buf, return filled; } -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) { - struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); struct filtered_istream *fs = &(ifs->u.filtered); ifs->close = close_istream_filtered; @@ -173,7 +173,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -218,14 +218,14 @@ static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) return total_read; } -static int close_istream_loose(struct git_istream *st) +static int close_istream_loose(struct odb_read_stream *st) { close_deflated_stream(st); munmap(st->u.loose.mapped, st->u.loose.mapsize); return 0; } -static int open_istream_loose(struct git_istream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { @@ -277,7 +277,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -336,13 +336,13 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, return total_read; } -static int close_istream_pack_non_delta(struct git_istream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *st) { close_deflated_stream(st); return 0; } -static int open_istream_pack_non_delta(struct git_istream *st, +static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, const struct object_id *oid UNUSED, enum object_type *type UNUSED) @@ -380,13 +380,13 @@ static int open_istream_pack_non_delta(struct git_istream *st, * *****************************************************************/ -static int close_istream_incore(struct git_istream *st) +static int close_istream_incore(struct odb_read_stream *st) { free(st->u.incore.buf); return 0; } -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) { size_t read_size = sz; size_t remainder = st->size - st->u.incore.read_ptr; @@ -400,7 +400,7 @@ static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) return read_size; } -static int open_istream_incore(struct git_istream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; @@ -420,7 +420,7 @@ static int open_istream_incore(struct git_istream *st, struct repository *r, * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct git_istream *st, +static int istream_source(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) @@ -458,25 +458,25 @@ static int istream_source(struct git_istream *st, * Users of streaming interface ****************************************************************/ -int close_istream(struct git_istream *st) +int close_istream(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) +ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *open_istream(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { - struct git_istream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); int ret = istream_source(st, r, real, type); @@ -493,7 +493,7 @@ struct git_istream *open_istream(struct repository *r, } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); + struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { close_istream(st); return NULL; @@ -508,7 +508,7 @@ struct git_istream *open_istream(struct repository *r, int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, int can_seek) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; ssize_t kept = 0; diff --git a/streaming.h b/streaming.h index bd27f59e57..f5ff5d7ac9 100644 --- a/streaming.h +++ b/streaming.h @@ -7,14 +7,14 @@ #include "object.h" /* opaque */ -struct git_istream; +struct odb_read_stream; struct stream_filter; -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); +struct odb_read_stream *open_istream(struct repository *, const struct object_id *, + enum object_type *, unsigned long *, + struct stream_filter *); +int close_istream(struct odb_read_stream *); +ssize_t read_istream(struct odb_read_stream *, void *, size_t); int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); -- GitLab From 70c8b5f5453b9f128a72fad4398acfb9e7d869c4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:27 +0100 Subject: [PATCH 002/110] streaming: drop the `open()` callback function When creating a read stream we first populate the structure with the open callback function and then subsequently call the function. This layout is somewhat weird though: - The structure needs to be allocated and partially populated with the open function before we can properly initialize it. - We only ever call the `open()` callback function right after having populated the `struct odb_read_stream::open` member, and it's never called thereafter again. So it is somewhat pointless to store the callback in the first place. Especially the first point creates a problem for us. In subsequent commits we'll want to fully move construction of the read source into the respective object sources. E.g., the loose object source will be the one that is responsible for creating the structure. But this creates a problem: if we first need to create the structure so that we can call the source-specific callback we cannot fully handle creation of the structure in the source itself. We could of course work around that and have the loose object source create the structure and populate its `open()` callback, only. But this doesn't really buy us anything due to the second bullet point above. Instead, drop the callback entirely and refactor `istream_source()` so that we open the streams immediately. This unblocks a subsequent step, where we'll also start to allocate the structure in the source-specific logic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/streaming.c b/streaming.c index 1fb4b7c1c0..1bb3f393b8 100644 --- a/streaming.c +++ b/streaming.c @@ -14,10 +14,6 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct odb_read_stream *, - struct repository *, - const struct object_id *, - enum object_type *); typedef int (*close_istream_fn)(struct odb_read_stream *); typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); @@ -34,7 +30,6 @@ struct filtered_istream { }; struct odb_read_stream { - open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -437,21 +432,25 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - st->open = open_istream_loose; + if (open_istream_loose(st, r, oid, type) < 0) + break; return 0; case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; + if (oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + break; + + st->u.in_pack.pack = oi.u.packed.pack; + st->u.in_pack.pos = oi.u.packed.offset; + if (open_istream_pack_non_delta(st, r, oid, type) < 0) + break; + return 0; + default: + break; } + + return open_istream_incore(st, r, oid, type); } /**************************************************************** @@ -485,12 +484,6 @@ struct odb_read_stream *open_istream(struct repository *r, return NULL; } - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); -- GitLab From 3f64deabdf0a2a9664acec61698affc449e07496 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:28 +0100 Subject: [PATCH 003/110] streaming: propagate final object type via the stream When opening the read stream for a specific object the caller is also expected to pass in a pointer to the object type. This type is passed down via multiple levels and will eventually be populated with the type of the looked-up object. The way we propagate down the pointer though is somewhat non-obvious. While `istream_source()` still expects the pointer and looks it up via `odb_read_object_info_extended()`, we also pass it down even further into the format-specific callbacks that perform another lookup. This is quite confusing overall. Refactor the code so that the responsibility to populate the object type rests solely with the format-specific callbacks. This will allow us to drop the call to `odb_read_object_info_extended()` in `istream_source()` entirely in a subsequent patch. Furthermore, instead of propagating the type via an in-pointer, we now propagate the type via a new field in the object stream. It already has a `size` field, so it's only natural to have a second field that contains the object type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/streaming.c b/streaming.c index 1bb3f393b8..665624ddc0 100644 --- a/streaming.c +++ b/streaming.c @@ -33,6 +33,7 @@ struct odb_read_stream { close_istream_fn close; read_istream_fn read; + enum object_type type; unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; @@ -159,6 +160,7 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, fs->o_end = fs->o_ptr = 0; fs->input_finished = 0; ifs->size = -1; /* unknown */ + ifs->type = st->type; return ifs; } @@ -221,14 +223,13 @@ static int close_istream_loose(struct odb_read_stream *st) } static int open_istream_loose(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_source *source; oi.sizep = &st->size; - oi.typep = type; + oi.typep = &st->type; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { @@ -249,7 +250,7 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; @@ -339,8 +340,7 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) + const struct object_id *oid UNUSED) { struct pack_window *window; enum object_type in_pack_type; @@ -361,6 +361,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } + st->type = in_pack_type; st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; @@ -396,7 +397,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t } static int open_istream_incore(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -404,7 +405,7 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, st->close = close_istream_incore; st->read = read_istream_incore; - oi.typep = type; + oi.typep = &st->type; oi.sizep = &st->size; oi.contentp = (void **)&st->u.incore.buf; return odb_read_object_info_extended(r->objects, oid, &oi, @@ -417,14 +418,12 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, static int istream_source(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { unsigned long size; int status; struct object_info oi = OBJECT_INFO_INIT; - oi.typep = type; oi.sizep = &size; status = odb_read_object_info_extended(r->objects, oid, &oi, 0); if (status < 0) @@ -432,7 +431,7 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid, type) < 0) + if (open_istream_loose(st, r, oid) < 0) break; return 0; case OI_PACKED: @@ -442,7 +441,7 @@ static int istream_source(struct odb_read_stream *st, st->u.in_pack.pack = oi.u.packed.pack; st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid, type) < 0) + if (open_istream_pack_non_delta(st, r, oid) < 0) break; return 0; @@ -450,7 +449,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid, type); + return open_istream_incore(st, r, oid); } /**************************************************************** @@ -477,7 +476,7 @@ struct odb_read_stream *open_istream(struct repository *r, { struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); + int ret = istream_source(st, r, real); if (ret) { free(st); @@ -495,6 +494,7 @@ struct odb_read_stream *open_istream(struct repository *r, } *size = st->size; + *type = st->type; return st; } -- GitLab From 3c7722dd4d376e0fce4c48f723fe8b69af785998 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:29 +0100 Subject: [PATCH 004/110] streaming: explicitly pass packfile info when streaming a packed object When streaming a packed object we first populate the stream with information about the pack that contains the object before calling `open_istream_pack_non_delta()`. This is done because we have already looked up both the pack and the object's offset, so it would be a waste of time to look up this information again. But the way this is done makes for a somewhat awkward calling interface, as the caller now needs to be aware of how exactly the function itself behaves. Refactor the code so that we instead explicitly pass the packfile info into `open_istream_pack_non_delta()`. This makes the calling convention explicit, but more importantly this allows us to refactor the function so that it becomes its responsibility to allocate the stream itself in a subsequent patch. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/streaming.c b/streaming.c index 665624ddc0..bf277daadd 100644 --- a/streaming.c +++ b/streaming.c @@ -340,16 +340,18 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED) + const struct object_id *oid UNUSED, + struct packed_git *pack, + off_t offset) { struct pack_window *window; enum object_type in_pack_type; window = NULL; - in_pack_type = unpack_object_header(st->u.in_pack.pack, + in_pack_type = unpack_object_header(pack, &window, - &st->u.in_pack.pos, + &offset, &st->size); unuse_pack(&window); switch (in_pack_type) { @@ -365,6 +367,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; + st->u.in_pack.pack = pack; + st->u.in_pack.pos = offset; return 0; } @@ -436,14 +440,10 @@ static int istream_source(struct odb_read_stream *st, return 0; case OI_PACKED: if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(the_repository) >= size || + open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + oi.u.packed.offset) < 0) break; - - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid) < 0) - break; - return 0; default: break; -- GitLab From 595296e124f5e8a67c4669fcaeb1b28e71c2d751 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:30 +0100 Subject: [PATCH 005/110] streaming: allocate stream inside the backend-specific logic When creating a new stream we first allocate it and then call into backend-specific logic to populate the stream. This design requires that the stream itself contains a `union` with backend-specific members that then ultimately get populated by the backend-specific logic. This works, but it's awkward in the context of pluggable object databases. Each backend will need its own member in that union, and as the structure itself is completely opaque (it's only defined in "streaming.c") it also has the consequence that we must have the logic that is specific to backends in "streaming.c". Ideally though, the infrastructure would be reversed: we have a generic `struct odb_read_stream` and some helper functions in "streaming.c", whereas the backend-specific logic sits in the backend's subsystem itself. This can be realized by using a design that is similar to how we handle reference databases: instead of having a union of members, we instead have backend-specific structures with a `struct odb_read_stream base` as its first member. The backends would thus hand out the pointer to the base, but internally they know to cast back to the backend-specific type. This means though that we need to allocate different structures depending on the backend. To prepare for this, move allocation of the structure into the backend-specific functions that open a new stream. Subsequent commits will then create those new backend-specific structs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 103 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/streaming.c b/streaming.c index bf277daadd..a2c2d88738 100644 --- a/streaming.c +++ b/streaming.c @@ -222,27 +222,34 @@ static int close_istream_loose(struct odb_read_stream *st) return 0; } -static int open_istream_loose(struct odb_read_stream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_read_stream *st; struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = &st->type; + unsigned long mapsize; + void *mapped; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (mapped) break; } - if (!st->u.loose.mapped) + if (!mapped) return -1; - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { case ULHR_OK: break; @@ -250,19 +257,28 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } + + oi.sizep = &st->size; + oi.typep = &st->type; + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; + st->u.loose.mapped = mapped; + st->u.loose.mapsize = mapsize; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; st->u.loose.hdr_avail = st->z.total_out; st->z_state = z_used; st->close = close_istream_loose; st->read = read_istream_loose; + *out = st; + return 0; error: git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); + free(st); return -1; } @@ -338,12 +354,16 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) return 0; } -static int open_istream_pack_non_delta(struct odb_read_stream *st, +static int open_istream_pack_non_delta(struct odb_read_stream **out, struct repository *r UNUSED, const struct object_id *oid UNUSED, struct packed_git *pack, off_t offset) { + struct odb_read_stream stream = { + .close = close_istream_pack_non_delta, + .read = read_istream_pack_non_delta, + }; struct pack_window *window; enum object_type in_pack_type; @@ -352,7 +372,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, in_pack_type = unpack_object_header(pack, &window, &offset, - &st->size); + &stream.size); unuse_pack(&window); switch (in_pack_type) { default: @@ -363,12 +383,13 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } - st->type = in_pack_type; - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - st->u.in_pack.pack = pack; - st->u.in_pack.pos = offset; + stream.type = in_pack_type; + stream.z_state = z_unused; + stream.u.in_pack.pack = pack; + stream.u.in_pack.pos = offset; + + CALLOC_ARRAY(*out, 1); + **out = stream; return 0; } @@ -400,27 +421,35 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t return read_size; } -static int open_istream_incore(struct odb_read_stream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = &st->type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); + struct odb_read_stream stream = { + .close = close_istream_incore, + .read = read_istream_incore, + }; + int ret; + + oi.typep = &stream.type; + oi.sizep = &stream.size; + oi.contentp = (void **)&stream.u.incore.buf; + ret = odb_read_object_info_extended(r->objects, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(*out, 1); + **out = stream; + return 0; } /***************************************************************************** * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct odb_read_stream *st, +static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { @@ -435,13 +464,13 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid) < 0) + if (open_istream_loose(out, r, oid) < 0) break; return 0; case OI_PACKED: if (oi.u.packed.is_delta || repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, oi.u.packed.offset) < 0) break; return 0; @@ -449,7 +478,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid); + return open_istream_incore(out, r, oid); } /**************************************************************** @@ -474,14 +503,12 @@ struct odb_read_stream *open_istream(struct repository *r, unsigned long *size, struct stream_filter *filter) { - struct odb_read_stream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st; const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real); + int ret = istream_source(&st, r, real); - if (ret) { - free(st); + if (ret) return NULL; - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ -- GitLab From e030d0aeb5ebf79cdc4910e79d59e33998de78cd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:31 +0100 Subject: [PATCH 006/110] streaming: create structure for in-core object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for in-core object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/streaming.c b/streaming.c index a2c2d88738..35307d7229 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - struct { void *mapped; unsigned long mapsize; @@ -401,22 +396,30 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, * *****************************************************************/ -static int close_istream_incore(struct odb_read_stream *st) +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) { - free(st->u.incore.buf); + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); return 0; } -static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; + size_t remainder = st->base.size - st->read_ptr; if (remainder <= read_size) read_size = remainder; if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; } return read_size; } @@ -426,22 +429,25 @@ static int open_istream_incore(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream stream = { - .close = close_istream_incore, - .read = read_istream_incore, + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, }; + struct odb_incore_read_stream *st; int ret; - oi.typep = &stream.type; - oi.sizep = &stream.size; - oi.contentp = (void **)&stream.u.incore.buf; + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(r->objects, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + return 0; } -- GitLab From b7774c0f0de43379c40984b4ede265a512c1a4f0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:32 +0100 Subject: [PATCH 007/110] streaming: create structure for loose object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for loose object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 85 +++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 35307d7229..ac7b3026f5 100644 --- a/streaming.c +++ b/streaming.c @@ -39,14 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - struct { struct packed_git *pack; off_t pos; @@ -165,11 +157,21 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) +struct odb_loose_read_stream { + struct odb_read_stream base; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_done: return 0; case z_error: @@ -178,42 +180,43 @@ static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t break; } - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; if (sz < to_copy) to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; total_read += to_copy; } while (total_read < sz) { int status; - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + status = git_inflate(&st->base.z, Z_FINISH); - total_read = st->z.next_out - (unsigned char *)buf; + total_read = st->base.z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_loose(struct odb_read_stream *st) +static int close_istream_loose(struct odb_read_stream *_st) { - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + close_deflated_stream(&st->base); + munmap(st->mapped, st->mapsize); return 0; } @@ -222,7 +225,7 @@ static int open_istream_loose(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream *st; + struct odb_loose_read_stream *st; struct odb_source *source; unsigned long mapsize; void *mapped; @@ -244,8 +247,8 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { + switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { case ULHR_OK: break; case ULHR_BAD: @@ -253,26 +256,26 @@ static int open_istream_loose(struct odb_read_stream **out, goto error; } - oi.sizep = &st->size; - oi.typep = &st->type; + oi.sizep = &st->base.size; + oi.typep = &st->base.type; - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; - st->u.loose.mapped = mapped; - st->u.loose.mapsize = mapsize; - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->base.z.total_out; + st->base.z_state = z_used; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; - *out = st; + *out = &st->base; return 0; error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + git_inflate_end(&st->base.z); + munmap(st->mapped, st->mapsize); free(st); return -1; } -- GitLab From 5f0d8d2e8d3f992f58af247b6d21509c3c7595ca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:33 +0100 Subject: [PATCH 008/110] streaming: create structure for packed object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for packed object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 75 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/streaming.c b/streaming.c index ac7b3026f5..788f04e83e 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - struct filtered_istream filtered; } u; }; @@ -287,16 +282,23 @@ static int open_istream_loose(struct odb_read_stream **out, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; + memset(&st->base.z, 0, sizeof(st->base.z)); + git_inflate_init(&st->base.z); + st->base.z_state = z_used; break; case z_done: return 0; @@ -311,21 +313,21 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf struct pack_window *window = NULL; unsigned char *mapped; - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); + mapped = use_pack(st->pack, &window, + st->pos, &st->base.z.avail_in); - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + st->base.z.next_in = mapped; + status = git_inflate(&st->base.z, Z_FINISH); - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; + st->pos += st->base.z.next_in - mapped; + total_read = st->base.z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } @@ -338,17 +340,18 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_pack_non_delta(struct odb_read_stream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *_st) { - close_deflated_stream(st); + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + close_deflated_stream(&st->base); return 0; } @@ -358,19 +361,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, struct packed_git *pack, off_t offset) { - struct odb_read_stream stream = { - .close = close_istream_pack_non_delta, - .read = read_istream_pack_non_delta, - }; + struct odb_packed_read_stream *stream; struct pack_window *window; enum object_type in_pack_type; + size_t size; window = NULL; in_pack_type = unpack_object_header(pack, &window, &offset, - &stream.size); + &size); unuse_pack(&window); switch (in_pack_type) { default: @@ -381,13 +382,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, case OBJ_TAG: break; } - stream.type = in_pack_type; - stream.z_state = z_unused; - stream.u.in_pack.pack = pack; - stream.u.in_pack.pos = offset; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->base.z_state = z_unused; + stream->pack = pack; + stream->pos = offset; + + *out = &stream->base; return 0; } -- GitLab From 1154b2d2e511113e9b7d567788b72acb05713915 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:34 +0100 Subject: [PATCH 009/110] streaming: create structure for filtered object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for filtered object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 54 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/streaming.c b/streaming.c index 788f04e83e..199cca5abb 100644 --- a/streaming.c +++ b/streaming.c @@ -19,16 +19,6 @@ typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) -struct filtered_istream { - struct odb_read_stream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - struct odb_read_stream { close_istream_fn close; read_istream_fn read; @@ -37,10 +27,6 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct filtered_istream filtered; - } u; }; /***************************************************************** @@ -62,16 +48,28 @@ static void close_deflated_stream(struct odb_read_stream *st) * *****************************************************************/ -static int close_istream_filtered(struct odb_read_stream *st) +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) { - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return close_istream(fs->upstream); } -static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, size_t sz) { - struct filtered_istream *fs = &(st->u.filtered); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; size_t filled = 0; while (sz) { @@ -131,19 +129,17 @@ static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct stream_filter *filter) { - struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); + struct odb_filtered_read_stream *fs; - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; fs->upstream = st; fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - ifs->type = st->type; - return ifs; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; } /***************************************************************** -- GitLab From eb5abbb4e6a8c06f5c6275bbb541bf7d736171c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:35 +0100 Subject: [PATCH 010/110] streaming: move zlib stream into backends While all backend-specific data is now contained in a backend-specific structure, we still share the zlib stream across the loose and packed objects. Refactor the code and move it into the specific structures so that we fully detangle the different backends from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 104 ++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/streaming.c b/streaming.c index 199cca5abb..46fddaf2ca 100644 --- a/streaming.c +++ b/streaming.c @@ -25,23 +25,8 @@ struct odb_read_stream { enum object_type type; unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; }; -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct odb_read_stream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - /***************************************************************** * * Filtered stream @@ -150,6 +135,12 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct odb_loose_read_stream { struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; void *mapped; unsigned long mapsize; char hdr[32]; @@ -162,10 +153,10 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_done: + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: return 0; - case z_error: + case ODB_LOOSE_READ_STREAM_ERROR: return -1; default: break; @@ -183,20 +174,20 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t while (total_read < sz) { int status; - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); - total_read = st->base.z.next_out - (unsigned char *)buf; + total_read = st->z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; return -1; } } @@ -206,7 +197,8 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t static int close_istream_loose(struct odb_read_stream *_st) { struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); return 0; } @@ -238,7 +230,7 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, sizeof(st->hdr))) { case ULHR_OK: break; @@ -256,8 +248,8 @@ static int open_istream_loose(struct odb_read_stream **out, st->mapped = mapped; st->mapsize = mapsize; st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->base.z.total_out; - st->base.z_state = z_used; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; st->base.close = close_istream_loose; st->base.read = read_istream_loose; @@ -265,7 +257,7 @@ static int open_istream_loose(struct odb_read_stream **out, return 0; error: - git_inflate_end(&st->base.z); + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); free(st); return -1; @@ -281,6 +273,13 @@ static int open_istream_loose(struct odb_read_stream **out, struct odb_packed_read_stream { struct odb_read_stream base; struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; off_t pos; }; @@ -290,17 +289,17 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_unused: - memset(&st->base.z, 0, sizeof(st->base.z)); - git_inflate_init(&st->base.z); - st->base.z_state = z_used; + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; break; - case z_done: + case ODB_PACKED_READ_STREAM_DONE: return 0; - case z_error: + case ODB_PACKED_READ_STREAM_ERROR: return -1; - case z_used: + case ODB_PACKED_READ_STREAM_INUSE: break; } @@ -310,20 +309,20 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu unsigned char *mapped; mapped = use_pack(st->pack, &window, - st->pos, &st->base.z.avail_in); + st->pos, &st->z.avail_in); - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - st->base.z.next_in = mapped; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); - st->pos += st->base.z.next_in - mapped; - total_read = st->base.z.next_out - (unsigned char *)buf; + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; break; } @@ -336,8 +335,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; return -1; } } @@ -347,7 +346,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu static int close_istream_pack_non_delta(struct odb_read_stream *_st) { struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); return 0; } @@ -384,7 +384,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.read = read_istream_pack_non_delta; stream->base.type = in_pack_type; stream->base.size = size; - stream->base.z_state = z_unused; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; stream->pack = pack; stream->pos = offset; -- GitLab From 385e18810f10ec0ce0a266d25da4e1878c8ce15a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:36 +0100 Subject: [PATCH 011/110] packfile: introduce function to read object info from a store Extract the logic to read object info for a packed object from `do_oid_object_into_extended()` into a standalone function that operates on the packfile store. This function will be used in a subsequent commit. Note that this change allows us to make `find_pack_entry()` an internal implementation detail. As a consequence though we have to move around `packfile_store_freshen_object()` so that it is defined after that function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 29 +++------------------- packfile.c | 71 +++++++++++++++++++++++++++++++++++++++++------------- packfile.h | 12 ++++++++- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/odb.c b/odb.c index 3ec21ef24e..f4cbee4b04 100644 --- a/odb.c +++ b/odb.c @@ -666,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -702,8 +700,8 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) @@ -713,8 +711,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -747,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, diff --git a/packfile.c b/packfile.c index 40f733dd23..b4bc40d895 100644 --- a/packfile.c +++ b/packfile.c @@ -819,22 +819,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -2064,7 +2048,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct list_head *pos; @@ -2087,6 +2073,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { diff --git a/packfile.h b/packfile.h index 58fcc88e20..0a98bddd81 100644 --- a/packfile.h +++ b/packfile.h @@ -144,6 +144,17 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -357,7 +368,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); -- GitLab From 4c89d31494bff4bde6079a0e0821f1437e37d07b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:37 +0100 Subject: [PATCH 012/110] streaming: rely on object sources to create object stream When creating an object stream we first look up the object info and, if it's present, we call into the respective backend that contains the object to create a new stream for it. This has the consequence that, for loose object source, we basically iterate through the object sources twice: we first discover that the file exists as a loose object in the first place by iterating through all sources. And, once we have discovered it, we again walk through all sources to try and map the object. The same issue will eventually also surface once the packfile store becomes per-object-source. Furthermore, it feels rather pointless to first look up the object only to then try and read it. Refactor the logic to be centered around sources instead. Instead of first reading the object, we immediately ask the source to create the object stream for us. If the object exists we get stream, otherwise we'll try the next source. Like this we only have to iterate through sources once. But even more importantly, this change also helps us to make the whole logic pluggable. The object read stream subsystem does not need to be aware of the different source backends anymore, but eventually it'll only have to call the source's callback function. Note that at the current point in time we aren't fully there yet: - The packfile store still sits on the object database level and is thus agnostic of the sources. - We still have to call into both the packfile store and the loose object source. But both of these issues will soon be addressed. This refactoring results in a slight change to semantics: previously, it was `odb_read_object_info_extended()` that picked the source for us, and it would have favored packed (non-deltified) objects over loose objects. And while we still favor packed over loose objects for a single source with the new logic, we'll now favor a loose object from an earlier source over a packed object from a later source. Ultimately this shouldn't matter though: the stream doesn't indicate to the caller which source it is from and whether it was created from a packed or loose object, so such details are opaque to the caller. And other than that we should be able to assume that two objects with the same object ID should refer to the same content, so the streamed data would be the same, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 65 ++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 46fddaf2ca..f0f7d31956 100644 --- a/streaming.c +++ b/streaming.c @@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st) } static int open_istream_loose(struct odb_read_stream **out, - struct repository *r, + struct odb_source *source, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; - struct odb_source *source; unsigned long mapsize; void *mapped; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (mapped) - break; - } + mapped = odb_source_loose_map_object(source, oid, &mapsize); if (!mapped) return -1; @@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) } static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - struct packed_git *pack, - off_t offset) + struct object_database *odb, + const struct object_id *oid) { struct odb_packed_read_stream *stream; - struct pack_window *window; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - size_t size; + unsigned long size; - window = NULL; + oi.sizep = &size; + + if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + return -1; - in_pack_type = unpack_object_header(pack, + in_pack_type = unpack_object_header(oi.u.packed.pack, &window, - &offset, + &oi.u.packed.offset, &size); unuse_pack(&window); switch (in_pack_type) { @@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = pack; - stream->pos = offset; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; *out = &stream->base; @@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; + struct odb_source *source; - switch (oi.whence) { - case OI_LOOSE: - if (open_istream_loose(out, r, oid) < 0) - break; - return 0; - case OI_PACKED: - if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, - oi.u.packed.offset) < 0) - break; + if (!open_istream_pack_non_delta(out, r->objects, oid)) return 0; - default: - break; - } + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) + if (!open_istream_loose(out, source, oid)) + return 0; return open_istream_incore(out, r, oid); } -- GitLab From c26da3446e98ad4aa98ec9154c70c6fd35cb9ad6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:38 +0100 Subject: [PATCH 013/110] streaming: get rid of `the_repository` Subsequent commits will move the backend-specific logic of object streaming into their respective subsystems. These subsystems have gotten rid of `the_repository` already, but we still use it in two locations in the streaming subsystem. Prepare for the move by fixing those two cases. Converting the logic in `open_istream_pack_non_delta()` is trivial as we already got the object database as input. But for `stream_blob_to_fd()` we have to add a new parameter to make it accessible. So, as we already have to adjust all callers anyway, rename the function to `odb_stream_blob_to_fd()` to indicate it's part of the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/fsck.c | 3 ++- builtin/log.c | 4 ++-- entry.c | 2 +- parallel-checkout.c | 3 ++- streaming.c | 13 +++++++------ streaming.h | 18 +++++++++++++++++- 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837..120d626d66 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index b1a650c673..1a348d43c2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/log.c b/builtin/log.c index c8319b8af3..e7b83a6e00 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/entry.c b/entry.c index cae02eb503..38dfe670f7 100644 --- a/entry.c +++ b/entry.c @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6..1cb6701b92 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/streaming.c b/streaming.c index f0f7d31956..807a6e03a8 100644 --- a/streaming.c +++ b/streaming.c @@ -2,8 +2,6 @@ * Copyright (c) 2011, Google Inc. */ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "convert.h" #include "environment.h" @@ -359,7 +357,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, @@ -518,8 +516,11 @@ struct odb_read_stream *open_istream(struct repository *r, return st; } -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) { struct odb_read_stream *st; enum object_type type; @@ -527,7 +528,7 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter ssize_t kept = 0; int result = -1; - st = open_istream(the_repository, oid, &type, &sz, filter); + st = open_istream(odb->repo, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); diff --git a/streaming.h b/streaming.h index f5ff5d7ac9..148f6b3069 100644 --- a/streaming.h +++ b/streaming.h @@ -6,6 +6,7 @@ #include "object.h" +struct object_database; /* opaque */ struct odb_read_stream; struct stream_filter; @@ -16,6 +17,21 @@ struct odb_read_stream *open_istream(struct repository *, const struct object_id int close_istream(struct odb_read_stream *); ssize_t read_istream(struct odb_read_stream *, void *, size_t); -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); #endif /* STREAMING_H */ -- GitLab From ffc9a3448500caa50766876ef2169e0f26ad3b3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:39 +0100 Subject: [PATCH 014/110] streaming: make the `odb_read_stream` definition public Subsequent commits will move the backend-specific logic of setting up an object read stream into the specific subsystems. As the backends are now the ones that are responsible for allocating the stream they'll need to have the stream definition available to them. Make the stream definition public to prepare for this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 11 ----------- streaming.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/streaming.c b/streaming.c index 807a6e03a8..0635b7c12e 100644 --- a/streaming.c +++ b/streaming.c @@ -12,19 +12,8 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*close_istream_fn)(struct odb_read_stream *); -typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); - #define FILTER_BUFFER (1024*16) -struct odb_read_stream { - close_istream_fn close; - read_istream_fn read; - - enum object_type type; - unsigned long size; /* inflated size of full object */ -}; - /***************************************************************** * * Filtered stream diff --git a/streaming.h b/streaming.h index 148f6b3069..acfdef1598 100644 --- a/streaming.h +++ b/streaming.h @@ -7,10 +7,23 @@ #include "object.h" struct object_database; -/* opaque */ struct odb_read_stream; struct stream_filter; +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + struct odb_read_stream *open_istream(struct repository *, const struct object_id *, enum object_type *, unsigned long *, struct stream_filter *); -- GitLab From bc30a2f5dff6dd39966819ca3771ab5e9e072123 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:40 +0100 Subject: [PATCH 015/110] streaming: move logic to read loose objects streams into backend Move the logic to read loose object streams into the respective subsystem. This allows us to make a couple of function declarations private. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++--- object-file.h | 42 ++----------- streaming.c | 133 +--------------------------------------- 3 files changed, 164 insertions(+), 178 deletions(-) diff --git a/object-file.c b/object-file.c index b62b21a452..8c67847fea 100644 --- a/object-file.c +++ b/object-file.c @@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; int fd = open_loose_object(source->loose, oid, &p); @@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source, return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbd..1229d5f675 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/streaming.c b/streaming.c index 0635b7c12e..d5acc1c396 100644 --- a/streaming.c +++ b/streaming.c @@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -static int open_istream_loose(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - oi.sizep = &st->base.size; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - free(st); - return -1; -} - - /***************************************************************** * * Non-delta packed object stream @@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) - if (!open_istream_loose(out, source, oid)) + if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; return open_istream_incore(out, r, oid); -- GitLab From 8c1b84bc977bf1e4515efe0386de87257ec28689 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:41 +0100 Subject: [PATCH 016/110] streaming: move logic to read packed objects streams into backend Move the logic to read packed object streams into the respective subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ streaming.c | 136 +--------------------------------------------------- 3 files changed, 134 insertions(+), 135 deletions(-) diff --git a/packfile.c b/packfile.c index b4bc40d895..ad56ce0b90 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -2406,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(p); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index 0a98bddd81..3fcc5ae6e0 100644 --- a/packfile.h +++ b/packfile.h @@ -8,6 +8,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct hashmap_entry packmap_ent; @@ -144,6 +145,10 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + /* * Try to read the object identified by its ID from the object store and * populate the object info with its data. Returns 1 in case the object was diff --git a/streaming.c b/streaming.c index d5acc1c396..3140728a70 100644 --- a/streaming.c +++ b/streaming.c @@ -114,140 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -struct odb_packed_read_stream { - struct odb_read_stream base; - struct packed_git *pack; - git_zstream z; - enum { - ODB_PACKED_READ_STREAM_UNINITIALIZED, - ODB_PACKED_READ_STREAM_INUSE, - ODB_PACKED_READ_STREAM_DONE, - ODB_PACKED_READ_STREAM_ERROR, - } z_state; - off_t pos; -}; - -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, - size_t sz) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_PACKED_READ_STREAM_UNINITIALIZED: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_INUSE; - break; - case ODB_PACKED_READ_STREAM_DONE: - return 0; - case ODB_PACKED_READ_STREAM_ERROR: - return -1; - case ODB_PACKED_READ_STREAM_INUSE: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->pack, &window, - st->pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_DONE; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct odb_read_stream *_st) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) - git_inflate_end(&st->z); - return 0; -} - -static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct object_database *odb, - const struct object_id *oid) -{ - struct odb_packed_read_stream *stream; - struct pack_window *window = NULL; - struct object_info oi = OBJECT_INFO_INIT; - enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; - - if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || - oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(odb->repo) >= size) - return -1; - - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - - CALLOC_ARRAY(stream, 1); - stream->base.close = close_istream_pack_non_delta; - stream->base.read = read_istream_pack_non_delta; - stream->base.type = in_pack_type; - stream->base.size = size; - stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = oi.u.packed.pack; - stream->pos = oi.u.packed.offset; - - *out = &stream->base; - - return 0; -} - - /***************************************************************** * * In-core stream @@ -319,7 +185,7 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!open_istream_pack_non_delta(out, r->objects, oid)) + if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) return 0; odb_prepare_alternates(r->objects); -- GitLab From 378ec56beba161abbef6e2c87d9bc2ac43c355f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:42 +0100 Subject: [PATCH 017/110] streaming: refactor interface to be object-database-centric Refactor the streaming interface to be centered around object databases instead of centered around the repository. Rename the functions accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 6 +++--- archive-zip.c | 12 ++++++------ builtin/index-pack.c | 8 ++++---- builtin/pack-objects.c | 14 +++++++------- object-file.c | 8 ++++---- streaming.c | 44 +++++++++++++++++++++--------------------- streaming.h | 30 +++++++++++++++++++++++----- 7 files changed, 71 insertions(+), 51 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index dc1eda09e0..4d87b28504 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -135,16 +135,16 @@ static int stream_blocked(struct repository *r, const struct object_id *oid) char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index 40a9c93ff9..c44684aebc 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -348,8 +348,8 @@ static int write_zip_entry(struct archiver_args *args, if (!buffer) { enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, + &type, &size, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); @@ -429,7 +429,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +439,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +462,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +486,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5f90f12f92..fb76ef0f4c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -807,15 +807,15 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c693d948e1..1353c2384c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct odb_read_stream *st, struct ha for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -520,8 +520,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL)) != NULL) buf = NULL; else { buf = odb_read_object(the_repository->objects, @@ -577,7 +577,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +591,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +601,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +609,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); diff --git a/object-file.c b/object-file.c index 8c67847fea..9ba40a848c 100644 --- a/object-file.c +++ b/object-file.c @@ -139,7 +139,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); if (!st) return -1; @@ -151,10 +151,10 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,7 +162,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } diff --git a/streaming.c b/streaming.c index 3140728a70..06993a751c 100644 --- a/streaming.c +++ b/streaming.c @@ -35,7 +35,7 @@ static int close_istream_filtered(struct odb_read_stream *_fs) { struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; free_stream_filter(fs->filter); - return close_istream(fs->upstream); + return odb_read_stream_close(fs->upstream); } static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, @@ -87,7 +87,7 @@ static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, /* refill the input from the upstream */ if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); if (fs->i_end < 0) return -1; if (fs->i_end) @@ -149,7 +149,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_ } static int open_istream_incore(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -163,7 +163,7 @@ static int open_istream_incore(struct odb_read_stream **out, oi.typep = &stream.base.type; oi.sizep = &stream.base.size; oi.contentp = (void **)&stream.buf; - ret = odb_read_object_info_extended(r->objects, oid, &oi, + ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; @@ -180,47 +180,47 @@ static int open_istream_incore(struct odb_read_stream **out, *****************************************************************************/ static int istream_source(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) return 0; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; - return open_istream_incore(out, r, oid); + return open_istream_incore(out, odb, oid); } /**************************************************************** * Users of streaming interface ****************************************************************/ -int close_istream(struct odb_read_stream *st) +int odb_read_stream_close(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct odb_read_stream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { struct odb_read_stream *st; - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(&st, r, real); + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); if (ret) return NULL; @@ -229,7 +229,7 @@ struct odb_read_stream *open_istream(struct repository *r, /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { - close_istream(st); + odb_read_stream_close(st); return NULL; } st = nst; @@ -252,7 +252,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, ssize_t kept = 0; int result = -1; - st = open_istream(odb->repo, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); @@ -263,7 +263,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, for (;;) { char buf[1024 * 16]; ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) goto close_and_exit; @@ -294,6 +294,6 @@ int odb_stream_blob_to_fd(struct object_database *odb, result = 0; close_and_exit: - close_istream(st); + odb_read_stream_close(st); return result; } diff --git a/streaming.h b/streaming.h index acfdef1598..7cb55213b7 100644 --- a/streaming.h +++ b/streaming.h @@ -24,11 +24,31 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ }; -struct odb_read_stream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct odb_read_stream *); -ssize_t read_istream(struct odb_read_stream *, void *, size_t); +/* + * Create a new object stream for the given object database. Populates the type + * and size pointers with the object's info. An optional filter can be used to + * transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); /* * Look up the object by its ID and write the full contents to the file -- GitLab From 1599b68d5e960a12f5ac624f81c70ece317db5a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:43 +0100 Subject: [PATCH 018/110] streaming: move into object database subsystem The "streaming" terminology is somewhat generic, so it may not be immediately obvious that "streaming.{c,h}" is specific to the object database. Rectify this by moving it into the "odb/" directory so that it can be immediately attributed to the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 +- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/cat-file.c | 2 +- builtin/fsck.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/pack-objects.c | 2 +- entry.c | 2 +- meson.build | 2 +- object-file.c | 2 +- streaming.c => odb/streaming.c | 2 +- streaming.h => odb/streaming.h | 0 packfile.c | 2 +- parallel-checkout.c | 2 +- 15 files changed, 14 insertions(+), 14 deletions(-) rename streaming.c => odb/streaming.c (99%) rename streaming.h => odb/streaming.h (100%) diff --git a/Makefile b/Makefile index 7e0f77e298..6d8dcc4622 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 4d87b28504..494b9f0667 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" diff --git a/archive-zip.c b/archive-zip.c index c44684aebc..a0bdc2fe3b 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 120d626d66..505ddaa12f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" diff --git a/builtin/fsck.c b/builtin/fsck.c index 1a348d43c2..c7d2eea287 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" diff --git a/builtin/index-pack.c b/builtin/index-pack.c index fb76ef0f4c..581023495f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" diff --git a/builtin/log.c b/builtin/log.c index e7b83a6e00..d4cf9c59c8 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1353c2384c..f109e26786 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" diff --git a/entry.c b/entry.c index 38dfe670f7..7817aee362 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" diff --git a/meson.build b/meson.build index 1f95a06edb..fc82929b37 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 9ba40a848c..9601fdb12d 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" diff --git a/streaming.c b/odb/streaming.c similarity index 99% rename from streaming.c rename to odb/streaming.c index 06993a751c..7ef58adaa2 100644 --- a/streaming.c +++ b/odb/streaming.c @@ -5,10 +5,10 @@ #include "git-compat-util.h" #include "convert.h" #include "environment.h" -#include "streaming.h" #include "repository.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "packfile.h" diff --git a/streaming.h b/odb/streaming.h similarity index 100% rename from streaming.h rename to odb/streaming.h diff --git a/packfile.c b/packfile.c index ad56ce0b90..7a16aaa90d 100644 --- a/packfile.c +++ b/packfile.c @@ -20,7 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" -#include "streaming.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" diff --git a/parallel-checkout.c b/parallel-checkout.c index 1cb6701b92..0bf4bd6d4a 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" -- GitLab From 7b940286527ec2175dffbb317f47e080bb37cf3e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:44 +0100 Subject: [PATCH 019/110] streaming: drop redundant type and size pointers In the preceding commits we have turned `struct odb_read_stream` into a publicly visible structure. Furthermore, this structure now contains the type and size of the object that we are about to stream. Consequently, the out-pointers that we used before to propagate the type and size of the streamed object are now somewhat redundant with the data contained in the structure itself. Drop these out-pointers and adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 4 +--- archive-zip.c | 5 ++--- builtin/index-pack.c | 7 ++----- builtin/pack-objects.c | 6 ++++-- object-file.c | 6 ++---- odb/streaming.c | 10 ++-------- odb/streaming.h | 7 ++----- 7 files changed, 15 insertions(+), 30 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 494b9f0667..0fc70d13a8 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -130,12 +130,10 @@ static void write_trailer(void) static int stream_blocked(struct repository *r, const struct object_id *oid) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; char buf[BLOCKSIZE]; ssize_t readlen; - st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { diff --git a/archive-zip.c b/archive-zip.c index a0bdc2fe3b..97ea8d60d6 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = odb_read_stream_open(args->repo->objects, oid, - &type, &size, NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 581023495f..b01cb77f4a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,11 +805,10 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f109e26786..0d1d6995bf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -521,9 +521,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL)) != NULL) + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); diff --git a/object-file.c b/object-file.c index 9601fdb12d..12177a7dd7 100644 --- a/object-file.c +++ b/object-file.c @@ -132,19 +132,17 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); diff --git a/odb/streaming.c b/odb/streaming.c index 7ef58adaa2..745cd486fb 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -214,8 +214,6 @@ ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter) { struct odb_read_stream *st; @@ -235,8 +233,6 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, st = nst; } - *size = st->size; - *type = st->type; return st; } @@ -247,18 +243,16 @@ int odb_stream_blob_to_fd(struct object_database *odb, int can_seek) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; ssize_t kept = 0; int result = -1; - st = odb_read_stream_open(odb, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, filter); if (!st) { if (filter) free_stream_filter(filter); return result; } - if (type != OBJ_BLOB) + if (st->type != OBJ_BLOB) goto close_and_exit; for (;;) { char buf[1024 * 16]; diff --git a/odb/streaming.h b/odb/streaming.h index 7cb55213b7..c7861f7e13 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -25,16 +25,13 @@ struct odb_read_stream { }; /* - * Create a new object stream for the given object database. Populates the type - * and size pointers with the object's info. An optional filter can be used to - * transform the object's content. + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. * * Returns the stream on success, a `NULL` pointer otherwise. */ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter); /* -- GitLab From a70be5043d8fa47fbc1d559369f954446cfb2d87 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 10:54:54 +0100 Subject: [PATCH 020/110] Start tracking packfiles per object database source Hi, the `struct packfile_store` tracks packfiles we have in the repository so that we can look up objects stored therein. Right now, the packfile store is tracked on the object database level -- each object database has exactly one packfile store. Consequently, we track packfiles that are part of different object database sources via the same packfile store. This patch series refactors this so that we instead have one packfile store per ODB source. This means that access to any object, regardless of whether it is stored in a packfile or in a loose object, is always done via its owning source. This is the last step required for pluggable object databases: all object access is routed through sources, and we can thus now abstract these sources and then plug in a different implementation. Of course, these abstractions are still very leaky, and we still reach into the implementation details in a bunch of files. But this is something that will be addressed over subsequent steps. Thanks! Patrick To: git@vger.kernel.org --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 1, "change-id": "20251201-b4-pks-pack-store-via-source-fd43dc0765a7", "prefixes": [] } } -- GitLab From 75fc7fc70c9cbdb28e48122e30d834c733aa0017 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 7 Nov 2025 10:53:52 +0100 Subject: [PATCH 021/110] packfile: create store via its owning source In subsequent patches we're about to move the packfile store from the object database layer into the object database source layer. Once done, we'll have one packfile store per source, where the source is owning the store. Prepare for this future and refactor `packfile_store_new()` to be initialized via an object database source instead of via the object database itself. This refactoring leads to a weird in-between state where the store is owned by the object database but created via the source. But this makes subsequent refactorings easier because we can now start to access the owning source of a given store. Signed-off-by: Patrick Steinhardt --- odb.c | 2 +- packfile.c | 20 ++++++++++---------- packfile.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index af13174425..3ab730f713 100644 --- a/odb.c +++ b/odb.c @@ -1056,7 +1056,6 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); @@ -1065,6 +1064,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->packfiles = packfile_store_new(o->sources); free(to_free); diff --git a/packfile.c b/packfile.c index c88bd92619..0a05a10daa 100644 --- a/packfile.c +++ b/packfile.c @@ -876,7 +876,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -1068,8 +1068,8 @@ void packfile_store_prepare(struct packfile_store *store) if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { + odb_prepare_alternates(store->source->odb); + for (source = store->source->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } @@ -1092,7 +1092,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { + for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -2121,7 +2121,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2142,7 +2142,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 1; /* @@ -2152,7 +2152,7 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi == &blank_oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, oid); return -1; @@ -2411,11 +2411,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2534,7 +2534,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(store->odb->repo) >= size) + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, diff --git a/packfile.h b/packfile.h index 59d162a3f4..33cc1c1654 100644 --- a/packfile.h +++ b/packfile.h @@ -77,7 +77,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -129,9 +129,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles -- GitLab From 322dc2d4bbe2521a9f1e5f1cddd30c7e247597f1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 09:48:32 +0200 Subject: [PATCH 022/110] packfile: pass source to `prepare_pack()` When preparing a packfile we pass various pieces attached to the pack's object database source via the `struct prepare_pack_data`. Refactor this code to instead pass in the source directly. This reduces the number of variables we need to pass and allows for a subsequent refactoring where we start to prepare the pack via the source. Signed-off-by: Patrick Steinhardt --- packfile.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packfile.c b/packfile.c index 0a05a10daa..ab86afa01d 100644 --- a/packfile.c +++ b/packfile.c @@ -975,10 +975,8 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -988,10 +986,10 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(data->source->odb->packfiles, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1020,10 +1018,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); -- GitLab From add7a2fc4253b41f5ec5d0476d4d73e37016ef06 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:23:43 +0200 Subject: [PATCH 023/110] packfile: refactor kept-pack cache to work with packfile stores The kept pack cache is a cache of packfiles that are marked as kept either via an accompanying ".kept" file or via an in-memory flag. The cache can be retrieved via `kept_pack_cache()`, where one needs to pass in a repository. Ultimately though the kept-pack cache is a property of the packfile store, and this causes problems in a subsequent commit where we want to move down the packfile store to be a per-object-source entity. Prepare for this and refactor the kept-pack cache to work on top of a packfile store instead. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 12 ++++++------ packfile.c | 37 ++++++++++++++++++++----------------- packfile.h | 18 +++++++++++++----- reachable.c | 2 +- revision.c | 8 ++++---- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee21..e86b8f387a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,9 +1529,9 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; off_t ofs; uint32_t candidate_mtime; @@ -1624,9 +1624,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -3931,7 +3931,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4030,7 +4030,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) diff --git a/packfile.c b/packfile.c index ab86afa01d..191344eb1c 100644 --- a/packfile.c +++ b/packfile.c @@ -2164,25 +2164,26 @@ int packfile_store_read_object_info(struct packfile_store *store, return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2192,9 +2193,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2202,11 +2205,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, @@ -2214,9 +2217,9 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; if (fill_pack_entry(oid, e, p)) return 1; diff --git a/packfile.h b/packfile.h index 33cc1c1654..701a3b4946 100644 --- a/packfile.h +++ b/packfile.h @@ -210,6 +210,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -385,9 +398,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. @@ -398,8 +408,6 @@ int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/reachable.c b/reachable.c index b753c39553..4b532039d5 100644 --- a/reachable.c +++ b/reachable.c @@ -242,7 +242,7 @@ static int want_recent_object(struct recent_data *data, const struct object_id *oid) { if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) return 0; return 1; } diff --git a/revision.c b/revision.c index 5f0850ae5c..64d223a7c6 100644 --- a/revision.c +++ b/revision.c @@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; -- GitLab From 89be7e014d0f2e6e94dd7155323839cfd0e52b87 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 17:19:31 +0200 Subject: [PATCH 024/110] packfile: refactor misleading code when unusing pack windows The function `unuse_one_window()` is responsible for unmapping one of the packfile windows, which is done when we have exceeded the allowed number of window. The function receives a `struct packed_git` as input, which serves as an additional packfile that should be considered to be closed. If not given, we seemingly skip that and instead go through all of the repository's packfiles. The conditional that checks whether we have a packfile though does not make much sense anymore, as we dereference the packfile regardless of whether or not it is a `NULL` pointer to derive the repository's packfile store. The function was originally introduced via f0e17e86e1 (pack: move release_pack_memory(), 2017-08-18), and here we indeed had a caller that passed a `NULL` pointer. That caller was later removed via 9827d4c185 (packfile: drop release_pack_memory(), 2019-08-12), so starting with that commit we always pass a `struct packed_git`. In 9c5ce06d74 (packfile: use `repository` from `packed_git` directly, 2024-12-03) we then inadvertently started to rely on the fact that the pointer is never `NULL` because we use it now to identify the repository. Arguably, it didn't really make sense in the first place that the caller provides a packfile, as the selected window would have been overridden anyway by the subsequent loop over all packfiles if there was an older window. So the overall logic is quite misleading overall. The only case where it _could_ make a difference is when there were two packfiles with the same `last_used` value, but that case doesn't ever happen because the `pack_used_ctr` is strictly increasing. Refactor the code so that we instead pass in the object database to help make the code less misleading. Signed-off-by: Patrick Steinhardt --- packfile.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packfile.c b/packfile.c index 191344eb1c..3700612465 100644 --- a/packfile.c +++ b/packfile.c @@ -355,16 +355,15 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + for (e = odb->packfiles->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -740,8 +739,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, -- GitLab From 2b33521aaec9819147bd210da65452ff3f101234 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 12:41:29 +0100 Subject: [PATCH 025/110] odb: properly close sources before freeing them In the next commit we are about to move the packfile store into the ODB source so that we have one store per source. This will lead to a memory leak in the following commit when reading data from a submodule via git-grep(1): Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x55555562e726 in calloc (git+0xda726) #1 0x555555963244 in xcalloc ../wrapper.c:154:8 #2 0x55555586b09b in use_pack ../packfile.c:739:4 #3 0x55555586c6bf in unpack_object_header ../packfile.c:1235:9 #4 0x55555586d44b in unpack_entry ../packfile.c:1789:10 #5 0x55555586cd6c in cache_or_unpack_entry ../packfile.c:1520:10 #6 0x55555586cacf in packed_object_info ../packfile.c:1600:19 #7 0x55555586e60a in packfile_store_read_object_info ../packfile.c:2165:10 #8 0x5555558525eb in do_oid_object_info_extended ../odb.c:720:10 #9 0x555555851fb1 in odb_read_object_info_extended ../odb.c:847:8 #10 0x555555852c0d in odb_read_object ../odb.c:905:6 #11 0x5555558089e0 in grep_source_load_oid ../grep.c:1934:12 #12 0x5555558087ea in grep_source_load ../grep.c:1986:10 #13 0x5555558077b3 in grep_source_is_binary ../grep.c:2014:7 #14 0x555555805c24 in grep_source_1 ../grep.c:1625:8 #15 0x5555558059d2 in grep_source ../grep.c:1837:10 #16 0x5555556a5ed8 in run ../builtin/grep.c:208:10 #17 0x55555562bb42 in void* ThreadStartFunc(void*) lsan_interceptors.cpp.o #18 0x7ffff7a9a979 in start_thread (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x9a979) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) #19 0x7ffff7b22d2b in __GI___clone3 (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x122d2b) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) The root caues of this leak is the way we set up and release the submodule: 1. We use `repo_submodule_init()` to initialize a new repository. This repository is stored in `repos_to_free`. 2. We now read data from the submodule repository. 3. We then call `repo_clear()` on the submodule repositories. 4. `repo_clear()` calls `odb_free()`. 5. `odb_free()` calls `odb_free_sources()` followde by `odb_close()`. The issue here is the 5th step: we call `odb_free_sources()` _before_ we call `odb_close()`. But `odb_free_sources()` already frees all sources, so the logic that closes them in `odb_close()` now becomes a no-op. As a consequence, we never explicitly close sources at all. This isn't a problem at the current point in time: the loose sources don't have any state to release, and the packfile store is not yet part of the sources. But once the packfile store is owned by the source we won't close it anymore, and this causes us to leak the packfile windows. Fix the upcoming leak by closing the store before we free the sources. Signed-off-by: Patrick Steinhardt --- odb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 3ab730f713..94144a69f5 100644 --- a/odb.c +++ b/odb.c @@ -1111,13 +1111,13 @@ void odb_free(struct object_database *o) oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); + odb_close(o); odb_free_sources(o); for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - odb_close(o); packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); -- GitLab From 66356daea6c6ac94b4ff358eb6fbfcb5bc0ad9b1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 12:56:45 +0100 Subject: [PATCH 026/110] packfile: move packfile store into object source The packfile store is a member of `struct object_database`, which means that we have a single store per database. This doesn't really make much sense though: each source connected to the database has its own set of packfiles, so there is a conceptual mismatch here. This hasn't really caused much of a problem in the past, but with the advent of pluggable object databases this is becoming more of a problem because some of the sources may not even use packfiles in the first place. Move the packfile store down by one level from the object database into the object database source. This ensures that each source now has its own packfile store, and we can eventually start to abstract it away entirely so that the caller doesn't even know what kind of store it uses. Note that we only need to adjust a relatively small number of callers, way less than one might expect. This is because most callers are using `repo_for_each_pack()`, which handles enumeration of all packfiles that exist in the repository. So for now, none of these callers need to be adapted. The remaining callers that iterate through the packfiles directly and that need adjustment are those that are a bit more tangled with packfiles. These will be adjusted over time. Note that this patch only moves the packfile store, and there is still a bunch of functions that seemingly operate on a packfile store but that end up iterating over all sources. These will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 37 +++++++----- builtin/grep.c | 6 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 96 ++++++++++++++++--------------- http.c | 2 +- midx.c | 5 +- odb.c | 36 ++++++------ odb.h | 6 +- odb/streaming.c | 9 ++- packfile.c | 127 ++++++++++++++++++++++++++--------------- packfile.h | 62 +++++++++++++++++--- 11 files changed, 243 insertions(+), 145 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 4cd0b079b6..d788726def 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,11 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1100,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1183,29 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25..4855b871dd 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,12 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); + /* + * Note: `packfile_store_prepare()` prepares stores from all + * sources. This will be fixed in a subsequent commit. + */ if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->sources->packfiles); start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a7e901e49c..b67fb0256c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, + packfile_store_load_pack(the_repository->objects->sources->packfiles, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e86b8f387a..7fd90a9996 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,49 +1529,53 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1749,13 +1753,15 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&source->packfiles->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { diff --git a/http.c b/http.c index 41f850db16..7815f144de 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/midx.c b/midx.c index 24e1e72175..dbb2aa68ba 100644 --- a/midx.c +++ b/midx.c @@ -95,7 +95,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); + packfile_store_prepare(source->packfiles); return source->midx; } @@ -447,7 +447,6 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -460,7 +459,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, + p = packfile_store_load_pack(m->source->packfiles, pack_name.buf, m->source->local); strbuf_release(&pack_name); diff --git a/odb.c b/odb.c index 94144a69f5..f159fbdd99 100644 --- a/odb.c +++ b/odb.c @@ -155,6 +155,7 @@ static struct odb_source *odb_source_new(struct object_database *odb, source->local = local; source->path = xstrdup(path); source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); return source; } @@ -373,6 +374,7 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); free(source); } @@ -704,19 +706,19 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; + } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + return 0; } /* @@ -975,13 +977,14 @@ int odb_freshen_object(struct object_database *odb, { struct odb_source *source; - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { + if (packfile_store_freshen_object(source->packfiles, oid)) + return 1; + if (odb_source_loose_freshen_object(source, oid)) return 1; + } return 0; } @@ -1064,7 +1067,6 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->packfiles = packfile_store_new(o->sources); free(to_free); @@ -1077,9 +1079,8 @@ void odb_close(struct object_database *o) { struct odb_source *source; - packfile_store_close(o->packfiles); - for (source = o->sources; source; source = source->next) { + packfile_store_close(source->packfiles); if (source->midx) close_midx(source->midx); source->midx = NULL; @@ -1118,7 +1119,6 @@ void odb_free(struct object_database *o) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1141,13 +1141,13 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) + for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); + packfile_store_reprepare(source->packfiles); + } o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a..c97b41c58c 100644 --- a/odb.h +++ b/odb.h @@ -51,6 +51,9 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; + /* * private data * @@ -128,9 +131,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want diff --git a/odb/streaming.c b/odb/streaming.c index 745cd486fb..4a4474f891 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -185,13 +185,12 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) - return 0; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) return 0; + } return open_istream_incore(out, odb, oid); } diff --git a/packfile.c b/packfile.c index 3700612465..a0225cb2cb 100644 --- a/packfile.c +++ b/packfile.c @@ -357,12 +357,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - for (e = odb->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) + for (e = source->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -528,15 +530,18 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -987,7 +992,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->odb->packfiles, + packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); free(trimmed_path); } @@ -1245,11 +1250,15 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + for (e = source->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -2089,26 +2098,32 @@ static int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct packfile_list_entry *l; + struct odb_source *source; - packfile_store_prepare(r->objects->packfiles); + /* + * Note: `packfile_store_prepare()` prepares stores from all sources. + * This will be fixed in a subsequent commit. + */ + packfile_store_prepare(r->objects->sources->packfiles); - for (struct odb_source *source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs.head) - return 0; + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *l; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = source->packfiles->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); - return 1; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!source->packfiles->skip_mru_updates) + packfile_list_prepend(&source->packfiles->packs, p); + return 1; + } } } + return 0; } @@ -2216,12 +2231,18 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } } return 0; @@ -2287,32 +2308,46 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) { - struct packed_git *p; + struct odb_source *source; int r = 0; int pack_errors = 0; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + + source->packfiles->skip_mru_updates = true; + + for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + r = for_each_object_in_pack(p, cb, data, flags); + if (r) + break; } - r = for_each_object_in_pack(p, cb, data, flags); + + source->packfiles->skip_mru_updates = false; + if (r) break; } - repo->objects->packfiles->skip_mru_updates = false; return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index 701a3b4946..6872b16755 100644 --- a/packfile.h +++ b/packfile.h @@ -5,6 +5,7 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ @@ -169,14 +170,65 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) int packfile_store_read_object_stream(struct odb_read_stream **out, struct packfile_store *store, @@ -193,12 +245,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags); -/* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. - */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a -- GitLab From bd826ae5c895ff3c6f7420875ed7db3c32e10be4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:56:26 +0200 Subject: [PATCH 027/110] packfile: only prepare owning store in `packfile_store_get_packs()` When calling `packfile_store_get_packs()` we prepare not only the provided packfile store, but also all those of all other sources part of teh same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Adapt the code so that we only prepare the MIDX of the provided store. All callers only work in the context of a single store or call the function in a loop over all sources, so this change shouldn't have any practical effects. Signed-off-by: Patrick Steinhardt --- packfile.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index a0225cb2cb..c46d53b75d 100644 --- a/packfile.c +++ b/packfile.c @@ -1092,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->source->midx) { + struct multi_pack_index *m = store->source->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } -- GitLab From 4b33828ee2f7bc18e225290d1514e0a493b93c4b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:59:02 +0200 Subject: [PATCH 028/110] packfile: only prepare owning store in `packfile_store_prepare()` When calling `packfile_store_prepare()` we prepare not only the provided packfile store, but also all those of all other sources part of the same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Refactor the code so that we only prepare the single packfile store passed by the caller. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt --- builtin/grep.c | 14 ++++++++------ packfile.c | 19 +++++-------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 4855b871dd..5b8b87b1ac 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,12 +1213,14 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - /* - * Note: `packfile_store_prepare()` prepares stores from all - * sources. This will be fixed in a subsequent commit. - */ - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->sources->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) + packfile_store_prepare(source->packfiles); + } start_threads(&opt); } else { diff --git a/packfile.c b/packfile.c index c46d53b75d..23d8f7cb93 100644 --- a/packfile.c +++ b/packfile.c @@ -1063,16 +1063,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->source->odb); - for (source = store->source->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -2098,15 +2093,11 @@ static int find_pack_entry(struct repository *r, { struct odb_source *source; - /* - * Note: `packfile_store_prepare()` prepares stores from all sources. - * This will be fixed in a subsequent commit. - */ - packfile_store_prepare(r->objects->sources->packfiles); - - for (source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) { + packfile_store_prepare(r->objects->sources->packfiles); if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; + } for (source = r->objects->sources; source; source = source->next) { struct packfile_list_entry *l; -- GitLab From 416fdad14809fb43746fa4cbb6b4560a49cdf372 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:26:46 +0200 Subject: [PATCH 029/110] packfile: inline `find_kept_pack_entry()` The `find_kept_pack_entry()` function is only used in `has_oject_kept_pack()`, which is only a trivial wrapper itself. Inline the latter into the former. Furthermore, reorder the code so that we can drop the declaration of the function in "packfile.h". This allow us to make the function file-local. Signed-off-by: Patrick Steinhardt --- packfile.c | 28 ++++++++++------------------ packfile.h | 6 ------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/packfile.c b/packfile.c index 23d8f7cb93..3bce1b150d 100644 --- a/packfile.c +++ b/packfile.c @@ -2215,12 +2215,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) +int has_object_pack(struct repository *r, const struct object_id *oid) +{ + struct pack_entry e; + return find_pack_entry(r, oid, &e); +} + +int has_object_kept_pack(struct repository *r, const struct object_id *oid, + unsigned flags) { struct odb_source *source; + struct pack_entry e; for (source = r->objects->sources; source; source = source->next) { struct packed_git **cache; @@ -2229,7 +2234,7 @@ int find_kept_pack_entry(struct repository *r, for (; *cache; cache++) { struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) + if (fill_pack_entry(oid, &e, p)) return 1; } } @@ -2237,19 +2242,6 @@ int find_kept_pack_entry(struct repository *r, return 0; } -int has_object_pack(struct repository *r, const struct object_id *oid) -{ - struct pack_entry e; - return find_pack_entry(r, oid, &e); -} - -int has_object_kept_pack(struct repository *r, const struct object_id *oid, - unsigned flags) -{ - struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); -} - int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) diff --git a/packfile.h b/packfile.h index 6872b16755..2fb87a26d6 100644 --- a/packfile.h +++ b/packfile.h @@ -444,12 +444,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -- GitLab From 56d468bcedb615e54c304336883103d6069491b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:29:38 +0200 Subject: [PATCH 030/110] packfile: refactor `find_pack_entry()` to work on the packfile store The function `find_pack_entry()` doesn't work on a specific packfile store, but instead works on the whole repository. This causes a bit of a conceptual mismatch in its callers: - `packfile_store_freshen_object()` supposedly acts on a store, and its callers know to iterate through all sources already. - `packfile_store_read_object_info()` behaves likewise. The only exception that doesn't know to handle iteration through sources is `has_object_pack()`, but that function is trivial to adapt. Refactor the code so that `find_pack_entry()` works on the packfile store level instead. Signed-off-by: Patrick Steinhardt --- packfile.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/packfile.c b/packfile.c index 3bce1b150d..0e4c63e11d 100644 --- a/packfile.c +++ b/packfile.c @@ -2087,29 +2087,23 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_pack_entry(struct repository *r, +static int find_pack_entry(struct packfile_store *store, const struct object_id *oid, struct pack_entry *e) { - struct odb_source *source; - - for (source = r->objects->sources; source; source = source->next) { - packfile_store_prepare(r->objects->sources->packfiles); - if (source->midx && fill_midx_entry(source->midx, oid, e)) - return 1; - } + struct packfile_list_entry *l; - for (source = r->objects->sources; source; source = source->next) { - struct packfile_list_entry *l; + packfile_store_prepare(store); + if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + return 1; - for (l = source->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = store->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!source->packfiles->skip_mru_updates) - packfile_list_prepend(&source->packfiles->packs, p); - return 1; - } + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); + return 1; } } @@ -2120,7 +2114,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2141,7 +2135,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 1; /* @@ -2217,8 +2211,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st int has_object_pack(struct repository *r, const struct object_id *oid) { + struct odb_source *source; struct pack_entry e; - return find_pack_entry(r, oid, &e); + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + int ret = find_pack_entry(source->packfiles, oid, &e); + if (ret) + return ret; + } + + return 0; } int has_object_kept_pack(struct repository *r, const struct object_id *oid, -- GitLab From e6bed6b495c1530bed381085b7ab667bbbcbdcd8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 16:44:30 +0200 Subject: [PATCH 031/110] packfile: move MIDX into packfile store The multi-pack index still is tracked as a member of the object database source, but ultimately the MIDX is always tied to one specific packfile store. Move the structure into `struct packfile_store` accordingly. This ensures that the packfile store now keeps track of all data related to packfiles. Signed-off-by: Patrick Steinhardt --- midx.c | 14 +++++++------- odb.c | 8 +------- odb.h | 7 ------- packfile.c | 12 ++++++++---- packfile.h | 3 +++ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/midx.c b/midx.c index dbb2aa68ba..fa7a7e5d13 100644 --- a/midx.c +++ b/midx.c @@ -96,7 +96,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { packfile_store_prepare(source->packfiles); - return source->midx; + return source->packfiles->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -709,12 +709,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (source->packfiles->midx) return 1; - source->midx = load_multi_pack_index(source); + source->packfiles->midx = load_multi_pack_index(source); - return !!source->midx; + return !!source->packfiles->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -803,9 +803,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + if (source->packfiles->midx) + close_midx(source->packfiles->midx); + source->packfiles->midx = NULL; } } diff --git a/odb.c b/odb.c index f159fbdd99..902251f9ed 100644 --- a/odb.c +++ b/odb.c @@ -1078,14 +1078,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - for (source = o->sources; source; source = source->next) { + for (source = o->sources; source; source = source->next) packfile_store_close(source->packfiles); - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - close_commit_graph(o); } diff --git a/odb.h b/odb.h index c97b41c58c..300c3c0c46 100644 --- a/odb.h +++ b/odb.h @@ -54,13 +54,6 @@ struct odb_source { /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; - /* * Figure out whether this is the local source of the owning * repository, which would typically be its ".git/objects" directory. diff --git a/packfile.c b/packfile.c index 0e4c63e11d..097dd8d85d 100644 --- a/packfile.c +++ b/packfile.c @@ -990,7 +990,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { + !(data->source->packfiles->midx && + midx_contains_pack(data->source->packfiles->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); @@ -1087,8 +1088,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - if (store->source->midx) { - struct multi_pack_index *m = store->source->midx; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -2094,7 +2095,7 @@ static int find_pack_entry(struct packfile_store *store, struct packfile_list_entry *l; packfile_store_prepare(store); - if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + if (store->midx && fill_midx_entry(store->midx, oid, e)) return 1; for (l = store->packs.head; l; l = l->next) { @@ -2454,6 +2455,9 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; } struct odb_packed_read_stream { diff --git a/packfile.h b/packfile.h index 2fb87a26d6..fb832a33e3 100644 --- a/packfile.h +++ b/packfile.h @@ -100,6 +100,9 @@ struct packfile_store { unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. -- GitLab From 3eea3c80238c8cd8911fc5a85d16d391f70d07fd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:01 -0500 Subject: [PATCH 032/110] midx: mark `get_midx_checksum()` arguments as const To make clear that the fucntion `get_midx_checksum()` does not do anything to modify its argument, mark the MIDX pointer as const. The following commit will rename this function altogether to make clear that it returns the raw bytes of the checksum, not a hex-encoded copy of it. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- midx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 24e1e72175..6c01f0fa52 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,7 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const unsigned char *get_midx_checksum(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } diff --git a/midx.h b/midx.h index 6e54d73503..7c7e0b5912 100644 --- a/midx.h +++ b/midx.h @@ -85,7 +85,7 @@ struct multi_pack_index { #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(struct multi_pack_index *m); +const unsigned char *get_midx_checksum(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); -- GitLab From fcf3d688d20804dbf5be2eea6210101fd6092b12 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:04 -0500 Subject: [PATCH 033/110] midx: split `get_midx_checksum()` by adding `get_midx_hash()` When trying to print out, say, the hexadecimal representation of a MIDX's hash, our code will do something like: hash_to_hex_algop(get_midx_checksum(m), m->source->odb->repo->hash_algo); , which is both cumbersome and repetitive. In fact, all but a handful of callers to `get_midx_checksum()` do exactly the above. Reduce the repetitive nature of calling `get_midx_checksum()` by having it return a pointer into a static buffer containing the above result. For the handful of callers that do need to compare the raw bytes and don't want to deal with an encoded copy (e.g., because they are passing it to hasheq() or similar), introduce `get_midx_hash()` which returns the raw bytes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 8 +++----- midx.c | 8 +++++++- midx.h | 3 ++- pack-bitmap.c | 9 ++++----- pack-revindex.c | 4 ++-- t/helper/test-read-midx.c | 4 ++-- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/midx-write.c b/midx-write.c index 23e61cb000..73d24fabbc 100644 --- a/midx-write.c +++ b/midx-write.c @@ -955,7 +955,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { - const unsigned char *hash = get_midx_checksum(m); + const unsigned char *hash = get_midx_hash(m); get_midx_filename_ext(m->source, &from, hash, midx_exts[i].non_split); @@ -1086,8 +1086,7 @@ static int write_midx_internal(struct odb_source *source, while (m) { if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), - hash_to_hex_algop(get_midx_checksum(m), - m->source->odb->repo->hash_algo)); + get_midx_checksum(m)); goto cleanup; } ctx.num_multi_pack_indexes_before++; @@ -1445,8 +1444,7 @@ static int write_midx_internal(struct odb_source *source, for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), - r->hash_algo)); + keep_hashes[j] = xstrdup(get_midx_checksum(m)); m = m->base_midx; } diff --git a/midx.c b/midx.c index 6c01f0fa52..f9b11de9ca 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,13 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(const struct multi_pack_index *m) +const char *get_midx_checksum(const struct multi_pack_index *m) +{ + return hash_to_hex_algop(get_midx_hash(m), + m->source->odb->repo->hash_algo); +} + +const unsigned char *get_midx_hash(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } diff --git a/midx.h b/midx.h index 7c7e0b5912..e188ffeb57 100644 --- a/midx.h +++ b/midx.h @@ -85,7 +85,8 @@ struct multi_pack_index { #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(const struct multi_pack_index *m); +const char *get_midx_checksum(const struct multi_pack_index *m) /* static buffer */; +const unsigned char *get_midx_hash(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); diff --git a/pack-bitmap.c b/pack-bitmap.c index 8ca79725b1..f466ed2ddc 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) struct strbuf buf = STRBUF_INIT; if (midx->has_chain) get_split_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + get_midx_hash(midx), MIDX_EXT_BITMAP); else get_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + get_midx_hash(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, + if (!hasheq(get_midx_hash(bitmap_git->midx), bitmap_git->checksum, bitmap_repo(bitmap_git)->hash_algo)) { error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; @@ -2820,8 +2820,7 @@ void test_bitmap_walk(struct rev_info *revs) if (bitmap_is_midx(found)) fprintf_ln(stderr, "Located via MIDX '%s'.", - hash_to_hex_algop(get_midx_checksum(found->midx), - revs->repo->hash_algo)); + get_midx_checksum(found->midx)); else fprintf_ln(stderr, "Located via pack '%s'.", hash_to_hex_algop(found->pack->hash, diff --git a/pack-revindex.c b/pack-revindex.c index d0791cc493..016195ceb9 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -390,11 +390,11 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->has_chain) get_split_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + get_midx_hash(m), MIDX_EXT_REV); else get_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + get_midx_hash(m), MIDX_EXT_REV); ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 6de5d1665a..dee603b3cd 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -34,7 +34,7 @@ static int read_midx_file(const char *object_dir, const char *checksum, return 1; if (checksum) { - while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum)) + while (m && strcmp(get_midx_checksum(m), checksum)) m = m->base_midx; if (!m) return 1; @@ -94,7 +94,7 @@ static int read_midx_checksum(const char *object_dir) m = setup_midx(object_dir); if (!m) return 1; - printf("%s\n", hash_to_hex(get_midx_checksum(m))); + printf("%s\n", get_midx_checksum(m)); close_midx(m); return 0; -- GitLab From 13980c822518a55e2afbdfe01dfc8849531442bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:07 -0500 Subject: [PATCH 034/110] builtin/multi-pack-index.c: make '--progress' a common option All multi-pack-index sub-commands (write, verify, repack, and expire) support a '--progress' command-line option, despite not listing it as one of the common options in `common_opts`. As a result each sub-command declares its own `OPT_BIT()` for a "--progress" command-line option. Centralize this within the `common_opts` to avoid re-declaring it in each sub-command. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 2 ++ builtin/multi-pack-index.c | 10 ++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 2f642697e9..a4550e28be 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -18,6 +18,8 @@ Write or verify a multi-pack-index (MIDX) file. OPTIONS ------- +The following command-line options are applicable to all sub-commands: + --object-dir=:: Use given directory for the location of Git objects. We check `/packs/multi-pack-index` for the current MIDX file, and diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5f364aa816..ca98d4c3ba 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -84,6 +84,8 @@ static struct option common_opts[] = { N_("directory"), N_("object directory containing set of packfile and pack-index pairs"), parse_object_dir), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), + MIDX_PROGRESS), OPT_END(), }; @@ -138,8 +140,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, @@ -200,8 +200,6 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_verify_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -231,8 +229,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_expire_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -264,8 +260,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, static struct option builtin_multi_pack_index_repack_options[] = { OPT_UNSIGNED(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; -- GitLab From 10292d39ace4dbabfe7fe71e919bb2d56178084b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:10 -0500 Subject: [PATCH 035/110] git-multi-pack-index(1): remove non-existent incompatibility Since fcb2205b774 (midx: implement support for writing incremental MIDX chains, 2024-08-06), the command-line options '--incremental' and '--bitmap' were declared to be incompatible with one another when running 'git multi-pack-index write'. However, since 27afc272c49 (midx: implement writing incremental MIDX bitmaps, 2025-03-20), that incompatibility no longer exists, despite the documentation saying so. Correct this by removing the stale reference to their incompatibility. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index a4550e28be..a502819fc3 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -75,7 +75,7 @@ marker). Write an incremental MIDX file containing only objects and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when - necessary. Incompatible with `--bitmap`. + necessary. -- verify:: -- GitLab From ae3770a76e23a12a8ea36b7413c3845c515d521c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:13 -0500 Subject: [PATCH 036/110] git-multi-pack-index(1): align SYNOPSIS with 'git multi-pack-index -h' Since c39fffc1c90 (tests: start asserting that *.txt SYNOPSIS matches -h output, 2022-10-13), the manual page for 'git multi-pack-index' has a SYNOPSIS section which differs from 'git multi-pack-index -h'. Correct this while also documenting additional options accepted by the 'write' sub-command. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 7 ++++++- builtin/multi-pack-index.c | 5 +++-- t/t0450/adoc-help-mismatches | 1 - 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index a502819fc3..164cf1f229 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -9,7 +9,12 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] [--[no-]bitmap] +'git multi-pack-index' [] write [--preferred-pack=] + [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] + [--refs-snapshot=] +'git multi-pack-index' [] verify +'git multi-pack-index' [] expire +'git multi-pack-index' [] repack [--batch-size=] DESCRIPTION ----------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index ca98d4c3ba..c0c6c1760c 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -13,8 +13,9 @@ #include "repository.h" #define BUILTIN_MIDX_WRITE_USAGE \ - N_("git multi-pack-index [] write [--preferred-pack=]" \ - "[--refs-snapshot=]") + N_("git multi-pack-index [] write [--preferred-pack=]\n" \ + " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ + " [--refs-snapshot=]") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches index 8ee2d3f7c8..e8d6c13ccd 100644 --- a/t/t0450/adoc-help-mismatches +++ b/t/t0450/adoc-help-mismatches @@ -33,7 +33,6 @@ merge merge-file merge-index merge-one-file -multi-pack-index name-rev notes push -- GitLab From 3dfc5b5c83bc875a0b8e7ad9af26cc1d96521980 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:16 -0500 Subject: [PATCH 037/110] t/t5319-multi-pack-index.sh: fix copy-and-paste error in t5319.39 Commit d4bf1d88b90 (multi-pack-index: verify missing pack, 2018-09-13) adds a new test to the MIDX test script to test how we handle missing packs. While the commit itself describes the test as "verify missing pack[s]", the test itself is actually called "verify packnames out of order", despite that not being what it tests. Likely this was a copy-and-paste of the test immediately above it of the same name. Correct this by renaming the test to match the commit message. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 93f319a4b2..ca020091dd 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -455,7 +455,7 @@ test_expect_success 'verify packnames out of order' ' "pack names out of order" ' -test_expect_success 'verify packnames out of order' ' +test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" ' -- GitLab From 9df4325f15043cb917cde0afa5f15eb07cd58195 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:19 -0500 Subject: [PATCH 038/110] midx-write.c: don't use `pack_perm` when assigning `bitmap_pos` In midx_pack_order(), we compute for each bitampped pack the first bit to correspond to an object in that pack, along with how many bits were assigned to object(s) in that pack. Initially, each bitmap_nr value is set to zero, and each bitmap_pos value is set to the sentinel BITMAP_POS_UNKNOWN. This is done to ensure that there are no packs who have an unknown bit position but a somehow non-zero number of objects (cf. `write_midx_bitmapped_packs()` in midx-write.c). Once the pack order is fully determined, midx_pack_order() sets the bitmap_pos field for any bitmapped packs to zero if they are still listed as BITMAP_POS_UNKNOWN. However, we enumerate the bitmapped packs in order of `ctx->pack_perm`. This is fine for existing cases, since the only time the `ctx->pack_perm` array holds a value outside of the addressable range of `ctx->info` is when there are expired packs, which only occurs via 'git multi-pack-index expire', which does not support writing MIDX bitmaps. As a result, the range of ctx->pack_perm covers all values in [0, `ctx->nr`), so enumerating in this order isn't an issue. A future change necessary for compaction will complicate this further by introducing a wrapper around the `ctx->pack_perm` array, which turns the given `pack_int_id` into one that is relative to the lower end of the compaction range. As a result, indexing into `ctx->pack_perm` through this helper, say, with "0" will produce a crash when the lower end of the compaction range has >0 pack(s) in its base layer, since the subtraction will wrap around the 32-bit unsigned range, resulting in an uninitialized read. But the process is completely unnecessary in the first place: we are enumerating all values of `ctx->info`, and there is no reason to process them in a different order than they appear in memory. Index `ctx->info` directly to reflect that. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx-write.c b/midx-write.c index 73d24fabbc..c30f6a70d3 100644 --- a/midx-write.c +++ b/midx-write.c @@ -637,7 +637,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) pack_order[i] = data[i].nr; } for (i = 0; i < ctx->nr; i++) { - struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + struct pack_info *pack = &ctx->info[i]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = 0; } -- GitLab From a7c1d30f29bae4966ec09394a7a1cdf547d11c3b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:22 -0500 Subject: [PATCH 039/110] midx-write.c: introduce `struct write_midx_opts` In the MIDX writing code, there are four functions which perform some sort of MIDX write operation. They are: - write_midx_file() - write_midx_file_only() - expire_midx_packs() - midx_repack() All of these functions are thin wrappers over `write_midx_internal()`, which implements the bulk of these routines. As a result, the `write_midx_internal()` function takes six arguments. Future commits in this series will want to add additional arguments, and in general this function's signature will be the union of parameters among *all* possible ways to write a MIDX. Instead of adding yet more arguments to this function to support MIDX compaction, introduce a `struct write_midx_opts`, which has the same struct members as `write_midx_internal()`'s arguments. Adding additional fields to the `write_midx_opts` struct is preferable to adding additional arguments to `write_midx_internal()`. This is because the callers below all zero-initialize the struct, so each time we add a new piece of information, we do not have to pass the zero value for it in all other call-sites that do not care about it. For now, no functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 129 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/midx-write.c b/midx-write.c index c30f6a70d3..b262631ae4 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1014,14 +1014,20 @@ static void clear_midx_files(struct odb_source *source, strbuf_release(&buf); } -static int write_midx_internal(struct odb_source *source, - struct string_list *packs_to_include, - struct string_list *packs_to_drop, - const char *preferred_pack_name, - const char *refs_snapshot, - unsigned flags) +struct write_midx_opts { + struct odb_source *source; + + struct string_list *packs_to_include; + struct string_list *packs_to_drop; + + const char *preferred_pack_name; + const char *refs_snapshot; + unsigned flags; +}; + +static int write_midx_internal(struct write_midx_opts *opts) { - struct repository *r = source->odb->repo; + struct repository *r = opts->source->odb->repo; struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; @@ -1041,22 +1047,22 @@ static int write_midx_internal(struct odb_source *source, trace2_region_enter("midx", "write_midx_internal", r); ctx.repo = r; - ctx.source = source; + ctx.source = opts->source; - ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); + ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); if (ctx.incremental) strbuf_addf(&midx_name, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", - source->path); + opts->source->path); else - get_midx_filename(source, &midx_name); + get_midx_filename(opts->source, &midx_name); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); - if (!packs_to_include || ctx.incremental) { - struct multi_pack_index *m = get_multi_pack_index(source); + if (!opts->packs_to_include || ctx.incremental) { + struct multi_pack_index *m = get_multi_pack_index(opts->source); if (m && !midx_checksum_valid(m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); m = NULL; @@ -1071,7 +1077,7 @@ static int write_midx_internal(struct odb_source *source, */ if (ctx.incremental) ctx.base_midx = m; - else if (!packs_to_include) + else if (!opts->packs_to_include) ctx.m = m; } } @@ -1084,7 +1090,7 @@ static int write_midx_internal(struct odb_source *source, if (ctx.incremental) { struct multi_pack_index *m = ctx.base_midx; while (m) { - if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { + if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), get_midx_checksum(m)); goto cleanup; @@ -1099,23 +1105,23 @@ static int write_midx_internal(struct odb_source *source, start_pack = ctx.nr; ctx.pack_paths_checked = 0; - if (flags & MIDX_PROGRESS) + if (opts->flags & MIDX_PROGRESS) ctx.progress = start_delayed_progress(r, _("Adding packfiles to multi-pack-index"), 0); else ctx.progress = NULL; - ctx.to_include = packs_to_include; + ctx.to_include = opts->packs_to_include; - for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) && !ctx.incremental && - !(packs_to_include || packs_to_drop)) { + !(opts->packs_to_include || opts->packs_to_drop)) { struct bitmap_index *bitmap_git; int bitmap_exists; - int want_bitmap = flags & MIDX_WRITE_BITMAP; + int want_bitmap = opts->flags & MIDX_WRITE_BITMAP; bitmap_git = prepare_midx_bitmap_git(ctx.m); bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); @@ -1127,7 +1133,8 @@ static int write_midx_internal(struct odb_source *source, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(source, "bitmap", NULL); + clear_midx_files_ext(opts->source, "bitmap", + NULL); result = 0; goto cleanup; } @@ -1138,11 +1145,11 @@ static int write_midx_internal(struct odb_source *source, goto cleanup; /* nothing to do */ } - if (preferred_pack_name) { + if (opts->preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (size_t i = 0; i < ctx.nr; i++) { - if (!cmp_idx_or_pack_name(preferred_pack_name, + if (!cmp_idx_or_pack_name(opts->preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; break; @@ -1151,9 +1158,9 @@ static int write_midx_internal(struct odb_source *source, if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); + opts->preferred_pack_name); } else if (ctx.nr && - (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { + (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; @@ -1164,7 +1171,7 @@ static int write_midx_internal(struct odb_source *source, */ open_pack_index(oldest); - if (packs_to_drop && packs_to_drop->nr) + if (opts->packs_to_drop && opts->packs_to_drop->nr) BUG("cannot write a MIDX bitmap during expiration"); /* @@ -1226,20 +1233,21 @@ static int write_midx_internal(struct odb_source *source, QSORT(ctx.info, ctx.nr, pack_info_compare); - if (packs_to_drop && packs_to_drop->nr) { + if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; - for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + for (size_t i = 0; + i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); if (!cmp) { drop_index++; ctx.info[i].expired = 1; } else if (cmp > 0) { error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); drop_index++; missing_drops++; i--; @@ -1276,8 +1284,8 @@ static int write_midx_internal(struct odb_source *source, } /* Check that the preferred pack wasn't expired (if given). */ - if (preferred_pack_name) { - struct pack_info *preferred = bsearch(preferred_pack_name, + if (opts->preferred_pack_name) { + struct pack_info *preferred = bsearch(opts->preferred_pack_name, ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); @@ -1285,7 +1293,7 @@ static int write_midx_internal(struct odb_source *source, uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), - preferred_pack_name); + opts->preferred_pack_name); } } @@ -1299,15 +1307,15 @@ static int write_midx_internal(struct odb_source *source, } if (!ctx.entries_nr) { - if (flags & MIDX_WRITE_BITMAP) + if (opts->flags & MIDX_WRITE_BITMAP) warning(_("refusing to write multi-pack .bitmap without any objects")); - flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); + opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); } if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(source, &lock_name); + get_midx_chain_filename(opts->source, &lock_name); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1350,7 +1358,7 @@ static int write_midx_internal(struct odb_source *source, MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); - if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { + if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), @@ -1368,11 +1376,11 @@ static int write_midx_internal(struct odb_source *source, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX && + if (opts->flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(&ctx, midx_hash); - if (flags & MIDX_WRITE_BITMAP) { + if (opts->flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; struct commit **commits; uint32_t commits_nr; @@ -1382,7 +1390,7 @@ static int write_midx_internal(struct odb_source *source, prepare_midx_packing_data(&pdata, &ctx); - commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); + commits = find_commits_for_midx_bitmap(&commits_nr, opts->refs_snapshot, &ctx); /* * The previous steps translated the information from @@ -1395,7 +1403,7 @@ static int write_midx_internal(struct odb_source *source, if (write_midx_bitmap(&ctx, midx_hash, &pdata, commits, commits_nr, - flags) < 0) { + opts->flags) < 0) { error(_("could not write multi-pack bitmap")); clear_packing_data(&pdata); free(commits); @@ -1428,7 +1436,7 @@ static int write_midx_internal(struct odb_source *source, if (link_midx_to_chain(ctx.base_midx) < 0) goto cleanup; - get_split_midx_filename_ext(source, &final_midx_name, + get_split_midx_filename_ext(opts->source, &final_midx_name, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { @@ -1461,7 +1469,7 @@ static int write_midx_internal(struct odb_source *source, if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(source, keep_hashes, + clear_midx_files(opts->source, keep_hashes, ctx.num_multi_pack_indexes_before + 1, ctx.incremental); result = 0; @@ -1495,9 +1503,14 @@ int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, NULL, NULL, - preferred_pack_name, refs_snapshot, - flags); + struct write_midx_opts opts = { + .source = source, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int write_midx_file_only(struct odb_source *source, @@ -1505,8 +1518,15 @@ int write_midx_file_only(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, packs_to_include, NULL, - preferred_pack_name, refs_snapshot, flags); + struct write_midx_opts opts = { + .source = source, + .packs_to_include = packs_to_include, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int expire_midx_packs(struct odb_source *source, unsigned flags) @@ -1566,8 +1586,11 @@ int expire_midx_packs(struct odb_source *source, unsigned flags) free(count); if (packs_to_drop.nr) - result = write_midx_internal(source, NULL, - &packs_to_drop, NULL, NULL, flags); + result = write_midx_internal(&(struct write_midx_opts) { + .source = source, + .packs_to_drop = &packs_to_drop, + .flags = flags & MIDX_PROGRESS, + }); string_list_clear(&packs_to_drop, 0); @@ -1774,8 +1797,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) goto cleanup; } - result = write_midx_internal(source, NULL, NULL, NULL, NULL, - flags); + result = write_midx_internal(&(struct write_midx_opts) { + .source = source, + .flags = flags, + }); cleanup: free(include_pack); -- GitLab From b40e7cbca25bed9715d29e8d9130bd14bf165225 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:25 -0500 Subject: [PATCH 040/110] midx: do not require packs to be sorted in lexicographic order The MIDX file format currently requires that pack files be identified by the lexicographic ordering of their names (that is, a pack having a checksum beginning with "abc" would have a numeric pack_int_id which is smaller than the same value for a pack beginning with "bcd"). As a result, it is impossible to combine adjacent MIDX layers together without permuting bits from bitmaps that are in more recent layer(s). To see why, consider the following example: | packs | preferred pack --------+-------------+--------------- MIDX #0 | { X, Y, Z } | Y MIDX #1 | { A, B, C } | B MIDX #2 | { D, E, F } | D , where MIDX #2's base MIDX is MIDX #1, and so on. Suppose that we want to combine MIDX layers #0 and #1, to create a new layer #0' containing the packs from both layers. With the original three MIDX layers, objects are laid out in the bitmap in the order they appear in their source pack, and the packs themselves are arranged according to the pseudo-pack order. In this case, that ordering is Y, X, Z, B, A, C. But recall that the pseudo-pack ordering is defined by the order that packs appear in the MIDX, with the exception of the preferred pack, which sorts ahead of all other packs regardless of its position within the MIDX. In the above example, that means that pack 'Y' could be placed anywhere (so long as it is designated as preferred), however, all other packs must be placed in the location listed above. Because that ordering isn't sorted lexicographically, it is impossible to compact MIDX layers in the above configuration without permuting the object-to-bit-position mapping. Changing this mapping would affect all bitmaps belonging to newer layers, rendering the bitmaps associated with MIDX #2 unreadable. One of the goals of MIDX compaction is that we are able to shrink the length of the MIDX chain *without* invalidating bitmaps that belong to newer layers, and the lexicographic ordering constraint is at odds with this goal. However, packs do not *need* to be lexicographically ordered within the MIDX. As far as I can gather, the only reason they are sorted lexically is to make it possible to perform a binary search over the pack names in a MIDX, necessary to make `midx_contains_pack()`'s performance logarithmic in the number of packs rather than linear. Relax this constraint by allowing MIDX writes to proceed with packs that are not arranged in lexicographic order. `midx_contains_pack()` will lazily instantiate a `pack_names_sorted` array on the MIDX, which will be used to implement the binary search over pack names. Note that this produces MIDXs which may be incompatible with earlier versions of Git that have stricter requirements on the layout of packs within a MIDX. This patch does *not* modify the version number of the MIDX format, since existing versions of Git already know to gracefully ignore a MIDX with packs that appear out-of-order. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 5 ----- midx.c | 28 ++++++++++++++++++++++------ midx.h | 1 + t/t5319-multi-pack-index.sh | 5 ----- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/midx-write.c b/midx-write.c index b262631ae4..55342fcb6d 100644 --- a/midx-write.c +++ b/midx-write.c @@ -410,11 +410,6 @@ static int write_midx_pack_names(struct hashfile *f, void *data) if (ctx->info[i].expired) continue; - if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0) - BUG("incorrect pack-file order: %s before %s", - ctx->info[i - 1].pack_name, - ctx->info[i].pack_name); - writelen = strlen(ctx->info[i].pack_name) + 1; hashwrite(f, ctx->info[i].pack_name, writelen); written += writelen; diff --git a/midx.c b/midx.c index f9b11de9ca..4d5fe88064 100644 --- a/midx.c +++ b/midx.c @@ -209,11 +209,6 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou if (!end) die(_("multi-pack-index pack-name chunk is too short")); cur_pack_name = end + 1; - - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) - die(_("multi-pack-index pack names out of order: '%s' before '%s'"), - m->pack_names[i - 1], - m->pack_names[i]); } trace2_data_intmax("midx", r, "load/num_packs", m->num_packs); @@ -411,6 +406,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + FREE_AND_NULL(m->pack_names_sorted); free(m); } @@ -656,17 +652,37 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name, return strcmp(idx_or_pack_name, idx_name); } + +static int midx_pack_names_cmp(const void *a, const void *b, void *m_) +{ + struct multi_pack_index *m = m_; + return strcmp(m->pack_names[*(const size_t *)a], + m->pack_names[*(const size_t *)b]); +} + static int midx_contains_pack_1(struct multi_pack_index *m, const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; + if (!m->pack_names_sorted) { + uint32_t i; + + ALLOC_ARRAY(m->pack_names_sorted, m->num_packs); + + for (i = 0; i < m->num_packs; i++) + m->pack_names_sorted[i] = i; + + QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp, + m); + } + while (first < last) { uint32_t mid = first + (last - first) / 2; const char *current; int cmp; - current = m->pack_names[mid]; + current = m->pack_names[m->pack_names_sorted[mid]]; cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); if (!cmp) return 1; diff --git a/midx.h b/midx.h index e188ffeb57..39bf04b18e 100644 --- a/midx.h +++ b/midx.h @@ -71,6 +71,7 @@ struct multi_pack_index { uint32_t num_packs_in_base; const char **pack_names; + size_t *pack_names_sorted; struct packed_git **packs; }; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index ca020091dd..03676d37b9 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -450,11 +450,6 @@ test_expect_success 'verify invalid chunk offset' ' "improper chunk offset(s)" ' -test_expect_success 'verify packnames out of order' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ - "pack names out of order" -' - test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" -- GitLab From 35364d7abaf9bd707f978c6efc6bf0eb005cc573 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:28 -0500 Subject: [PATCH 041/110] git-compat-util.h: introduce `u32_add()` A future commit will want to add two 32-bit unsigned values together while checking for overflow. Introduce a variant of the u64_add() function for operating on 32-bit inputs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- git-compat-util.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 398e0fac4f..a7aa5f05fc 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -670,6 +670,14 @@ static inline int cast_size_t_to_int(size_t a) return (int)a; } +static inline uint32_t u32_add(uint32_t a, uint32_t b) +{ + if (unsigned_add_overflows(a, b)) + die("uint32_t overflow: %"PRIuMAX" + %"PRIuMAX, + (uintmax_t)a, (uintmax_t)b); + return a + b; +} + static inline uint64_t u64_mult(uint64_t a, uint64_t b) { if (unsigned_mult_overflows(a, b)) -- GitLab From 363485a3111e025135c552b877d7b0f58bde8c6d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:31 -0500 Subject: [PATCH 042/110] midx-write.c: introduce `midx_pack_perm()` helper The `ctx->pack_perm` array can be considered as a permutation between the original `pack_int_id` of some given pack to its position in the `ctx->info` array containing all packs. Today we can always index into this array with any known `pack_int_id`, since there is never a `pack_int_id` which is greater than or equal to the value `ctx->nr`. That is not necessarily the case with MIDX compaction. For example, suppose we have a MIDX chain with three layers, each containing three packs. The base of the MIDX chain will have packs with IDs 0, 1, and 2, the next layer 3, 4, and 5, and so on. If we are compacting the topmost two layers, we'll have input `pack_int_id` values between [3, 8], but `ctx->nr` will only be 6. In that example, if we want to know where the pack whose original `pack_int_id` value was, say, 7, we would compute `ctx->pack_perm[7]`, leading to an uninitialized read, since there are only 6 entries allocated in that array. To address this, there are a couple of options: - We could allocate enough entries in `ctx->pack_perm` to accommodate the largest `orig_pack_int_id` value. - Or, we could internally shift the input values by the number of packs in the base layer of the lower end of the MIDX compaction range. This patch prepare us to take the latter approach, since it does not allocate more memory than strictly necessary. (In our above example, the base of the lower end of the compaction range is the first MIDX layer (having three packs), so we would end up indexing `ctx->pack_perm[7-3]`, which is a valid read.) Note that this patch does not actually implement that approach yet, but merely performs a behavior-preserving refactoring which will make the change easier to carry out in the future. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 55342fcb6d..4a1a16431a 100644 --- a/midx-write.c +++ b/midx-write.c @@ -114,6 +114,12 @@ struct write_midx_context { struct odb_source *source; }; +static uint32_t midx_pack_perm(struct write_midx_context *ctx, + uint32_t orig_pack_int_id) +{ + return ctx->pack_perm[orig_pack_int_id]; +} + static int should_include_pack(const struct write_midx_context *ctx, const char *file_name) { @@ -509,12 +515,12 @@ static int write_midx_object_offsets(struct hashfile *f, for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *obj = list++; - if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) + if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED) BUG("object %s is in an expired pack with int-id %d", oid_to_hex(&obj->oid), obj->pack_int_id); - hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); + hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id)); if (ctx->large_offsets_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -615,7 +621,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; - data[i].pack = ctx->pack_perm[e->pack_int_id]; + data[i].pack = midx_pack_perm(ctx, e->pack_int_id); if (!e->preferred) data[i].pack |= (1U << 31); data[i].offset = e->offset; @@ -625,7 +631,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[data[i].nr]; - struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = i + base_objects; pack->bitmap_nr++; @@ -686,7 +692,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata, struct object_entry *to = packlist_alloc(pdata, &from->oid); oe_set_in_pack(pdata, to, - ctx->info[ctx->pack_perm[from->pack_int_id]].p); + ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p); } trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); @@ -1285,7 +1291,7 @@ static int write_midx_internal(struct write_midx_opts *opts) sizeof(*ctx.info), idx_or_pack_name_cmp); if (preferred) { - uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; + uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id); if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), opts->preferred_pack_name); -- GitLab From 9ced91c83b8effe8d8d73961c46b43f5399598bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:34 -0500 Subject: [PATCH 043/110] midx-write.c: extract `fill_pack_from_midx()` When filling packs from an existing MIDX, `fill_packs_from_midx()` handles preparing a MIDX'd pack, and reading out its pack name from the existing MIDX. MIDX compaction will want to perform an identical operation, though the caller will look quite different than `fill_packs_from_midx()`. To reduce any future code duplication, extract `fill_pack_from_midx()` from `fill_packs_from_midx()` to prepare to call our new helper function in a future change. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 4a1a16431a..5927691f6a 100644 --- a/midx-write.c +++ b/midx-write.c @@ -910,6 +910,21 @@ static int write_midx_bitmap(struct write_midx_context *ctx, return ret; } +static int fill_pack_from_midx(struct pack_info *info, + struct multi_pack_index *m, + uint32_t pack_int_id) +{ + if (prepare_midx_pack(m, pack_int_id)) + return error(_("could not load pack %d"), pack_int_id); + + fill_pack_info(info, + m->packs[pack_int_id - m->num_packs_in_base], + m->pack_names[pack_int_id - m->num_packs_in_base], + pack_int_id); + + return 0; +} + static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -918,13 +933,13 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) uint32_t i; for (i = 0; i < m->num_packs; i++) { - if (prepare_midx_pack(m, m->num_packs_in_base + i)) - return error(_("could not load pack")); - ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], - m->pack_names[i], - m->num_packs_in_base + i); + + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, + m->num_packs_in_base + i) < 0) + return -1; + + ctx->nr++; } } return 0; -- GitLab From cad0632e1334bd5c73b1d9e4e17f541ebc1b430b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:37 -0500 Subject: [PATCH 044/110] midx-write.c: enumerate `pack_int_id` values directly Our `midx-write.c::fill_packs_from_midx()` function currently enumerates the range [0, m->num_packs), and then shifts its index variable up by `m->num_packs_in_base` to produce a valid `pack_int_id`. Instead, directly enumerate the range: [m->num_packs_in_base, m->num_packs_in_base + m->num_packs) , which are the original pack_int_ids themselves as opposed to the indexes of those packs relative to the MIDX layer they are contained within. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/midx-write.c b/midx-write.c index 5927691f6a..d3644276aa 100644 --- a/midx-write.c +++ b/midx-write.c @@ -932,11 +932,11 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) for (m = ctx->m; m; m = m->base_midx) { uint32_t i; - for (i = 0; i < m->num_packs; i++) { + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - if (fill_pack_from_midx(&ctx->info[ctx->nr], m, - m->num_packs_in_base + i) < 0) + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0) return -1; ctx->nr++; -- GitLab From 477f9c1b2128bce41c85e4bacf5e9000ba565ea8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:40 -0500 Subject: [PATCH 045/110] midx-write.c: factor fanout layering from `compute_sorted_entries()` When computing the set of objects to appear in a MIDX, we use compute_sorted_entries(), which handles objects from various existing sources one fanout layer at a time. The process for computing this set is slightly different during MIDX compaction, so factor out the existing functionality into its own routine to prevent `compute_sorted_entries()` from becoming too difficult to read. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/midx-write.c b/midx-write.c index d3644276aa..7854561359 100644 --- a/midx-write.c +++ b/midx-write.c @@ -323,6 +323,30 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t start_pack, + uint32_t cur_fanout) +{ + uint32_t cur_pack; + + if (ctx->m && !ctx->incremental) + midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout, + ctx->preferred_pack_idx); + + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { + int preferred = cur_pack == ctx->preferred_pack_idx; + midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack, + preferred, cur_fanout); + } + + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) + midx_fanout_add_pack_fanout(fanout, ctx->info, + ctx->preferred_pack_idx, 1, + cur_fanout); +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -359,23 +383,7 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - if (ctx->m && !ctx->incremental) - midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, - ctx->preferred_pack_idx); - - for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { - int preferred = cur_pack == ctx->preferred_pack_idx; - midx_fanout_add_pack_fanout(&fanout, - ctx->info, cur_pack, - preferred, cur_fanout); - } - - if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && - ctx->preferred_pack_idx < start_pack) - midx_fanout_add_pack_fanout(&fanout, ctx->info, - ctx->preferred_pack_idx, 1, - cur_fanout); - + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* -- GitLab From 6b7d81269685064c5e98417bdf195552ac58f428 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:43 -0500 Subject: [PATCH 046/110] t/helper/test-read-midx.c: plug memory leak when selecting layer Though our 'read-midx' test tool is capable of printing information about a single MIDX layer identified by its checksum, no caller in our test suite exercises this path. Unfortunately, there is a memory leak lurking in this (currently) unused path that would otherwise be exposed by the following commit. This occurs when providing a MIDX layer checksum other than the tip. As we walk over the MIDX chain trying to find the matching layer, we drop our reference to the top-most MIDX layer. Thus, our call to 'close_midx()' later on leaks memory between the top-most MIDX layer and the MIDX layer immediately following the specified one. Plug this leak by holding a reference to the tip of the MIDX chain, and ensure that we call `close_midx()` before terminating the test tool. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-read-midx.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index dee603b3cd..6e03aabca7 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -26,9 +26,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, int show_objects) { uint32_t i; - struct multi_pack_index *m; + struct multi_pack_index *m, *tip; + int ret = 0; - m = setup_midx(object_dir); + m = tip = setup_midx(object_dir); if (!m) return 1; @@ -36,8 +37,11 @@ static int read_midx_file(const char *object_dir, const char *checksum, if (checksum) { while (m && strcmp(get_midx_checksum(m), checksum)) m = m->base_midx; - if (!m) - return 1; + if (!m) { + ret = error(_("could not find MIDX with checksum %s"), + checksum); + goto out; + } } printf("header: %08x %d %d %d %d\n", @@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, } } - close_midx(m); +out: + close_midx(tip); - return 0; + return ret; } static int read_midx_checksum(const char *object_dir) -- GitLab From 98ce832ee6cec3cd48a963140fa59c18c3a61051 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:47 -0500 Subject: [PATCH 047/110] midx: implement MIDX compaction When managing a MIDX chain with many layers, it is convenient to combine a sequence of adjacent layers into a single layer to prevent the chain from growing too long. While it is conceptually possible to "compact" a sequence of MIDX layers together by running "git multi-pack-index write --stdin-packs", there are a few drawbacks that make this less than desirable: - Preserving the MIDX chain is impossible, since there is no way to write a MIDX layer that contains objects or packs found in an earlier MIDX layer already part of the chain. So callers would have to write an entirely new (non-incremental) MIDX containing only the compacted layers, discarding all other objects/packs from the MIDX. - There is (currently) no way to write a MIDX layer outside of the MIDX chain to work around the above, such that the MIDX chain could be reassembled substituting the compacted layers with the MIDX that was written. - The `--stdin-packs` command-line option does not allow us to specify the order of packs as they appear in the MIDX. Therefore, even if there were workarounds for the previous two challenges, any bitmaps belonging to layers which come after the compacted layer(s) would no longer be valid. This commit introduces a way to compact a sequence of adjacent MIDX layers into a single layer while preserving the MIDX chain, as well as any bitmap(s) in layers which are newer than the compacted ones. Implementing MIDX compaction does not require a significant number of changes to how MIDX layers are written. The main changes are as follows: - Instead of calling `fill_packs_from_midx()`, we call a new function `fill_packs_from_midx_range()`, which walks backwards along the portion of the MIDX chain which we are compacting, and adds packs one layer a time. In order to preserve the pseudo-pack order, the concatenated pack order is preserved, with the exception of preferred packs which are always added first. - After adding entries from the set of packs in the compaction range, `compute_sorted_entries()` must adjust the `pack_int_id`'s for all objects added in each fanout layer to match their original `pack_int_id`'s (as opposed to the index at which each pack appears in `ctx.info`). - When writing out the new 'multi-pack-index-chain' file, discard any layers in the compaction range, replacing them with the newly written layer, instead of keeping them and placing the new layer at the end of the chain. This ends up being sufficient to implement MIDX compaction in such a way that preserves bitmaps corresponding to more recent layers in the MIDX chain. The tests for MIDX compaction are so far fairly spartan, since the main interesting behavior here is ensuring that the right packs/objects are selected from each layer, and that the pack order is preserved despite whether or not they are sorted in lexicographic order in the original MIDX chain. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 13 ++ builtin/multi-pack-index.c | 67 +++++++ midx-write.c | 242 ++++++++++++++++++++++-- midx.h | 5 + t/meson.build | 1 + t/t5335-compact-multi-pack-index.sh | 102 ++++++++++ 6 files changed, 411 insertions(+), 19 deletions(-) create mode 100755 t/t5335-compact-multi-pack-index.sh diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 164cf1f229..a9664e7741 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -12,6 +12,8 @@ SYNOPSIS 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] +'git multi-pack-index' [] compact [--[no-]incremental] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +85,17 @@ marker). necessary. -- +compact:: + Write a new MIDX layer containing only objects and packs present + in the range `` to ``, where both arguments are + checksums of existing layers in the MIDX chain. ++ +-- + --incremental:: + Write the result to a MIDX chain instead of writing a + stand-alone MIDX. Incompatible with `--bitmap`. +-- + verify:: Verify the contents of the MIDX file. diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index c0c6c1760c..9b0c2082cb 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -17,6 +17,10 @@ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ " [--refs-snapshot=]") +#define BUILTIN_MIDX_COMPACT_USAGE \ + N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ + " ") + #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -30,6 +34,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = { BUILTIN_MIDX_WRITE_USAGE, NULL }; +static char const * const builtin_multi_pack_index_compact_usage[] = { + BUILTIN_MIDX_COMPACT_USAGE, + NULL +}; static char const * const builtin_multi_pack_index_verify_usage[] = { BUILTIN_MIDX_VERIFY_USAGE, NULL @@ -44,6 +52,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = { }; static char const * const builtin_multi_pack_index_usage[] = { BUILTIN_MIDX_WRITE_USAGE, + BUILTIN_MIDX_COMPACT_USAGE, BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_EXPIRE_USAGE, BUILTIN_MIDX_REPACK_USAGE, @@ -195,6 +204,63 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, return ret; } +static int cmd_multi_pack_index_compact(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct multi_pack_index *m, *cur; + struct multi_pack_index *from_midx = NULL; + struct multi_pack_index *to_midx = NULL; + struct odb_source *source; + int ret; + + struct option *options; + static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "incremental", &opts.flags, + N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_END(), + }; + + repo_config(repo, git_multi_pack_index_write_config, NULL); + + options = add_common_options(builtin_multi_pack_index_compact_options); + + trace2_cmd_mode(argv[0]); + + if (isatty(2)) + opts.flags |= MIDX_PROGRESS; + argc = parse_options(argc, argv, prefix, + options, builtin_multi_pack_index_compact_usage, + 0); + + if (argc != 2) + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + source = handle_object_dir_option(the_repository); + + FREE_AND_NULL(options); + + m = get_multi_pack_index(source); + + for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) { + const char *midx_csum = get_midx_checksum(cur); + + if (!from_midx && !strcmp(midx_csum, argv[0])) + from_midx = cur; + if (!to_midx && !strcmp(midx_csum, argv[1])) + to_midx = cur; + } + + if (!from_midx) + die(_("could not find MIDX 'from': %s"), argv[0]); + if (!to_midx) + die(_("could not find MIDX 'to': %s"), argv[1]); + + ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + + return ret; +} + static int cmd_multi_pack_index_verify(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) @@ -295,6 +361,7 @@ int cmd_multi_pack_index(int argc, struct option builtin_multi_pack_index_options[] = { OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack), OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write), + OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact), OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify), OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire), OPT_END(), diff --git a/midx-write.c b/midx-write.c index 7854561359..fcbfedcd91 100644 --- a/midx-write.c +++ b/midx-write.c @@ -108,6 +108,10 @@ struct write_midx_context { int incremental; uint32_t num_multi_pack_indexes_before; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + int compact; + struct string_list *to_include; struct repository *repo; @@ -117,6 +121,8 @@ struct write_midx_context { static uint32_t midx_pack_perm(struct write_midx_context *ctx, uint32_t orig_pack_int_id) { + if (ctx->compact) + orig_pack_int_id -= ctx->compact_from->num_packs_in_base; return ctx->pack_perm[orig_pack_int_id]; } @@ -347,6 +353,21 @@ static void midx_fanout_add(struct midx_fanout *fanout, cur_fanout); } +static void midx_fanout_add_compact(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t cur_fanout) +{ + struct multi_pack_index *m = ctx->compact_to; + + ASSERT(ctx->compact); + + while (m && m != ctx->compact_from->base_midx) { + midx_fanout_add_midx_fanout(fanout, m, cur_fanout, + NO_PREFERRED_PACK); + m = m->base_midx; + } +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -365,6 +386,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx, size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; + if (ctx->compact) + ASSERT(!start_pack); + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) total_objects = st_add(total_objects, ctx->info[cur_pack].p->num_objects); @@ -383,7 +407,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); + if (ctx->compact) + midx_fanout_add_compact(&fanout, ctx, cur_fanout); + else + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* @@ -953,6 +980,72 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) return 0; } +static uint32_t compactible_packs_between(const struct multi_pack_index *from, + const struct multi_pack_index *to) +{ + uint32_t nr; + + ASSERT(from && to); + + nr = u32_add(to->num_packs, to->num_packs_in_base); + if (nr < from->num_packs_in_base) + BUG("unexpected number of packs in base during compaction: " + "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base); + + return nr - from->num_packs_in_base; +} + +static int fill_packs_from_midx_range(struct write_midx_context *ctx, + int bitmap_order) +{ + struct multi_pack_index *m = ctx->compact_to; + uint32_t packs_nr; + + ASSERT(ctx->compact && !ctx->nr); + ASSERT(ctx->compact_from); + ASSERT(ctx->compact_to); + + packs_nr = compactible_packs_between(ctx->compact_from, + ctx->compact_to); + + ALLOC_GROW(ctx->info, packs_nr, ctx->alloc); + + while (m != ctx->compact_from->base_midx) { + uint32_t pack_int_id, preferred_pack_id; + uint32_t i; + + if (bitmap_order) { + if (midx_preferred_pack(m, &preferred_pack_id) < 0) + die(_("could not determine preferred pack")); + } else { + preferred_pack_id = m->num_packs_in_base; + } + + pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + preferred_pack_id) < 0) + return -1; + + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { + if (preferred_pack_id == i) + continue; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + i) < 0) + return -1; + } + + ctx->nr += m->num_packs; + m = m->base_midx; + } + + ASSERT(ctx->nr == packs_nr); + + return 0; +} + static struct { const char *non_split; const char *split; @@ -1038,12 +1131,22 @@ static void clear_midx_files(struct odb_source *source, strbuf_release(&buf); } +static int midx_hashcmp(const struct multi_pack_index *a, + const struct multi_pack_index *b, + const struct git_hash_algo *algop) +{ + return hashcmp(get_midx_hash(a), get_midx_hash(b), algop); +} + struct write_midx_opts { struct odb_source *source; struct string_list *packs_to_include; struct string_list *packs_to_drop; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + const char *preferred_pack_name; const char *refs_snapshot; unsigned flags; @@ -1066,6 +1169,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int dropped_packs = 0; int result = -1; const char **keep_hashes = NULL; + size_t keep_hashes_nr = 0; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); @@ -1074,6 +1178,17 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.source = opts->source; ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); + ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT); + + if (ctx.compact) { + if (!opts->compact_from) + BUG("expected non-NULL 'from' MIDX during compaction"); + if (!opts->compact_to) + BUG("expected non-NULL 'to' MIDX during compaction"); + + ctx.compact_from = opts->compact_from; + ctx.compact_to = opts->compact_to; + } if (ctx.incremental) strbuf_addf(&midx_name, @@ -1101,11 +1216,18 @@ static int write_midx_internal(struct write_midx_opts *opts) */ if (ctx.incremental) ctx.base_midx = m; - else if (!opts->packs_to_include) + if (!opts->packs_to_include) ctx.m = m; } } + /* + * If compacting MIDX layer(s) in the range [from, to], then the + * compacted MIDX will share the same base MIDX as 'from'. + */ + if (ctx.compact) + ctx.base_midx = ctx.compact_from->base_midx; + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1122,7 +1244,7 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx)) { + } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) { goto cleanup; } @@ -1135,13 +1257,23 @@ static int write_midx_internal(struct write_midx_opts *opts) else ctx.progress = NULL; - ctx.to_include = opts->packs_to_include; + if (ctx.compact) { + int bitmap_order = 0; + if (opts->preferred_pack_name) + bitmap_order |= 1; + else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) + bitmap_order |= 1; - for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + fill_packs_from_midx_range(&ctx, bitmap_order); + } else { + ctx.to_include = opts->packs_to_include; + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + } stop_progress(&ctx.progress); if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) && !ctx.incremental && + !ctx.compact && !(opts->packs_to_include || opts->packs_to_drop)) { struct bitmap_index *bitmap_git; int bitmap_exists; @@ -1255,12 +1387,15 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.large_offsets_needed = 1; } - QSORT(ctx.info, ctx.nr, pack_info_compare); + if (!ctx.compact) + QSORT(ctx.info, ctx.nr, pack_info_compare); if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; + ASSERT(!ctx.compact); + for (size_t i = 0; i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, @@ -1292,12 +1427,20 @@ static int write_midx_internal(struct write_midx_opts *opts) */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); for (size_t i = 0; i < ctx.nr; i++) { + uint32_t from = ctx.info[i].orig_pack_int_id; + uint32_t to; + if (ctx.info[i].expired) { + to = PACK_EXPIRED; dropped_packs++; - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; } else { - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; + to = i - dropped_packs; } + + if (ctx.compact) + from -= ctx.compact_from->num_packs_in_base; + + ctx.pack_perm[from] = to; } for (size_t i = 0; i < ctx.nr; i++) { @@ -1445,7 +1588,24 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); + if (ctx.compact) { + struct multi_pack_index *m; + + /* + * Keep all MIDX layers excluding those in the range [from, to]. + */ + for (m = ctx.base_midx; m; m = m->base_midx) + keep_hashes_nr++; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) + keep_hashes_nr++; + + keep_hashes_nr++; /* include the compacted layer */ + } else { + keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; + } + CALLOC_ARRAY(keep_hashes, keep_hashes_nr); if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1470,17 +1630,47 @@ static int write_midx_internal(struct write_midx_opts *opts) strbuf_release(&final_midx_name); - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + if (ctx.compact) { + struct multi_pack_index *m; + uint32_t num_layers_before_from = 0; + uint32_t i; - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + for (m = ctx.base_midx; m; m = m->base_midx) + num_layers_before_from++; - keep_hashes[j] = xstrdup(get_midx_checksum(m)); - m = m->base_midx; + m = ctx.base_midx; + for (i = 0; i < num_layers_before_from; i++) { + uint32_t j = num_layers_before_from - i - 1; + + keep_hashes[j] = xstrdup(get_midx_checksum(m)); + m = m->base_midx; + } + + keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + i = 0; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) { + keep_hashes[keep_hashes_nr - i - 1] = + xstrdup(get_midx_checksum(m)); + i++; + } + } else { + keep_hashes[ctx.num_multi_pack_indexes_before] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + + keep_hashes[j] = xstrdup(get_midx_checksum(m)); + m = m->base_midx; + } } - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1493,8 +1683,7 @@ static int write_midx_internal(struct write_midx_opts *opts) if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, - ctx.num_multi_pack_indexes_before + 1, + clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, ctx.incremental); result = 0; @@ -1512,7 +1701,7 @@ static int write_midx_internal(struct write_midx_opts *opts) free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) free((char *)keep_hashes[i]); free(keep_hashes); } @@ -1553,6 +1742,21 @@ int write_midx_file_only(struct odb_source *source, return write_midx_internal(&opts); } +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags) +{ + struct write_midx_opts opts = { + .source = source, + .compact_from = from, + .compact_to = to, + .flags = flags | MIDX_WRITE_COMPACT, + }; + + return write_midx_internal(&opts); +} + int expire_midx_packs(struct odb_source *source, unsigned flags) { uint32_t i, *count, result = 0; diff --git a/midx.h b/midx.h index 39bf04b18e..61f9809b8c 100644 --- a/midx.h +++ b/midx.h @@ -81,6 +81,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) +#define MIDX_WRITE_COMPACT (1 << 6) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" @@ -130,6 +131,10 @@ int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); diff --git a/t/meson.build b/t/meson.build index 7c994d4643..2d1926faaf 100644 --- a/t/meson.build +++ b/t/meson.build @@ -613,6 +613,7 @@ integration_tests = [ 't5332-multi-pack-reuse.sh', 't5333-pseudo-merge-bitmaps.sh', 't5334-incremental-multi-pack-index.sh', + 't5335-compact-multi-pack-index.sh', 't5351-unpack-large-objects.sh', 't5400-send-pack.sh', 't5401-update-hooks.sh', diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh new file mode 100755 index 0000000000..f889af7fb1 --- /dev/null +++ b/t/t5335-compact-multi-pack-index.sh @@ -0,0 +1,102 @@ +#!/bin/sh + +test_description='multi-pack-index compaction' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +nth_line() { + local n="$1" + shift + awk "NR==$n" "$@" +} + +write_packs () { + for c in "$@" + do + test_commit "$c" && + + git pack-objects --all --unpacked $packdir/pack-$c && + git prune-packed && + + git multi-pack-index write --incremental --bitmap || return 1 + done +} + +test_midx_layer_packs () { + local checksum="$1" && + shift && + + test-tool read-midx $objdir "$checksum" >out && + + printf "%s\n" "$@" >expect && + # NOTE: do *not* pipe through sort here, we want to ensure the + # order of packs is preserved during compaction. + grep "^pack-" out | cut -d"-" -f2 >actual && + + test_cmp expect actual +} + +test_midx_layer_object_uniqueness () { + : >objs.all + while read layer + do + test-tool read-midx --show-objects $objdir "$layer" >out && + grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer && + test_stdout_line_count = 0 comm -12 objs.all objs.layer && + cat objs.all objs.layer | sort >objs.tmp && + mv objs.tmp objs.all || return 1 + done <$midx_chain +} + +test_expect_success 'MIDX compaction with lex-ordered pack names' ' + git init midx-compact-lex-order && + ( + cd midx-compact-lex-order && + + write_packs A B C D E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' + git init midx-compact-non-lex-order && + ( + cd midx-compact-non-lex-order && + + write_packs D C A B E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_done -- GitLab From 1e06a72a0a43d1d84e7d0dde47a47b3982638d6e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sat, 6 Dec 2025 15:31:50 -0500 Subject: [PATCH 048/110] midx: enable reachability bitmaps during MIDX compaction Enable callers to generate reachability bitmaps when performing MIDX layer compaction by combining all existing bitmaps from the compacted layers. Note that the because of the object/pack ordering described by the previous commit, the pseudo-pack order for the compacted MIDX is the same as concatenating the individual pseudo-pack orderings for each layer in the compaction range. As a result, the only non-test or documentation change necessary is to treat all objects as non-preferred during compaction so as not to disturb the object ordering. In the future, we may want to adjust which commit(s) receive reachability bitmaps when compacting multiple .bitmap files into one, or even generate new bitmaps (e.g., if the references have moved significantly since the .bitmap was generated). This commit only implements combining all existing bitmaps in range together in order to demonstrate and lay the groundwork for more exotic strategies. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 2 +- builtin/multi-pack-index.c | 4 +- midx-write.c | 2 +- t/t5335-compact-multi-pack-index.sh | 120 +++++++++++++++++++++++- 4 files changed, 123 insertions(+), 5 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index a9664e7741..458bb87363 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -13,7 +13,7 @@ SYNOPSIS [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] 'git multi-pack-index' [] compact [--[no-]incremental] - + [--[no-]bitmap] 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 9b0c2082cb..40afa8f1ed 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -19,7 +19,7 @@ #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " ") + " [--[no-]bitmap] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -216,6 +216,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_END(), diff --git a/midx-write.c b/midx-write.c index fcbfedcd91..f2dbacef4c 100644 --- a/midx-write.c +++ b/midx-write.c @@ -657,7 +657,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; data[i].pack = midx_pack_perm(ctx, e->pack_int_id); - if (!e->preferred) + if (!e->preferred || ctx->compact) data[i].pack |= (1U << 31); data[i].offset = e->offset; } diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index f889af7fb1..a306f50430 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -65,7 +65,7 @@ test_expect_success 'MIDX compaction with lex-ordered pack names' ' write_packs A B C D E && test_line_count = 5 $midx_chain && - git multi-pack-index compact --incremental \ + git multi-pack-index compact --incremental --bitmap \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 4 "$midx_chain")" && test_line_count = 3 $midx_chain && @@ -86,7 +86,7 @@ test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' write_packs D C A B E && test_line_count = 5 $midx_chain && - git multi-pack-index compact --incremental \ + git multi-pack-index compact --incremental --bitmap \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 4 "$midx_chain")" && test_line_count = 3 $midx_chain && @@ -99,4 +99,120 @@ test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' ) ' +midx_objs_by_pack () { + awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort +} + +tag_objs_from_pack () { + objs="$(git rev-list --objects --no-object-names "$2")" && + printf "$1 %s\n" $objs | sort +} + +test_expect_success 'MIDX compaction preserves pack object selection' ' + git init midx-compact-preserve-selection && + ( + cd midx-compact-preserve-selection && + + test_commit A && + test_commit B && + + # Create two packs, one containing just the objects from + # A, and another containing all objects from the + # repository. + p1="$(echo A | git pack-objects --revs --delta-base-offset \ + $packdir/pack-1)" && + p0="$(echo B | git pack-objects --revs --delta-base-offset \ + $packdir/pack-0)" && + + echo "pack-1-$p1.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + echo "pack-0-$p0.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + + write_packs C && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + + test-tool read-midx --show-objects $objdir \ + "$(nth_line 1 "$midx_chain")" >AB.info && + test-tool read-midx --show-objects $objdir \ + "$(nth_line 2 "$midx_chain")" >C.info && + + midx_objs_by_pack AB.actual && + midx_objs_by_pack C.actual && + + { + tag_objs_from_pack 1 A && + tag_objs_from_pack 0 A..B + } | sort >AB.expect && + tag_objs_from_pack C B..C >C.expect && + + test_cmp AB.expect AB.actual && + test_cmp C.expect C.actual + ) +' + +test_expect_success 'MIDX compaction with bitmaps' ' + git init midx-compact-with-bitmaps && + ( + cd midx-compact-with-bitmaps && + + write_packs foo bar baz quux woot && + + test-tool read-midx --bitmap $objdir >bitmap.expect && + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test-tool read-midx --bitmap $objdir >bitmap.actual && + + test_cmp bitmap.expect bitmap.actual && + + true + ) +' + +test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' + git init midx-compact-with-bitmaps-non-trivial && + ( + cd midx-compact-with-bitmaps-non-trivial && + + git branch -m main && + + # D(4) + # / + # A(1) --- B(2) --- C(3) --- G(7) + # \ + # E(5) --- F(6) + write_packs A B C && + git checkout -b side && + write_packs D && + git checkout -b other B && + write_packs E F && + git checkout main && + write_packs G && + + cat $midx_chain && + + # Compact layers 2-4, leaving us with: + # + # [A, [B, C, D], E, F, G] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + + # Then compact the top two layers, condensing the above + # such that the new 4th layer contains F and G. + # + # [A, [B, C, D], E, [F, G]] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 4 "$midx_chain")" \ + "$(nth_line 5 "$midx_chain")" && + + cat $midx_chain + ) +' + test_done -- GitLab From 6df1bc0aacb9c06b7ce1b60ea59e5623fc2f0674 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 10:54:54 +0100 Subject: [PATCH 049/110] Start tracking packfiles per object database source Hi, the `struct packfile_store` tracks packfiles we have in the repository so that we can look up objects stored therein. Right now, the packfile store is tracked on the object database level -- each object database has exactly one packfile store. Consequently, we track packfiles that are part of different object database sources via the same packfile store. This patch series refactors this so that we instead have one packfile store per ODB source. This means that access to any object, regardless of whether it is stored in a packfile or in a loose object, is always done via its owning source. This is the last step required for pluggable object databases: all object access is routed through sources, and we can thus now abstract these sources and then plug in a different implementation. Of course, these abstractions are still very leaky, and we still reach into the implementation details in a bunch of files. But this is something that will be addressed over subsequent steps. This series is based on top of e85ae279b0 (The seventh batch, 2025-12-09) with ps/object-read-stream at 7b94028652 (streaming: drop redundant type and size pointers, 2025-11-23) merged into it. There is a minor conflict with ps/odb-misc-fixes that can be resolved by simply deleting the conflicting lines. Thanks! Patrick To: git@vger.kernel.org --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 1, "change-id": "20251201-b4-pks-pack-store-via-source-fd43dc0765a7", "prefixes": [] } } -- GitLab From c9d1c194cf30b823b994896988fdf53e1876581d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 7 Nov 2025 10:53:52 +0100 Subject: [PATCH 050/110] packfile: create store via its owning source In subsequent patches we're about to move the packfile store from the object database layer into the object database source layer. Once done, we'll have one packfile store per source, where the source is owning the store. Prepare for this future and refactor `packfile_store_new()` to be initialized via an object database source instead of via the object database itself. This refactoring leads to a weird in-between state where the store is owned by the object database but created via the source. But this makes subsequent refactorings easier because we can now start to access the owning source of a given store. Signed-off-by: Patrick Steinhardt --- odb.c | 2 +- packfile.c | 20 ++++++++++---------- packfile.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index af13174425..3ab730f713 100644 --- a/odb.c +++ b/odb.c @@ -1056,7 +1056,6 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); @@ -1065,6 +1064,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->packfiles = packfile_store_new(o->sources); free(to_free); diff --git a/packfile.c b/packfile.c index c88bd92619..0a05a10daa 100644 --- a/packfile.c +++ b/packfile.c @@ -876,7 +876,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -1068,8 +1068,8 @@ void packfile_store_prepare(struct packfile_store *store) if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { + odb_prepare_alternates(store->source->odb); + for (source = store->source->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } @@ -1092,7 +1092,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { + for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -2121,7 +2121,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2142,7 +2142,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->odb->repo, oid, &e)) + if (!find_pack_entry(store->source->odb->repo, oid, &e)) return 1; /* @@ -2152,7 +2152,7 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi == &blank_oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, oid); return -1; @@ -2411,11 +2411,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2534,7 +2534,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(store->odb->repo) >= size) + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, diff --git a/packfile.h b/packfile.h index 59d162a3f4..33cc1c1654 100644 --- a/packfile.h +++ b/packfile.h @@ -77,7 +77,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -129,9 +129,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles -- GitLab From ea28699f7237931861b6394ce627852bf8a46586 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 09:48:32 +0200 Subject: [PATCH 051/110] packfile: pass source to `prepare_pack()` When preparing a packfile we pass various pieces attached to the pack's object database source via the `struct prepare_pack_data`. Refactor this code to instead pass in the source directly. This reduces the number of variables we need to pass and allows for a subsequent refactoring where we start to prepare the pack via the source. Signed-off-by: Patrick Steinhardt --- packfile.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packfile.c b/packfile.c index 0a05a10daa..ab86afa01d 100644 --- a/packfile.c +++ b/packfile.c @@ -975,10 +975,8 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -988,10 +986,10 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(data->source->odb->packfiles, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1020,10 +1018,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); -- GitLab From 8a77907578d00113c4430e66fb7b6a112248d346 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:23:43 +0200 Subject: [PATCH 052/110] packfile: refactor kept-pack cache to work with packfile stores The kept pack cache is a cache of packfiles that are marked as kept either via an accompanying ".kept" file or via an in-memory flag. The cache can be retrieved via `kept_pack_cache()`, where one needs to pass in a repository. Ultimately though the kept-pack cache is a property of the packfile store, and this causes problems in a subsequent commit where we want to move down the packfile store to be a per-object-source entity. Prepare for this and refactor the kept-pack cache to work on top of a packfile store instead. Signed-off-by: Patrick Steinhardt --- builtin/pack-objects.c | 12 ++++++------ packfile.c | 37 ++++++++++++++++++++----------------- packfile.h | 18 +++++++++++++----- reachable.c | 2 +- revision.c | 8 ++++---- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee21..e86b8f387a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,9 +1529,9 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; off_t ofs; uint32_t candidate_mtime; @@ -1624,9 +1624,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -3931,7 +3931,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4030,7 +4030,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) diff --git a/packfile.c b/packfile.c index ab86afa01d..191344eb1c 100644 --- a/packfile.c +++ b/packfile.c @@ -2164,25 +2164,26 @@ int packfile_store_read_object_info(struct packfile_store *store, return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2192,9 +2193,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2202,11 +2205,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, @@ -2214,9 +2217,9 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache; + struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); - for (cache = kept_pack_cache(r, flags); *cache; cache++) { + for (; *cache; cache++) { struct packed_git *p = *cache; if (fill_pack_entry(oid, e, p)) return 1; diff --git a/packfile.h b/packfile.h index 33cc1c1654..701a3b4946 100644 --- a/packfile.h +++ b/packfile.h @@ -210,6 +210,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -385,9 +398,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. @@ -398,8 +408,6 @@ int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/reachable.c b/reachable.c index b753c39553..4b532039d5 100644 --- a/reachable.c +++ b/reachable.c @@ -242,7 +242,7 @@ static int want_recent_object(struct recent_data *data, const struct object_id *oid) { if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) return 0; return 1; } diff --git a/revision.c b/revision.c index 5f0850ae5c..64d223a7c6 100644 --- a/revision.c +++ b/revision.c @@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; -- GitLab From 517feacc319b9cb20841891a1768903e3d4ec56c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 17:19:31 +0200 Subject: [PATCH 053/110] packfile: refactor misleading code when unusing pack windows The function `unuse_one_window()` is responsible for unmapping one of the packfile windows, which is done when we have exceeded the allowed number of window. The function receives a `struct packed_git` as input, which serves as an additional packfile that should be considered to be closed. If not given, we seemingly skip that and instead go through all of the repository's packfiles. The conditional that checks whether we have a packfile though does not make much sense anymore, as we dereference the packfile regardless of whether or not it is a `NULL` pointer to derive the repository's packfile store. The function was originally introduced via f0e17e86e1 (pack: move release_pack_memory(), 2017-08-18), and here we indeed had a caller that passed a `NULL` pointer. That caller was later removed via 9827d4c185 (packfile: drop release_pack_memory(), 2019-08-12), so starting with that commit we always pass a `struct packed_git`. In 9c5ce06d74 (packfile: use `repository` from `packed_git` directly, 2024-12-03) we then inadvertently started to rely on the fact that the pointer is never `NULL` because we use it now to identify the repository. Arguably, it didn't really make sense in the first place that the caller provides a packfile, as the selected window would have been overridden anyway by the subsequent loop over all packfiles if there was an older window. So the overall logic is quite misleading overall. The only case where it _could_ make a difference is when there were two packfiles with the same `last_used` value, but that case doesn't ever happen because the `pack_used_ctr` is strictly increasing. Refactor the code so that we instead pass in the object database to help make the code less misleading. Signed-off-by: Patrick Steinhardt --- packfile.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packfile.c b/packfile.c index 191344eb1c..3700612465 100644 --- a/packfile.c +++ b/packfile.c @@ -355,16 +355,15 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + for (e = odb->packfiles->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -740,8 +739,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, -- GitLab From 340d0f8bc31df1b4f401b7a83a77d3a41c2dbf18 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 12:41:29 +0100 Subject: [PATCH 054/110] odb: properly close sources before freeing them In the next commit we are about to move the packfile store into the ODB source so that we have one store per source. This will lead to a memory leak in the following commit when reading data from a submodule via git-grep(1): Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x55555562e726 in calloc (git+0xda726) #1 0x555555963244 in xcalloc ../wrapper.c:154:8 #2 0x55555586b09b in use_pack ../packfile.c:739:4 #3 0x55555586c6bf in unpack_object_header ../packfile.c:1235:9 #4 0x55555586d44b in unpack_entry ../packfile.c:1789:10 #5 0x55555586cd6c in cache_or_unpack_entry ../packfile.c:1520:10 #6 0x55555586cacf in packed_object_info ../packfile.c:1600:19 #7 0x55555586e60a in packfile_store_read_object_info ../packfile.c:2165:10 #8 0x5555558525eb in do_oid_object_info_extended ../odb.c:720:10 #9 0x555555851fb1 in odb_read_object_info_extended ../odb.c:847:8 #10 0x555555852c0d in odb_read_object ../odb.c:905:6 #11 0x5555558089e0 in grep_source_load_oid ../grep.c:1934:12 #12 0x5555558087ea in grep_source_load ../grep.c:1986:10 #13 0x5555558077b3 in grep_source_is_binary ../grep.c:2014:7 #14 0x555555805c24 in grep_source_1 ../grep.c:1625:8 #15 0x5555558059d2 in grep_source ../grep.c:1837:10 #16 0x5555556a5ed8 in run ../builtin/grep.c:208:10 #17 0x55555562bb42 in void* ThreadStartFunc(void*) lsan_interceptors.cpp.o #18 0x7ffff7a9a979 in start_thread (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x9a979) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) #19 0x7ffff7b22d2b in __GI___clone3 (/nix/store/xx7cm72qy2c0643cm1ipngd87aqwkcdp-glibc-2.40-66/lib/libc.so.6+0x122d2b) (BuildId: cddea92d6cba8333be952b5a02fd47d61054c5ab) The root caues of this leak is the way we set up and release the submodule: 1. We use `repo_submodule_init()` to initialize a new repository. This repository is stored in `repos_to_free`. 2. We now read data from the submodule repository. 3. We then call `repo_clear()` on the submodule repositories. 4. `repo_clear()` calls `odb_free()`. 5. `odb_free()` calls `odb_free_sources()` followde by `odb_close()`. The issue here is the 5th step: we call `odb_free_sources()` _before_ we call `odb_close()`. But `odb_free_sources()` already frees all sources, so the logic that closes them in `odb_close()` now becomes a no-op. As a consequence, we never explicitly close sources at all. This isn't a problem at the current point in time: the loose sources don't have any state to release, and the packfile store is not yet part of the sources. But once the packfile store is owned by the source we won't close it anymore, and this causes us to leak the packfile windows. Fix the upcoming leak by closing the store before we free the sources. Signed-off-by: Patrick Steinhardt --- odb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 3ab730f713..94144a69f5 100644 --- a/odb.c +++ b/odb.c @@ -1111,13 +1111,13 @@ void odb_free(struct object_database *o) oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); + odb_close(o); odb_free_sources(o); for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - odb_close(o); packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); -- GitLab From 53c69491634658478f4dc121d8683738a611686c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 12:56:45 +0100 Subject: [PATCH 055/110] packfile: move packfile store into object source The packfile store is a member of `struct object_database`, which means that we have a single store per database. This doesn't really make much sense though: each source connected to the database has its own set of packfiles, so there is a conceptual mismatch here. This hasn't really caused much of a problem in the past, but with the advent of pluggable object databases this is becoming more of a problem because some of the sources may not even use packfiles in the first place. Move the packfile store down by one level from the object database into the object database source. This ensures that each source now has its own packfile store, and we can eventually start to abstract it away entirely so that the caller doesn't even know what kind of store it uses. Note that we only need to adjust a relatively small number of callers, way less than one might expect. This is because most callers are using `repo_for_each_pack()`, which handles enumeration of all packfiles that exist in the repository. So for now, none of these callers need to be adapted. The remaining callers that iterate through the packfiles directly and that need adjustment are those that are a bit more tangled with packfiles. These will be adjusted over time. Note that this patch only moves the packfile store, and there is still a bunch of functions that seemingly operate on a packfile store but that end up iterating over all sources. These will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt --- builtin/fast-import.c | 37 +++++++----- builtin/grep.c | 6 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 96 ++++++++++++++++--------------- http.c | 2 +- midx.c | 5 +- odb.c | 36 ++++++------ odb.h | 6 +- odb/streaming.c | 9 ++- packfile.c | 127 ++++++++++++++++++++++++++--------------- packfile.h | 62 +++++++++++++++++--- 11 files changed, 243 insertions(+), 145 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 7849005ccb..b8a7757cfd 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,11 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1100,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1183,29 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + for (source = the_repository->objects->sources; source; source = source->next) { + if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25..4855b871dd 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,12 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); + /* + * Note: `packfile_store_prepare()` prepares stores from all + * sources. This will be fixed in a subsequent commit. + */ if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + packfile_store_prepare(the_repository->objects->sources->packfiles); start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a7e901e49c..b67fb0256c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, + packfile_store_load_pack(the_repository->objects->sources->packfiles, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e86b8f387a..7fd90a9996 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1529,49 +1529,53 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1749,13 +1753,15 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&source->packfiles->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { diff --git a/http.c b/http.c index 41f850db16..7815f144de 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/midx.c b/midx.c index 24e1e72175..dbb2aa68ba 100644 --- a/midx.c +++ b/midx.c @@ -95,7 +95,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); + packfile_store_prepare(source->packfiles); return source->midx; } @@ -447,7 +447,6 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -460,7 +459,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, + p = packfile_store_load_pack(m->source->packfiles, pack_name.buf, m->source->local); strbuf_release(&pack_name); diff --git a/odb.c b/odb.c index 94144a69f5..f159fbdd99 100644 --- a/odb.c +++ b/odb.c @@ -155,6 +155,7 @@ static struct odb_source *odb_source_new(struct object_database *odb, source->local = local; source->path = xstrdup(path); source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); return source; } @@ -373,6 +374,7 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); free(source); } @@ -704,19 +706,19 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; + } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + return 0; } /* @@ -975,13 +977,14 @@ int odb_freshen_object(struct object_database *odb, { struct odb_source *source; - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { + if (packfile_store_freshen_object(source->packfiles, oid)) + return 1; + if (odb_source_loose_freshen_object(source, oid)) return 1; + } return 0; } @@ -1064,7 +1067,6 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->packfiles = packfile_store_new(o->sources); free(to_free); @@ -1077,9 +1079,8 @@ void odb_close(struct object_database *o) { struct odb_source *source; - packfile_store_close(o->packfiles); - for (source = o->sources; source; source = source->next) { + packfile_store_close(source->packfiles); if (source->midx) close_midx(source->midx); source->midx = NULL; @@ -1118,7 +1119,6 @@ void odb_free(struct object_database *o) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1141,13 +1141,13 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) + for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); + packfile_store_reprepare(source->packfiles); + } o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a..c97b41c58c 100644 --- a/odb.h +++ b/odb.h @@ -51,6 +51,9 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; + /* * private data * @@ -128,9 +131,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want diff --git a/odb/streaming.c b/odb/streaming.c index 745cd486fb..4a4474f891 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -185,13 +185,12 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) - return 0; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) { + if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) return 0; + } return open_istream_incore(out, odb, oid); } diff --git a/packfile.c b/packfile.c index 3700612465..a0225cb2cb 100644 --- a/packfile.c +++ b/packfile.c @@ -357,12 +357,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - for (e = odb->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) + for (e = source->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -528,15 +530,18 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + for (e = source->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -987,7 +992,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->odb->packfiles, + packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); free(trimmed_path); } @@ -1245,11 +1250,15 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + for (e = source->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -2089,26 +2098,32 @@ static int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct packfile_list_entry *l; + struct odb_source *source; - packfile_store_prepare(r->objects->packfiles); + /* + * Note: `packfile_store_prepare()` prepares stores from all sources. + * This will be fixed in a subsequent commit. + */ + packfile_store_prepare(r->objects->sources->packfiles); - for (struct odb_source *source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs.head) - return 0; + for (source = r->objects->sources; source; source = source->next) { + struct packfile_list_entry *l; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = source->packfiles->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); - return 1; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!source->packfiles->skip_mru_updates) + packfile_list_prepend(&source->packfiles->packs, p); + return 1; + } } } + return 0; } @@ -2216,12 +2231,18 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(r->objects->packfiles, flags); + struct odb_source *source; - for (; *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + for (source = r->objects->sources; source; source = source->next) { + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } } return 0; @@ -2287,32 +2308,46 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) { - struct packed_git *p; + struct odb_source *source; int r = 0; int pack_errors = 0; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *e; + + source->packfiles->skip_mru_updates = true; + + for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + pack_errors = 1; + continue; + } + + r = for_each_object_in_pack(p, cb, data, flags); + if (r) + break; } - r = for_each_object_in_pack(p, cb, data, flags); + + source->packfiles->skip_mru_updates = false; + if (r) break; } - repo->objects->packfiles->skip_mru_updates = false; return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index 701a3b4946..6872b16755 100644 --- a/packfile.h +++ b/packfile.h @@ -5,6 +5,7 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ @@ -169,14 +170,65 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) int packfile_store_read_object_stream(struct odb_read_stream **out, struct packfile_store *store, @@ -193,12 +245,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags); -/* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. - */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a -- GitLab From 79b3470c563ec74f7162b37ab6b698b0fbaead05 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:56:26 +0200 Subject: [PATCH 056/110] packfile: only prepare owning store in `packfile_store_get_packs()` When calling `packfile_store_get_packs()` we prepare not only the provided packfile store, but also all those of all other sources part of teh same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Adapt the code so that we only prepare the MIDX of the provided store. All callers only work in the context of a single store or call the function in a loop over all sources, so this change shouldn't have any practical effects. Signed-off-by: Patrick Steinhardt --- packfile.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index a0225cb2cb..c46d53b75d 100644 --- a/packfile.c +++ b/packfile.c @@ -1092,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->source->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->source->midx) { + struct multi_pack_index *m = store->source->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } -- GitLab From fe8507982f6457e3bff5d7b3b83686e6827efeed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 12:59:02 +0200 Subject: [PATCH 057/110] packfile: only prepare owning store in `packfile_store_prepare()` When calling `packfile_store_prepare()` we prepare not only the provided packfile store, but also all those of all other sources part of the same object database. This was required when the store was still sitting on the object database level. But now that it sits on the source level it's not anymore. Refactor the code so that we only prepare the single packfile store passed by the caller. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt --- builtin/grep.c | 14 ++++++++------ packfile.c | 19 +++++-------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 4855b871dd..5b8b87b1ac 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,12 +1213,14 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - /* - * Note: `packfile_store_prepare()` prepares stores from all - * sources. This will be fixed in a subsequent commit. - */ - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->sources->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) + packfile_store_prepare(source->packfiles); + } start_threads(&opt); } else { diff --git a/packfile.c b/packfile.c index c46d53b75d..23d8f7cb93 100644 --- a/packfile.c +++ b/packfile.c @@ -1063,16 +1063,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->source->odb); - for (source = store->source->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -2098,15 +2093,11 @@ static int find_pack_entry(struct repository *r, { struct odb_source *source; - /* - * Note: `packfile_store_prepare()` prepares stores from all sources. - * This will be fixed in a subsequent commit. - */ - packfile_store_prepare(r->objects->sources->packfiles); - - for (source = r->objects->sources; source; source = source->next) + for (source = r->objects->sources; source; source = source->next) { + packfile_store_prepare(r->objects->sources->packfiles); if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; + } for (source = r->objects->sources; source; source = source->next) { struct packfile_list_entry *l; -- GitLab From 60b9dd1d0b95ec7153b4a1d98205c4056a0dd41b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:26:46 +0200 Subject: [PATCH 058/110] packfile: inline `find_kept_pack_entry()` The `find_kept_pack_entry()` function is only used in `has_oject_kept_pack()`, which is only a trivial wrapper itself. Inline the latter into the former. Furthermore, reorder the code so that we can drop the declaration of the function in "packfile.h". This allow us to make the function file-local. Signed-off-by: Patrick Steinhardt --- packfile.c | 28 ++++++++++------------------ packfile.h | 6 ------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/packfile.c b/packfile.c index 23d8f7cb93..3bce1b150d 100644 --- a/packfile.c +++ b/packfile.c @@ -2215,12 +2215,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) +int has_object_pack(struct repository *r, const struct object_id *oid) +{ + struct pack_entry e; + return find_pack_entry(r, oid, &e); +} + +int has_object_kept_pack(struct repository *r, const struct object_id *oid, + unsigned flags) { struct odb_source *source; + struct pack_entry e; for (source = r->objects->sources; source; source = source->next) { struct packed_git **cache; @@ -2229,7 +2234,7 @@ int find_kept_pack_entry(struct repository *r, for (; *cache; cache++) { struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) + if (fill_pack_entry(oid, &e, p)) return 1; } } @@ -2237,19 +2242,6 @@ int find_kept_pack_entry(struct repository *r, return 0; } -int has_object_pack(struct repository *r, const struct object_id *oid) -{ - struct pack_entry e; - return find_pack_entry(r, oid, &e); -} - -int has_object_kept_pack(struct repository *r, const struct object_id *oid, - unsigned flags) -{ - struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); -} - int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, enum for_each_object_flags flags) diff --git a/packfile.h b/packfile.h index 6872b16755..2fb87a26d6 100644 --- a/packfile.h +++ b/packfile.h @@ -444,12 +444,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -- GitLab From ef0b72561481dc26601579e911f7c1e4c4b1f914 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 24 Oct 2025 15:29:38 +0200 Subject: [PATCH 059/110] packfile: refactor `find_pack_entry()` to work on the packfile store The function `find_pack_entry()` doesn't work on a specific packfile store, but instead works on the whole repository. This causes a bit of a conceptual mismatch in its callers: - `packfile_store_freshen_object()` supposedly acts on a store, and its callers know to iterate through all sources already. - `packfile_store_read_object_info()` behaves likewise. The only exception that doesn't know to handle iteration through sources is `has_object_pack()`, but that function is trivial to adapt. Refactor the code so that `find_pack_entry()` works on the packfile store level instead. Signed-off-by: Patrick Steinhardt --- packfile.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/packfile.c b/packfile.c index 3bce1b150d..0e4c63e11d 100644 --- a/packfile.c +++ b/packfile.c @@ -2087,29 +2087,23 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_pack_entry(struct repository *r, +static int find_pack_entry(struct packfile_store *store, const struct object_id *oid, struct pack_entry *e) { - struct odb_source *source; - - for (source = r->objects->sources; source; source = source->next) { - packfile_store_prepare(r->objects->sources->packfiles); - if (source->midx && fill_midx_entry(source->midx, oid, e)) - return 1; - } + struct packfile_list_entry *l; - for (source = r->objects->sources; source; source = source->next) { - struct packfile_list_entry *l; + packfile_store_prepare(store); + if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + return 1; - for (l = source->packfiles->packs.head; l; l = l->next) { - struct packed_git *p = l->pack; + for (l = store->packs.head; l; l = l->next) { + struct packed_git *p = l->pack; - if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!source->packfiles->skip_mru_updates) - packfile_list_prepend(&source->packfiles->packs, p); - return 1; - } + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); + return 1; } } @@ -2120,7 +2114,7 @@ int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid) { struct pack_entry e; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 0; if (e.p->is_cruft) return 0; @@ -2141,7 +2135,7 @@ int packfile_store_read_object_info(struct packfile_store *store, struct pack_entry e; int rtype; - if (!find_pack_entry(store->source->odb->repo, oid, &e)) + if (!find_pack_entry(store, oid, &e)) return 1; /* @@ -2217,8 +2211,17 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st int has_object_pack(struct repository *r, const struct object_id *oid) { + struct odb_source *source; struct pack_entry e; - return find_pack_entry(r, oid, &e); + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + int ret = find_pack_entry(source->packfiles, oid, &e); + if (ret) + return ret; + } + + return 0; } int has_object_kept_pack(struct repository *r, const struct object_id *oid, -- GitLab From ed22cd57e4639f28d73c6f2bfaf9a9a15dcd986e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 19 Oct 2025 16:44:30 +0200 Subject: [PATCH 060/110] packfile: move MIDX into packfile store The multi-pack index still is tracked as a member of the object database source, but ultimately the MIDX is always tied to one specific packfile store. Move the structure into `struct packfile_store` accordingly. This ensures that the packfile store now keeps track of all data related to packfiles. Signed-off-by: Patrick Steinhardt --- midx.c | 14 +++++++------- odb.c | 8 +------- odb.h | 7 ------- packfile.c | 12 ++++++++---- packfile.h | 3 +++ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/midx.c b/midx.c index dbb2aa68ba..fa7a7e5d13 100644 --- a/midx.c +++ b/midx.c @@ -96,7 +96,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { packfile_store_prepare(source->packfiles); - return source->midx; + return source->packfiles->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -709,12 +709,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (source->packfiles->midx) return 1; - source->midx = load_multi_pack_index(source); + source->packfiles->midx = load_multi_pack_index(source); - return !!source->midx; + return !!source->packfiles->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -803,9 +803,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + if (source->packfiles->midx) + close_midx(source->packfiles->midx); + source->packfiles->midx = NULL; } } diff --git a/odb.c b/odb.c index f159fbdd99..902251f9ed 100644 --- a/odb.c +++ b/odb.c @@ -1078,14 +1078,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - for (source = o->sources; source; source = source->next) { + for (source = o->sources; source; source = source->next) packfile_store_close(source->packfiles); - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - close_commit_graph(o); } diff --git a/odb.h b/odb.h index c97b41c58c..300c3c0c46 100644 --- a/odb.h +++ b/odb.h @@ -54,13 +54,6 @@ struct odb_source { /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; - /* * Figure out whether this is the local source of the owning * repository, which would typically be its ".git/objects" directory. diff --git a/packfile.c b/packfile.c index 0e4c63e11d..097dd8d85d 100644 --- a/packfile.c +++ b/packfile.c @@ -990,7 +990,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { + !(data->source->packfiles->midx && + midx_contains_pack(data->source->packfiles->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); packfile_store_load_pack(data->source->packfiles, trimmed_path, data->source->local); @@ -1087,8 +1088,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - if (store->source->midx) { - struct multi_pack_index *m = store->source->midx; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -2094,7 +2095,7 @@ static int find_pack_entry(struct packfile_store *store, struct packfile_list_entry *l; packfile_store_prepare(store); - if (store->source->midx && fill_midx_entry(store->source->midx, oid, e)) + if (store->midx && fill_midx_entry(store->midx, oid, e)) return 1; for (l = store->packs.head; l; l = l->next) { @@ -2454,6 +2455,9 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; } struct odb_packed_read_stream { diff --git a/packfile.h b/packfile.h index 2fb87a26d6..fb832a33e3 100644 --- a/packfile.h +++ b/packfile.h @@ -100,6 +100,9 @@ struct packfile_store { unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. -- GitLab From 1660496fc400b3956b4abe7bfc40351c9eddc168 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:10 +0100 Subject: [PATCH 061/110] odb: refactor parsing of alternates to be self-contained Parsing of the alternates file and environment variable is currently split up across multiple different functions and is entangled with `link_alt_odb_entries()`, which is responsible for linking the parsed object database sources. This results in two downsides: - We have mutual recursion between parsing alternates and linking them into the object database. This is because we also parse alternates that the newly added sources may have. - We mix up the actual logic to parse the data and to link them into place. Refactor the logic so that parsing of the alternates file is entirely self-contained. Note that this doesn't yet fix the above two issues, but it is a necessary step to get there. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 70 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/odb.c b/odb.c index dc8f292f3d..9785f62cb6 100644 --- a/odb.c +++ b/odb.c @@ -216,39 +216,50 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, return alternate; } -static const char *parse_alt_odb_entry(const char *string, - int sep, - struct strbuf *out) +static void parse_alternates(const char *string, + int sep, + struct strvec *out) { - const char *end; + struct strbuf buf = STRBUF_INIT; - strbuf_reset(out); + while (*string) { + const char *end; + + strbuf_reset(&buf); + + if (*string == '#') { + /* comment; consume up to next separator */ + end = strchrnul(string, sep); + } else if (*string == '"' && !unquote_c_style(&buf, string, &end)) { + /* + * quoted path; unquote_c_style has copied the + * data for us and set "end". Broken quoting (e.g., + * an entry that doesn't end with a quote) falls + * back to the unquoted case below. + */ + } else { + /* normal, unquoted path */ + end = strchrnul(string, sep); + strbuf_add(&buf, string, end - string); + } - if (*string == '#') { - /* comment; consume up to next separator */ - end = strchrnul(string, sep); - } else if (*string == '"' && !unquote_c_style(out, string, &end)) { - /* - * quoted path; unquote_c_style has copied the - * data for us and set "end". Broken quoting (e.g., - * an entry that doesn't end with a quote) falls - * back to the unquoted case below. - */ - } else { - /* normal, unquoted path */ - end = strchrnul(string, sep); - strbuf_add(out, string, end - string); + if (*end) + end++; + string = end; + + if (!buf.len) + continue; + + strvec_push(out, buf.buf); } - if (*end) - end++; - return end; + strbuf_release(&buf); } static void link_alt_odb_entries(struct object_database *odb, const char *alt, int sep, const char *relative_base, int depth) { - struct strbuf dir = STRBUF_INIT; + struct strvec alternates = STRVEC_INIT; if (!alt || !*alt) return; @@ -259,13 +270,12 @@ static void link_alt_odb_entries(struct object_database *odb, const char *alt, return; } - while (*alt) { - alt = parse_alt_odb_entry(alt, sep, &dir); - if (!dir.len) - continue; - link_alt_odb_entry(odb, dir.buf, relative_base, depth); - } - strbuf_release(&dir); + parse_alternates(alt, sep, &alternates); + + for (size_t i = 0; i < alternates.nr; i++) + link_alt_odb_entry(odb, alternates.v[i], relative_base, depth); + + strvec_clear(&alternates); } static void read_info_alternates(struct object_database *odb, -- GitLab From 84cec5276e70bdabd651a3d0a250d006434d639f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:11 +0100 Subject: [PATCH 062/110] odb: resolve relative alternative paths when parsing Parsing alternates and resolving potential relative paths is currently handled in two separate steps. This has the effect that the logic to retrieve alternates is not entirely self-contained. We want it to be just that though so that we can eventually move the logic to list alternates into the `struct odb_source`. Move the logic to resolve relative alternative paths into `parse_alternates()`. Besides bringing us a step closer towards the above goal, it also neatly separates concerns of generating the list of alternatives and linking them into the object database. Note that we ignore any errors when the relative path cannot be resolved. This isn't really a change in behaviour though: if the path cannot be resolved to a directory then `alt_odb_usable()` still knows to bail out. While at it, rename the function to `odb_add_alternate_recursively()` to more clearly indicate what its intent is and to align it with modern terminology. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 64 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/odb.c b/odb.c index 9785f62cb6..699bdbffd1 100644 --- a/odb.c +++ b/odb.c @@ -159,44 +159,21 @@ static struct odb_source *odb_source_new(struct object_database *odb, return source; } -static struct odb_source *link_alt_odb_entry(struct object_database *odb, - const char *dir, - const char *relative_base, - int depth) +static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, + const char *source, + int depth) { struct odb_source *alternate = NULL; - struct strbuf pathbuf = STRBUF_INIT; struct strbuf tmp = STRBUF_INIT; khiter_t pos; int ret; - if (!is_absolute_path(dir) && relative_base) { - strbuf_realpath(&pathbuf, relative_base, 1); - strbuf_addch(&pathbuf, '/'); - } - strbuf_addstr(&pathbuf, dir); - - if (!strbuf_realpath(&tmp, pathbuf.buf, 0)) { - error(_("unable to normalize alternate object path: %s"), - pathbuf.buf); - goto error; - } - strbuf_swap(&pathbuf, &tmp); - - /* - * The trailing slash after the directory name is given by - * this function at the end. Remove duplicates. - */ - while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') - strbuf_setlen(&pathbuf, pathbuf.len - 1); - - strbuf_reset(&tmp); strbuf_realpath(&tmp, odb->sources->path, 1); - if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) + if (!alt_odb_usable(odb, source, tmp.buf)) goto error; - alternate = odb_source_new(odb, pathbuf.buf, false); + alternate = odb_source_new(odb, source, false); /* add the alternate entry */ *odb->sources_tail = alternate; @@ -212,20 +189,22 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, error: strbuf_release(&tmp); - strbuf_release(&pathbuf); return alternate; } static void parse_alternates(const char *string, int sep, + const char *relative_base, struct strvec *out) { + struct strbuf pathbuf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; while (*string) { const char *end; strbuf_reset(&buf); + strbuf_reset(&pathbuf); if (*string == '#') { /* comment; consume up to next separator */ @@ -250,9 +229,30 @@ static void parse_alternates(const char *string, if (!buf.len) continue; + if (!is_absolute_path(buf.buf) && relative_base) { + strbuf_realpath(&pathbuf, relative_base, 1); + strbuf_addch(&pathbuf, '/'); + } + strbuf_addbuf(&pathbuf, &buf); + + strbuf_reset(&buf); + if (!strbuf_realpath(&buf, pathbuf.buf, 0)) { + error(_("unable to normalize alternate object path: %s"), + pathbuf.buf); + continue; + } + + /* + * The trailing slash after the directory name is given by + * this function at the end. Remove duplicates. + */ + while (buf.len && buf.buf[buf.len - 1] == '/') + strbuf_setlen(&buf, buf.len - 1); + strvec_push(out, buf.buf); } + strbuf_release(&pathbuf); strbuf_release(&buf); } @@ -270,10 +270,10 @@ static void link_alt_odb_entries(struct object_database *odb, const char *alt, return; } - parse_alternates(alt, sep, &alternates); + parse_alternates(alt, sep, relative_base, &alternates); for (size_t i = 0; i < alternates.nr; i++) - link_alt_odb_entry(odb, alternates.v[i], relative_base, depth); + odb_add_alternate_recursively(odb, alternates.v[i], depth); strvec_clear(&alternates); } @@ -348,7 +348,7 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, * overwritten when they are. */ odb_prepare_alternates(odb); - return link_alt_odb_entry(odb, dir, NULL, 0); + return odb_add_alternate_recursively(odb, dir, 0); } struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, -- GitLab From d17673ef4285d3d5f70909136f1ffe2745bcb71c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:12 +0100 Subject: [PATCH 063/110] odb: move computation of normalized objdir into `alt_odb_usable()` The function `alt_odb_usable()` receives as input the object database, the path it's supposed to determine usability for as well as the normalized path of the main object directory of the repository. The last part is derived by the function's caller from the object database. As we already pass the object database to `alt_odb_usable()` it is redundant information. Drop the extra parameter and compute the normalized object directory in the function itself. While at it, rename the function to `odb_is_source_usable()` to align it with modern terminology. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 699bdbffd1..e314f86c3b 100644 --- a/odb.c +++ b/odb.c @@ -89,17 +89,20 @@ int odb_mkstemp(struct object_database *odb, /* * Return non-zero iff the path is usable as an alternate object database. */ -static int alt_odb_usable(struct object_database *o, const char *path, - const char *normalized_objdir) +static bool odb_is_source_usable(struct object_database *o, const char *path) { int r; + struct strbuf normalized_objdir = STRBUF_INIT; + bool usable = false; + + strbuf_realpath(&normalized_objdir, o->sources->path, 1); /* Detect cases where alternate disappeared */ if (!is_directory(path)) { error(_("object directory %s does not exist; " "check .git/objects/info/alternates"), path); - return 0; + goto out; } /* @@ -116,13 +119,17 @@ static int alt_odb_usable(struct object_database *o, const char *path, kh_value(o->source_by_path, p) = o->sources; } - if (fspatheq(path, normalized_objdir)) - return 0; + if (fspatheq(path, normalized_objdir.buf)) + goto out; if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path)) - return 0; + goto out; + + usable = true; - return 1; +out: + strbuf_release(&normalized_objdir); + return usable; } /* @@ -164,13 +171,10 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * int depth) { struct odb_source *alternate = NULL; - struct strbuf tmp = STRBUF_INIT; khiter_t pos; int ret; - strbuf_realpath(&tmp, odb->sources->path, 1); - - if (!alt_odb_usable(odb, source, tmp.buf)) + if (!odb_is_source_usable(odb, source)) goto error; alternate = odb_source_new(odb, source, false); @@ -188,7 +192,6 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * read_info_alternates(odb, alternate->path, depth + 1); error: - strbuf_release(&tmp); return alternate; } -- GitLab From dccfb39cdb68e47a4c7103b3c465cde91c5f9f56 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:13 +0100 Subject: [PATCH 064/110] odb: stop splitting alternate in `odb_add_to_alternates_file()` When calling `odb_add_to_alternates_file()` we know to add the newly added source to the object database in case we have already loaded alternates. This is done so that we can make its objects accessible immediately without having to fully reload all alternates. The way we do this though is to call `link_alt_odb_entries()`, which adds _multiple_ sources to the object database source in case we have newline-separated entries. This behaviour is not documented in the function documentation of `odb_add_to_alternates_file()`, and all callers only ever pass a single directory to it. It's thus entirely surprising and a conceptual mismatch. Fix this issue by directly calling `odb_add_alternate_recursively()` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/odb.c b/odb.c index e314f86c3b..3112eab5d0 100644 --- a/odb.c +++ b/odb.c @@ -338,7 +338,7 @@ void odb_add_to_alternates_file(struct object_database *odb, if (commit_lock_file(&lock)) die_errno(_("unable to move new alternates file into place")); if (odb->loaded_alternates) - link_alt_odb_entries(odb, dir, '\n', NULL, 0); + odb_add_alternate_recursively(odb, dir, 0); } free(alts); } -- GitLab From 430e0e0f2e75673206321f6f4942c0bc7856c8b7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:14 +0100 Subject: [PATCH 065/110] odb: remove mutual recursion when parsing alternates When adding an alternative object database source we not only have to consider the added source itself, but we also have to add _its_ sources to our database. We implement this via mutual recursion: 1. We first call `link_alt_odb_entries()`. 2. `link_alt_odb_entries()` calls `parse_alternates()`. 3. We then add each alternate via `odb_add_alternate_recursively()`. 4. `odb_add_alternate_recursively()` calls `link_alt_odb_entries()` again. This flow is somewhat hard to follow, but more importantly it means that parsing of alternates is somewhat tied to the recursive behaviour. Refactor the function to remove the mutual recursion between adding sources and parsing alternates. The parsing step thus becomes completely oblivious to the fact that there is recursive behaviour going on at all. The recursion is handled by `odb_add_alternate_recursively()` instead, which now recurses with itself. This refactoring allows us to move parsing of alternates into object database sources in a subsequent step. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 60 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/odb.c b/odb.c index 3112eab5d0..59944d4649 100644 --- a/odb.c +++ b/odb.c @@ -147,9 +147,8 @@ static bool odb_is_source_usable(struct object_database *o, const char *path) * of the object ID, an extra slash for the first level indirection, and * the terminating NUL. */ -static void read_info_alternates(struct object_database *odb, - const char *relative_base, - int depth); +static void read_info_alternates(const char *relative_base, + struct strvec *out); static struct odb_source *odb_source_new(struct object_database *odb, const char *path, @@ -171,6 +170,7 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * int depth) { struct odb_source *alternate = NULL; + struct strvec sources = STRVEC_INIT; khiter_t pos; int ret; @@ -189,9 +189,17 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ - read_info_alternates(odb, alternate->path, depth + 1); + read_info_alternates(alternate->path, &sources); + if (sources.nr && depth + 1 > 5) { + error(_("%s: ignoring alternate object stores, nesting too deep"), + source); + } else { + for (size_t i = 0; i < sources.nr; i++) + odb_add_alternate_recursively(odb, sources.v[i], depth + 1); + } error: + strvec_clear(&sources); return alternate; } @@ -203,6 +211,9 @@ static void parse_alternates(const char *string, struct strbuf pathbuf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; + if (!string || !*string) + return; + while (*string) { const char *end; @@ -259,34 +270,11 @@ static void parse_alternates(const char *string, strbuf_release(&buf); } -static void link_alt_odb_entries(struct object_database *odb, const char *alt, - int sep, const char *relative_base, int depth) +static void read_info_alternates(const char *relative_base, + struct strvec *out) { - struct strvec alternates = STRVEC_INIT; - - if (!alt || !*alt) - return; - - if (depth > 5) { - error(_("%s: ignoring alternate object stores, nesting too deep"), - relative_base); - return; - } - - parse_alternates(alt, sep, relative_base, &alternates); - - for (size_t i = 0; i < alternates.nr; i++) - odb_add_alternate_recursively(odb, alternates.v[i], depth); - - strvec_clear(&alternates); -} - -static void read_info_alternates(struct object_database *odb, - const char *relative_base, - int depth) -{ - char *path; struct strbuf buf = STRBUF_INIT; + char *path; path = xstrfmt("%s/info/alternates", relative_base); if (strbuf_read_file(&buf, path, 1024) < 0) { @@ -294,8 +282,8 @@ static void read_info_alternates(struct object_database *odb, free(path); return; } + parse_alternates(buf.buf, '\n', relative_base, out); - link_alt_odb_entries(odb, buf.buf, '\n', relative_base, depth); strbuf_release(&buf); free(path); } @@ -622,13 +610,19 @@ int odb_for_each_alternate(struct object_database *odb, void odb_prepare_alternates(struct object_database *odb) { + struct strvec sources = STRVEC_INIT; + if (odb->loaded_alternates) return; - link_alt_odb_entries(odb, odb->alternate_db, PATH_SEP, NULL, 0); + parse_alternates(odb->alternate_db, PATH_SEP, NULL, &sources); + read_info_alternates(odb->sources->path, &sources); + for (size_t i = 0; i < sources.nr; i++) + odb_add_alternate_recursively(odb, sources.v[i], 0); - read_info_alternates(odb, odb->sources->path, 0); odb->loaded_alternates = 1; + + strvec_clear(&sources); } int odb_has_alternates(struct object_database *odb) -- GitLab From 3f42555322f86f17a2dac4f585edab1d84f3df57 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:15 +0100 Subject: [PATCH 066/110] odb: drop forward declaration of `read_info_alternates()` Now that we have removed the mutual recursion in the preceding commit it is not necessary anymore to have a forward declaration of the `read_info_alternates()` function. Move the function and its dependencies further up so that we can remove it. Note that this commit also removes the function documentation of `read_info_alternates()`. It's unclear what it's documenting, but it for sure isn't documenting the modern behaviour of the function anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 125 +++++++++++++++++++++++++--------------------------------- 1 file changed, 54 insertions(+), 71 deletions(-) diff --git a/odb.c b/odb.c index 59944d4649..dcf4a62cd2 100644 --- a/odb.c +++ b/odb.c @@ -132,77 +132,6 @@ static bool odb_is_source_usable(struct object_database *o, const char *path) return usable; } -/* - * Prepare alternate object database registry. - * - * The variable alt_odb_list points at the list of struct - * odb_source. The elements on this list come from - * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT - * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, - * whose contents is similar to that environment variable but can be - * LF separated. Its base points at a statically allocated buffer that - * contains "/the/directory/corresponding/to/.git/objects/...", while - * its name points just after the slash at the end of ".git/objects/" - * in the example above, and has enough space to hold all hex characters - * of the object ID, an extra slash for the first level indirection, and - * the terminating NUL. - */ -static void read_info_alternates(const char *relative_base, - struct strvec *out); - -static struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) -{ - struct odb_source *source; - - CALLOC_ARRAY(source, 1); - source->odb = odb; - source->local = local; - source->path = xstrdup(path); - source->loose = odb_source_loose_new(source); - - return source; -} - -static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, - const char *source, - int depth) -{ - struct odb_source *alternate = NULL; - struct strvec sources = STRVEC_INIT; - khiter_t pos; - int ret; - - if (!odb_is_source_usable(odb, source)) - goto error; - - alternate = odb_source_new(odb, source, false); - - /* add the alternate entry */ - *odb->sources_tail = alternate; - odb->sources_tail = &(alternate->next); - - pos = kh_put_odb_path_map(odb->source_by_path, alternate->path, &ret); - if (!ret) - BUG("source must not yet exist"); - kh_value(odb->source_by_path, pos) = alternate; - - /* recursively add alternates */ - read_info_alternates(alternate->path, &sources); - if (sources.nr && depth + 1 > 5) { - error(_("%s: ignoring alternate object stores, nesting too deep"), - source); - } else { - for (size_t i = 0; i < sources.nr; i++) - odb_add_alternate_recursively(odb, sources.v[i], depth + 1); - } - - error: - strvec_clear(&sources); - return alternate; -} - static void parse_alternates(const char *string, int sep, const char *relative_base, @@ -288,6 +217,60 @@ static void read_info_alternates(const char *relative_base, free(path); } + +static struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + + CALLOC_ARRAY(source, 1); + source->odb = odb; + source->local = local; + source->path = xstrdup(path); + source->loose = odb_source_loose_new(source); + + return source; +} + +static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, + const char *source, + int depth) +{ + struct odb_source *alternate = NULL; + struct strvec sources = STRVEC_INIT; + khiter_t pos; + int ret; + + if (!odb_is_source_usable(odb, source)) + goto error; + + alternate = odb_source_new(odb, source, false); + + /* add the alternate entry */ + *odb->sources_tail = alternate; + odb->sources_tail = &(alternate->next); + + pos = kh_put_odb_path_map(odb->source_by_path, alternate->path, &ret); + if (!ret) + BUG("source must not yet exist"); + kh_value(odb->source_by_path, pos) = alternate; + + /* recursively add alternates */ + read_info_alternates(alternate->path, &sources); + if (sources.nr && depth + 1 > 5) { + error(_("%s: ignoring alternate object stores, nesting too deep"), + source); + } else { + for (size_t i = 0; i < sources.nr; i++) + odb_add_alternate_recursively(odb, sources.v[i], depth + 1); + } + + error: + strvec_clear(&sources); + return alternate; +} + void odb_add_to_alternates_file(struct object_database *odb, const char *dir) { -- GitLab From f7dbd9fb2ea9b14b4df0949411205f4b5d284b41 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:16 +0100 Subject: [PATCH 067/110] odb: read alternates via sources Adapt how we read alternates so that the interface is structured around the object database source we're reading from. This will eventually allow us to abstract away this behaviour with pluggable object databases so that every format can have its own mechanism for listing alternates. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/odb.c b/odb.c index dcf4a62cd2..c5ba26b85f 100644 --- a/odb.c +++ b/odb.c @@ -199,19 +199,19 @@ static void parse_alternates(const char *string, strbuf_release(&buf); } -static void read_info_alternates(const char *relative_base, - struct strvec *out) +static void odb_source_read_alternates(struct odb_source *source, + struct strvec *out) { struct strbuf buf = STRBUF_INIT; char *path; - path = xstrfmt("%s/info/alternates", relative_base); + path = xstrfmt("%s/info/alternates", source->path); if (strbuf_read_file(&buf, path, 1024) < 0) { warn_on_fopen_errors(path); free(path); return; } - parse_alternates(buf.buf, '\n', relative_base, out); + parse_alternates(buf.buf, '\n', source->path, out); strbuf_release(&buf); free(path); @@ -257,7 +257,7 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ - read_info_alternates(alternate->path, &sources); + odb_source_read_alternates(alternate, &sources); if (sources.nr && depth + 1 > 5) { error(_("%s: ignoring alternate object stores, nesting too deep"), source); @@ -599,7 +599,7 @@ void odb_prepare_alternates(struct object_database *odb) return; parse_alternates(odb->alternate_db, PATH_SEP, NULL, &sources); - read_info_alternates(odb->sources->path, &sources); + odb_source_read_alternates(odb->sources, &sources); for (size_t i = 0; i < sources.nr; i++) odb_add_alternate_recursively(odb, sources.v[i], 0); -- GitLab From 221a877d4785030e07d20977418609257fd606d8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 10:30:17 +0100 Subject: [PATCH 068/110] odb: write alternates via sources Refactor writing of alternates so that the actual business logic is structured around the object database source we want to write the alternate to. Same as with the preceding commit, this will eventually allow us to have different logic for writing alternates depending on the backend used. Note that after the refactoring we start to call `odb_add_alternate_recursively()` unconditionally. This is fine though as we know to skip adding sources that are tracked already. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/odb.c b/odb.c index c5ba26b85f..cc7f832465 100644 --- a/odb.c +++ b/odb.c @@ -271,25 +271,28 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * return alternate; } -void odb_add_to_alternates_file(struct object_database *odb, - const char *dir) +static int odb_source_write_alternate(struct odb_source *source, + const char *alternate) { struct lock_file lock = LOCK_INIT; - char *alts = repo_git_path(odb->repo, "objects/info/alternates"); + char *path = xstrfmt("%s/%s", source->path, "info/alternates"); FILE *in, *out; int found = 0; + int ret; - hold_lock_file_for_update(&lock, alts, LOCK_DIE_ON_ERROR); + hold_lock_file_for_update(&lock, path, LOCK_DIE_ON_ERROR); out = fdopen_lock_file(&lock, "w"); - if (!out) - die_errno(_("unable to fdopen alternates lockfile")); + if (!out) { + ret = error_errno(_("unable to fdopen alternates lockfile")); + goto out; + } - in = fopen(alts, "r"); + in = fopen(path, "r"); if (in) { struct strbuf line = STRBUF_INIT; while (strbuf_getline(&line, in) != EOF) { - if (!strcmp(dir, line.buf)) { + if (!strcmp(alternate, line.buf)) { found = 1; break; } @@ -298,20 +301,36 @@ void odb_add_to_alternates_file(struct object_database *odb, strbuf_release(&line); fclose(in); + } else if (errno != ENOENT) { + ret = error_errno(_("unable to read alternates file")); + goto out; } - else if (errno != ENOENT) - die_errno(_("unable to read alternates file")); if (found) { rollback_lock_file(&lock); } else { - fprintf_or_die(out, "%s\n", dir); - if (commit_lock_file(&lock)) - die_errno(_("unable to move new alternates file into place")); - if (odb->loaded_alternates) - odb_add_alternate_recursively(odb, dir, 0); + fprintf_or_die(out, "%s\n", alternate); + if (commit_lock_file(&lock)) { + ret = error_errno(_("unable to move new alternates file into place")); + goto out; + } } - free(alts); + + ret = 0; + +out: + free(path); + return ret; +} + +void odb_add_to_alternates_file(struct object_database *odb, + const char *dir) +{ + int ret = odb_source_write_alternate(odb->sources, dir); + if (ret < 0) + die(NULL); + if (odb->loaded_alternates) + odb_add_alternate_recursively(odb, dir, 0); } struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, -- GitLab From 4f30636198fc443f7364242244133d085e546486 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 09:59:27 +0100 Subject: [PATCH 069/110] odb: rename `FOR_EACH_OBJECT_*` flags Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This prepares us for a new upcoming `odb_for_each_object()` function and enures that both the function and its flags have the same prefix. --- builtin/cat-file.c | 2 +- builtin/pack-objects.c | 10 +++++----- commit-graph.c | 4 ++-- object-file.c | 4 ++-- object-file.h | 2 +- odb.h | 13 +++++++------ packfile.c | 20 ++++++++++---------- packfile.h | 4 ++-- reachable.c | 8 ++++---- repack-promisor.c | 2 +- revision.c | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 505ddaa12f..c3de953dee 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -923,7 +923,7 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; batch_each_object(opt, batch_unordered_object, - FOR_EACH_OBJECT_PACK_ORDER, &cb); + ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7fd90a9996..4eb83c28d1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) for_each_object_in_pack(p, add_object_entry_from_pack, revs, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); } strbuf_release(&buf); @@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void) if (for_each_packed_object(to_pack.repo, add_object_in_unpacked_pack, NULL, - FOR_EACH_OBJECT_PACK_ORDER | - FOR_EACH_OBJECT_LOCAL_ONLY | - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) die(_("cannot open pack index")); } diff --git a/commit-graph.c b/commit-graph.c index 80be2ff2c3..181f2cc498 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1933,7 +1933,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, goto cleanup; } for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } @@ -1971,7 +1971,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); for_each_packed_object(ctx->r, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); diff --git a/object-file.c b/object-file.c index af1c3f972d..414377b713 100644 --- a/object-file.c +++ b/object-file.c @@ -1762,7 +1762,7 @@ int for_each_loose_file_in_source(struct odb_source *source, int for_each_loose_object(struct object_database *odb, each_loose_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { struct odb_source *source; @@ -1773,7 +1773,7 @@ int for_each_loose_object(struct object_database *odb, if (r) return r; - if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) break; } diff --git a/object-file.h b/object-file.h index 1229d5f675..42bb50e10c 100644 --- a/object-file.h +++ b/object-file.h @@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source, */ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); /** diff --git a/odb.h b/odb.h index 300c3c0c46..e079d79c26 100644 --- a/odb.h +++ b/odb.h @@ -438,24 +438,25 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } + /* Flags for for_each_*_object(). */ -enum for_each_object_flags { +enum odb_for_each_object_flags { /* Iterate only over local objects, not alternates. */ - FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), /* Only iterate over packs obtained from the promisor remote. */ - FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), /* * Visit objects within a pack in packfile order rather than .idx order */ - FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2), /* Only iterate over packs that are not marked as kept in-core. */ - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), /* Only iterate over packs that do not have .keep files. */ - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; enum { diff --git a/packfile.c b/packfile.c index 097dd8d85d..ba9c005041 100644 --- a/packfile.c +++ b/packfile.c @@ -2248,12 +2248,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum for_each_object_flags flags) + enum odb_for_each_object_flags flags) { uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) { if (load_pack_revindex(p->repo, p)) return -1; } @@ -2274,7 +2274,7 @@ int for_each_object_in_pack(struct packed_git *p, * - in pack-order, it is pack position, which we must * convert to an index position in order to get the oid. */ - if (flags & FOR_EACH_OBJECT_PACK_ORDER) + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) index_pos = pack_pos_to_index(p, i); else index_pos = i; @@ -2291,7 +2291,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags) + void *data, enum odb_for_each_object_flags flags) { struct odb_source *source; int r = 0; @@ -2307,15 +2307,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2401,8 +2401,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (repo_has_promisor_remote(r)) { for_each_packed_object(r, add_promisor_object, &promisor_objects, - FOR_EACH_OBJECT_PROMISOR_ONLY | - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } diff --git a/packfile.h b/packfile.h index fb832a33e3..b9e6051dbc 100644 --- a/packfile.h +++ b/packfile.h @@ -338,9 +338,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum for_each_object_flags flags); + enum odb_for_each_object_flags flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); + void *data, enum odb_for_each_object_flags flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 diff --git a/reachable.c b/reachable.c index 4b532039d5..82676b2668 100644 --- a/reachable.c +++ b/reachable.c @@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum for_each_object_flags flags; + enum odb_for_each_object_flags flags; int r; data.revs = revs; @@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, data.extra_recent_oids_loaded = 0; r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); + ODB_FOR_EACH_OBJECT_LOCAL_ONLY); if (r) goto done; - flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) - flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f6..45c330b9a5 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo, ctx.cmd = &cmd; ctx.algop = repo->hash_algo; for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/revision.c b/revision.c index 64d223a7c6..a2b8cd178d 100644 --- a/revision.c +++ b/revision.c @@ -3961,7 +3961,7 @@ int prepare_revision_walk(struct rev_info *revs) if (revs->exclude_promisor_objects) { for_each_packed_object(revs->repo, mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); } if (!revs->reflog_info) -- GitLab From 81e38deea383cb19bf905bfe8c61e6966e626120 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 10:44:36 +0100 Subject: [PATCH 070/110] odb: fix flags parameter to be unsigned The `flags` parameter accepted by various `for_each_object()` functions is a bitfield of multiple flags. Such parameters are typically unsigned in the Git codebase, but we use `enum odb_for_each_object_flags` in some places. Adapt these function signatures to use the correct type. --- object-file.c | 3 ++- object-file.h | 3 ++- packfile.c | 4 ++-- packfile.h | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 414377b713..d8db3ff505 100644 --- a/object-file.c +++ b/object-file.c @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + unsigned flags) { int status = 0; int fd; diff --git a/object-file.h b/object-file.h index 42bb50e10c..2acf19fb91 100644 --- a/object-file.h +++ b/object-file.h @@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + unsigned flags); int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, diff --git a/packfile.c b/packfile.c index ba9c005041..ece1875424 100644 --- a/packfile.c +++ b/packfile.c @@ -2248,7 +2248,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum odb_for_each_object_flags flags) + unsigned flags) { uint32_t i; int r = 0; @@ -2291,7 +2291,7 @@ int for_each_object_in_pack(struct packed_git *p, } int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags) + void *data, unsigned flags) { struct odb_source *source; int r = 0; diff --git a/packfile.h b/packfile.h index b9e6051dbc..f034296c03 100644 --- a/packfile.h +++ b/packfile.h @@ -338,9 +338,9 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum odb_for_each_object_flags flags); + unsigned flags); int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum odb_for_each_object_flags flags); + void *data, unsigned flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 -- GitLab From 1806e852afcaa823c85811a633d59ae0dec384bb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 10:45:15 +0100 Subject: [PATCH 071/110] object-file: extract function to read object info from path Extract a new function that allows us to read object info for a specific loose object via a user-supplied path. This function will be used in a subsequent commit. Note that this also allows us to drop `stat_loose_object()`, which is a simple wrapper around `odb_loose_path()` plus lstat(3p). --- object-file.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index d8db3ff505..9b65697e41 100644 --- a/object-file.c +++ b/object-file.c @@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in given source. - * Returns 0 on success, negative on failure. + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, - struct stat *st, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - - *path = odb_loose_path(loose->source, &buf, oid); - if (!lstat(*path, st)) - return 0; - - return -1; -} - -/* - * Like stat_loose_object(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { @@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int odb_source_loose_read_object_info(struct odb_source *source, +static int read_object_info_from_path(struct odb_source *source, + const char *path, const struct object_id *oid, struct object_info *oi, unsigned flags) @@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source, int status = 0; int fd; unsigned long mapsize; - const char *path; void *map; git_zstream stream; char hdr[MAX_HEADER_LEN]; @@ -442,14 +425,14 @@ int odb_source_loose_read_object_info(struct odb_source *source, struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) return quick_has_loose(source->loose, oid) ? 0 : -1; - if (stat_loose_object(source->loose, oid, &st, &path) < 0) + if (lstat(path, &st)) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(source->loose, oid, &path); + fd = git_open(path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -507,6 +490,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, return status; } +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + static struct strbuf buf = STRBUF_INIT; + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(source, buf.buf, oid, oi, flags); +} + static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, -- GitLab From fd989e58f57d1585a7a4ebc978ba34fe6efe1b02 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 10 Dec 2025 15:51:15 +0100 Subject: [PATCH 072/110] object-file: always set OI_LOOSE when reading object info There are some early returns in `read_object_info_from_path()` in cases where we don't have to open the object itself. These return paths do not set `struct object_info::whence` to `OI_LOOSE` though, so it becomes impossible for the caller to tell the format of such an object. Nobody seems to care about this right now. But in a subsequent change we're going to introduce a new `odb_for_each_object()` function where we will reuse a single object info structure across multiple calls, and here it is important to always set the field. Prepare for this change by always setting `whence` on success. --- object-file.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/object-file.c b/object-file.c index 9b65697e41..c4c7a2e66f 100644 --- a/object-file.c +++ b/object-file.c @@ -423,12 +423,21 @@ static int read_object_info_from_path(struct odb_source *source, */ if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; - if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(source->loose, oid) ? 0 : -1; + + if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) { + status = quick_has_loose(source->loose, oid) ? 0 : -1; + if (!status) + oi->whence = OI_LOOSE; + return status; + } + if (lstat(path, &st)) return -1; + if (oi->disk_sizep) *oi->disk_sizep = st.st_size; + + oi->whence = OI_LOOSE; return 0; } -- GitLab From 762ca6f45d7957df8760c1172742b42e0cfaede8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 10:30:10 +0100 Subject: [PATCH 073/110] object-file: introduce function to iterate through objects We have multiple divergent interfaces to iterate through objects of a specific backend: - `for_each_loose_object()` yields all loose objects. - `for_each_packed_object()` (somewhat obviously) yields all packed objects. These functions have different function signatures, which makes it hard to create a common abstraction layer that covers both of these. Introduce a new function `odb_source_loose_for_each_object()` to plug this gap. This function doesn't take any data specific to loose objects, but instead it accepts a `struct object_info` that will be populated the exact same as if `odb_source_loose_read_object()` was called. The benefit of this new interface is that we can continue to pass backend-specific data, as `struct object_info` contains a union for these exact use cases. This will allow us to unify how we iterate through objects across both loose and packed objects in a subsequent commit. The `for_each_loose_object()` function continues to exist for now, but it will be removed at the end of this patch series. --- object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++ object-file.h | 11 +++++++++++ odb.h | 12 ++++++++++++ 3 files changed, 64 insertions(+) diff --git a/object-file.c b/object-file.c index c4c7a2e66f..ebb5312696 100644 --- a/object-file.c +++ b/object-file.c @@ -1783,6 +1783,47 @@ int for_each_loose_object(struct object_database *odb, return 0; } +struct for_each_object_wrapper_data { + struct odb_source *source; + struct object_info *oi; + unsigned flags; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->oi && + read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0) + return -1; + return data->cb(oid, data->oi, data->cb_data); +} + +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct for_each_object_wrapper_data data = { + .source = source, + .oi = oi, + .flags = flags, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if (flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) + return 0; + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index 2acf19fb91..048b778531 100644 --- a/object-file.h +++ b/object-file.h @@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb, each_loose_object_fn, void *, enum odb_for_each_object_flags flags); +/* + * Iterate through all loose objects in the given object database source and + * invoke the callback function for each of them. If given, the object info + * will be populated with the object's data as if you had called + * `odb_source_loose_read_object_info()` on the object. + */ +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /** * format_object_header() is a thin wrapper around s xsnprintf() that diff --git a/odb.h b/odb.h index e079d79c26..a3c1b29d01 100644 --- a/odb.h +++ b/odb.h @@ -459,6 +459,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + enum { /* * By default, `odb_write_object()` does not actually write anything -- GitLab From c58f01b86300391ddbb8ce3195f75be99f09fc54 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:08:03 +0100 Subject: [PATCH 074/110] packfile: extend `is_delta` field to allow for "unknown" state The `struct object_info::u::packed::is_delta` field determines whether or not a specific object is stored as a delta. It only stores whether or not the object is stored as delta, so it is treated as a boolean value. This boolean is insufficient though: when reading a packed object via `packfile_store_read_object_info()` we know to skip parsing the actual object when the user didn't request any object-specific data. In that case we won't read the object itself, but will only look up its position in the packfile. Consequently, we do not know whether it is a delta or not. This isn't really an issue right now, as the check for an empty request is broken. But a subsequent commit will fix it, and once we do we will have the need to also represent an "unknown" state. Prepare for this change by introducing a new enum that encodes the object type. We don't use the "unknown" state just yet, but will start to do so in a subsequent commit. --- odb.h | 7 ++++++- packfile.c | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/odb.h b/odb.h index a3c1b29d01..42648332bb 100644 --- a/odb.h +++ b/odb.h @@ -337,7 +337,12 @@ struct object_info { struct { struct packed_git *pack; off_t offset; - unsigned int is_delta; + enum packed_object_type { + PACKED_OBJECT_TYPE_UNKNOWN, + PACKED_OBJECT_TYPE_FULL, + PACKED_OBJECT_TYPE_OFS_DELTA, + PACKED_OBJECT_TYPE_REF_DELTA, + } type; } packed; } u; }; diff --git a/packfile.c b/packfile.c index ece1875424..c75f784361 100644 --- a/packfile.c +++ b/packfile.c @@ -2155,8 +2155,18 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi->whence == OI_PACKED) { oi->u.packed.offset = e.offset; oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); + + switch (rtype) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } } return 0; @@ -2554,7 +2564,8 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, oi.sizep = &size; if (packfile_store_read_object_info(store, oid, &oi, 0) || - oi.u.packed.is_delta || + oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) return -1; -- GitLab From 47b3abf76ec07c8b11fd41d710cda7e0569ff462 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:09:32 +0100 Subject: [PATCH 075/110] packfile: always declare object info to be OI_PACKED When reading object info via a packfile we yield one of two types: - The object can either be OI_PACKED, which is what a caller would typically expect. - Or it can be OI_DBCACHED if it is stored in the delta base cache. The latter really is an implementation detail though, and callers typically don't care at all about the difference. Furthermore, the information whether or not it is part of the delta base cache can already be derived via the `type` field, so the fact that we discern between OI_PACKED and OI_DBCACHED only further complicates the interface. Drop the OI_DBCACHED enum completely. I could not find any callers that care about the distinction. --- odb.h | 1 - packfile.c | 29 +++++++++++++---------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/odb.h b/odb.h index 42648332bb..f97f249580 100644 --- a/odb.h +++ b/odb.h @@ -323,7 +323,6 @@ struct object_info { OI_CACHED, OI_LOOSE, OI_PACKED, - OI_DBCACHED } whence; union { /* diff --git a/packfile.c b/packfile.c index c75f784361..84668cbed3 100644 --- a/packfile.c +++ b/packfile.c @@ -1654,8 +1654,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, oidclr(oi->delta_base_oid, p->repo->hash_algo); } - oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : - OI_PACKED; + oi->whence = OI_PACKED; out: unuse_pack(&w_curs); @@ -2152,21 +2151,19 @@ int packfile_store_read_object_info(struct packfile_store *store, return -1; } - if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; - switch (rtype) { - case OBJ_REF_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; - break; - case OBJ_OFS_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; - break; - default: - oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; - break; - } + switch (rtype) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; } return 0; -- GitLab From 96796e433cd92b3762cd1491352574786b60f168 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:11:57 +0100 Subject: [PATCH 076/110] packfile: always populate pack-specific info when reading object info When reading object information from a packfile we are not always populating the pack-specific information. This happens in two cases: - When calling `packed_object_info()` directly instead of `packfile_store_read_object_info()`. - When we've got the empty request. Fix both of these issues so that we can always assume the pack info to be populated when reading object info from a pack. Note that we don't really care about the second case right now, as the condition will always evaluate to false anyway. This will be fixed in the next commit. --- packfile.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/packfile.c b/packfile.c index 84668cbed3..4c0f256853 100644 --- a/packfile.c +++ b/packfile.c @@ -1655,6 +1655,20 @@ int packed_object_info(struct repository *r, struct packed_git *p, } oi->whence = OI_PACKED; + oi->u.packed.offset = obj_offset; + oi->u.packed.pack = p; + + switch (type) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } out: unuse_pack(&w_curs); @@ -2142,8 +2156,13 @@ int packfile_store_read_object_info(struct packfile_store *store, * We know that the caller doesn't actually need the * information below, so return early. */ - if (oi == &blank_oi) + if (oi == &blank_oi) { + oi->whence = OI_PACKED; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; return 0; + } rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); if (rtype < 0) { @@ -2151,21 +2170,6 @@ int packfile_store_read_object_info(struct packfile_store *store, return -1; } - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - - switch (rtype) { - case OBJ_REF_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; - break; - case OBJ_OFS_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; - break; - default: - oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; - break; - } - return 0; } -- GitLab From 649774fabacd0e28cf9cea8631b48809d2b14005 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 08:17:45 +0100 Subject: [PATCH 077/110] packfile: fix short-circuiting of empty requests When reading object information from the packfile store we have logic that tries to bail out early on empty requests. This is supposed to be a performance optimization so that we don't even have to unpack the object header stored in the packfile. This optimization doesn't work though: we compare the passed-in pointer with the pointer of an on-stack variable, which of course cannot ever become true. Fix this bug by introducing a new `object_info_is_blank_request()` helper, which simply verifies that none of the contained request pointers are populated. The helper will also be reused in a subsequent commit. --- odb.h | 10 ++++++++++ packfile.c | 3 +-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/odb.h b/odb.h index f97f249580..0a4c451670 100644 --- a/odb.h +++ b/odb.h @@ -346,6 +346,16 @@ struct object_info { } u; }; +/* + * Given an object info structure, figure out whether any of its request + * pointers are populated. + */ +static inline bool object_info_is_blank_request(struct object_info *oi) +{ + return !oi->typep && !oi->sizep && !oi->disk_sizep && + !oi->delta_base_oid && !oi->contentp; +} + /* * Initializer for a "struct object_info" that wants no items. You may * also memset() the memory to all-zeroes. diff --git a/packfile.c b/packfile.c index 4c0f256853..ba0a04b2fe 100644 --- a/packfile.c +++ b/packfile.c @@ -2145,7 +2145,6 @@ int packfile_store_read_object_info(struct packfile_store *store, struct object_info *oi, unsigned flags UNUSED) { - static struct object_info blank_oi = OBJECT_INFO_INIT; struct pack_entry e; int rtype; @@ -2156,7 +2155,7 @@ int packfile_store_read_object_info(struct packfile_store *store, * We know that the caller doesn't actually need the * information below, so return early. */ - if (oi == &blank_oi) { + if (object_info_is_blank_request(oi)) { oi->whence = OI_PACKED; oi->u.packed.offset = e.offset; oi->u.packed.pack = e.p; -- GitLab From 529b1dcf11a2f1e685e303d3287520ce77bd59af Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 10:54:27 +0100 Subject: [PATCH 078/110] packfile: extract function to iterate through objects of a store In the next commit we're about to introduce a new function that knows to iterate through objects of a given packfile store. Same as with the equivalent function for loose objects, this new function will also be agnostic of backends by using a `struct object_info`. Prepare for this by extracting a new shared function to iterate through a single packfile store. --- packfile.c | 78 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/packfile.c b/packfile.c index ba0a04b2fe..e2f0ac009c 100644 --- a/packfile.c +++ b/packfile.c @@ -2300,51 +2300,63 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) +static int packfile_store_for_each_object_internal(struct packfile_store *store, + each_packed_object_fn cb, + void *data, + unsigned flags, + int *pack_errors) { - struct odb_source *source; - int r = 0; - int pack_errors = 0; + struct packfile_list_entry *e; + int ret = 0; - odb_prepare_alternates(repo->objects); + store->skip_mru_updates = true; - for (source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *e; + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; - source->packfiles->skip_mru_updates = true; + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; + if (open_pack_index(p)) { + *pack_errors = 1; + continue; + } - for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) { - struct packed_git *p = e->pack; + ret = for_each_object_in_pack(p, cb, data, flags); + if (ret) + break; + } - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && - !p->pack_promisor) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && - p->pack_keep_in_core) - continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && - p->pack_keep) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; - } + store->skip_mru_updates = false; - r = for_each_object_in_pack(p, cb, data, flags); - if (r) - break; - } + return ret; +} - source->packfiles->skip_mru_updates = false; +int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, + void *data, unsigned flags) +{ + struct odb_source *source; + int pack_errors = 0; + int ret = 0; - if (r) + odb_prepare_alternates(repo->objects); + + for (source = repo->objects->sources; source; source = source->next) { + ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, + flags, &pack_errors); + if (ret) break; } - return r ? r : pack_errors; + return ret ? ret : pack_errors; } static int add_promisor_object(const struct object_id *oid, -- GitLab From 5e798ebdd7f871d11768de5d95bc6dae0734e0c4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 10:30:10 +0100 Subject: [PATCH 079/110] packfile: introduce function to iterate through objects Introduce a new function `packfile_store_for_each_object()`. This function is the equivalent to `odb_source_loose_for_each_object()` in that it: - Works on a single packfile store and thus per object source. - Passes a `struct object_info` to the callback function. As such, it provides the same callback interface as we already provide for loose objects now. These functions will be used in a subsequent step to implement `odb_for_each_object()`. The `for_each_packed_object()` function continues to exist for now, but it will be removed at the end of this patch series. --- packfile.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 14 ++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/packfile.c b/packfile.c index e2f0ac009c..bb2561ba75 100644 --- a/packfile.c +++ b/packfile.c @@ -2359,6 +2359,62 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, return ret ? ret : pack_errors; } +struct packfile_store_for_each_object_wrapper_data { + struct packfile_store *store; + struct object_info *oi; + odb_for_each_object_cb cb; + void *cb_data; + unsigned flags; +}; + +static int packfile_store_for_each_object_wrapper(const struct object_id *oid, + struct packed_git *pack, + uint32_t index_pos, + void *cb_data) +{ + struct packfile_store_for_each_object_wrapper_data *data = cb_data; + + if (data->oi) { + off_t offset = nth_packed_object_offset(pack, index_pos); + + if (object_info_is_blank_request(data->oi)) { + data->oi->whence = OI_PACKED; + data->oi->u.packed.offset = offset; + data->oi->u.packed.pack = pack; + data->oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; + } else if (packed_object_info(data->store->source->odb->repo, + pack, offset, data->oi) < 0) { + mark_bad_packed_object(pack, oid); + return -1; + } + } + + return data->cb(oid, data->oi, data->cb_data); +} + +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct packfile_store_for_each_object_wrapper_data data = { + .store = store, + .oi = oi, + .cb = cb, + .cb_data = cb_data, + .flags = flags, + }; + int pack_errors = 0, ret; + + ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, + &data, flags, &pack_errors); + if (ret) + return ret; + + return pack_errors ? -1 : 0; +} + static int add_promisor_object(const struct object_id *oid, struct packed_git *pack, uint32_t pos UNUSED, diff --git a/packfile.h b/packfile.h index f034296c03..4c892e896b 100644 --- a/packfile.h +++ b/packfile.h @@ -342,6 +342,20 @@ int for_each_object_in_pack(struct packed_git *p, int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, void *data, unsigned flags); +/* + * Iterate through all packed objects in the given packfile store and invoke + * the callback function for each of them. If given, the object info will be + * populated with the object's data as if you had called + * `packfile_store_read_object_info()` on the object. + * + * The flags parameter is a combination of `odb_for_each_object_flags`. + */ +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 -- GitLab From b189579f11d3a53788524fdc08cbc618d5d9b00b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:11:43 +0100 Subject: [PATCH 080/110] odb_for_each_object --- odb.c | 27 +++++++++++++++++++++++++++ odb.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/odb.c b/odb.c index 902251f9ed..80d1875d11 100644 --- a/odb.c +++ b/odb.c @@ -989,6 +989,33 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index 0a4c451670..0453c06c9f 100644 --- a/odb.h +++ b/odb.h @@ -485,6 +485,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Iterate through all objects contained in the object database. Note that + * objects may be iterated over multiple times in case they are either stored + * in different backends or in case they are stored in multiple sources. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + * + * Returns 0 on success, a negative error code in case a failure occurred, or + * an arbitrary non-zero error code returned by the callback itself. + */ +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + enum { /* * By default, `odb_write_object()` does not actually write anything -- GitLab From 0f5107e1f366cb026397a32ca63254724fdb7ba3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:33:05 +0100 Subject: [PATCH 081/110] fsck conversions --- builtin/fsck.c | 57 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 4979bc795e..96107695ae 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, return 0; } -static void mark_unreachable_referents(const struct object_id *oid) +static int mark_unreachable_referents(const struct object_id *oid, + struct object_info *io UNUSED, + void *data UNUSED) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) - return; /* not part of our original set */ + return 0; /* not part of our original set */ if (obj->flags & REACHABLE) - return; /* reachable objects already traversed */ + return 0; /* reachable objects already traversed */ /* * Avoid passing OBJ_NONE to fsck_walk, which will parse the object @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid) fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) free_tree_buffer((struct tree *)obj); -} -static int mark_loose_unreachable_referents(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); - return 0; -} - -static int mark_packed_unreachable_referents(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); return 0; } @@ -394,12 +381,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(the_repository->objects, - mark_loose_unreachable_referents, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_unreachable_referents, - NULL, - 0); + odb_for_each_object(the_repository->objects, NULL, + mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path, fsck_resolve_undo(istate, index_path); } -static void mark_object_for_connectivity(const struct object_id *oid) +static int mark_object_for_connectivity(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data UNUSED) { struct object *obj = lookup_unknown_object(the_repository, oid); obj->flags |= HAS_OBJ; -} - -static int mark_loose_for_connectivity(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); - return 0; -} - -static int mark_packed_for_connectivity(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); return 0; } @@ -1001,10 +970,8 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(the_repository->objects, - mark_loose_for_connectivity, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_for_connectivity, NULL, 0); + odb_for_each_object(the_repository->objects, NULL, + mark_object_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) -- GitLab From 02db3f9d37c34b322e59188d5533798bf775cf44 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:48:14 +0100 Subject: [PATCH 082/110] packfile.c --- packfile.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/packfile.c b/packfile.c index bb2561ba75..1b6c6761cf 100644 --- a/packfile.c +++ b/packfile.c @@ -2415,27 +2415,31 @@ int packfile_store_for_each_object(struct packfile_store *store, return pack_errors ? -1 : 0; } +struct add_promisor_object_data { + struct repository *repo; + struct oidset *set; +}; + static int add_promisor_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos UNUSED, - void *set_) + struct object_info *oi UNUSED, + void *cb_data) { - struct oidset *set = set_; + struct add_promisor_object_data *data = cb_data; struct object *obj; int we_parsed_object; - obj = lookup_object(pack->repo, oid); + obj = lookup_object(data->repo, oid); if (obj && obj->parsed) { we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object(pack->repo, oid); + obj = parse_object(data->repo, oid); } if (!obj) return 1; - oidset_insert(set, oid); + oidset_insert(data->set, oid); /* * If this is a tree, commit, or tag, the objects it refers @@ -2453,19 +2457,19 @@ static int add_promisor_object(const struct object_id *oid, */ return 0; while (tree_entry_gently(&desc, &entry)) - oidset_insert(set, &entry.oid); + oidset_insert(data->set, &entry.oid); if (we_parsed_object) free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, get_commit_tree_oid(commit)); + oidset_insert(data->set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) - oidset_insert(set, &parents->item->object.oid); + oidset_insert(data->set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; - oidset_insert(set, get_tagged_oid(tag)); + oidset_insert(data->set, get_tagged_oid(tag)); } return 0; } @@ -2477,10 +2481,14 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (!promisor_objects_prepared) { if (repo_has_promisor_remote(r)) { - for_each_packed_object(r, add_promisor_object, - &promisor_objects, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | - ODB_FOR_EACH_OBJECT_PACK_ORDER); + struct add_promisor_object_data data = { + .repo = r, + .set = &promisor_objects, + }; + + odb_for_each_object(r->objects, NULL, add_promisor_object, &data, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } -- GitLab From 4b3ede33a6b83044af81ff2bee2793822f7905a8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:54:33 +0100 Subject: [PATCH 083/110] revision mark_uninteresting --- revision.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/revision.c b/revision.c index a2b8cd178d..d2b83d0f8b 100644 --- a/revision.c +++ b/revision.c @@ -3649,8 +3649,7 @@ void reset_revision_walk(void) } static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, + struct object_info *oi UNUSED, void *cb) { struct rev_info *revs = cb; @@ -3959,10 +3958,9 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(revs->repo, mark_uninteresting, revs, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); - } + if (revs->exclude_promisor_objects) + odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting, + revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); -- GitLab From ef6ab10c0e9fcad1e387bf5afab064d9b3278de5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:56:14 +0100 Subject: [PATCH 084/110] repack promisor --- repack-promisor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/repack-promisor.c b/repack-promisor.c index 45c330b9a5..35c4073632 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -17,8 +17,8 @@ struct write_oid_context { * necessary. */ static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) + struct object_info *oi UNUSED, + void *data) { struct write_oid_context *ctx = data; struct child_process *cmd = ctx->cmd; @@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo, */ ctx.cmd = &cmd; ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); + odb_for_each_object(repo->objects, NULL, write_oid, &ctx, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ -- GitLab From c724b3e0406ac9503523232045558b6c2c22159a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 12:03:57 +0100 Subject: [PATCH 085/110] commit-graph --- commit-graph.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 181f2cc498..238d5456e0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1485,24 +1485,16 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, return 0; } -static int add_packed_commits(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) +static int add_packed_commits_oi(const struct object_id *oid, + struct object_info *oi, + void *data) { struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; - enum object_type type; - off_t offset = nth_packed_object_offset(pack, pos); - struct object_info oi = OBJECT_INFO_INIT; if (ctx->progress) display_progress(ctx->progress, ++ctx->progress_done); - oi.typep = &type; - if (packed_object_info(ctx->r, pack, offset, &oi) < 0) - die(_("unable to get type of object %s"), oid_to_hex(oid)); - - if (type != OBJ_COMMIT) + if (*oi->typep != OBJ_COMMIT) return 0; oid_array_append(&ctx->oids, oid); @@ -1511,6 +1503,22 @@ static int add_packed_commits(const struct object_id *oid, return 0; } +static int add_packed_commits(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + enum object_type type; + off_t offset = nth_packed_object_offset(pack, pos); + struct object_info oi = OBJECT_INFO_INIT; + + oi.typep = &type; + if (packed_object_info(pack->repo, pack, offset, &oi) < 0) + die(_("unable to get type of object %s"), oid_to_hex(oid)); + + return add_packed_commits_oi(oid, &oi, data); +} + static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) { struct commit_list *parent; @@ -1965,13 +1973,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx, static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { + struct odb_source *source; + enum object_type type; + struct object_info oi = { + .typep = &type, + }; + if (ctx->report_progress) ctx->progress = start_delayed_progress( ctx->r, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); - for_each_packed_object(ctx->r, add_packed_commits, ctx, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + + odb_prepare_alternates(ctx->r->objects); + for (source = ctx->r->objects->sources; source; source = source->next) + packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi, + ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); -- GitLab From 0921d293474ff72847d8870c2c3d3b1080b5ddd3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:52:12 +0100 Subject: [PATCH 086/110] cat-file --- builtin/cat-file.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index c3de953dee..b771a6c973 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -807,11 +807,14 @@ struct for_each_object_payload { void *payload; }; -static int batch_one_object_loose(const struct object_id *oid, - const char *path UNUSED, - void *_payload) +static int batch_one_object_oi(const struct object_id *oid, + struct object_info *oi, + void *_payload) { struct for_each_object_payload *payload = _payload; + if (oi && oi->whence == OI_PACKED) + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset, + payload->payload); return payload->callback(oid, NULL, 0, payload->payload); } @@ -847,8 +850,15 @@ static void batch_each_object(struct batch_options *opt, .payload = _payload, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + struct odb_source *source; - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, + &payload, flags); + if (ret) + break; + } if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { @@ -862,8 +872,14 @@ static void batch_each_object(struct batch_options *opt, &payload, flags); } } else { - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + struct object_info oi = { 0 }; + + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = packfile_store_for_each_object(source->packfiles, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } free_bitmap_index(bitmap); -- GitLab From 1ddbdf0059c9fe0e5d2bf400d10bf35223e60719 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 13:12:24 +0100 Subject: [PATCH 087/110] mtimep --- object-file.c | 25 ++++++++++++++++++++----- odb.c | 2 ++ odb.h | 3 ++- packfile.c | 42 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 58 insertions(+), 14 deletions(-) diff --git a/object-file.c b/object-file.c index ebb5312696..ee9ab8d116 100644 --- a/object-file.c +++ b/object-file.c @@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source, char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; + struct stat st; if (oi->delta_base_oid) oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); @@ -422,9 +423,7 @@ static int read_object_info_from_path(struct odb_source *source, * object even exists. */ if (!oi->typep && !oi->sizep && !oi->contentp) { - struct stat st; - - if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) { + if (!oi->disk_sizep && !oi->mtimep && (flags & OBJECT_INFO_QUICK)) { status = quick_has_loose(source->loose, oid) ? 0 : -1; if (!status) oi->whence = OI_LOOSE; @@ -436,6 +435,8 @@ static int read_object_info_from_path(struct odb_source *source, if (oi->disk_sizep) *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; oi->whence = OI_LOOSE; return 0; @@ -447,7 +448,20 @@ static int read_object_info_from_path(struct odb_source *source, error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); return -1; } - map = map_fd(fd, path, &mapsize); + + if (fstat(fd, &st)) { + close(fd); + return -1; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + return error(_("object file %s is empty"), path); + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); if (!map) return -1; @@ -455,9 +469,10 @@ static int read_object_info_from_path(struct odb_source *source, oi->sizep = &size_scratch; if (!oi->typep) oi->typep = &type_scratch; - if (oi->disk_sizep) *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { case ULHR_OK: diff --git a/odb.c b/odb.c index 80d1875d11..2c9fb98e45 100644 --- a/odb.c +++ b/odb.c @@ -697,6 +697,8 @@ static int do_oid_object_info_extended(struct object_database *odb, oidclr(oi->delta_base_oid, odb->repo->hash_algo); if (oi->contentp) *oi->contentp = xmemdupz(co->buf, co->size); + if (oi->mtimep) + *oi->mtimep = 0; oi->whence = OI_CACHED; return 0; } diff --git a/odb.h b/odb.h index 0453c06c9f..5793fc0243 100644 --- a/odb.h +++ b/odb.h @@ -317,6 +317,7 @@ struct object_info { off_t *disk_sizep; struct object_id *delta_base_oid; void **contentp; + time_t *mtimep; /* Response */ enum { @@ -353,7 +354,7 @@ struct object_info { static inline bool object_info_is_blank_request(struct object_info *oi) { return !oi->typep && !oi->sizep && !oi->disk_sizep && - !oi->delta_base_oid && !oi->contentp; + !oi->delta_base_oid && !oi->contentp && !oi->mtimep; } /* diff --git a/packfile.c b/packfile.c index 1b6c6761cf..6031df8bae 100644 --- a/packfile.c +++ b/packfile.c @@ -1578,13 +1578,15 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct repository *r, struct packed_git *p, - off_t obj_offset, struct object_info *oi) +static int packed_object_info_with_index_pos(struct repository *r, struct packed_git *p, + off_t obj_offset, uint32_t *maybe_index_pos, + struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; enum object_type type; + uint32_t pack_pos; /* * We always get the representation type, but only convert it to @@ -1618,16 +1620,34 @@ int packed_object_info(struct repository *r, struct packed_git *p, } } - if (oi->disk_sizep) { - uint32_t pos; - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); type = OBJ_BAD; goto out; } + } + + if (oi->disk_sizep) + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + + if (oi->mtimep) { + if (p->is_cruft) { + uint32_t index_pos; + + if (load_pack_mtimes(p) < 0) + die(_("could not load cruft pack .mtimes")); + + if (maybe_index_pos) + index_pos = *maybe_index_pos; + else + index_pos = pack_pos_to_index(p, pack_pos); - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; + *oi->mtimep = nth_packed_mtime(p, index_pos); + } else { + *oi->mtimep = p->mtime; + } } if (oi->typep) { @@ -1675,6 +1695,12 @@ int packed_object_info(struct repository *r, struct packed_git *p, return type; } +int packed_object_info(struct repository *r, struct packed_git *p, + off_t obj_offset, struct object_info *oi) +{ + return packed_object_info_with_index_pos(r, p, obj_offset, NULL, oi); +} + static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@ -2382,8 +2408,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, data->oi->u.packed.offset = offset; data->oi->u.packed.pack = pack; data->oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; - } else if (packed_object_info(data->store->source->odb->repo, - pack, offset, data->oi) < 0) { + } else if (packed_object_info_with_index_pos(data->store->source->odb->repo, + pack, offset, &index_pos, data->oi) < 0) { mark_bad_packed_object(pack, oid); return -1; } -- GitLab From daefdd8766c2e802a7d2d6c04ca6b4a67100a37c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 12:12:43 +0100 Subject: [PATCH 088/110] pack_objects --- builtin/pack-objects.c | 45 ++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4eb83c28d1..df7a7cdd8c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit) } static int add_object_in_unpacked_pack(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, + struct object_info *oi, void *data UNUSED) { if (cruft) { - off_t offset; - time_t mtime; - - if (pack->is_cruft) { - if (load_pack_mtimes(pack) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(pack, pos); - } else { - mtime = pack->mtime; - } - offset = nth_packed_object_offset(pack, pos); - - add_cruft_object_entry(oid, OBJ_NONE, pack, offset, - NULL, mtime); + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack, + oi->u.packed.offset, NULL, *oi->mtimep); } else { add_object_entry(oid, OBJ_NONE, "", 0); } @@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, static void add_objects_in_unpacked_packs(void) { - if (for_each_packed_object(to_pack.repo, - add_object_in_unpacked_pack, - NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) - die(_("cannot open pack index")); + struct odb_source *source; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + }; + + odb_prepare_alternates(to_pack.repo->objects); + for (source = to_pack.repo->objects->sources; source; source = source->next) { + if (!source->local) + continue; + + if (packfile_store_for_each_object(source->packfiles, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); + } } static int add_loose_object(const struct object_id *oid, const char *path, -- GitLab From 01d0034a9a89f6ce046134445fd4c4520258a073 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 11 Dec 2025 12:20:05 +0100 Subject: [PATCH 089/110] reachable --- reachable.c | 123 +++++++++++++++------------------------------------- 1 file changed, 34 insertions(+), 89 deletions(-) diff --git a/reachable.c b/reachable.c index 82676b2668..4a26ccf399 100644 --- a/reachable.c +++ b/reachable.c @@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, return oidset_contains(&data->extra_recent_oids, oid); } -static void add_recent_object(const struct object_id *oid, - struct packed_git *pack, - off_t offset, - timestamp_t mtime, - struct recent_data *data) +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) { - struct object *obj; - enum object_type type; + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + return 0; + return 1; +} - if (!obj_is_recent(oid, mtime, data)) - return; +static int add_recent_object(const struct object_id *oid, + struct object_info *oi, + void *cb_data) +{ + struct recent_data *data = cb_data; + struct object *obj; - /* - * We do not want to call parse_object here, because - * inflating blobs and trees could be very expensive. - * However, we do need to know the correct type for - * later processing, and the revision machinery expects - * commits and tags to have been parsed. - */ - type = odb_read_object_info(the_repository->objects, oid, NULL); - if (type < 0) - die("unable to get object info for %s", oid_to_hex(oid)); + if (!want_recent_object(data, oid) || + !obj_is_recent(oid, *oi->mtimep, data)) + return 0; - switch (type) { + switch (*oi->typep) { case OBJ_TAG: case OBJ_COMMIT: obj = parse_object_or_die(the_repository, oid, NULL); @@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid, break; default: die("unknown object type for %s: %s", - oid_to_hex(oid), type_name(type)); + oid_to_hex(oid), type_name(*oi->typep)); } if (!obj) die("unable to lookup %s", oid_to_hex(oid)); - - add_pending_object(data->revs, obj, ""); - if (data->cb) - data->cb(obj, pack, offset, mtime); -} - -static int want_recent_object(struct recent_data *data, - const struct object_id *oid) -{ - if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + if (obj->flags & SEEN) return 0; - return 1; -} -static int add_recent_loose(const struct object_id *oid, - const char *path, void *data) -{ - struct stat st; - struct object *obj; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - - if (stat(path, &st) < 0) { - /* - * It's OK if an object went away during our iteration; this - * could be due to a simultaneous repack. But anything else - * we should abort, since we might then fail to mark objects - * which should not be pruned. - */ - if (errno == ENOENT) - return 0; - return error_errno("unable to stat %s", oid_to_hex(oid)); + add_pending_object(data->revs, obj, ""); + if (data->cb) { + if (oi->whence == OI_PACKED) + data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep); + else + data->cb(obj, NULL, 0, *oi->mtimep); } - add_recent_object(oid, NULL, 0, st.st_mtime, data); - return 0; -} - -static int add_recent_packed(const struct object_id *oid, - struct packed_git *p, - uint32_t pos, - void *data) -{ - struct object *obj; - timestamp_t mtime = p->mtime; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - if (p->is_cruft) { - if (load_pack_mtimes(p) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(p, pos); - } - add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } @@ -308,6 +250,12 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, { struct recent_data data; enum odb_for_each_object_flags flags; + enum object_type type; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + .typep = &type, + }; int r; data.revs = revs; @@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - ODB_FOR_EACH_OBJECT_LOCAL_ONLY); - if (r) - goto done; - flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); + r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags); + if (r) + goto done; done: oidset_clear(&data.extra_recent_oids); -- GitLab From 5f62950bedfb4eec45913b50c8330dfd71c0d5b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 11:53:31 +0100 Subject: [PATCH 090/110] drop for_each_loose_object and for_each_packed_object --- object-file.c | 20 -------------------- object-file.h | 11 ----------- packfile.c | 19 ------------------- packfile.h | 2 -- 4 files changed, 52 deletions(-) diff --git a/object-file.c b/object-file.c index ee9ab8d116..ecee055a61 100644 --- a/object-file.c +++ b/object-file.c @@ -1778,26 +1778,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn cb, void *data, - enum odb_for_each_object_flags flags) -{ - struct odb_source *source; - - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - int r = for_each_loose_file_in_source(source, cb, NULL, - NULL, data); - if (r) - return r; - - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) - break; - } - - return 0; -} - struct for_each_object_wrapper_data { struct odb_source *source; struct object_info *oi; diff --git a/object-file.h b/object-file.h index 048b778531..af7f57d2a1 100644 --- a/object-file.h +++ b/object-file.h @@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_subdir_fn subdir_cb, void *data); -/* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. - */ -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn, void *, - enum odb_for_each_object_flags flags); - /* * Iterate through all loose objects in the given object database source and * invoke the callback function for each of them. If given, the object info diff --git a/packfile.c b/packfile.c index 6031df8bae..7cab2d29da 100644 --- a/packfile.c +++ b/packfile.c @@ -2366,25 +2366,6 @@ static int packfile_store_for_each_object_internal(struct packfile_store *store, return ret; } -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags) -{ - struct odb_source *source; - int pack_errors = 0; - int ret = 0; - - odb_prepare_alternates(repo->objects); - - for (source = repo->objects->sources; source; source = source->next) { - ret = packfile_store_for_each_object_internal(source->packfiles, cb, data, - flags, &pack_errors); - if (ret) - break; - } - - return ret ? ret : pack_errors; -} - struct packfile_store_for_each_object_wrapper_data { struct packfile_store *store; struct object_info *oi; diff --git a/packfile.h b/packfile.h index 4c892e896b..a70d9c5774 100644 --- a/packfile.h +++ b/packfile.h @@ -339,8 +339,6 @@ typedef int each_packed_object_fn(const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, unsigned flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, unsigned flags); /* * Iterate through all packed objects in the given packfile store and invoke -- GitLab From da8e4b651a36aed1ed120351ce9c65ceb2dda45f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:49:46 +0100 Subject: [PATCH 091/110] odb object approximation --- builtin/gc.c | 3 ++- commit-graph.c | 2 +- object-name.c | 3 ++- odb.c | 13 +++++++++++++ odb.h | 18 ++++++++++++++++++ packfile.c | 41 ++++++++++++++--------------------------- packfile.h | 23 +++++++++++++++++------ 7 files changed, 67 insertions(+), 36 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 92c6e7b954..6034c10b15 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -592,7 +592,8 @@ static uint64_t total_ram(void) static uint64_t estimate_repack_memory(struct gc_config *cfg, struct packed_git *pack) { - unsigned long nr_objects = repo_approximate_object_count(the_repository); + unsigned long nr_objects = odb_count_objects(the_repository->objects, + ODB_COUNT_OBJECTS_APPROXIMATE); size_t os_cache, heap; if (!pack || !nr_objects) diff --git a/commit-graph.c b/commit-graph.c index 238d5456e0..e7f93c4b8b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2613,7 +2613,7 @@ int write_commit_graph(struct odb_source *source, replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE; } - ctx.approx_nr_objects = repo_approximate_object_count(r); + ctx.approx_nr_objects = odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE); if (ctx.append && g) { for (i = 0; i < g->num_commits; i++) { diff --git a/object-name.c b/object-name.c index fed5de5153..81269d241a 100644 --- a/object-name.c +++ b/object-name.c @@ -837,7 +837,8 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, const unsigned hexsz = algo->hexsz; if (len < 0) { - unsigned long count = repo_approximate_object_count(r); + unsigned long count = odb_count_objects(r->objects, + ODB_COUNT_OBJECTS_APPROXIMATE); /* * Add one because the MSB only tells us the highest bit set, * not including the value of all the _other_ bits (so "15" diff --git a/odb.c b/odb.c index 843f2e0408..66d01822d3 100644 --- a/odb.c +++ b/odb.c @@ -1027,6 +1027,19 @@ int odb_for_each_object(struct object_database *odb, return 0; } +unsigned long odb_count_objects(struct object_database *odb, + unsigned flags) +{ + struct odb_source *source; + unsigned long count = 0; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + count += packfile_store_count_objects(source->packfiles, flags); + + return count; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index 5793fc0243..ce2ad8e41f 100644 --- a/odb.h +++ b/odb.h @@ -418,6 +418,24 @@ int odb_has_object(struct object_database *odb, int odb_freshen_object(struct object_database *odb, const struct object_id *oid); +/* Flags that can be passed to `odb_count_objects()`. */ +enum odb_count_objects_flags { + /* + * Allow the number of objects to be estimated. This flags essentially + * asks the backend to trade accuracy for speed. The exact details of + * how these estimations happen is backend-specific. Some backends may + * not honor this flag at all. + */ + ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), +}; + +/* + * Count the nubber of objects in the object database. This function does not + * account for reachability and may count objects multiple times. + */ +unsigned long odb_count_objects(struct object_database *odb, + unsigned flags); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); diff --git a/packfile.c b/packfile.c index 7cab2d29da..50a08a157d 100644 --- a/packfile.c +++ b/packfile.c @@ -1097,37 +1097,24 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) +unsigned long packfile_store_count_objects(struct packfile_store *store, + unsigned flags UNUSED) { - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - odb_prepare_alternates(r->objects); + struct packfile_list_entry *e; + struct multi_pack_index *m; + unsigned long count = 0; - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects + m->num_objects_in_base; - } + m = get_multi_pack_index(store->source); + if (m) + count += m->num_objects + m->num_objects_in_base; - repo_for_each_pack(r, p) { - if (p->multi_pack_index || open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index || open_pack_index(e->pack)) + continue; + count += e->pack->num_objects; } - return r->objects->approximate_object_count; + + return count; } unsigned long unpack_object_header_buffer(const unsigned char *buf, diff --git a/packfile.h b/packfile.h index a70d9c5774..684557ed05 100644 --- a/packfile.h +++ b/packfile.h @@ -173,6 +173,23 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + +/* + * Count the number of objects contained in the packfiles. + */ +unsigned long packfile_store_count_objects(struct packfile_store *store, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -360,12 +377,6 @@ int packfile_store_for_each_object(struct packfile_store *store, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ -unsigned long repo_approximate_object_count(struct repository *r); - void pack_report(struct repository *repo); /* -- GitLab From 829c737805c223c72b86d9c99871015b6051c923 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Dec 2025 16:12:37 +0100 Subject: [PATCH 092/110] loose object approximation --- builtin/gc.c | 37 +++++++------------------------------ object-file.c | 39 +++++++++++++++++++++++++++++++++++++++ object-file.h | 8 ++++++++ odb.c | 5 ++++- odb.h | 7 +++++++ 5 files changed, 65 insertions(+), 31 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 6034c10b15..883eb19101 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,14 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; - - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) - return 0; - - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + unsigned long auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + return odb_source_loose_count_objects(the_repository->objects->sources, + ODB_COUNT_OBJECTS_APPROXIMATE) + > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, diff --git a/object-file.c b/object-file.c index ecee055a61..47a6ef7ab2 100644 --- a/object-file.c +++ b/object-file.c @@ -1015,6 +1015,45 @@ int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_source(source, oid, 1); } +static int count_loose(const struct object_id *oid UNUSED, + const char *path UNUSED, + void *data) +{ + unsigned long *count = data; + (*count)++; + return 0; +} + +unsigned long odb_source_loose_count_objects(struct odb_source *source, + unsigned flags) +{ + const unsigned hexsz_loose = source->odb->repo->hash_algo->hexsz - 2; + struct strbuf path = STRBUF_INIT; + unsigned long count = 0; + struct dirent *ent; + DIR *dir; + + if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { + for_each_loose_file_in_source(source, count_loose, + NULL, NULL, &count); + return count; + } + + strbuf_addf(&path, "%s/17", source->path); + dir = opendir(path.buf); + strbuf_release(&path); + if (!dir) + return 0; + + while ((ent = readdir(dir)) != NULL) + if (strspn(ent->d_name, "0123456789abcdef") == hexsz_loose && + ent->d_name[hexsz_loose] == '\0') + count++; + closedir(dir); + + return count * 256; +} + int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) diff --git a/object-file.h b/object-file.h index af7f57d2a1..f1318de7ad 100644 --- a/object-file.h +++ b/object-file.h @@ -65,6 +65,14 @@ int odb_source_loose_has_object(struct odb_source *source, int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); +/* + * Because object hashes are cryptographic and thus evenly distributed, + * we can check only one and get a reasonable estimate via extrapolation. The + * shard used for this is "objects/17". + */ +unsigned long odb_source_loose_count_objects(struct odb_source *source, + unsigned flags); + int odb_source_loose_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, diff --git a/odb.c b/odb.c index 66d01822d3..51e28fac2c 100644 --- a/odb.c +++ b/odb.c @@ -1034,8 +1034,11 @@ unsigned long odb_count_objects(struct object_database *odb, unsigned long count = 0; odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) + for (source = odb->sources; source; source = source->next) { count += packfile_store_count_objects(source->packfiles, flags); + if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) + count += odb_source_loose_count_objects(source, flags); + } return count; } diff --git a/odb.h b/odb.h index ce2ad8e41f..17a5c66f16 100644 --- a/odb.h +++ b/odb.h @@ -427,6 +427,13 @@ enum odb_count_objects_flags { * not honor this flag at all. */ ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), + + /* + * Also estimate objects that are stored in an unoptimized format. This + * flag may be ignored in case a backend does not discern between + * unoptimized/optimized formats. + */ + ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED = (1 << 1), }; /* -- GitLab From bbb37a4313eb8444d87012733a913441245e3bae Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 9 Dec 2025 09:07:04 +0100 Subject: [PATCH 093/110] odb source --- Makefile | 1 + meson.build | 1 + odb.c | 25 ---------------------- odb.h | 45 +-------------------------------------- odb/source.c | 28 ++++++++++++++++++++++++ odb/source.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 69 deletions(-) create mode 100644 odb/source.c create mode 100644 odb/source.h diff --git a/Makefile b/Makefile index 291e4a7219..25d6e8a5fc 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/source.o LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o diff --git a/meson.build b/meson.build index cd4114fa71..63dedbf141 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/source.c', 'odb/streaming.c', 'oid-array.c', 'oidmap.c', diff --git a/odb.c b/odb.c index 51e28fac2c..b8f769979e 100644 --- a/odb.c +++ b/odb.c @@ -217,23 +217,6 @@ static void odb_source_read_alternates(struct odb_source *source, free(path); } - -static struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) -{ - struct odb_source *source; - - CALLOC_ARRAY(source, 1); - source->odb = odb; - source->local = local; - source->path = xstrdup(path); - source->loose = odb_source_loose_new(source); - source->packfiles = packfile_store_new(source); - - return source; -} - static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, const char *source, int depth) @@ -373,14 +356,6 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, return source->next; } -static void odb_source_free(struct odb_source *source) -{ - free(source->path); - odb_source_loose_free(source->loose); - packfile_store_free(source->packfiles); - free(source); -} - void odb_restore_primary_source(struct object_database *odb, struct odb_source *restore_source, const char *old_path) diff --git a/odb.h b/odb.h index 17a5c66f16..93a999f310 100644 --- a/odb.h +++ b/odb.h @@ -3,6 +3,7 @@ #include "hashmap.h" #include "object.h" +#include "odb/source.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -30,50 +31,6 @@ extern int fetch_if_missing; */ char *compute_alternate_path(const char *path, struct strbuf *err); -/* - * The source is the part of the object database that stores the actual - * objects. It thus encapsulates the logic to read and write the specific - * on-disk format. An object database can have multiple sources: - * - * - The primary source, which is typically located in "$GIT_DIR/objects". - * This is where new objects are usually written to. - * - * - Alternate sources, which are configured via "objects/info/alternates" or - * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These - * alternate sources are only used to read objects. - */ -struct odb_source { - struct odb_source *next; - - /* Object database that owns this object source. */ - struct object_database *odb; - - /* Private state for loose objects. */ - struct odb_source_loose *loose; - - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - - /* - * Figure out whether this is the local source of the owning - * repository, which would typically be its ".git/objects" directory. - * This local object directory is usually where objects would be - * written to. - */ - bool local; - - /* - * This object store is ephemeral, so there is no need to fsync. - */ - int will_destroy; - - /* - * Path to the source. If this is a relative path, it is relative to - * the current working directory. - */ - char *path; -}; - struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/odb/source.c b/odb/source.c new file mode 100644 index 0000000000..7fc89806f9 --- /dev/null +++ b/odb/source.c @@ -0,0 +1,28 @@ +#include "git-compat-util.h" +#include "object-file.h" +#include "odb/source.h" +#include "packfile.h" + +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + + CALLOC_ARRAY(source, 1); + source->odb = odb; + source->local = local; + source->path = xstrdup(path); + source->loose = odb_source_loose_new(source); + source->packfiles = packfile_store_new(source); + + return source; +} + +void odb_source_free(struct odb_source *source) +{ + free(source->path); + odb_source_loose_free(source->loose); + packfile_store_free(source->packfiles); + free(source); +} diff --git a/odb/source.h b/odb/source.h new file mode 100644 index 0000000000..391d6d1e38 --- /dev/null +++ b/odb/source.h @@ -0,0 +1,60 @@ +#ifndef ODB_SOURCE_H +#define ODB_SOURCE_H + +/* + * The source is the part of the object database that stores the actual + * objects. It thus encapsulates the logic to read and write the specific + * on-disk format. An object database can have multiple sources: + * + * - The primary source, which is typically located in "$GIT_DIR/objects". + * This is where new objects are usually written to. + * + * - Alternate sources, which are configured via "objects/info/alternates" or + * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These + * alternate sources are only used to read objects. + */ +struct odb_source { + struct odb_source *next; + + /* Object database that owns this object source. */ + struct object_database *odb; + + /* Private state for loose objects. */ + struct odb_source_loose *loose; + + /* Should only be accessed directly by packfile.c and midx.c. */ + struct packfile_store *packfiles; + + /* + * Figure out whether this is the local source of the owning + * repository, which would typically be its ".git/objects" directory. + * This local object directory is usually where objects would be + * written to. + */ + bool local; + + /* + * This object store is ephemeral, so there is no need to fsync. + */ + int will_destroy; + + /* + * Path to the source. If this is a relative path, it is relative to + * the current working directory. + */ + char *path; +}; + +/* + * Allocate and initialize a new source for the given object database located + * at `path`. `local` indicates whether or not the source is the local and thus + * primary object source of the object database. + */ +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local); + +/* Free the object database source, releasing all associated resources. */ +void odb_source_free(struct odb_source *source); + +#endif -- GitLab From 1eac14ad1ce69097ae47311204476fe484c3fef8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:52:26 +0100 Subject: [PATCH 094/110] files backend --- Makefile | 1 + builtin/cat-file.c | 2 +- builtin/fast-import.c | 6 +++--- builtin/grep.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 8 ++++---- commit-graph.c | 2 +- http.c | 2 +- loose.c | 18 +++++++++--------- meson.build | 1 + midx.c | 18 +++++++++--------- object-file.c | 24 ++++++++++++------------ odb.c | 14 +++++++------- odb/source-files.c | 23 +++++++++++++++++++++++ odb/source-files.h | 24 ++++++++++++++++++++++++ odb/source.c | 6 ++---- odb/source.h | 9 ++++----- odb/streaming.c | 2 +- packfile.c | 16 ++++++++-------- packfile.h | 4 ++-- 20 files changed, 115 insertions(+), 69 deletions(-) create mode 100644 odb/source-files.c create mode 100644 odb/source-files.h diff --git a/Makefile b/Makefile index 25d6e8a5fc..f3fc3d99ea 100644 --- a/Makefile +++ b/Makefile @@ -1202,6 +1202,7 @@ LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o LIB_OBJS += odb/source.o +LIB_OBJS += odb/source-files.o LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b771a6c973..3bdd6ed0e1 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -875,7 +875,7 @@ static void batch_each_object(struct batch_options *opt, struct object_info oi = { 0 }; for (source = the_repository->objects->sources; source; source = source->next) { - int ret = packfile_store_for_each_object(source->packfiles, &oi, + int ret = packfile_store_for_each_object(source->files->packed, &oi, batch_one_object_oi, &payload, flags); if (ret) break; diff --git a/builtin/fast-import.c b/builtin/fast-import.c index b8a7757cfd..627dcbf4f3 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -900,7 +900,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->sources->packfiles, + new_p = packfile_store_load_pack(pack_data->repo->objects->sources->files->packed, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); @@ -982,7 +982,7 @@ static int store_object( } for (source = the_repository->objects->sources; source; source = source->next) { - if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + if (!packfile_list_find_oid(packfile_store_get_packs(source->files->packed), &oid)) continue; e->type = type; e->pack_id = MAX_PACK_ID; @@ -1187,7 +1187,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) } for (source = the_repository->objects->sources; source; source = source->next) { - if (!packfile_list_find_oid(packfile_store_get_packs(source->packfiles), &oid)) + if (!packfile_list_find_oid(packfile_store_get_packs(source->files->packed), &oid)) continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; diff --git a/builtin/grep.c b/builtin/grep.c index 5b8b87b1ac..c8d0e51415 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1219,7 +1219,7 @@ int cmd_grep(int argc, odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) - packfile_store_prepare(source->packfiles); + packfile_store_prepare(source->files->packed); } start_threads(&opt); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index b67fb0256c..f0cce534b2 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1638,7 +1638,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, hash, "idx", 1); if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->sources->packfiles, + packfile_store_load_pack(the_repository->objects->sources->files->packed, final_index_name, 0); if (!from_stdin) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index df7a7cdd8c..fc9bf1ca6c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1532,7 +1532,7 @@ static int want_cruft_object_mtime(struct repository *r, struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + struct packed_git **cache = packfile_store_get_kept_pack_cache(source->files->packed, flags); for (; *cache; cache++) { struct packed_git *p = *cache; @@ -1754,11 +1754,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } for (source = the_repository->objects->sources; source; source = source->next) { - for (e = source->packfiles->packs.head; e; e = e->next) { + for (e = source->files->packed->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&source->packfiles->packs, p); + packfile_list_prepend(&source->files->packed->packs, p); if (want != -1) return want; } @@ -4339,7 +4339,7 @@ static void add_objects_in_unpacked_packs(void) if (!source->local) continue; - if (packfile_store_for_each_object(source->packfiles, &oi, + if (packfile_store_for_each_object(source->files->packed, &oi, add_object_in_unpacked_pack, NULL, ODB_FOR_EACH_OBJECT_PACK_ORDER | ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | diff --git a/commit-graph.c b/commit-graph.c index e7f93c4b8b..056dbc4ac0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1987,7 +1987,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) odb_prepare_alternates(ctx->r->objects); for (source = ctx->r->objects->sources; source; source = source->next) - packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi, + packfile_store_for_each_object(source->files->packed, &oi, add_packed_commits_oi, ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); if (ctx->progress_done < ctx->approx_nr_objects) diff --git a/http.c b/http.c index 7815f144de..b44f493919 100644 --- a/http.c +++ b/http.c @@ -2544,7 +2544,7 @@ void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->sources->packfiles, p); + packfile_store_add_pack(the_repository->objects->sources->files->packed, p); } struct http_pack_request *new_http_pack_request( diff --git a/loose.c b/loose.c index 56cf64b648..c921d46b94 100644 --- a/loose.c +++ b/loose.c @@ -49,13 +49,13 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose->map; + struct loose_object_map *map = source->files->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose->cache, compat_oid); + oidtree_insert(source->files->loose->cache, compat_oid); return inserted; } @@ -65,11 +65,11 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose->map) - loose_object_map_init(&source->loose->map); - if (!source->loose->cache) { - ALLOC_ARRAY(source->loose->cache, 1); - oidtree_init(source->loose->cache); + if (!source->files->loose->map) + loose_object_map_init(&source->files->loose->map); + if (!source->files->loose->cache) { + ALLOC_ARRAY(source->files->loose->cache, 1); + oidtree_init(source->files->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); @@ -125,7 +125,7 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; + kh_oid_map_t *map = repo->objects->sources->files->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +231,7 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose->map; + struct loose_object_map *loose_map = source->files->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/meson.build b/meson.build index 63dedbf141..c8a8b3882c 100644 --- a/meson.build +++ b/meson.build @@ -398,6 +398,7 @@ libgit_sources = [ 'object.c', 'odb.c', 'odb/source.c', + 'odb/source-files.c', 'odb/streaming.c', 'oid-array.c', 'oidmap.c', diff --git a/midx.c b/midx.c index 59f6228360..bb726ee0c8 100644 --- a/midx.c +++ b/midx.c @@ -101,8 +101,8 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->packfiles); - return source->packfiles->midx; + packfile_store_prepare(source->files->packed); + return source->files->packed->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -461,7 +461,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(m->source->packfiles, + p = packfile_store_load_pack(m->source->files->packed, pack_name.buf, m->source->local); strbuf_release(&pack_name); @@ -731,12 +731,12 @@ int prepare_multi_pack_index_one(struct odb_source *source) if (!r->settings.core_multi_pack_index) return 0; - if (source->packfiles->midx) + if (source->files->packed->midx) return 1; - source->packfiles->midx = load_multi_pack_index(source); + source->files->packed->midx = load_multi_pack_index(source); - return !!source->packfiles->midx; + return !!source->files->packed->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -825,9 +825,9 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->packfiles->midx) - close_midx(source->packfiles->midx); - source->packfiles->midx = NULL; + if (source->files->packed->midx) + close_midx(source->files->packed->midx); + source->files->packed->midx = NULL; } } diff --git a/object-file.c b/object-file.c index 47a6ef7ab2..471fc85758 100644 --- a/object-file.c +++ b/object-file.c @@ -220,7 +220,7 @@ static void *odb_source_loose_map_object(struct odb_source *source, unsigned long *size) { const char *p; - int fd = open_loose_object(source->loose, oid, &p); + int fd = open_loose_object(source->files->loose, oid, &p); if (fd < 0) return NULL; @@ -424,7 +424,7 @@ static int read_object_info_from_path(struct odb_source *source, */ if (!oi->typep && !oi->sizep && !oi->contentp) { if (!oi->disk_sizep && !oi->mtimep && (flags & OBJECT_INFO_QUICK)) { - status = quick_has_loose(source->loose, oid) ? 0 : -1; + status = quick_has_loose(source->files->loose, oid) ? 0 : -1; if (!status) oi->whence = OI_LOOSE; return status; @@ -1871,31 +1871,31 @@ struct oidtree *odb_source_loose_cache(struct odb_source *source, { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); + size_t word_bits = bitsizeof(source->files->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->files->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose->subdir_seen[word_index]; + bitmap = &source->files->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose->cache; - if (!source->loose->cache) { - ALLOC_ARRAY(source->loose->cache, 1); - oidtree_init(source->loose->cache); + return source->files->loose->cache; + if (!source->files->loose->cache) { + ALLOC_ARRAY(source->files->loose->cache, 1); + oidtree_init(source->files->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose->cache); + source->files->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose->cache; + return source->files->loose->cache; } static void odb_source_loose_clear_cache(struct odb_source_loose *loose) @@ -1908,7 +1908,7 @@ static void odb_source_loose_clear_cache(struct odb_source_loose *loose) void odb_source_loose_reprepare(struct odb_source *source) { - odb_source_loose_clear_cache(source->loose); + odb_source_loose_clear_cache(source->files->loose); } static int check_stream_oid(git_zstream *stream, diff --git a/odb.c b/odb.c index b8f769979e..35898a9316 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,7 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) { - if (!packfile_store_read_object_info(source->packfiles, real, oi, flags) || + if (!packfile_store_read_object_info(source->files->packed, real, oi, flags) || !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; } @@ -703,7 +703,7 @@ static int do_oid_object_info_extended(struct object_database *odb, if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); for (source = odb->sources; source; source = source->next) - if (!packfile_store_read_object_info(source->packfiles, real, oi, flags)) + if (!packfile_store_read_object_info(source->files->packed, real, oi, flags)) return 0; } @@ -965,7 +965,7 @@ int odb_freshen_object(struct object_database *odb, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - if (packfile_store_freshen_object(source->packfiles, oid)) + if (packfile_store_freshen_object(source->files->packed, oid)) return 1; if (odb_source_loose_freshen_object(source, oid)) @@ -994,7 +994,7 @@ int odb_for_each_object(struct object_database *odb, return ret; } - ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags); + ret = packfile_store_for_each_object(source->files->packed, oi, cb, cb_data, flags); if (ret) return ret; } @@ -1010,7 +1010,7 @@ unsigned long odb_count_objects(struct object_database *odb, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - count += packfile_store_count_objects(source->packfiles, flags); + count += packfile_store_count_objects(source->files->packed, flags); if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) count += odb_source_loose_count_objects(source, flags); } @@ -1108,7 +1108,7 @@ void odb_close(struct object_database *o) { struct odb_source *source; for (source = o->sources; source; source = source->next) - packfile_store_close(source->packfiles); + packfile_store_close(source->files->packed); close_commit_graph(o); } @@ -1166,7 +1166,7 @@ void odb_reprepare(struct object_database *o) for (source = o->sources; source; source = source->next) { odb_source_loose_reprepare(source); - packfile_store_reprepare(source->packfiles); + packfile_store_reprepare(source->files->packed); } o->approximate_object_count_valid = 0; diff --git a/odb/source-files.c b/odb/source-files.c new file mode 100644 index 0000000000..cbdaa6850f --- /dev/null +++ b/odb/source-files.c @@ -0,0 +1,23 @@ +#include "git-compat-util.h" +#include "object-file.h" +#include "odb/source-files.h" +#include "packfile.h" + +void odb_source_files_free(struct odb_source_files *files) +{ + if (!files) + return; + odb_source_loose_free(files->loose); + packfile_store_free(files->packed); + free(files); +} + +struct odb_source_files *odb_source_files_new(struct odb_source *source) +{ + struct odb_source_files *files; + CALLOC_ARRAY(files, 1); + files->source = source; + files->loose = odb_source_loose_new(source); + files->packed = packfile_store_new(source); + return files; +} diff --git a/odb/source-files.h b/odb/source-files.h new file mode 100644 index 0000000000..2f015dae84 --- /dev/null +++ b/odb/source-files.h @@ -0,0 +1,24 @@ +#ifndef ODB_FILES_H +#define ODB_FILES_H + +struct odb_source_loose; +struct odb_source; +struct packfile_store; + +/* + * The files object database source uses a combination of loose objects and + * packfiles. It is the default backend used by Git to store objects. + */ +struct odb_source_files { + struct odb_source *source; + struct odb_source_loose *loose; + struct packfile_store *packed; +}; + +/* Allocate and initialize a new object source. */ +struct odb_source_files *odb_source_files_new(struct odb_source *source); + +/* Free the object source and release all associated resources. */ +void odb_source_files_free(struct odb_source_files *files); + +#endif diff --git a/odb/source.c b/odb/source.c index 7fc89806f9..9d7fd19f45 100644 --- a/odb/source.c +++ b/odb/source.c @@ -13,8 +13,7 @@ struct odb_source *odb_source_new(struct object_database *odb, source->odb = odb; source->local = local; source->path = xstrdup(path); - source->loose = odb_source_loose_new(source); - source->packfiles = packfile_store_new(source); + source->files = odb_source_files_new(source); return source; } @@ -22,7 +21,6 @@ struct odb_source *odb_source_new(struct object_database *odb, void odb_source_free(struct odb_source *source) { free(source->path); - odb_source_loose_free(source->loose); - packfile_store_free(source->packfiles); + odb_source_files_free(source->files); free(source); } diff --git a/odb/source.h b/odb/source.h index 391d6d1e38..1c34265189 100644 --- a/odb/source.h +++ b/odb/source.h @@ -1,6 +1,8 @@ #ifndef ODB_SOURCE_H #define ODB_SOURCE_H +#include "odb/source-files.h" + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -19,11 +21,8 @@ struct odb_source { /* Object database that owns this object source. */ struct object_database *odb; - /* Private state for loose objects. */ - struct odb_source_loose *loose; - - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; + /* The backend used to store objects. */ + struct odb_source_files *files; /* * Figure out whether this is the local source of the owning diff --git a/odb/streaming.c b/odb/streaming.c index 4a4474f891..26b0a1a0f5 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -187,7 +187,7 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - if (!packfile_store_read_object_stream(out, source->packfiles, oid) || + if (!packfile_store_read_object_stream(out, source->files->packed, oid) || !odb_source_loose_read_object_stream(out, source, oid)) return 0; } diff --git a/packfile.c b/packfile.c index 50a08a157d..ae6b6fb9df 100644 --- a/packfile.c +++ b/packfile.c @@ -363,7 +363,7 @@ static int unuse_one_window(struct object_database *odb) struct pack_window *lru_w = NULL, *lru_l = NULL; for (source = odb->sources; source; source = source->next) - for (e = source->packfiles->packs.head; e; e = e->next) + for (e = source->files->packed->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { @@ -537,7 +537,7 @@ static int close_one_pack(struct repository *r) int accept_windows_inuse = 1; for (source = r->objects->sources; source; source = source->next) { - for (e = source->packfiles->packs.head; e; e = e->next) { + for (e = source->files->packed->packs.head; e; e = e->next) { if (e->pack->pack_fd == -1) continue; find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); @@ -990,10 +990,10 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->packfiles->midx && - midx_contains_pack(data->source->packfiles->midx, file_name))) { + !(data->source->files->packed->midx && + midx_contains_pack(data->source->files->packed->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->packfiles, + packfile_store_load_pack(data->source->files->packed, trimmed_path, data->source->local); free(trimmed_path); } @@ -1235,7 +1235,7 @@ const struct packed_git *has_packed_and_bad(struct repository *r, for (source = r->objects->sources; source; source = source->next) { struct packfile_list_entry *e; - for (e = source->packfiles->packs.head; e; e = e->next) + for (e = source->files->packed->packs.head; e; e = e->next) if (oidset_contains(&e->pack->bad_objects, oid)) return e->pack; } @@ -2240,7 +2240,7 @@ int has_object_pack(struct repository *r, const struct object_id *oid) odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - int ret = find_pack_entry(source->packfiles, oid, &e); + int ret = find_pack_entry(source->files->packed, oid, &e); if (ret) return ret; } @@ -2257,7 +2257,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, for (source = r->objects->sources; source; source = source->next) { struct packed_git **cache; - cache = packfile_store_get_kept_pack_cache(source->packfiles, flags); + cache = packfile_store_get_kept_pack_cache(source->files->packed, flags); for (; *cache; cache++) { struct packed_git *p = *cache; diff --git a/packfile.h b/packfile.h index 684557ed05..a14f02dde6 100644 --- a/packfile.h +++ b/packfile.h @@ -208,7 +208,7 @@ static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct odb_prepare_alternates(repo->objects); for (struct odb_source *source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + struct packfile_list_entry *entry = packfile_store_get_packs(source->files->packed); if (!entry) continue; data.source = source; @@ -228,7 +228,7 @@ static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data * return; for (source = data->source->next; source; source = source->next) { - struct packfile_list_entry *entry = packfile_store_get_packs(source->packfiles); + struct packfile_list_entry *entry = packfile_store_get_packs(source->files->packed); if (!entry) continue; data->source = source; -- GitLab From 405d8a3b1619448dccc62c8b90622d63d1357c51 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:55:12 +0100 Subject: [PATCH 095/110] reverse source creation --- builtin/cat-file.c | 3 ++- builtin/fast-import.c | 12 ++++++++---- builtin/grep.c | 6 ++++-- builtin/index-pack.c | 8 +++++--- builtin/pack-objects.c | 13 +++++++++---- commit-graph.c | 6 ++++-- http.c | 3 ++- loose.c | 23 ++++++++++++++--------- midx.c | 26 +++++++++++++++----------- object-file.c | 28 ++++++++++++++++------------ odb.c | 30 +++++++++++++++++++++--------- odb/source-files.c | 14 ++++++++++---- odb/source-files.h | 18 +++++++++++++++--- odb/source.c | 26 +++++++++++++++++++------- odb/source.h | 31 +++++++++++++++++++++++++------ odb/streaming.c | 3 ++- packfile.c | 26 +++++++++++++++++--------- packfile.h | 7 +++++-- 18 files changed, 193 insertions(+), 90 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 3bdd6ed0e1..d8bb8923bc 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -875,7 +875,8 @@ static void batch_each_object(struct batch_options *opt, struct object_info oi = { 0 }; for (source = the_repository->objects->sources; source; source = source->next) { - int ret = packfile_store_for_each_object(source->files->packed, &oi, + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = packfile_store_for_each_object(files->packed, &oi, batch_one_object_oi, &payload, flags); if (ret) break; diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 627dcbf4f3..a41f95191e 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -875,6 +875,7 @@ static void end_packfile(void) running = 1; clear_delta_base_cache(); if (object_count) { + struct odb_source_files *files = odb_source_files_downcast(pack_data->repo->objects->sources); struct packed_git *new_p; struct object_id cur_pack_oid; char *idx_name; @@ -900,8 +901,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->sources->files->packed, - idx_name, 1); + new_p = packfile_store_load_pack(files->packed, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); all_packs[pack_id] = new_p; @@ -982,7 +982,9 @@ static int store_object( } for (source = the_repository->objects->sources; source; source = source->next) { - if (!packfile_list_find_oid(packfile_store_get_packs(source->files->packed), &oid)) + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid)) continue; e->type = type; e->pack_id = MAX_PACK_ID; @@ -1187,7 +1189,9 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) } for (source = the_repository->objects->sources; source; source = source->next) { - if (!packfile_list_find_oid(packfile_store_get_packs(source->files->packed), &oid)) + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid)) continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; diff --git a/builtin/grep.c b/builtin/grep.c index c8d0e51415..61379909b8 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1218,8 +1218,10 @@ int cmd_grep(int argc, struct odb_source *source; odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) - packfile_store_prepare(source->files->packed); + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_prepare(files->packed); + } } start_threads(&opt); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f0cce534b2..d1e47279a8 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1637,9 +1637,11 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->sources->files->packed, - final_index_name, 0); + if (do_fsck_object && startup_info->have_repository) { + struct odb_source_files *files = + odb_source_files_downcast(the_repository->objects->sources); + packfile_store_load_pack(files->packed, final_index_name, 0); + } if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fc9bf1ca6c..4522a0eb4e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1532,7 +1532,8 @@ static int want_cruft_object_mtime(struct repository *r, struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - struct packed_git **cache = packfile_store_get_kept_pack_cache(source->files->packed, flags); + struct odb_source_files *files = odb_source_files_downcast(source); + struct packed_git **cache = packfile_store_get_kept_pack_cache(files->packed, flags); for (; *cache; cache++) { struct packed_git *p = *cache; @@ -1754,11 +1755,13 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } for (source = the_repository->objects->sources; source; source = source->next) { - for (e = source->files->packed->packs.head; e; e = e->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + + for (e = files->packed->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&source->files->packed->packs, p); + packfile_list_prepend(&files->packed->packs, p); if (want != -1) return want; } @@ -4336,10 +4339,12 @@ static void add_objects_in_unpacked_packs(void) odb_prepare_alternates(to_pack.repo->objects); for (source = to_pack.repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!source->local) continue; - if (packfile_store_for_each_object(source->files->packed, &oi, + if (packfile_store_for_each_object(files->packed, &oi, add_object_in_unpacked_pack, NULL, ODB_FOR_EACH_OBJECT_PACK_ORDER | ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | diff --git a/commit-graph.c b/commit-graph.c index 056dbc4ac0..c5c9ce2824 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1986,9 +1986,11 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) ctx->approx_nr_objects); odb_prepare_alternates(ctx->r->objects); - for (source = ctx->r->objects->sources; source; source = source->next) - packfile_store_for_each_object(source->files->packed, &oi, add_packed_commits_oi, + for (source = ctx->r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + } if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); diff --git a/http.c b/http.c index b44f493919..8ea1b9d1f6 100644 --- a/http.c +++ b/http.c @@ -2543,8 +2543,9 @@ int finish_http_pack_request(struct http_pack_request *preq) void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->sources->files->packed, p); + packfile_store_add_pack(files->packed, p); } struct http_pack_request *new_http_pack_request( diff --git a/loose.c b/loose.c index c921d46b94..07333be696 100644 --- a/loose.c +++ b/loose.c @@ -3,6 +3,7 @@ #include "path.h" #include "object-file.h" #include "odb.h" +#include "odb/source-files.h" #include "hex.h" #include "repository.h" #include "wrapper.h" @@ -49,27 +50,29 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->files->loose->map; + struct odb_source_files *files = odb_source_files_downcast(source); + struct loose_object_map *map = files->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->files->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid); return inserted; } static int load_one_loose_object_map(struct repository *repo, struct odb_source *source) { + struct odb_source_files *files = odb_source_files_downcast(source); struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->files->loose->map) - loose_object_map_init(&source->files->loose->map); - if (!source->files->loose->cache) { - ALLOC_ARRAY(source->files->loose->cache, 1); - oidtree_init(source->files->loose->cache); + if (!files->loose->map) + loose_object_map_init(&files->loose->map); + if (!files->loose->cache) { + ALLOC_ARRAY(files->loose->cache, 1); + oidtree_init(files->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); @@ -125,7 +128,8 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->files->loose->map->to_compat; + struct odb_source_files *files = odb_source_files_downcast(repo->objects->sources); + kh_oid_map_t *map = files->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +235,8 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->files->loose->map; + struct odb_source_files *files = odb_source_files_downcast(source); + struct loose_object_map *loose_map = files->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/midx.c b/midx.c index bb726ee0c8..aecdaebd36 100644 --- a/midx.c +++ b/midx.c @@ -101,8 +101,9 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->files->packed); - return source->files->packed->midx; + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_prepare(files->packed); + return files->packed->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -449,6 +450,7 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { + struct odb_source_files *files = odb_source_files_downcast(m->source); struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -459,10 +461,10 @@ int prepare_midx_pack(struct multi_pack_index *m, if (m->packs[pack_int_id]) return 0; - strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, + strbuf_addf(&pack_name, "%s/pack/%s", files->base.path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(m->source->files->packed, - pack_name.buf, m->source->local); + p = packfile_store_load_pack(files->packed, + pack_name.buf, files->base.local); strbuf_release(&pack_name); if (!p) { @@ -725,18 +727,19 @@ int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) int prepare_multi_pack_index_one(struct odb_source *source) { + struct odb_source_files *files = odb_source_files_downcast(source); struct repository *r = source->odb->repo; prepare_repo_settings(r); if (!r->settings.core_multi_pack_index) return 0; - if (source->files->packed->midx) + if (files->packed->midx) return 1; - source->files->packed->midx = load_multi_pack_index(source); + files->packed->midx = load_multi_pack_index(source); - return !!source->files->packed->midx; + return !!files->packed->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -825,9 +828,10 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->files->packed->midx) - close_midx(source->files->packed->midx); - source->files->packed->midx = NULL; + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; } } diff --git a/object-file.c b/object-file.c index 471fc85758..ce8450a68e 100644 --- a/object-file.c +++ b/object-file.c @@ -219,8 +219,9 @@ static void *odb_source_loose_map_object(struct odb_source *source, const struct object_id *oid, unsigned long *size) { + struct odb_source_files *files = odb_source_files_downcast(source); const char *p; - int fd = open_loose_object(source->files->loose, oid, &p); + int fd = open_loose_object(files->loose, oid, &p); if (fd < 0) return NULL; @@ -401,6 +402,7 @@ static int read_object_info_from_path(struct odb_source *source, struct object_info *oi, unsigned flags) { + struct odb_source_files *files = odb_source_files_downcast(source); int status = 0; int fd; unsigned long mapsize; @@ -424,7 +426,7 @@ static int read_object_info_from_path(struct odb_source *source, */ if (!oi->typep && !oi->sizep && !oi->contentp) { if (!oi->disk_sizep && !oi->mtimep && (flags & OBJECT_INFO_QUICK)) { - status = quick_has_loose(source->files->loose, oid) ? 0 : -1; + status = quick_has_loose(files->loose, oid) ? 0 : -1; if (!status) oi->whence = OI_LOOSE; return status; @@ -1869,33 +1871,34 @@ static int append_loose_object(const struct object_id *oid, struct oidtree *odb_source_loose_cache(struct odb_source *source, const struct object_id *oid) { + struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->files->loose->subdir_seen[0]); + size_t word_bits = bitsizeof(files->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->files->loose->subdir_seen)) + (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->files->loose->subdir_seen[word_index]; + bitmap = &files->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->files->loose->cache; - if (!source->files->loose->cache) { - ALLOC_ARRAY(source->files->loose->cache, 1); - oidtree_init(source->files->loose->cache); + return files->loose->cache; + if (!files->loose->cache) { + ALLOC_ARRAY(files->loose->cache, 1); + oidtree_init(files->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->files->loose->cache); + files->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->files->loose->cache; + return files->loose->cache; } static void odb_source_loose_clear_cache(struct odb_source_loose *loose) @@ -1908,7 +1911,8 @@ static void odb_source_loose_clear_cache(struct odb_source_loose *loose) void odb_source_loose_reprepare(struct odb_source *source) { - odb_source_loose_clear_cache(source->files->loose); + struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_loose_clear_cache(files->loose); } static int check_stream_oid(git_zstream *stream, diff --git a/odb.c b/odb.c index 35898a9316..64f3f8d5b2 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) { - if (!packfile_store_read_object_info(source->files->packed, real, oi, flags) || + struct odb_source_files *files = odb_source_files_downcast(source); + if (!packfile_store_read_object_info(files->packed, real, oi, flags) || !odb_source_loose_read_object_info(source, real, oi, flags)) return 0; } @@ -702,9 +703,11 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - for (source = odb->sources; source; source = source->next) - if (!packfile_store_read_object_info(source->files->packed, real, oi, flags)) + for (source = odb->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!packfile_store_read_object_info(files->packed, real, oi, flags)) return 0; + } } /* @@ -965,7 +968,9 @@ int odb_freshen_object(struct object_database *odb, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - if (packfile_store_freshen_object(source->files->packed, oid)) + struct odb_source_files *files = odb_source_files_downcast(source); + + if (packfile_store_freshen_object(files->packed, oid)) return 1; if (odb_source_loose_freshen_object(source, oid)) @@ -985,6 +990,8 @@ int odb_for_each_object(struct object_database *odb, odb_prepare_alternates(odb); for (struct odb_source *source = odb->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) continue; @@ -994,7 +1001,7 @@ int odb_for_each_object(struct object_database *odb, return ret; } - ret = packfile_store_for_each_object(source->files->packed, oi, cb, cb_data, flags); + ret = packfile_store_for_each_object(files->packed, oi, cb, cb_data, flags); if (ret) return ret; } @@ -1010,7 +1017,9 @@ unsigned long odb_count_objects(struct object_database *odb, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - count += packfile_store_count_objects(source->files->packed, flags); + struct odb_source_files *files = odb_source_files_downcast(source); + + count += packfile_store_count_objects(files->packed, flags); if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) count += odb_source_loose_count_objects(source, flags); } @@ -1107,8 +1116,10 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - for (source = o->sources; source; source = source->next) - packfile_store_close(source->files->packed); + for (source = o->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_close(files->packed); + } close_commit_graph(o); } @@ -1165,8 +1176,9 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); odb_source_loose_reprepare(source); - packfile_store_reprepare(source->files->packed); + packfile_store_reprepare(files->packed); } o->approximate_object_count_valid = 0; diff --git a/odb/source-files.c b/odb/source-files.c index cbdaa6850f..a43a197157 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "object-file.h" +#include "odb/source.h" #include "odb/source-files.h" #include "packfile.h" @@ -9,15 +10,20 @@ void odb_source_files_free(struct odb_source_files *files) return; odb_source_loose_free(files->loose); packfile_store_free(files->packed); + odb_source_release(&files->base); free(files); } -struct odb_source_files *odb_source_files_new(struct odb_source *source) +struct odb_source_files *odb_source_files_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source_files *files; + CALLOC_ARRAY(files, 1); - files->source = source; - files->loose = odb_source_loose_new(source); - files->packed = packfile_store_new(source); + odb_source_init(&files->base, odb, path, local); + files->loose = odb_source_loose_new(&files->base); + files->packed = packfile_store_new(&files->base); + return files; } diff --git a/odb/source-files.h b/odb/source-files.h index 2f015dae84..672d2b31ca 100644 --- a/odb/source-files.h +++ b/odb/source-files.h @@ -1,8 +1,9 @@ #ifndef ODB_FILES_H #define ODB_FILES_H +#include "odb/source.h" + struct odb_source_loose; -struct odb_source; struct packfile_store; /* @@ -10,15 +11,26 @@ struct packfile_store; * packfiles. It is the default backend used by Git to store objects. */ struct odb_source_files { - struct odb_source *source; + struct odb_source base; struct odb_source_loose *loose; struct packfile_store *packed; }; /* Allocate and initialize a new object source. */ -struct odb_source_files *odb_source_files_new(struct odb_source *source); +struct odb_source_files *odb_source_files_new(struct object_database *odb, + const char *path, + bool local); /* Free the object source and release all associated resources. */ void odb_source_files_free(struct odb_source_files *files); +/* + * Cast the given object database source to the files backend. This will cause + * a BUG in case the source uses doesn't use this backend. + */ +static inline struct odb_source_files *odb_source_files_downcast(struct odb_source *source) +{ + return (struct odb_source_files *) source; +} + #endif diff --git a/odb/source.c b/odb/source.c index 9d7fd19f45..d8b2176a94 100644 --- a/odb/source.c +++ b/odb/source.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "object-file.h" +#include "odb/source-files.h" #include "odb/source.h" #include "packfile.h" @@ -7,20 +8,31 @@ struct odb_source *odb_source_new(struct object_database *odb, const char *path, bool local) { - struct odb_source *source; + return &odb_source_files_new(odb, path, local)->base; +} - CALLOC_ARRAY(source, 1); +void odb_source_init(struct odb_source *source, + struct object_database *odb, + const char *path, + bool local) +{ source->odb = odb; source->local = local; source->path = xstrdup(path); - source->files = odb_source_files_new(source); - - return source; } void odb_source_free(struct odb_source *source) { + struct odb_source_files *files; + if (!source) + return; + files = odb_source_files_downcast(source); + odb_source_files_free(files); +} + +void odb_source_release(struct odb_source *source) +{ + if (!source) + return; free(source->path); - odb_source_files_free(source->files); - free(source); } diff --git a/odb/source.h b/odb/source.h index 1c34265189..e6698b73a3 100644 --- a/odb/source.h +++ b/odb/source.h @@ -1,8 +1,6 @@ #ifndef ODB_SOURCE_H #define ODB_SOURCE_H -#include "odb/source-files.h" - /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -21,9 +19,6 @@ struct odb_source { /* Object database that owns this object source. */ struct object_database *odb; - /* The backend used to store objects. */ - struct odb_source_files *files; - /* * Figure out whether this is the local source of the owning * repository, which would typically be its ".git/objects" directory. @@ -53,7 +48,31 @@ struct odb_source *odb_source_new(struct object_database *odb, const char *path, bool local); -/* Free the object database source, releasing all associated resources. */ +/* + * Initialize the source for the given object database located at `path`. + * `local` indicates whether or not the source is the local and thus primary + * object source of the object database. + * + * This function is only supposed to be called by specific object source + * implementations. + */ +void odb_source_init(struct odb_source *source, + struct object_database *odb, + const char *path, + bool local); + +/* + * Free the object database source, releasing all associated resources and + * freeing the structure itself. + */ void odb_source_free(struct odb_source *source); +/* + * Release the object database source, releasing all associated resources. + * + * This function is only supposed to be called by specific object source + * implementations. + */ +void odb_source_release(struct odb_source *source); + #endif diff --git a/odb/streaming.c b/odb/streaming.c index 26b0a1a0f5..19cda9407d 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -187,7 +187,8 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) { - if (!packfile_store_read_object_stream(out, source->files->packed, oid) || + struct odb_source_files *files = odb_source_files_downcast(source); + if (!packfile_store_read_object_stream(out, files->packed, oid) || !odb_source_loose_read_object_stream(out, source, oid)) return 0; } diff --git a/packfile.c b/packfile.c index ae6b6fb9df..3167ab2298 100644 --- a/packfile.c +++ b/packfile.c @@ -362,9 +362,11 @@ static int unuse_one_window(struct object_database *odb) struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - for (source = odb->sources; source; source = source->next) - for (e = source->files->packed->packs.head; e; e = e->next) + for (source = odb->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + for (e = files->packed->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + } if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -537,7 +539,8 @@ static int close_one_pack(struct repository *r) int accept_windows_inuse = 1; for (source = r->objects->sources; source; source = source->next) { - for (e = source->files->packed->packs.head; e; e = e->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + for (e = files->packed->packs.head; e; e = e->next) { if (e->pack->pack_fd == -1) continue; find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); @@ -987,13 +990,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct odb_source_files *files = odb_source_files_downcast(data->source); size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->source->files->packed->midx && - midx_contains_pack(data->source->files->packed->midx, file_name))) { + !(files->packed->midx && + midx_contains_pack(files->packed->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->source->files->packed, + packfile_store_load_pack(files->packed, trimmed_path, data->source->local); free(trimmed_path); } @@ -1234,8 +1238,10 @@ const struct packed_git *has_packed_and_bad(struct repository *r, struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); struct packfile_list_entry *e; - for (e = source->files->packed->packs.head; e; e = e->next) + + for (e = files->packed->packs.head; e; e = e->next) if (oidset_contains(&e->pack->bad_objects, oid)) return e->pack; } @@ -2240,7 +2246,8 @@ int has_object_pack(struct repository *r, const struct object_id *oid) odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - int ret = find_pack_entry(source->files->packed, oid, &e); + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = find_pack_entry(files->packed, oid, &e); if (ret) return ret; } @@ -2255,9 +2262,10 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, struct pack_entry e; for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); struct packed_git **cache; - cache = packfile_store_get_kept_pack_cache(source->files->packed, flags); + cache = packfile_store_get_kept_pack_cache(files->packed, flags); for (; *cache; cache++) { struct packed_git *p = *cache; diff --git a/packfile.h b/packfile.h index a14f02dde6..78d5bf1794 100644 --- a/packfile.h +++ b/packfile.h @@ -4,6 +4,7 @@ #include "list.h" #include "object.h" #include "odb.h" +#include "odb/source-files.h" #include "oidset.h" #include "repository.h" #include "strmap.h" @@ -208,7 +209,8 @@ static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct odb_prepare_alternates(repo->objects); for (struct odb_source *source = repo->objects->sources; source; source = source->next) { - struct packfile_list_entry *entry = packfile_store_get_packs(source->files->packed); + struct odb_source_files *files = odb_source_files_downcast(source); + struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); if (!entry) continue; data.source = source; @@ -228,7 +230,8 @@ static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data * return; for (source = data->source->next; source; source = source->next) { - struct packfile_list_entry *entry = packfile_store_get_packs(source->files->packed); + struct odb_source_files *files = odb_source_files_downcast(source); + struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); if (!entry) continue; data->source = source; -- GitLab From 5b2b675c0354deea5ade887f862d9c9900ef55b4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:25:58 +0100 Subject: [PATCH 096/110] pluggable free function --- odb/source-files.c | 7 ++++--- odb/source-files.h | 3 --- odb/source.c | 4 +--- odb/source.h | 8 ++++++++ 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/odb/source-files.c b/odb/source-files.c index a43a197157..342a52d7d3 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -4,10 +4,9 @@ #include "odb/source-files.h" #include "packfile.h" -void odb_source_files_free(struct odb_source_files *files) +static void odb_source_files_free(struct odb_source *source) { - if (!files) - return; + struct odb_source_files *files = odb_source_files_downcast(source); odb_source_loose_free(files->loose); packfile_store_free(files->packed); odb_source_release(&files->base); @@ -25,5 +24,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->loose = odb_source_loose_new(&files->base); files->packed = packfile_store_new(&files->base); + files->base.free = odb_source_files_free; + return files; } diff --git a/odb/source-files.h b/odb/source-files.h index 672d2b31ca..e641870735 100644 --- a/odb/source-files.h +++ b/odb/source-files.h @@ -21,9 +21,6 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local); -/* Free the object source and release all associated resources. */ -void odb_source_files_free(struct odb_source_files *files); - /* * Cast the given object database source to the files backend. This will cause * a BUG in case the source uses doesn't use this backend. diff --git a/odb/source.c b/odb/source.c index d8b2176a94..dbdc6cee9c 100644 --- a/odb/source.c +++ b/odb/source.c @@ -23,11 +23,9 @@ void odb_source_init(struct odb_source *source, void odb_source_free(struct odb_source *source) { - struct odb_source_files *files; if (!source) return; - files = odb_source_files_downcast(source); - odb_source_files_free(files); + source->free(source); } void odb_source_release(struct odb_source *source) diff --git a/odb/source.h b/odb/source.h index e6698b73a3..0ee3dd6b40 100644 --- a/odb/source.h +++ b/odb/source.h @@ -1,6 +1,8 @@ #ifndef ODB_SOURCE_H #define ODB_SOURCE_H +struct odb_source; + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -37,6 +39,12 @@ struct odb_source { * the current working directory. */ char *path; + + /* + * This callback is expected to free the underlying object database source and + * all associated resources. The function will never be called with a NULL pointer. + */ + void (*free)(struct odb_source *source); }; /* -- GitLab From 2b9676fb7e93b033379a639f08b04077d09e05d0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:27:50 +0100 Subject: [PATCH 097/110] reprepare --- odb.c | 7 ++----- odb/source-files.c | 8 ++++++++ odb/source.h | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 64f3f8d5b2..e3f2a41249 100644 --- a/odb.c +++ b/odb.c @@ -1175,11 +1175,8 @@ void odb_reprepare(struct object_database *o) o->loaded_alternates = 0; odb_prepare_alternates(o); - for (source = o->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - odb_source_loose_reprepare(source); - packfile_store_reprepare(files->packed); - } + for (source = o->sources; source; source = source->next) + odb_source_reprepare(source); o->approximate_object_count_valid = 0; diff --git a/odb/source-files.c b/odb/source-files.c index 342a52d7d3..5af6808582 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -13,6 +13,13 @@ static void odb_source_files_free(struct odb_source *source) free(files); } +static void odb_source_files_reprepare(struct odb_source *source) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_loose_reprepare(&files->base); + packfile_store_reprepare(files->packed); +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -25,6 +32,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->packed = packfile_store_new(&files->base); files->base.free = odb_source_files_free; + files->base.reprepare = odb_source_files_reprepare; return files; } diff --git a/odb/source.h b/odb/source.h index 0ee3dd6b40..775aba6229 100644 --- a/odb/source.h +++ b/odb/source.h @@ -45,6 +45,13 @@ struct odb_source { * all associated resources. The function will never be called with a NULL pointer. */ void (*free)(struct odb_source *source); + + /* + * This callback is expected to clear underlying caches of the object + * database source. The function is called when the repository has for + * example just been repacked so that new objects will become visible. + */ + void (*reprepare)(struct odb_source *source); }; /* @@ -83,4 +90,14 @@ void odb_source_free(struct odb_source *source); */ void odb_source_release(struct odb_source *source); +/* + * Reprepare the object database source and clear any caches. Depending on the + * backend used this may have the effect that concurrently-written objects + * become visible. + */ +static inline void odb_source_reprepare(struct odb_source *source) +{ + source->reprepare(source); +} + #endif -- GitLab From 59c71f3f365712a7d940441b1f06bfd478df92dc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:29:30 +0100 Subject: [PATCH 098/110] close source --- odb.c | 6 ++---- odb/source-files.c | 7 +++++++ odb/source.h | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/odb.c b/odb.c index e3f2a41249..b4b15b1697 100644 --- a/odb.c +++ b/odb.c @@ -1116,10 +1116,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - for (source = o->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - packfile_store_close(files->packed); - } + for (source = o->sources; source; source = source->next) + odb_source_close(source); close_commit_graph(o); } diff --git a/odb/source-files.c b/odb/source-files.c index 5af6808582..77b4219b61 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -13,6 +13,12 @@ static void odb_source_files_free(struct odb_source *source) free(files); } +static void odb_source_files_close(struct odb_source *source) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_close(files->packed); +} + static void odb_source_files_reprepare(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); @@ -32,6 +38,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->packed = packfile_store_new(&files->base); files->base.free = odb_source_files_free; + files->base.close = odb_source_files_close; files->base.reprepare = odb_source_files_reprepare; return files; diff --git a/odb/source.h b/odb/source.h index 775aba6229..99d9c92323 100644 --- a/odb/source.h +++ b/odb/source.h @@ -46,6 +46,14 @@ struct odb_source { */ void (*free)(struct odb_source *source); + /* + * This callback is expected to close any open resources, like for + * example file descriptors or connections. The source is expected to + * still be usable after it has been closed. Closed resources may need + * to be reopened in that case. + */ + void (*close)(struct odb_source *source); + /* * This callback is expected to clear underlying caches of the object * database source. The function is called when the repository has for @@ -90,6 +98,16 @@ void odb_source_free(struct odb_source *source); */ void odb_source_release(struct odb_source *source); +/* + * Close the object database source without releasing he underlying data. The + * source can still be used going forward, but it first needs to be reopened. + * This can be useful to reduce resource usage. + */ +static inline void odb_source_close(struct odb_source *source) +{ + source->close(source); +} + /* * Reprepare the object database source and clear any caches. Depending on the * backend used this may have the effect that concurrently-written objects -- GitLab From b5ce24431a9b9b2e84dc506f91f0212336c450ad Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 29 Oct 2025 08:57:31 +0100 Subject: [PATCH 099/110] object info flags --- odb.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/odb.h b/odb.h index 93a999f310..e974eb2dcf 100644 --- a/odb.h +++ b/odb.h @@ -320,23 +320,29 @@ static inline bool object_info_is_blank_request(struct object_info *oi) */ #define OBJECT_INFO_INIT { 0 } -/* Invoke lookup_replace_object() on the given hash */ -#define OBJECT_INFO_LOOKUP_REPLACE 1 -/* Do not retry packed storage after checking packed and loose storage */ -#define OBJECT_INFO_QUICK 8 -/* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ -#define OBJECT_INFO_SKIP_FETCH_OBJECT 16 -/* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK - */ -#define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 3), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 4), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 5), -/* Die if object corruption (not just an object being missing) was detected. */ -#define OBJECT_INFO_DIE_IF_CORRUPT 32 + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; /* * Read object info from the object database and populate the `object_info` -- GitLab From f602ce08973c44592261c5d9f24d69108dc19223 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:30:53 +0100 Subject: [PATCH 100/110] read object info --- odb.c | 14 +++++--------- odb.h | 3 +++ odb/source-files.c | 22 ++++++++++++++++++++++ odb/source.h | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/odb.c b/odb.c index b4b15b1697..a01921d19c 100644 --- a/odb.c +++ b/odb.c @@ -693,21 +693,17 @@ static int do_oid_object_info_extended(struct object_database *odb, struct odb_source *source; /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags) || - !odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, flags)) return 0; - } /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, + flags | OBJECT_INFO_AFTER_REPREPARE)) return 0; - } } /* diff --git a/odb.h b/odb.h index e974eb2dcf..0b66c96338 100644 --- a/odb.h +++ b/odb.h @@ -328,6 +328,9 @@ enum object_info_flags { /* Do not reprepare object sources when the first lookup has failed. */ OBJECT_INFO_QUICK = (1 << 3), + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_AFTER_REPREPARE = (1 << 2), + /* * Do not attempt to fetch the object if missing (even if fetch_is_missing is * nonzero). diff --git a/odb/source-files.c b/odb/source-files.c index 77b4219b61..1c97538f64 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -26,6 +26,27 @@ static void odb_source_files_reprepare(struct odb_source *source) packfile_store_reprepare(files->packed); } +static int odb_source_files_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_store_read_object_info(files->packed, oid, oi, flags)) + return 0; + + /* + * A reprepare doesn't cause new loose objects to show up, so we skip + * reading loose objects in that case. + */ + if (!(flags & OBJECT_INFO_AFTER_REPREPARE) && + !odb_source_loose_read_object_info(source, oid, oi, flags)) + return 0; + + return -1; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -40,6 +61,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.free = odb_source_files_free; files->base.close = odb_source_files_close; files->base.reprepare = odb_source_files_reprepare; + files->base.read_object_info = odb_source_files_read_object_info; return files; } diff --git a/odb/source.h b/odb/source.h index 99d9c92323..a5ff825d8b 100644 --- a/odb/source.h +++ b/odb/source.h @@ -1,6 +1,7 @@ #ifndef ODB_SOURCE_H #define ODB_SOURCE_H +struct object_info; struct odb_source; /* @@ -60,6 +61,31 @@ struct odb_source { * example just been repacked so that new objects will become visible. */ void (*reprepare)(struct odb_source *source); + + /* + * This callback is expected to read object information from the object + * database source. The object info will be partially populated with + * pointers for each bit of information that was requested by the + * caller. + * + * The flags field is a combination of `OBJECT_INFO` flags. Only the + * following fields need to be handled by the backend: + * + * - `OBJECT_INFO_QUICK` indicates it is fine to use caches without + * re-verifying the data. + * + * - `OBJECT_INFO_AFTER_REPREPARE` indicates that the initial object + * lookup has failed and that the object sources have just been + * reloaded. The source should only look up objects via sources + * that may have been changed due to the reload. + * + * The callback is expected to return a negative error code in case + * reading the object has failed, 0 otherwise. + */ + int (*read_object_info)(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); }; /* @@ -118,4 +144,16 @@ static inline void odb_source_reprepare(struct odb_source *source) source->reprepare(source); } +/* + * Read an object from the object database source identified by its object ID. + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + return source->read_object_info(source, oid, oi, flags); +} + #endif -- GitLab From 41598091f8f044cd99aa41f46698a3648473357b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:32:00 +0100 Subject: [PATCH 101/110] read object stream --- odb/source-files.c | 12 ++++++++++++ odb/source.h | 23 +++++++++++++++++++++++ odb/streaming.c | 7 ++----- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/odb/source-files.c b/odb/source-files.c index 1c97538f64..698558ea00 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -47,6 +47,17 @@ static int odb_source_files_read_object_info(struct odb_source *source, return -1; } +static int odb_source_files_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + if (!packfile_store_read_object_stream(out, files->packed, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) + return 0; + return -1; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -62,6 +73,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.close = odb_source_files_close; files->base.reprepare = odb_source_files_reprepare; files->base.read_object_info = odb_source_files_read_object_info; + files->base.read_object_stream = odb_source_files_read_object_stream; return files; } diff --git a/odb/source.h b/odb/source.h index a5ff825d8b..e7351b2681 100644 --- a/odb/source.h +++ b/odb/source.h @@ -2,6 +2,7 @@ #define ODB_SOURCE_H struct object_info; +struct odb_read_stream; struct odb_source; /* @@ -86,6 +87,17 @@ struct odb_source { const struct object_id *oid, struct object_info *oi, unsigned flags); + + /* + * This callback is expected to create a new read stream that can be + * used to stream the object identified by the given ID. + * + * The callback is expected to return a negative error code in case + * creating the object stream has failed, 0 otherwise. + */ + int (*read_object_stream)(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); }; /* @@ -156,4 +168,15 @@ static inline int odb_source_read_object_info(struct odb_source *source, return source->read_object_info(source, oid, oi, flags); } +/* + * Create a new read stream for the given object ID. Returns 0 on success, a + * negative error code otherwise. + */ +static inline int odb_source_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + return source->read_object_stream(out, source, oid); +} + #endif diff --git a/odb/streaming.c b/odb/streaming.c index 19cda9407d..14a586292d 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -186,12 +186,9 @@ static int istream_source(struct odb_read_stream **out, struct odb_source *source; odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_stream(out, files->packed, oid) || - !odb_source_loose_read_object_stream(out, source, oid)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_stream(out, source, oid)) return 0; - } return open_istream_incore(out, odb, oid); } -- GitLab From bd07ac56fe5a2a25add4faa2d5a6065fbf02a575 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 10:19:59 +0100 Subject: [PATCH 102/110] for_each_object --- odb.c | 10 +------- odb.h | 12 ---------- odb/source-files.c | 23 ++++++++++++++++++ odb/source.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 21 deletions(-) diff --git a/odb.c b/odb.c index a01921d19c..4f50804459 100644 --- a/odb.c +++ b/odb.c @@ -986,18 +986,10 @@ int odb_for_each_object(struct object_database *odb, odb_prepare_alternates(odb); for (struct odb_source *source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) continue; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags); - if (ret) - return ret; - } - - ret = packfile_store_for_each_object(files->packed, oi, cb, cb_data, flags); + ret = odb_source_for_each_object(source, oi, cb, cb_data, flags); if (ret) return ret; } diff --git a/odb.h b/odb.h index 0b66c96338..6e455b2519 100644 --- a/odb.h +++ b/odb.h @@ -465,18 +465,6 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; -/* - * A callback function that can be used to iterate through objects. If given, - * the optional `oi` parameter will be populated the same as if you would call - * `odb_read_object_info()`. - * - * Returning a non-zero error code will cause iteration to abort. The error - * code will be propagated. - */ -typedef int (*odb_for_each_object_cb)(const struct object_id *oid, - struct object_info *oi, - void *cb_data); - /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored diff --git a/odb/source-files.c b/odb/source-files.c index 698558ea00..eee13cf2a6 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -58,6 +58,28 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out, return -1; } +static int odb_source_files_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + int ret; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(files->packed, oi, cb, cb_data, flags); + if (ret) + return ret; + + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -74,6 +96,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.reprepare = odb_source_files_reprepare; files->base.read_object_info = odb_source_files_read_object_info; files->base.read_object_stream = odb_source_files_read_object_stream; + files->base.for_each_object = odb_source_files_for_each_object; return files; } diff --git a/odb/source.h b/odb/source.h index e7351b2681..070f803139 100644 --- a/odb/source.h +++ b/odb/source.h @@ -5,6 +5,18 @@ struct object_info; struct odb_read_stream; struct odb_source; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -98,6 +110,27 @@ struct odb_source { int (*read_object_stream)(struct odb_read_stream **out, struct odb_source *source, const struct object_id *oid); + + /* + * This callback is expected to iterate over all objects stored in this + * source and invoke the callback function for each of them. It is + * valid to yield the same object multiple time. A non-zero exit code + * from the object callback shall abort iteration. + * + * The optional `oi` structure shall be populated similar to how an individual + * call to `odb_source_read_object_info()` would have behaved. If the caller + * passes a `NULL` pointer then the object itself shall not be read. + * + * The callback is expected to return a negative error code in case the + * iteration has failed to read all objects, 0 otherwise. When the + * callback function returns a non-zero error code then that error code + * should be returned. + */ + int (*for_each_object)(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); }; /* @@ -179,4 +212,30 @@ static inline int odb_source_read_object_stream(struct odb_read_stream **out, return source->read_object_stream(out, source, oid); } +/* + * Iterate through all objects contained in the given source and invoke the + * callback function for each of them. Returning a non-zero code from the + * callback function aborts iteration. There is no guarantee that objects + * are only iterated over once. + * + * The optional `oi` structure shall be populated similar to how an individual + * call to `odb_source_read_object_info()` would have behaved. If the caller + * passes a `NULL` pointer then the object itself shall not be read. + * + * The flags is a bitfield of `ODB_FOR_EACH_OBJECT_*` flags. Not all flags may + * apply to a specific backend, so whether or not they are honored is defined + * by the implementation. + * + * Returns 0 when all objects have been iterated over, a negative error code in + * case iteration has failed, or a non-zero value returned from the callback. + */ +static inline int odb_source_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + return source->for_each_object(source, oi, cb, cb_data, flags); +} + #endif -- GitLab From 0d97f42b631709a903825bb1aac32c22b36aaf7e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 10:20:42 +0100 Subject: [PATCH 103/110] approximate object count --- odb.c | 9 ++------- odb/source-files.c | 14 ++++++++++++++ odb/source.h | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/odb.c b/odb.c index 4f50804459..7154af4590 100644 --- a/odb.c +++ b/odb.c @@ -1004,13 +1004,8 @@ unsigned long odb_count_objects(struct object_database *odb, unsigned long count = 0; odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - - count += packfile_store_count_objects(files->packed, flags); - if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) - count += odb_source_loose_count_objects(source, flags); - } + for (source = odb->sources; source; source = source->next) + count += odb_source_count_objects(source, flags); return count; } diff --git a/odb/source-files.c b/odb/source-files.c index eee13cf2a6..d7959fb8b9 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -80,6 +80,19 @@ static int odb_source_files_for_each_object(struct odb_source *source, return 0; } +static unsigned long odb_source_files_count_objects(struct odb_source *source, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned long count = 0; + + count += packfile_store_count_objects(files->packed, flags); + if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) + count += odb_source_loose_count_objects(source, flags); + + return count; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -97,6 +110,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_info = odb_source_files_read_object_info; files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; + files->base.count_objects = odb_source_files_count_objects; return files; } diff --git a/odb/source.h b/odb/source.h index 070f803139..b798e8c16f 100644 --- a/odb/source.h +++ b/odb/source.h @@ -131,6 +131,19 @@ struct odb_source { odb_for_each_object_cb cb, void *cb_data, unsigned flags); + + /* + * This callback is expected to count the number of objects that exist + * in the given source. It is fine to both under- and overcount the + * objects. + * + * The flags field is a combination of `enum odb_count_objects_flags` + * flags. + * + * The callback is expected to return the number of objects. + */ + unsigned long (*count_objects)(struct odb_source *source, + unsigned flags); }; /* @@ -238,4 +251,13 @@ static inline int odb_source_for_each_object(struct odb_source *source, return source->for_each_object(source, oi, cb, cb_data, flags); } +/* + * Count the number of objects that exist in the given object database source. + */ +static inline unsigned long odb_source_count_objects(struct odb_source *source, + unsigned flags) +{ + return source->count_objects(source, flags); +} + #endif -- GitLab From 9b6a66e61159d80a468b9aeb767626507dc71ad6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:58:56 +0100 Subject: [PATCH 104/110] freshen object --- odb.c | 12 ++---------- odb/source-files.c | 11 +++++++++++ odb/source.h | 23 +++++++++++++++++++++++ 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/odb.c b/odb.c index 7154af4590..3f4818869a 100644 --- a/odb.c +++ b/odb.c @@ -961,18 +961,10 @@ int odb_freshen_object(struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - - if (packfile_store_freshen_object(files->packed, oid)) - return 1; - - if (odb_source_loose_freshen_object(source, oid)) + for (source = odb->sources; source; source = source->next) + if (odb_source_freshen_object(source, oid)) return 1; - } - return 0; } diff --git a/odb/source-files.c b/odb/source-files.c index d7959fb8b9..e088de5b8a 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -93,6 +93,16 @@ static unsigned long odb_source_files_count_objects(struct odb_source *source, return count; } +static int odb_source_files_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + if (packfile_store_freshen_object(files->packed, oid) || + odb_source_loose_freshen_object(source, oid)) + return 1; + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -111,6 +121,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.freshen_object = odb_source_files_freshen_object; return files; } diff --git a/odb/source.h b/odb/source.h index b798e8c16f..386b898a2e 100644 --- a/odb/source.h +++ b/odb/source.h @@ -144,6 +144,18 @@ struct odb_source { */ unsigned long (*count_objects)(struct odb_source *source, unsigned flags); + + /* + * This callback is expected to freshen the given object so that its + * last access time is set to the current time. This is used to ensure + * that objects that are recent will not get garbage collected even if + * they were unreachable. + * + * Returns 0 in case the object does not exist, 1 in case the object + * has been freshened. + */ + int (*freshen_object)(struct odb_source *source, + const struct object_id *oid); }; /* @@ -260,4 +272,15 @@ static inline unsigned long odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags); } +/* + * Freshen an object in the object database by updating its timestamp. + * Returns 1 in case the object has been freshened, 0 in case the object does + * not exist. + */ +static inline int odb_source_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + return source->freshen_object(source, oid); +} + #endif -- GitLab From 707736773c378869d228943988aed732ccded84f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:59:48 +0100 Subject: [PATCH 105/110] write object --- odb.c | 4 ++-- odb/source-files.c | 12 ++++++++++++ odb/source.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/odb.c b/odb.c index 3f4818869a..77b2255edf 100644 --- a/odb.c +++ b/odb.c @@ -1020,8 +1020,8 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return odb_source_loose_write_object(odb->sources, buf, len, type, - oid, compat_oid, flags); + return odb_source_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); } int odb_write_object_stream(struct object_database *odb, diff --git a/odb/source-files.c b/odb/source-files.c index e088de5b8a..2829ed7b64 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -103,6 +103,17 @@ static int odb_source_files_freshen_object(struct odb_source *source, return 0; } +static int odb_source_files_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags) +{ + return odb_source_loose_write_object(source, buf, len, type, + oid, compat_oid, flags); +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -122,6 +133,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; files->base.freshen_object = odb_source_files_freshen_object; + files->base.write_object = odb_source_files_write_object; return files; } diff --git a/odb/source.h b/odb/source.h index 386b898a2e..5d8e2cb1ed 100644 --- a/odb/source.h +++ b/odb/source.h @@ -156,6 +156,24 @@ struct odb_source { */ int (*freshen_object)(struct odb_source *source, const struct object_id *oid); + + /* + * This callback is expected to persist the given object into the + * object source. In case the object already exists it shall be + * freshened. + * + * The flags field is a combination of `WRITE_OBJECT` flags. + * + * The resulting object ID (and optionally the compatibility object ID) + * shall be written into the out pointers. The callback is expected to + * return 0 on success, a negative error code otherwise. + */ + int (*write_object)(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags); }; /* @@ -283,4 +301,20 @@ static inline int odb_source_freshen_object(struct odb_source *source, return source->freshen_object(source, oid); } +/* + * Write an object into the object database source. Returns 0 on success, a + * negative error code otherwise. Populates the given out pointers for the + * object ID and the compatibility object ID, if non-NULL. + */ +static inline int odb_source_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags) +{ + return source->write_object(source, buf, len, type, oid, + compat_oid, flags); +} + #endif -- GitLab From eb13cbd3926864307e6143edc38ffb3d1471b85c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 15:35:11 +0100 Subject: [PATCH 106/110] write object stream --- odb.c | 2 +- odb/source-files.c | 9 +++++++++ odb/source.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 77b2255edf..dc0d32ce31 100644 --- a/odb.c +++ b/odb.c @@ -1028,7 +1028,7 @@ int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid) { - return odb_source_loose_write_stream(odb->sources, stream, len, oid); + return odb_source_write_object_stream(odb->sources, stream, len, oid); } static void odb_update_commondir(const char *name UNUSED, diff --git a/odb/source-files.c b/odb/source-files.c index 2829ed7b64..abbda9a6c5 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -114,6 +114,14 @@ static int odb_source_files_write_object(struct odb_source *source, oid, compat_oid, flags); } +static int odb_source_files_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(source, stream, len, oid); +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -134,6 +142,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.count_objects = odb_source_files_count_objects; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; + files->base.write_object_stream = odb_source_files_write_object_stream; return files; } diff --git a/odb/source.h b/odb/source.h index 5d8e2cb1ed..7a77b6d931 100644 --- a/odb/source.h +++ b/odb/source.h @@ -4,6 +4,7 @@ struct object_info; struct odb_read_stream; struct odb_source; +struct odb_write_stream; /* * A callback function that can be used to iterate through objects. If given, @@ -174,6 +175,18 @@ struct odb_source { struct object_id *oid, struct object_id *compat_oid, unsigned flags); + + /* + * This callback is expected to persist the given object stream into + * the object source. + * + * The resulting object ID shall be written into the out pointer. The + * callback is expected to return 0 on success, a negative error code + * otherwise. + */ + int (*write_object_stream)(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); }; /* @@ -317,4 +330,19 @@ static inline int odb_source_write_object(struct odb_source *source, compat_oid, flags); } +/* + * Write an object into the object database source via a stream. The overall + * length of the object must be known in advance. + * + * Return 0 on success, a negative error code otherwise. Populates the given + * out pointer for the object ID. + */ +static inline int odb_source_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + return source->write_object_stream(source, stream, len, oid); +} + #endif -- GitLab From d87fdea31c5ccc6b85f70cd8334eb816eee69ec1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 09:14:15 +0100 Subject: [PATCH 107/110] read alternates --- odb.c | 26 ++++---------------------- odb.h | 5 +++++ odb/source-files.c | 22 ++++++++++++++++++++++ odb/source.h | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/odb.c b/odb.c index dc0d32ce31..d6c30e2b4e 100644 --- a/odb.c +++ b/odb.c @@ -132,10 +132,10 @@ static bool odb_is_source_usable(struct object_database *o, const char *path) return usable; } -static void parse_alternates(const char *string, - int sep, - const char *relative_base, - struct strvec *out) +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out) { struct strbuf pathbuf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; @@ -199,24 +199,6 @@ static void parse_alternates(const char *string, strbuf_release(&buf); } -static void odb_source_read_alternates(struct odb_source *source, - struct strvec *out) -{ - struct strbuf buf = STRBUF_INIT; - char *path; - - path = xstrfmt("%s/info/alternates", source->path); - if (strbuf_read_file(&buf, path, 1024) < 0) { - warn_on_fopen_errors(path); - free(path); - return; - } - parse_alternates(buf.buf, '\n', source->path, out); - - strbuf_release(&buf); - free(path); -} - static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, const char *source, int depth) diff --git a/odb.h b/odb.h index 6e455b2519..ecdbd29e7d 100644 --- a/odb.h +++ b/odb.h @@ -530,4 +530,9 @@ int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid); +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out); + #endif /* ODB_H */ diff --git a/odb/source-files.c b/odb/source-files.c index abbda9a6c5..7d82af738b 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -1,8 +1,10 @@ #include "git-compat-util.h" #include "object-file.h" +#include "odb.h" #include "odb/source.h" #include "odb/source-files.h" #include "packfile.h" +#include "strbuf.h" static void odb_source_files_free(struct odb_source *source) { @@ -122,6 +124,25 @@ static int odb_source_files_write_object_stream(struct odb_source *source, return odb_source_loose_write_stream(source, stream, len, oid); } +static int odb_source_files_read_alternates(struct odb_source *source, + struct strvec *out) +{ + struct strbuf buf = STRBUF_INIT; + char *path; + + path = xstrfmt("%s/info/alternates", source->path); + if (strbuf_read_file(&buf, path, 1024) < 0) { + warn_on_fopen_errors(path); + free(path); + return 0; + } + parse_alternates(buf.buf, '\n', source->path, out); + + strbuf_release(&buf); + free(path); + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -143,6 +164,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; + files->base.read_alternates = odb_source_files_read_alternates; return files; } diff --git a/odb/source.h b/odb/source.h index 7a77b6d931..e3d2c87001 100644 --- a/odb/source.h +++ b/odb/source.h @@ -5,6 +5,7 @@ struct object_info; struct odb_read_stream; struct odb_source; struct odb_write_stream; +struct strvec; /* * A callback function that can be used to iterate through objects. If given, @@ -187,6 +188,20 @@ struct odb_source { int (*write_object_stream)(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); + + /* + * This callback is expected to read the list of alternate object + * database sources connected to it and write them into the `strvec`. + * + * The format is expected to follow the "objectStorage" extension + * format with `(backend://)?payload` syntax. If the payload contains + * paths, these paths must be resolved to absolute paths. + * + * The callback is expected to return 0 on success, a negative error + * code otherwise. + */ + int (*read_alternates)(struct odb_source *source, + struct strvec *out); }; /* @@ -345,4 +360,18 @@ static inline int odb_source_write_object_stream(struct odb_source *source, return source->write_object_stream(source, stream, len, oid); } +/* + * Read the list of alternative object database sources from the given backend + * and populate the `strvec` with them. The listing is not recursive -- that + * is, if any of the yielded alternate sources has alternates itself, those + * will not be yielded as part of this function call. + * + * Return 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_alternates(struct odb_source *source, + struct strvec *out) +{ + return source->read_alternates(source, out); +} + #endif -- GitLab From 7c180603cc9e1825b3f4625d388f98fd8c2bffd9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 12 Dec 2025 08:50:39 +0100 Subject: [PATCH 108/110] write alternate --- odb.c | 52 ------------------------------------------ odb/source-files.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ odb/source.h | 26 +++++++++++++++++++++ 3 files changed, 82 insertions(+), 52 deletions(-) diff --git a/odb.c b/odb.c index d6c30e2b4e..66af7c935f 100644 --- a/odb.c +++ b/odb.c @@ -237,58 +237,6 @@ static struct odb_source *odb_add_alternate_recursively(struct object_database * return alternate; } -static int odb_source_write_alternate(struct odb_source *source, - const char *alternate) -{ - struct lock_file lock = LOCK_INIT; - char *path = xstrfmt("%s/%s", source->path, "info/alternates"); - FILE *in, *out; - int found = 0; - int ret; - - hold_lock_file_for_update(&lock, path, LOCK_DIE_ON_ERROR); - out = fdopen_lock_file(&lock, "w"); - if (!out) { - ret = error_errno(_("unable to fdopen alternates lockfile")); - goto out; - } - - in = fopen(path, "r"); - if (in) { - struct strbuf line = STRBUF_INIT; - - while (strbuf_getline(&line, in) != EOF) { - if (!strcmp(alternate, line.buf)) { - found = 1; - break; - } - fprintf_or_die(out, "%s\n", line.buf); - } - - strbuf_release(&line); - fclose(in); - } else if (errno != ENOENT) { - ret = error_errno(_("unable to read alternates file")); - goto out; - } - - if (found) { - rollback_lock_file(&lock); - } else { - fprintf_or_die(out, "%s\n", alternate); - if (commit_lock_file(&lock)) { - ret = error_errno(_("unable to move new alternates file into place")); - goto out; - } - } - - ret = 0; - -out: - free(path); - return ret; -} - void odb_add_to_alternates_file(struct object_database *odb, const char *dir) { diff --git a/odb/source-files.c b/odb/source-files.c index 7d82af738b..2a74106a10 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -1,10 +1,13 @@ #include "git-compat-util.h" +#include "gettext.h" +#include "lockfile.h" #include "object-file.h" #include "odb.h" #include "odb/source.h" #include "odb/source-files.h" #include "packfile.h" #include "strbuf.h" +#include "write-or-die.h" static void odb_source_files_free(struct odb_source *source) { @@ -143,6 +146,58 @@ static int odb_source_files_read_alternates(struct odb_source *source, return 0; } +static int odb_source_files_write_alternate(struct odb_source *source, + const char *alternate) +{ + struct lock_file lock = LOCK_INIT; + char *path = xstrfmt("%s/%s", source->path, "info/alternates"); + FILE *in, *out; + int found = 0; + int ret; + + hold_lock_file_for_update(&lock, path, LOCK_DIE_ON_ERROR); + out = fdopen_lock_file(&lock, "w"); + if (!out) { + ret = error_errno(_("unable to fdopen alternates lockfile")); + goto out; + } + + in = fopen(path, "r"); + if (in) { + struct strbuf line = STRBUF_INIT; + + while (strbuf_getline(&line, in) != EOF) { + if (!strcmp(alternate, line.buf)) { + found = 1; + break; + } + fprintf_or_die(out, "%s\n", line.buf); + } + + strbuf_release(&line); + fclose(in); + } else if (errno != ENOENT) { + ret = error_errno(_("unable to read alternates file")); + goto out; + } + + if (found) { + rollback_lock_file(&lock); + } else { + fprintf_or_die(out, "%s\n", alternate); + if (commit_lock_file(&lock)) { + ret = error_errno(_("unable to move new alternates file into place")); + goto out; + } + } + + ret = 0; + +out: + free(path); + return ret; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -165,6 +220,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; files->base.read_alternates = odb_source_files_read_alternates; + files->base.write_alternate = odb_source_files_write_alternate; return files; } diff --git a/odb/source.h b/odb/source.h index e3d2c87001..7581fb1962 100644 --- a/odb/source.h +++ b/odb/source.h @@ -202,6 +202,19 @@ struct odb_source { */ int (*read_alternates)(struct odb_source *source, struct strvec *out); + + /* + * This callback is expected to persist the singular alternate passed + * to it into its list of alternates. Any pre-existing alternates are + * expected to remain active. Subsequent calls to `read_alternates` are + * thus expected to yield the pre-existing list of alternates plus the + * newly added alternate appended to its end. + * + * The callback is expected to return 0 on success, a negative error + * code otherwise. + */ + int (*write_alternate)(struct odb_source *source, + const char *alternate); }; /* @@ -374,4 +387,17 @@ static inline int odb_source_read_alternates(struct odb_source *source, return source->read_alternates(source, out); } +/* + * Write and persist a new alternate object database source for the given + * source. Any preexisting alternates are expected to stay valid, and the new + * alternate shall be appended to the end of the list. + * + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_write_alternate(struct odb_source *source, + const char *alternate) +{ + return source->write_alternate(source, alternate); +} + #endif -- GitLab From 18f4cd5f3c08cebcf7f0dc08cb0583deeb6d6d8f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 12 Nov 2025 08:30:23 +0100 Subject: [PATCH 109/110] objetc storage extension --- odb.c | 3 +++ repository.h | 9 +++++++++ setup.c | 15 +++++++++++++++ setup.h | 1 + 4 files changed, 28 insertions(+) diff --git a/odb.c b/odb.c index 66af7c935f..3ba32d3dc3 100644 --- a/odb.c +++ b/odb.c @@ -1005,8 +1005,11 @@ struct object_database *odb_new(struct repository *repo, pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + if (!primary_source) + primary_source = repo->object_storage; if (!primary_source) primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); diff --git a/repository.h b/repository.h index 6063c4b846..01322ca197 100644 --- a/repository.h +++ b/repository.h @@ -50,6 +50,15 @@ struct repository { */ char *commondir; + /* + * Location of the primary object database source. May be NULL, in + * which case the primary object database source will assumed to be + * "${commondir}/objects". + * + * This configuration can be set via "extensions.objectStorage". + */ + char *object_storage; + /* * Holds any information related to accessing the raw object content. */ diff --git a/setup.c b/setup.c index 3a6a048620..8fcdd8d7c0 100644 --- a/setup.c +++ b/setup.c @@ -686,7 +686,14 @@ static enum extension_result handle_extension(const char *var, } else if (!strcmp(ext, "relativeworktrees")) { data->relative_worktrees = git_config_bool(var, value); return EXTENSION_OK; + } else if (!strcmp(ext, "objectstorage")) { + if (!value) + return config_error_nonbool(var); + free(data->object_storage); + data->object_storage = xstrdup(value); + return EXTENSION_OK; } + return EXTENSION_UNKNOWN; } @@ -1931,12 +1938,18 @@ const char *setup_git_directory_gently(int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { + if (startup_info->have_repository) { + the_repository->object_storage = + xstrdup_or_null(repo_fmt.object_storage); + } + if (!the_repository->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; setup_git_env(gitdir); } + if (startup_info->have_repository) { repo_set_hash_algo(the_repository, repo_fmt.hash_algo); repo_set_compat_hash_algo(the_repository, @@ -2039,6 +2052,8 @@ void check_repository_format(struct repository_format *fmt) fmt = &repo_fmt; check_repository_format_gently(repo_get_git_dir(the_repository), fmt, NULL); startup_info->have_repository = 1; + the_repository->object_storage = + xstrdup_or_null(repo_fmt.object_storage); repo_set_hash_algo(the_repository, fmt->hash_algo); repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); repo_set_ref_storage_format(the_repository, diff --git a/setup.h b/setup.h index d55dcc6608..e1c1279d09 100644 --- a/setup.h +++ b/setup.h @@ -173,6 +173,7 @@ struct repository_format { enum ref_storage_format ref_storage_format; int sparse_index; char *work_tree; + char *object_storage; struct string_list unknown_extensions; struct string_list v1_only_extensions; }; -- GitLab From ac6821fcc1eafdb3aae4214bc876ce66317b27cc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 12 Nov 2025 09:05:10 +0100 Subject: [PATCH 110/110] odb source schema --- odb/source.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/odb/source.c b/odb/source.c index dbdc6cee9c..51bb1cde74 100644 --- a/odb/source.c +++ b/odb/source.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "gettext.h" #include "object-file.h" #include "odb/source-files.h" #include "odb/source.h" @@ -8,7 +9,27 @@ struct odb_source *odb_source_new(struct object_database *odb, const char *path, bool local) { - return &odb_source_files_new(odb, path, local)->base; + struct odb_source *source; + const char *schema_end; + char *schema; + + schema_end = strstr(path, "://"); + if (!schema_end) + return &odb_source_files_new(odb, path, local)->base; + + schema = xstrndup(path, schema_end - path); + path = schema_end + 3; + + if (!strcmp(schema, "files")) { + source = &odb_source_files_new(odb, path, local)->base; + goto out; + } + + die(_("unknown object database source schema: '%s'"), schema); + +out: + free(schema); + return source; } void odb_source_init(struct odb_source *source, -- GitLab