From f82e430b4e46120e0ef67959e0ef9d8ab9282c56 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:56 +0100 Subject: [PATCH 01/27] odb: fix subtle logic to check whether an alternate is usable When adding an alternate to the object database we first check whether or not the path is usable. A path is usable if: - It actually exists. - We don't have it in our object sources yet. While the former check is trivial enough, the latter part is somewhat subtle and prone for bugs. This is because the function doesn't only check whether or not the given path is usable. But if it _is_ usable, we also store that path in the map of object sources immediately. The tricky part here is that the path that gets stored in the map is _not_ copied. Instead, we rely on the fact that subsequent code uses `strbuf_detach()` to store the exact same allocated memory in the created object source. Consequently, the memory is owned by the source but _also_ stored in the map. This subtlety is easy to miss, so if one decides to refactor this code one can easily end up breaking this mechanism. Make the relationship more explicit by not storing the path as part of `alt_odb_usable()`. Instead, store the path after we have created the source so that we can use the source's path pointer directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/odb.c b/odb.c index 00a6e71568..57d85ed950 100644 --- a/odb.c +++ b/odb.c @@ -86,17 +86,16 @@ int odb_mkstemp(struct object_database *odb, /* * Return non-zero iff the path is usable as an alternate object database. */ -static int alt_odb_usable(struct object_database *o, - struct strbuf *path, - const char *normalized_objdir, khiter_t *pos) +static int alt_odb_usable(struct object_database *o, const char *path, + const char *normalized_objdir) { int r; /* Detect cases where alternate disappeared */ - if (!is_directory(path->buf)) { + if (!is_directory(path)) { error(_("object directory %s does not exist; " "check .git/objects/info/alternates"), - path->buf); + path); return 0; } @@ -113,11 +112,14 @@ static int alt_odb_usable(struct object_database *o, assert(r == 1); /* never used */ kh_value(o->source_by_path, p) = o->sources; } - if (fspatheq(path->buf, normalized_objdir)) + + if (fspatheq(path, normalized_objdir)) + return 0; + + if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path)) return 0; - *pos = kh_put_odb_path_map(o->source_by_path, path->buf, &r); - /* r: 0 = exists, 1 = never used, 2 = deleted */ - return r == 0 ? 0 : 1; + + return 1; } /* @@ -148,6 +150,7 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, struct strbuf pathbuf = STRBUF_INIT; struct strbuf tmp = STRBUF_INIT; khiter_t pos; + int ret; if (!is_absolute_path(dir) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); @@ -172,20 +175,21 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, strbuf_reset(&tmp); strbuf_realpath(&tmp, odb->sources->path, 1); - if (!alt_odb_usable(odb, &pathbuf, tmp.buf, &pos)) + if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) goto error; CALLOC_ARRAY(alternate, 1); alternate->odb = odb; alternate->local = false; - /* pathbuf.buf is already in r->objects->source_by_path */ alternate->path = strbuf_detach(&pathbuf, NULL); /* add the alternate entry */ *odb->sources_tail = alternate; odb->sources_tail = &(alternate->next); - alternate->next = NULL; - assert(odb->source_by_path); + + pos = kh_put_odb_path_map(odb->source_by_path, alternate->path, &ret); + if (!ret) + BUG("source must not yet exist"); kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ -- GitLab From 0820a4b120f310d87ac8817ade63896a901c9267 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:57 +0100 Subject: [PATCH 02/27] odb: introduce `odb_source_new()` We have three different locations where we create a new ODB source. Deduplicate the logic via a new `odb_source_new()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 23 ++++++++++++++++------- odb.h | 4 ++++ repository.c | 14 +++++++++----- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 57d85ed950..d2d4c514ae 100644 --- a/odb.c +++ b/odb.c @@ -141,6 +141,20 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + + CALLOC_ARRAY(source, 1); + source->odb = odb; + source->local = local; + source->path = xstrdup(path); + + return source; +} + static struct odb_source *link_alt_odb_entry(struct object_database *odb, const char *dir, const char *relative_base, @@ -178,10 +192,7 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) goto error; - CALLOC_ARRAY(alternate, 1); - alternate->odb = odb; - alternate->local = false; - alternate->path = strbuf_detach(&pathbuf, NULL); + alternate = odb_source_new(odb, pathbuf.buf, false); /* add the alternate entry */ *odb->sources_tail = alternate; @@ -341,9 +352,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Make a new primary odb and link the old primary ODB in as an * alternate */ - source = xcalloc(1, sizeof(*source)); - source->odb = odb; - source->path = xstrdup(dir); + source = odb_source_new(odb, dir, false); /* * Disable ref updates while a temporary odb is active, since diff --git a/odb.h b/odb.h index e6602dd90c..2bec895d13 100644 --- a/odb.h +++ b/odb.h @@ -89,6 +89,10 @@ struct odb_source { char *path; }; +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local); + struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/repository.c b/repository.c index 6faf5c7398..6aaa7ba008 100644 --- a/repository.c +++ b/repository.c @@ -160,20 +160,24 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; + char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); if (!repo->objects->sources) { - CALLOC_ARRAY(repo->objects->sources, 1); - repo->objects->sources->odb = repo->objects; - repo->objects->sources->local = true; + repo->objects->sources = odb_source_new(repo->objects, + objects_path, true); repo->objects->sources_tail = &repo->objects->sources->next; + free(objects_path); + } else { + free(repo->objects->sources->path); + repo->objects->sources->path = objects_path; } - expand_base_dir(&repo->objects->sources->path, o->object_dir, - repo->commondir, "objects"); repo->objects->sources->disable_ref_updates = o->disable_ref_updates; -- GitLab From c2da110411919387fef0f869181fb510d06295d8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:58 +0100 Subject: [PATCH 03/27] odb: adjust naming to free object sources The functions `free_object_directory()` and `free_object_directories()` are responsible for freeing a single object source or all object sources connected to an object database, respectively. The associated structure has been renamed from `struct object_directory` to `struct odb_source` in a1e2581a1e (object-store: rename `object_directory` to `odb_source`, 2025-07-01) though, so the names are somewhat stale nowadays. Rename them to mention the new struct name instead. Furthermore, while at it, adapt them to our modern naming schema where we first have the subject followed by a verb. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index d2d4c514ae..77490d7fdb 100644 --- a/odb.c +++ b/odb.c @@ -365,7 +365,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, return source->next; } -static void free_object_directory(struct odb_source *source) +static void odb_source_free(struct odb_source *source) { free(source->path); odb_clear_loose_cache(source); @@ -387,7 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, BUG("we expect the old primary object store to be the first alternate"); odb->sources = restore_source; - free_object_directory(cur_source); + odb_source_free(cur_source); } char *compute_alternate_path(const char *path, struct strbuf *err) @@ -1015,13 +1015,13 @@ struct object_database *odb_new(struct repository *repo) return o; } -static void free_object_directories(struct object_database *o) +static void odb_free_sources(struct object_database *o) { while (o->sources) { struct odb_source *next; next = o->sources->next; - free_object_directory(o->sources); + odb_source_free(o->sources); o->sources = next; } kh_destroy_odb_path_map(o->source_by_path); @@ -1039,7 +1039,7 @@ void odb_clear(struct object_database *o) o->commit_graph = NULL; o->commit_graph_attempted = 0; - free_object_directories(o); + odb_free_sources(o); o->sources_tail = NULL; o->loaded_alternates = 0; -- GitLab From 0cc12dedef2885dba8cf2635697767d394baf91f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:59 +0100 Subject: [PATCH 04/27] object-file: move `fetch_if_missing` The `fetch_if_missing` global variable is declared in "object-file.h" but defined in "odb.c". The variable relates to the whole object database instead of only loose objects, so move the declaration into "odb.h" accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.h | 8 -------- odb.h | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/object-file.h b/object-file.h index 3fd48dcafb..097e9764be 100644 --- a/object-file.h +++ b/object-file.h @@ -7,14 +7,6 @@ struct index_state; -/* - * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing - * blobs. This has a difference only if extensions.partialClone is set. - * - * Its default value is 1. - */ -extern int fetch_if_missing; - enum { INDEX_WRITE_OBJECT = (1 << 0), INDEX_FORMAT_CHECK = (1 << 1), diff --git a/odb.h b/odb.h index 2bec895d13..2346ffeca8 100644 --- a/odb.h +++ b/odb.h @@ -14,6 +14,14 @@ struct strbuf; struct repository; struct multi_pack_index; +/* + * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing + * blobs. This has a difference only if extensions.partialClone is set. + * + * Its default value is 1. + */ +extern int fetch_if_missing; + /* * Compute the exact path an alternate is at and returns it. In case of * error NULL is returned and the human readable error is added to `err` -- GitLab From ece43d9dc70b1717484ee78b66aef4f9390c2b2b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:00 +0100 Subject: [PATCH 05/27] object-file: introduce `struct odb_source_loose` Currently, all state that relates to loose objects is held directly by the `struct odb_source`. Introduce a new `struct odb_source_loose` to hold the state instead so that it is entirely self-contained. This structure will eventually morph into the backend for accessing loose objects. As such, this is part of the refactorings to introduce pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 13 +++++++++++++ object-file.h | 7 +++++++ odb.c | 2 ++ odb.h | 3 +++ 4 files changed, 25 insertions(+) diff --git a/object-file.c b/object-file.c index 4675c8ed6b..cd6aa561fa 100644 --- a/object-file.c +++ b/object-file.c @@ -1995,3 +1995,16 @@ void object_file_transaction_commit(struct odb_transaction *transaction) transaction->odb->transaction = NULL; free(transaction); } + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} + +void odb_source_loose_free(struct odb_source_loose *loose) +{ + free(loose); +} diff --git a/object-file.h b/object-file.h index 097e9764be..695a7e8e7c 100644 --- a/object-file.h +++ b/object-file.h @@ -18,6 +18,13 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct odb_source; +struct odb_source_loose { + struct odb_source *source; +}; + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source); +void odb_source_loose_free(struct odb_source_loose *loose); + /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/odb.c b/odb.c index 77490d7fdb..2d06ab0bb8 100644 --- a/odb.c +++ b/odb.c @@ -151,6 +151,7 @@ struct odb_source *odb_source_new(struct object_database *odb, source->odb = odb; source->local = local; source->path = xstrdup(path); + source->loose = odb_source_loose_new(source); return source; } @@ -368,6 +369,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, static void odb_source_free(struct odb_source *source) { free(source->path); + odb_source_loose_free(source->loose); odb_clear_loose_cache(source); loose_object_map_clear(&source->loose_map); free(source); diff --git a/odb.h b/odb.h index 2346ffeca8..49b398beda 100644 --- a/odb.h +++ b/odb.h @@ -48,6 +48,9 @@ struct odb_source { /* Object database that owns this object source. */ struct object_database *odb; + /* Private state for loose objects. */ + struct odb_source_loose *loose; + /* * Used to store the results of readdir(3) calls when we are OK * sacrificing accuracy due to races for speed. That includes -- GitLab From 90a93f9dea88532623ef7422dbc21d8dc70a58dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:01 +0100 Subject: [PATCH 06/27] object-file: move loose object cache into loose source Our loose objects use a cache that (optionally) stores all objects for each of the opened sharding directories. This cache is located in the `struct odb_source`, but now that we have `struct odb_source_loose` it makes sense to move it into the latter structure so that all state that relates to loose objects is entirely self-contained. Do so. While at it, rename corresponding functions to have a prefix that relates to `struct odb_source_loose`. Note that despite this prefix, the functions still accept a `struct odb_source` as input. This is done intentionally: once we introduce pluggable object databases, we will continue to accept this struct but then do a cast inside these functions to `struct odb_source_loose`. This design is similar to how we do it for our ref backends. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 9 +++++---- object-file.c | 35 +++++++++++++++++++---------------- object-file.h | 16 ++++++++++++++-- object-name.c | 2 +- odb.c | 1 - odb.h | 12 ------------ 6 files changed, 39 insertions(+), 36 deletions(-) diff --git a/loose.c b/loose.c index e8ea6e7e24..8cc7573ff2 100644 --- a/loose.c +++ b/loose.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "hash.h" #include "path.h" +#include "object-file.h" #include "odb.h" #include "hex.h" #include "repository.h" @@ -54,7 +55,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose_objects_cache, compat_oid); + oidtree_insert(source->loose->cache, compat_oid); return inserted; } @@ -66,9 +67,9 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source if (!source->loose_map) loose_object_map_init(&source->loose_map); - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); diff --git a/object-file.c b/object-file.c index cd6aa561fa..fef00d6d3d 100644 --- a/object-file.c +++ b/object-file.c @@ -223,7 +223,7 @@ static int quick_has_loose(struct repository *r, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_loose_cache(source, oid), oid)) + if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) return 1; } return 0; @@ -1802,44 +1802,44 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid) +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]); + size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose_objects_subdir_seen[word_index]; + bitmap = &source->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose_objects_cache; - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + return source->loose->cache; + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose_objects_cache); + source->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose_objects_cache; + return source->loose->cache; } void odb_clear_loose_cache(struct odb_source *source) { - oidtree_clear(source->loose_objects_cache); - FREE_AND_NULL(source->loose_objects_cache); - memset(&source->loose_objects_subdir_seen, 0, - sizeof(source->loose_objects_subdir_seen)); + oidtree_clear(source->loose->cache); + FREE_AND_NULL(source->loose->cache); + memset(&source->loose->subdir_seen, 0, + sizeof(source->loose->subdir_seen)); } static int check_stream_oid(git_zstream *stream, @@ -2006,5 +2006,8 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source *source) void odb_source_loose_free(struct odb_source_loose *loose) { + if (!loose) + return; + odb_clear_loose_cache(loose->source); free(loose); } diff --git a/object-file.h b/object-file.h index 695a7e8e7c..90da69cf5f 100644 --- a/object-file.h +++ b/object-file.h @@ -20,6 +20,18 @@ struct odb_source; struct odb_source_loose { struct odb_source *source; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); @@ -29,8 +41,8 @@ void odb_source_loose_free(struct odb_source_loose *loose); * Populate and return the loose object cache array corresponding to the * given object ID. */ -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid); +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct odb_source *source); diff --git a/object-name.c b/object-name.c index 766c757042..8ce0ef7c23 100644 --- a/object-name.c +++ b/object-name.c @@ -116,7 +116,7 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_loose_cache(source, &ds->bin_pfx), + oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), &ds->bin_pfx, ds->len, match_prefix, ds); } diff --git a/odb.c b/odb.c index 2d06ab0bb8..87d84688c6 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - odb_clear_loose_cache(source); loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 49b398beda..77104396af 100644 --- a/odb.h +++ b/odb.h @@ -51,18 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ - struct oidtree *loose_objects_cache; - /* Map between object IDs for loose objects. */ struct loose_object_map *loose_map; -- GitLab From be659c97eae3b68e38b71f0a67067dede23903b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:02 +0100 Subject: [PATCH 07/27] object-file: hide internals when we need to reprepare loose sources There are two different situations where we have to clear the cache of loose objects: - When freeing the loose object source itself to avoid memory leaks. - When repreparing the loose object source so that any potentially- stale data is getting evicted from the cache. The former is already handled by `odb_source_loose_free()`. But the latter case is still done manually by in `odb_reprepare()`, so we are leaking internals into that code. Introduce a new `odb_source_loose_reprepare()` function as an equivalent to `packfile_store_prepare()` to hide these implementation details. Furthermore, while at it, rename the function `odb_clear_loose_cache()` to `odb_source_loose_clear()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 17 +++++++++++------ object-file.h | 6 +++--- odb.c | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index fef00d6d3d..20daa629a1 100644 --- a/object-file.c +++ b/object-file.c @@ -1834,12 +1834,17 @@ struct oidtree *odb_source_loose_cache(struct odb_source *source, return source->loose->cache; } -void odb_clear_loose_cache(struct odb_source *source) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { - oidtree_clear(source->loose->cache); - FREE_AND_NULL(source->loose->cache); - memset(&source->loose->subdir_seen, 0, - sizeof(source->loose->subdir_seen)); + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +void odb_source_loose_reprepare(struct odb_source *source) +{ + odb_source_loose_clear_cache(source->loose); } static int check_stream_oid(git_zstream *stream, @@ -2008,6 +2013,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) { if (!loose) return; - odb_clear_loose_cache(loose->source); + odb_source_loose_clear_cache(loose); free(loose); } diff --git a/object-file.h b/object-file.h index 90da69cf5f..bec855e8e5 100644 --- a/object-file.h +++ b/object-file.h @@ -37,6 +37,9 @@ struct odb_source_loose { struct odb_source_loose *odb_source_loose_new(struct odb_source *source); void odb_source_loose_free(struct odb_source_loose *loose); +/* Reprepare the loose source by emptying the loose object cache. */ +void odb_source_loose_reprepare(struct odb_source *source); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -44,9 +47,6 @@ void odb_source_loose_free(struct odb_source_loose *loose); struct oidtree *odb_source_loose_cache(struct odb_source *source, const struct object_id *oid); -/* Empty the loose object cache for the specified object directory. */ -void odb_clear_loose_cache(struct odb_source *source); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/odb.c b/odb.c index 87d84688c6..b3e8d4a49c 100644 --- a/odb.c +++ b/odb.c @@ -1071,7 +1071,7 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) - odb_clear_loose_cache(source); + odb_source_loose_reprepare(source); o->approximate_object_count_valid = 0; -- GitLab From 376016ec71c3a6c883f2ca77a3f1c0245fd60dc2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:03 +0100 Subject: [PATCH 08/27] object-file: move loose object map into loose source The loose object map is used to map from the repository's canonical object hash to the compatibility hash. As the name indicates, this map is only used for loose objects, and as such it is tied to a specific loose object source. Same as with preceding commits, move this map into the loose object source accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 10 +++++----- object-file.c | 1 + object-file.h | 3 +++ odb.c | 1 - odb.h | 3 --- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/loose.c b/loose.c index 8cc7573ff2..56cf64b648 100644 --- a/loose.c +++ b/loose.c @@ -49,7 +49,7 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose_map; + struct loose_object_map *map = source->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); @@ -65,8 +65,8 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose_map) - loose_object_map_init(&source->loose_map); + if (!source->loose->map) + loose_object_map_init(&source->loose->map); if (!source->loose->cache) { ALLOC_ARRAY(source->loose->cache, 1); oidtree_init(source->loose->cache); @@ -125,7 +125,7 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose_map->to_compat; + kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +231,7 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose_map; + struct loose_object_map *loose_map = source->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/object-file.c b/object-file.c index 20daa629a1..ccc67713fa 100644 --- a/object-file.c +++ b/object-file.c @@ -2014,5 +2014,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) if (!loose) return; odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); free(loose); } diff --git a/object-file.h b/object-file.h index bec855e8e5..f8a96a45f5 100644 --- a/object-file.h +++ b/object-file.h @@ -32,6 +32,9 @@ struct odb_source_loose { */ uint32_t subdir_seen[8]; /* 256 bits */ struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); diff --git a/odb.c b/odb.c index b3e8d4a49c..d1df9609e2 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 77104396af..f9a3137a34 100644 --- a/odb.h +++ b/odb.h @@ -51,9 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* Map between object IDs for loose objects. */ - struct loose_object_map *loose_map; - /* * private data * -- GitLab From ff7ad5cb3936514ec0be32531ff6274b53dbe091 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:04 +0100 Subject: [PATCH 09/27] object-file: read objects via the loose object source When reading an object via `loose_object_info()` or `map_loose_object()` we hand in the whole repository. We then iterate through each of the object sources to figure out whether that source has the object in question. This logic is reversing responsibility though: a specific backend should only care about one specific source, where the object sources themselves are then managed by the object database. Refactor the code accordingly by passing an object source to both of these functions instead. The different sources are then handled by either `do_oid_object_info_extended()`, which sits on the object database level, and by `open_istream_loose()`. The latter function arguably is still at the wrong level, but this will be cleaned up at a later point in time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 68 +++++++++++++++++++-------------------------------- object-file.h | 15 ++++++------ odb.c | 9 +++++-- streaming.c | 11 ++++++++- 4 files changed, 50 insertions(+), 53 deletions(-) diff --git a/object-file.c b/object-file.c index ccc67713fa..6d6e9a5a2a 100644 --- a/object-file.c +++ b/object-file.c @@ -167,25 +167,22 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in the local repository or in an alternate. + * Find "oid" as a loose object in given source. * Returns 0 on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct repository *r, const struct object_id *oid, +static int stat_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, struct stat *st, const char **path) { - struct odb_source *source; static struct strbuf buf = STRBUF_INIT; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - if (!lstat(*path, st)) - return 0; - } + *path = odb_loose_path(loose->source, &buf, oid); + if (!lstat(*path, st)) + return 0; return -1; } @@ -194,39 +191,24 @@ static int stat_loose_object(struct repository *r, const struct object_id *oid, * Like stat_loose_object(), but actually open the object and return the * descriptor. See the caveats on the "path" parameter above. */ -static int open_loose_object(struct repository *r, +static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { - int fd; - struct odb_source *source; - int most_interesting_errno = ENOENT; static struct strbuf buf = STRBUF_INIT; + int fd; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; + *path = odb_loose_path(loose->source, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; return -1; } -static int quick_has_loose(struct repository *r, +static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - struct odb_source *source; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) - return 1; - } - return 0; + return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); } /* @@ -252,12 +234,12 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *map_loose_object(struct repository *r, - const struct object_id *oid, - unsigned long *size) +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; - int fd = open_loose_object(r, oid, &p); + int fd = open_loose_object(source->loose, oid, &p); if (fd < 0) return NULL; @@ -407,9 +389,9 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags) +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags) { int status = 0; int fd; @@ -422,7 +404,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, r->hash_algo); + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); /* * If we don't care about type or size, then we don't @@ -435,15 +417,15 @@ int loose_object_info(struct repository *r, if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(r, oid) ? 0 : -1; - if (stat_loose_object(r, oid, &st, &path) < 0) + return quick_has_loose(source->loose, oid) ? 0 : -1; + if (stat_loose_object(source->loose, oid, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(r, oid, &path); + fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); diff --git a/object-file.h b/object-file.h index f8a96a45f5..ca13d3d64e 100644 --- a/object-file.h +++ b/object-file.h @@ -43,6 +43,14 @@ void odb_source_loose_free(struct odb_source_loose *loose); /* Reprepare the loose source by emptying the loose object cache. */ void odb_source_loose_reprepare(struct odb_source *source); +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags); + +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,9 +74,6 @@ const char *odb_loose_path(struct odb_source *source, int has_loose_object(struct odb_source *source, const struct object_id *oid); -void *map_loose_object(struct repository *r, const struct object_id *oid, - unsigned long *size); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: @@ -196,10 +201,6 @@ int check_object_signature(struct repository *r, const struct object_id *oid, */ int stream_object_signature(struct repository *r, const struct object_id *oid); -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags); - enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, }; diff --git a/odb.c b/odb.c index d1df9609e2..4c0b4fdcd5 100644 --- a/odb.c +++ b/odb.c @@ -697,13 +697,18 @@ static int do_oid_object_info_extended(struct object_database *odb, return 0; } + odb_prepare_alternates(odb); + while (1) { + struct odb_source *source; + if (find_pack_entry(odb->repo, real, &e)) break; /* Most likely it's a loose object. */ - if (!loose_object_info(odb->repo, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_info(source, real, oi, flags)) + return 0; /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { diff --git a/streaming.c b/streaming.c index 4b13827668..00ad649ae3 100644 --- a/streaming.c +++ b/streaming.c @@ -230,12 +230,21 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_source *source; + oi.sizep = &st->size; oi.typep = type; - st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + st->u.loose.mapped = odb_source_loose_map_object(source, oid, + &st->u.loose.mapsize); + if (st->u.loose.mapped) + break; + } if (!st->u.loose.mapped) return -1; + switch (unpack_loose_header(&st->z, st->u.loose.mapped, st->u.loose.mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { -- GitLab From 05130c6c9eed9ff7450e9067d7215032eb914c10 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:05 +0100 Subject: [PATCH 10/27] object-file: rename `has_loose_object()` Rename `has_loose_object()` to `odb_source_loose_has_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- object-file.c | 6 +++--- object-file.h | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df1..69e80b1443 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1716,7 +1716,7 @@ static int want_object_in_pack_mtime(const struct object_id *oid, */ struct odb_source *source = the_repository->objects->sources->next; for (; source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) return 0; } @@ -3978,7 +3978,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type int found = 0; for (; !found && source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) found = 1; /* diff --git a/object-file.c b/object-file.c index 6d6e9a5a2a..79e7ab8d2e 100644 --- a/object-file.c +++ b/object-file.c @@ -99,8 +99,8 @@ static int check_and_freshen_source(struct odb_source *source, return check_and_freshen_file(path.buf, freshen); } -int has_loose_object(struct odb_source *source, - const struct object_id *oid) +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid) { return check_and_freshen_source(source, oid, 0); } @@ -1161,7 +1161,7 @@ int force_object_loose(struct odb_source *source, int ret; for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (has_loose_object(s, oid)) + if (odb_source_loose_has_object(s, oid)) return 0; oi.typep = &type; diff --git a/object-file.h b/object-file.h index ca13d3d64e..065a44bb8a 100644 --- a/object-file.h +++ b/object-file.h @@ -51,6 +51,14 @@ void *odb_source_loose_map_object(struct odb_source *source, const struct object_id *oid, unsigned long *size); +/* + * Return true iff an object database source has a loose object + * with the specified name. This function does not respect replace + * references. + */ +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,14 +74,6 @@ const char *odb_loose_path(struct odb_source *source, struct strbuf *buf, const struct object_id *oid); -/* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. - */ -int has_loose_object(struct odb_source *source, - const struct object_id *oid); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: -- GitLab From f2bd88a308a2754e727cb462e03102307cdfe004 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:06 +0100 Subject: [PATCH 11/27] object-file: refactor freshening of objects When writing an object that already exists in our object database we skip the write and instead only update mtimes of the object, either in its packed or loose object format. This logic is wholly contained in "object-file.c", but that file is really only concerned with loose objects. So it does not really make sense that it also contains the logic to freshen a packed object. Introduce a new `odb_freshen_object()` function that sits on the object database level and two functions `packfile_store_freshen_object()` and `odb_source_loose_freshen_object()`. Like this, the format-specific functions can be part of their respective subsystems, while the backend agnostic function to freshen an object sits at the object database layer. Note that this change also moves the logic that iterates through object sources from the object source layer into the object database layer. This change is intentional: object sources should ideally only have to worry about themselves, and coordination of different sources should be handled on the object database level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 33 +++++---------------------------- object-file.h | 3 +++ odb.c | 16 ++++++++++++++++ odb.h | 3 +++ packfile.c | 16 ++++++++++++++++ packfile.h | 3 +++ 6 files changed, 46 insertions(+), 28 deletions(-) diff --git a/object-file.c b/object-file.c index 79e7ab8d2e..893c32adcd 100644 --- a/object-file.c +++ b/object-file.c @@ -968,30 +968,10 @@ static int write_loose_object(struct odb_source *source, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(struct object_database *odb, - const struct object_id *oid) +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) { - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) - if (check_and_freshen_source(source, oid, 1)) - return 1; - return 0; -} - -static int freshen_packed_object(struct object_database *odb, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (!freshen_file(e.p->pack_name)) - return 0; - e.p->freshened = 1; - return 1; + return !!check_and_freshen_source(source, oid, 1); } int stream_loose_object(struct odb_source *source, @@ -1073,12 +1053,10 @@ int stream_loose_object(struct odb_source *source, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) { + if (odb_freshen_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ @@ -1137,8 +1115,7 @@ int write_object_file(struct odb_source *source, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) + if (odb_freshen_object(source->odb, oid)) return 0; if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; diff --git a/object-file.h b/object-file.h index 065a44bb8a..ee5b24cec6 100644 --- a/object-file.h +++ b/object-file.h @@ -59,6 +59,9 @@ void *odb_source_loose_map_object(struct odb_source *source, int odb_source_loose_has_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/odb.c b/odb.c index 4c0b4fdcd5..17734bdaff 100644 --- a/odb.c +++ b/odb.c @@ -987,6 +987,22 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, return odb_read_object_info_extended(odb, oid, NULL, object_info_flags) >= 0; } +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (packfile_store_freshen_object(odb->packfiles, oid)) + return 1; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (odb_source_loose_freshen_object(source, oid)) + return 1; + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f9a3137a34..2653247e0c 100644 --- a/odb.h +++ b/odb.h @@ -396,6 +396,9 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, unsigned flags); +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); diff --git a/packfile.c b/packfile.c index 1ae2b2fe1e..40f733dd23 100644 --- a/packfile.c +++ b/packfile.c @@ -819,6 +819,22 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, diff --git a/packfile.h b/packfile.h index c9d0b93446..58fcc88e20 100644 --- a/packfile.h +++ b/packfile.h @@ -163,6 +163,9 @@ struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); struct packed_git *packfile_store_load_pack(struct packfile_store *store, const char *idx_path, int local); +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid); + struct pack_window { struct pack_window *next; unsigned char *base; -- GitLab From bfb1b2b4ac5cfa99f7d2503b404d282714d84bdf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:07 +0100 Subject: [PATCH 12/27] object-file: rename `write_object_file()` Rename `write_object_file()` to `odb_source_loose_write_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 8 ++++---- object-file.h | 10 +++++----- odb.c | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index 893c32adcd..fdc644a427 100644 --- a/object-file.c +++ b/object-file.c @@ -1084,10 +1084,10 @@ int stream_loose_object(struct odb_source *source, return err; } -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; diff --git a/object-file.h b/object-file.h index ee5b24cec6..36a60e15c4 100644 --- a/object-file.h +++ b/object-file.h @@ -62,6 +62,11 @@ int odb_source_loose_has_object(struct odb_source *source, int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -168,11 +173,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); - struct input_stream { const void *(*read)(struct input_stream *, unsigned long *len); void *data; diff --git a/odb.c b/odb.c index 17734bdaff..da44f1d63b 100644 --- a/odb.c +++ b/odb.c @@ -1021,7 +1021,8 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return write_object_file(odb->sources, buf, len, type, oid, compat_oid, flags); + return odb_source_loose_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); } struct object_database *odb_new(struct repository *repo) -- GitLab From 3e5e360888316ed1a44da69bf134bb6ec70aee1b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:08 +0100 Subject: [PATCH 13/27] object-file: refactor writing objects via a stream We have two different ways to write an object into the database: - We either provide the full buffer and write the object all at once. - Or we provide an input stream that has a `read()` function so that we can chunk the object. The latter is especially used for large objects, where it may be too expensive to hold the complete object in memory all at once. While we already have `odb_write_object()` at the ODB-layer, we don't have an equivalent for streaming an object. Introduce a new function `odb_write_object_stream()` to address this gap so that callers don't have to be aware of the inner workings of how to stream an object to disk with a specific object source. Rename `stream_loose_object()` to `odb_source_loose_write_stream()` to clarify its scope. This matches our modern best practices around how to name functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 7 +++---- object-file.c | 6 +++--- object-file.h | 14 ++++---------- odb.c | 7 +++++++ odb.h | 10 ++++++++++ 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index ef79e43715..6fc64e9e4b 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -363,7 +363,7 @@ struct input_zstream_data { int status; }; -static const void *feed_input_zstream(struct input_stream *in_stream, +static const void *feed_input_zstream(struct odb_write_stream *in_stream, unsigned long *readlen) { struct input_zstream_data *data = in_stream->data; @@ -393,7 +393,7 @@ static void stream_blob(unsigned long size, unsigned nr) { git_zstream zstream = { 0 }; struct input_zstream_data data = { 0 }; - struct input_stream in_stream = { + struct odb_write_stream in_stream = { .read = feed_input_zstream, .data = &data, }; @@ -402,8 +402,7 @@ static void stream_blob(unsigned long size, unsigned nr) data.zstream = &zstream; git_inflate_init(&zstream); - if (stream_loose_object(the_repository->objects->sources, - &in_stream, size, &info->oid)) + if (odb_write_object_stream(the_repository->objects, &in_stream, size, &info->oid)) die(_("failed to write object in stream")); if (data.status != Z_STREAM_END) diff --git a/object-file.c b/object-file.c index fdc644a427..811c569ed3 100644 --- a/object-file.c +++ b/object-file.c @@ -974,9 +974,9 @@ int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_source(source, oid, 1); } -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid) +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *in_stream, size_t len, + struct object_id *oid) { const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; diff --git a/object-file.h b/object-file.h index 36a60e15c4..eeffa67bbd 100644 --- a/object-file.h +++ b/object-file.h @@ -67,6 +67,10 @@ int odb_source_loose_write_object(struct odb_source *source, enum object_type type, struct object_id *oid, struct object_id *compat_oid_in, unsigned flags); +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -173,16 +177,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -struct input_stream { - const void *(*read)(struct input_stream *, unsigned long *len); - void *data; - int is_finished; -}; - -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/odb.c b/odb.c index da44f1d63b..3ec21ef24e 100644 --- a/odb.c +++ b/odb.c @@ -1025,6 +1025,13 @@ int odb_write_object_ext(struct object_database *odb, oid, compat_oid, flags); } +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(odb->sources, stream, len, oid); +} + struct object_database *odb_new(struct repository *repo) { struct object_database *o = xmalloc(sizeof(*o)); diff --git a/odb.h b/odb.h index 2653247e0c..9bb28008b1 100644 --- a/odb.h +++ b/odb.h @@ -492,4 +492,14 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } +struct odb_write_stream { + const void *(*read)(struct odb_write_stream *, unsigned long *len); + void *data; + int is_finished; +}; + +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + #endif /* ODB_H */ -- GitLab From ad800599f494a73e2039a1ae9c5a019d927ee040 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 7 Nov 2025 08:43:19 +0100 Subject: [PATCH 14/27] Centralize management of object database sources Hi, currently, the creation of the object databases is handled both by "setup.c" and by "odb.c". While it's expected that "setup.c" is the one setting up the object database itself, what's less so is that there is a shared responsibility for managing its sources: - The primary object database source gets created in "setup.c". - The temporary source used during ODB transactions is handled by "setup.c" when changing the current working directory. - Relative paths stored in object database sources get updated by "setup.c" when changing the current working directory. This means that the management of ODB sources is somewhat cluttered and thus hard to understand. Furthermore, it has the consequence that "setup.c" reaches into internals of the ODB that really shouldn't be any of its concern. This patch series cleans that up and moves all handling of ODB sources into "odb.c". This hopefully makes the logic easier to understand and it will allow us to eventually handle the logic for different backends in a single central location. The series is structured as follows: - Patches 1 to 5 clean up some smaller nuisances in the vicinity. - Patches 6 to 9 refactor a couple of callsites that play weird games with the object database. These cause us to re-initialize the ODB multiple times, which will not be allowed anymore at the end of this series. - Patches 10 to 13 move the logic that manages object sources from "setup.c" into "odb.c". This series is built on top of v2.52.0 with ps/object-source-loose at 3e5e360888 (object-file: refactor writing objects via a stream, 2025-11-03) merged into it. Thanks! Patrick To: git@vger.kernel.org --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 1, "change-id": "20251107-b4-pks-odb-creation-96c18fdab1d2", "prefixes": [] } } -- GitLab From 34125b4659606529ed9397e6df556754c5d185c7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 31 Oct 2025 09:28:57 +0100 Subject: [PATCH 15/27] path: move `enter_repo()` into "setup.c" The function `enter_repo()` is used to enter a repository at a given path. As such it sits way closer to setting up a repository than it does with handling paths, but regardless of that it's located in "path.c" instead of in "setup.c". Move the function into "setup.c". Signed-off-by: Patrick Steinhardt --- builtin/receive-pack.c | 2 +- builtin/upload-archive.c | 2 +- builtin/upload-pack.c | 2 +- http-backend.c | 1 + path.c | 100 --------------------------------------- path.h | 15 ------ setup.c | 81 +++++++++++++++++++++++++++++++ setup.h | 38 +++++++++++++++ 8 files changed, 123 insertions(+), 118 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e..79a0fd4756 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -34,7 +34,6 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" -#include "path.h" #include "protocol.h" #include "commit-reach.h" #include "server-info.h" @@ -42,6 +41,7 @@ #include "trace2.h" #include "worktree.h" #include "shallow.h" +#include "setup.h" #include "parse-options.h" static const char * const receive_pack_usage[] = { diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 97d7c9522f..25312bb2a5 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -4,8 +4,8 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "archive.h" -#include "path.h" #include "pkt-line.h" +#include "setup.h" #include "sideband.h" #include "run-command.h" #include "strvec.h" diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index c2bbc035ab..30498fafea 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -5,11 +5,11 @@ #include "gettext.h" #include "pkt-line.h" #include "parse-options.h" -#include "path.h" #include "protocol.h" #include "replace-object.h" #include "upload-pack.h" #include "serve.h" +#include "setup.h" #include "commit.h" #include "environment.h" diff --git a/http-backend.c b/http-backend.c index 52f0483dd3..e9d1ef92bd 100644 --- a/http-backend.c +++ b/http-backend.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "string-list.h" #include "url.h" +#include "setup.h" #include "strvec.h" #include "packfile.h" #include "odb.h" diff --git a/path.c b/path.c index 7f56eaf993..d726537622 100644 --- a/path.c +++ b/path.c @@ -738,106 +738,6 @@ char *interpolate_path(const char *path, int real_home) return NULL; } -/* - * First, one directory to try is determined by the following algorithm. - * - * (0) If "strict" is given, the path is used as given and no DWIM is - * done. Otherwise: - * (1) "~/path" to mean path under the running user's home directory; - * (2) "~user/path" to mean path under named user's home directory; - * (3) "relative/path" to mean cwd relative directory; or - * (4) "/absolute/path" to mean absolute directory. - * - * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" - * in this order. We select the first one that is a valid git repository, and - * chdir() to it. If none match, or we fail to chdir, we return NULL. - * - * If all goes well, we return the directory we used to chdir() (but - * before ~user is expanded), avoiding getcwd() resolving symbolic - * links. User relative paths are also returned as they are given, - * except DWIM suffixing. - */ -const char *enter_repo(const char *path, unsigned flags) -{ - static struct strbuf validated_path = STRBUF_INIT; - static struct strbuf used_path = STRBUF_INIT; - - if (!path) - return NULL; - - if (!(flags & ENTER_REPO_STRICT)) { - static const char *suffix[] = { - "/.git", "", ".git/.git", ".git", NULL, - }; - const char *gitfile; - int len = strlen(path); - int i; - while ((1 < len) && (path[len-1] == '/')) - len--; - - /* - * We can handle arbitrary-sized buffers, but this remains as a - * sanity check on untrusted input. - */ - if (PATH_MAX <= len) - return NULL; - - strbuf_reset(&used_path); - strbuf_reset(&validated_path); - strbuf_add(&used_path, path, len); - strbuf_add(&validated_path, path, len); - - if (used_path.buf[0] == '~') { - char *newpath = interpolate_path(used_path.buf, 0); - if (!newpath) - return NULL; - strbuf_attach(&used_path, newpath, strlen(newpath), - strlen(newpath)); - } - for (i = 0; suffix[i]; i++) { - struct stat st; - size_t baselen = used_path.len; - strbuf_addstr(&used_path, suffix[i]); - if (!stat(used_path.buf, &st) && - (S_ISREG(st.st_mode) || - (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { - strbuf_addstr(&validated_path, suffix[i]); - break; - } - strbuf_setlen(&used_path, baselen); - } - if (!suffix[i]) - return NULL; - gitfile = read_gitfile(used_path.buf); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, used_path.buf); - if (gitfile) { - strbuf_reset(&used_path); - strbuf_addstr(&used_path, gitfile); - } - if (chdir(used_path.buf)) - return NULL; - path = validated_path.buf; - } - else { - const char *gitfile = read_gitfile(path); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, path); - if (gitfile) - path = gitfile; - if (chdir(path)) - return NULL; - } - - if (is_git_directory(".")) { - set_git_dir(".", 0); - check_repository_format(NULL); - return path; - } - - return NULL; -} - int calc_shared_perm(struct repository *repo, int mode) { diff --git a/path.h b/path.h index e67348f253..0ec95a0b07 100644 --- a/path.h +++ b/path.h @@ -146,21 +146,6 @@ int adjust_shared_perm(struct repository *repo, const char *path); char *interpolate_path(const char *path, int real_home); -/* The bits are as follows: - * - * - ENTER_REPO_STRICT: callers that require exact paths (as opposed - * to allowing known suffixes like ".git", ".git/.git" to be - * omitted) can set this bit. - * - * - ENTER_REPO_ANY_OWNER_OK: callers that are willing to run without - * ownership check can set this bit. - */ -enum { - ENTER_REPO_STRICT = (1<<0), - ENTER_REPO_ANY_OWNER_OK = (1<<1), -}; - -const char *enter_repo(const char *path, unsigned flags); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); diff --git a/setup.c b/setup.c index 7086741e6c..98c6fd8ee4 100644 --- a/setup.c +++ b/setup.c @@ -1703,6 +1703,87 @@ void set_git_dir(const char *path, int make_realpath) strbuf_release(&realpath); } +const char *enter_repo(const char *path, unsigned flags) +{ + static struct strbuf validated_path = STRBUF_INIT; + static struct strbuf used_path = STRBUF_INIT; + + if (!path) + return NULL; + + if (!(flags & ENTER_REPO_STRICT)) { + static const char *suffix[] = { + "/.git", "", ".git/.git", ".git", NULL, + }; + const char *gitfile; + int len = strlen(path); + int i; + while ((1 < len) && (path[len-1] == '/')) + len--; + + /* + * We can handle arbitrary-sized buffers, but this remains as a + * sanity check on untrusted input. + */ + if (PATH_MAX <= len) + return NULL; + + strbuf_reset(&used_path); + strbuf_reset(&validated_path); + strbuf_add(&used_path, path, len); + strbuf_add(&validated_path, path, len); + + if (used_path.buf[0] == '~') { + char *newpath = interpolate_path(used_path.buf, 0); + if (!newpath) + return NULL; + strbuf_attach(&used_path, newpath, strlen(newpath), + strlen(newpath)); + } + for (i = 0; suffix[i]; i++) { + struct stat st; + size_t baselen = used_path.len; + strbuf_addstr(&used_path, suffix[i]); + if (!stat(used_path.buf, &st) && + (S_ISREG(st.st_mode) || + (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { + strbuf_addstr(&validated_path, suffix[i]); + break; + } + strbuf_setlen(&used_path, baselen); + } + if (!suffix[i]) + return NULL; + gitfile = read_gitfile(used_path.buf); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); + if (gitfile) { + strbuf_reset(&used_path); + strbuf_addstr(&used_path, gitfile); + } + if (chdir(used_path.buf)) + return NULL; + path = validated_path.buf; + } + else { + const char *gitfile = read_gitfile(path); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } + + if (is_git_directory(".")) { + set_git_dir(".", 0); + check_repository_format(NULL); + return path; + } + + return NULL; +} + static int git_work_tree_initialized; /* diff --git a/setup.h b/setup.h index 8522fa8575..bfea199bcd 100644 --- a/setup.h +++ b/setup.h @@ -97,6 +97,44 @@ static inline int discover_git_directory(struct strbuf *commondir, void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); +/* Flags that can be passed to `enter_repo()`. */ +enum { + /* + * Callers that require exact paths (as opposed to allowing known + * suffixes like ".git", ".git/.git" to be omitted) can set this bit. + */ + ENTER_REPO_STRICT = (1<<0), + + /* + * Callers that are willing to run without ownership check can set this + * bit. + */ + ENTER_REPO_ANY_OWNER_OK = (1<<1), +}; + +/* + * Discover and enter a repository. + * + * First, one directory to try is determined by the following algorithm. + * + * (0) If "strict" is given, the path is used as given and no DWIM is + * done. Otherwise: + * (1) "~/path" to mean path under the running user's home directory; + * (2) "~user/path" to mean path under named user's home directory; + * (3) "relative/path" to mean cwd relative directory; or + * (4) "/absolute/path" to mean absolute directory. + * + * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" + * in this order. We select the first one that is a valid git repository, and + * chdir() to it. If none match, or we fail to chdir, we return NULL. + * + * If all goes well, we return the directory we used to chdir() (but + * before ~user is expanded), avoiding getcwd() resolving symbolic + * links. User relative paths are also returned as they are given, + * except DWIM suffixing. + */ +const char *enter_repo(const char *path, unsigned flags); + const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); char *prefix_path(const char *prefix, int len, const char *path); -- GitLab From 64dfb6ae0844721acb626032afc97f9e97cfda24 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 07:32:24 +0100 Subject: [PATCH 16/27] setup: convert `set_git_dir()` to have file scope We don't have any external callers of `set_git_dir()` anymore now that `enter_repo()` has been moved into "setup.c". Remove the declaration and mark the function as static. Note that this change requires us to move the implementation around so that we can avoid adding any new forward declarations. Signed-off-by: Patrick Steinhardt --- setup.c | 80 ++++++++++++++++++++++++++++----------------------------- setup.h | 1 - 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/setup.c b/setup.c index 98c6fd8ee4..8bf52df716 100644 --- a/setup.c +++ b/setup.c @@ -1002,6 +1002,46 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } +static void set_git_dir_1(const char *path) +{ + xsetenv(GIT_DIR_ENVIRONMENT, path, 1); + setup_git_env(path); +} + +static void update_relative_gitdir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *data UNUSED) +{ + char *path = reparent_relative_path(old_cwd, new_cwd, + repo_get_git_dir(the_repository)); + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); + + trace_printf_key(&trace_setup_key, + "setup: move $GIT_DIR to '%s'", + path); + set_git_dir_1(path); + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); + free(path); +} + +static void set_git_dir(const char *path, int make_realpath) +{ + struct strbuf realpath = STRBUF_INIT; + + if (make_realpath) { + strbuf_realpath(&realpath, path, 1); + path = realpath.buf; + } + + set_git_dir_1(path); + if (!is_absolute_path(path)) + chdir_notify_register(NULL, update_relative_gitdir, NULL); + + strbuf_release(&realpath); +} + static const char *setup_explicit_git_dir(const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, @@ -1663,46 +1703,6 @@ void setup_git_env(const char *git_dir) fetch_if_missing = 0; } -static void set_git_dir_1(const char *path) -{ - xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); -} - -static void update_relative_gitdir(const char *name UNUSED, - const char *old_cwd, - const char *new_cwd, - void *data UNUSED) -{ - char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - - trace_printf_key(&trace_setup_key, - "setup: move $GIT_DIR to '%s'", - path); - set_git_dir_1(path); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); - free(path); -} - -void set_git_dir(const char *path, int make_realpath) -{ - struct strbuf realpath = STRBUF_INIT; - - if (make_realpath) { - strbuf_realpath(&realpath, path, 1); - path = realpath.buf; - } - - set_git_dir_1(path); - if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); - - strbuf_release(&realpath); -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; diff --git a/setup.h b/setup.h index bfea199bcd..d55dcc6608 100644 --- a/setup.h +++ b/setup.h @@ -94,7 +94,6 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); /* Flags that can be passed to `enter_repo()`. */ -- GitLab From 8058536dfa8bbc867129f8ce92c24b5aa1b80e65 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 Oct 2025 15:25:16 +0200 Subject: [PATCH 17/27] odb: adopt logic to close object databases The logic to close an object database is currently contained in the packfile subsystem. That choice is somewhat relatable, as most of the logic really is to close resources associated with the packfile store itself. But we also end up handling object sources and commit graphs, which certainly is not related to packfiles. Move the function into the object database subsystem and rename it to `odb_close()`. Signed-off-by: Patrick Steinhardt --- builtin/clone.c | 2 +- builtin/gc.c | 2 +- builtin/repack.c | 2 +- midx-write.c | 2 +- odb.c | 18 +++++++++++++++++- odb.h | 7 +++++++ packfile.c | 15 --------------- packfile.h | 1 - run-command.c | 2 +- scalar.c | 2 +- 10 files changed, 30 insertions(+), 23 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index c990f398ef..b19b302b06 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1617,7 +1617,7 @@ int cmd_clone(int argc, transport_disconnect(transport); if (option_dissociate) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); dissociate_from_references(); } diff --git a/builtin/gc.c b/builtin/gc.c index d212cbb9b8..961fa343c4 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1048,7 +1048,7 @@ int cmd_gc(int argc, report_garbage = report_pack_garbage; odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); clean_pack_garbage(); } diff --git a/builtin/repack.c b/builtin/repack.c index cfdb4c0920..d9012141f6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -488,7 +488,7 @@ int cmd_repack(int argc, string_list_sort(&names); - close_object_store(repo->objects); + odb_close(repo->objects); /* * Ok we have prepared all new packfiles. diff --git a/midx-write.c b/midx-write.c index c73010df6d..60497586fd 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1459,7 +1459,7 @@ static int write_midx_internal(struct odb_source *source, } if (ctx.m || ctx.base_midx) - close_object_store(ctx.repo->objects); + odb_close(ctx.repo->objects); if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); diff --git a/odb.c b/odb.c index 3ec21ef24e..bcefa5cede 100644 --- a/odb.c +++ b/odb.c @@ -9,6 +9,7 @@ #include "khash.h" #include "lockfile.h" #include "loose.h" +#include "midx.h" #include "object-file-convert.h" #include "object-file.h" #include "odb.h" @@ -1044,6 +1045,21 @@ struct object_database *odb_new(struct repository *repo) return o; } +void odb_close(struct object_database *o) +{ + struct odb_source *source; + + packfile_store_close(o->packfiles); + + for (source = o->sources; source; source = source->next) { + if (source->midx) + close_midx(source->midx); + source->midx = NULL; + } + + close_commit_graph(o); +} + static void odb_free_sources(struct object_database *o) { while (o->sources) { @@ -1076,7 +1092,7 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - close_object_store(o); + odb_close(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index 9bb28008b1..71b4897c82 100644 --- a/odb.h +++ b/odb.h @@ -169,6 +169,13 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Close the object database and all of its sources so that any held resources + * will be released. The database can still be used after closing it, in which + * case these resources may be reallocated. + */ +void odb_close(struct object_database *o); + /* * Clear caches, reload alternates and then reload object sources so that new * objects may become accessible. diff --git a/packfile.c b/packfile.c index 40f733dd23..af71eaf7e3 100644 --- a/packfile.c +++ b/packfile.c @@ -359,21 +359,6 @@ void close_pack(struct packed_git *p) oidset_clear(&p->bad_objects); } -void close_object_store(struct object_database *o) -{ - struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - - close_commit_graph(o); -} - void unlink_pack_path(const char *pack_name, int force_delete) { static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; diff --git a/packfile.h b/packfile.h index 58fcc88e20..d9226a072a 100644 --- a/packfile.h +++ b/packfile.h @@ -279,7 +279,6 @@ struct object_database; unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); void close_pack_windows(struct packed_git *); void close_pack(struct packed_git *); -void close_object_store(struct object_database *o); void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(struct repository *r, const char *path, diff --git a/run-command.c b/run-command.c index ed9575bd6a..e3e02475cc 100644 --- a/run-command.c +++ b/run-command.c @@ -743,7 +743,7 @@ int start_command(struct child_process *cmd) fflush(NULL); if (cmd->close_object_store) - close_object_store(the_repository->objects); + odb_close(the_repository->objects); #ifndef GIT_WINDOWS_NATIVE { diff --git a/scalar.c b/scalar.c index f754311627..2aeb191cc8 100644 --- a/scalar.c +++ b/scalar.c @@ -931,7 +931,7 @@ static int cmd_delete(int argc, const char **argv) if (dir_inside_of(cwd, enlistment.buf) >= 0) res = error(_("refusing to delete current working directory")); else { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); res = delete_enlistment(&enlistment); } strbuf_release(&enlistment); -- GitLab From 99959388f61310c4726a7e9cb47fd78548c9ed68 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 07:40:33 +0100 Subject: [PATCH 18/27] odb: refactor `odb_clear()` to `odb_free()` The function `odb_clear()` releases all resources allocated to an object database and ensures that all fields become zero'd out. Despite its naming though it doesn't really clear the object database so that it becomes ready for reuse afterwards again -- the caller would first have to reinitialize it, and that contradicts the terminology of "clearing" as we have defined it in our coding guidelines. There isn't really only a reason to have "clearing" semantics, either. There's only a single caller of `odb_clear()`, and that caller also ends up freeing the object database structure itself. Refactor the function to have "freeing" semantics instead, so that the structure itself is also freed, which allows us to drop some useless boilerplate to zero out the structure's members. This refactoring reveals that we're trying to close the commit graph multiple times: once directly via `free_commit_graph()`, and once via `odb_close()`. Drop the former call. Signed-off-by: Patrick Steinhardt --- odb.c | 19 ++++++++----------- odb.h | 4 +++- repository.c | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index bcefa5cede..29cf6496c5 100644 --- a/odb.c +++ b/odb.c @@ -1073,30 +1073,27 @@ static void odb_free_sources(struct object_database *o) o->source_by_path = NULL; } -void odb_clear(struct object_database *o) +void odb_free(struct object_database *o) { - FREE_AND_NULL(o->alternate_db); + if (!o) + return; + + free(o->alternate_db); oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); - free_commit_graph(o->commit_graph); - o->commit_graph = NULL; - o->commit_graph_attempted = 0; - odb_free_sources(o); - o->sources_tail = NULL; - o->loaded_alternates = 0; for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); - FREE_AND_NULL(o->cached_objects); + free(o->cached_objects); odb_close(o); packfile_store_free(o->packfiles); - o->packfiles = NULL; - string_list_clear(&o->submodule_source_paths, 0); + + free(o); } void odb_reprepare(struct object_database *o) diff --git a/odb.h b/odb.h index 71b4897c82..77b313b784 100644 --- a/odb.h +++ b/odb.h @@ -167,7 +167,9 @@ struct object_database { }; struct object_database *odb_new(struct repository *repo); -void odb_clear(struct object_database *o); + +/* Free the object database and release all resources. */ +void odb_free(struct object_database *o); /* * Close the object database and all of its sources so that any held resources diff --git a/repository.c b/repository.c index 6aaa7ba008..3c8b3813b0 100644 --- a/repository.c +++ b/repository.c @@ -382,8 +382,8 @@ void repo_clear(struct repository *repo) FREE_AND_NULL(repo->worktree); FREE_AND_NULL(repo->submodule_prefix); - odb_clear(repo->objects); - FREE_AND_NULL(repo->objects); + odb_free(repo->objects); + repo->objects = NULL; parsed_object_pool_clear(repo->parsed_objects); FREE_AND_NULL(repo->parsed_objects); -- GitLab From bbd2b7bcee5cf1b3f95b22731fa45c933c01c99c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 31 Oct 2025 10:50:35 +0100 Subject: [PATCH 19/27] odb: move logic to disable ref updates into repo Our object database sources have a field `disable_ref_updates`. This field can obviously be set to disable reference updates, but it is somewhat curious that this logic is hosted by the object database. The reason for this is that it was primarily added to keep us from accidentally updating references while an ODB transaction is ongoing. Any objects part of the transaction have not yet been committed to disk, so new references that point to them might get corrupted in case we never end up committing the transaction. As such, whenever we create a new transaction we set up a new temporary ODB source and mark it as disabling reference updates. This has one (and only one?) upside: once we have committed the transaction, the temporary source will be dropped and thus we clean up the disabled reference updates automatically. But other than that, it's somewhat misdesigned: - We can have multiple ODB sources, but only the currently active source inhibits reference updates. - We're mixing concerns of the refbd with the ODB. Arguably, the decision of whether we can update references or not should be handled by the refdb. But that wouldn't be a great fit either, as there can be one refdb per worktree. So we'd again have the same problem that a "global" intent becomes localized to a specific instance. Instead, move the setting into the repository. While at it, convert it into a boolean. Signed-off-by: Patrick Steinhardt --- odb.c | 3 ++- odb.h | 7 ------- refs.c | 2 +- repository.c | 2 +- repository.h | 9 ++++++++- setup.c | 2 +- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 29cf6496c5..ccc6e999e7 100644 --- a/odb.c +++ b/odb.c @@ -360,7 +360,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Disable ref updates while a temporary odb is active, since * the objects in the database may roll back. */ - source->disable_ref_updates = 1; + odb->repo->disable_ref_updates = true; source->will_destroy = will_destroy; source->next = odb->sources; odb->sources = source; @@ -387,6 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, if (cur_source->next != restore_source) BUG("we expect the old primary object store to be the first alternate"); + odb->repo->disable_ref_updates = false; odb->sources = restore_source; odb_source_free(cur_source); } diff --git a/odb.h b/odb.h index 77b313b784..99c4d48972 100644 --- a/odb.h +++ b/odb.h @@ -66,13 +66,6 @@ struct odb_source { */ bool local; - /* - * This is a temporary object store created by the tmp_objdir - * facility. Disable ref updates since the objects in the store - * might be discarded on rollback. - */ - int disable_ref_updates; - /* * This object store is ephemeral, so there is no need to fsync. */ diff --git a/refs.c b/refs.c index 965381367e..6c7283d9eb 100644 --- a/refs.c +++ b/refs.c @@ -2491,7 +2491,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, break; } - if (refs->repo->objects->sources->disable_ref_updates) { + if (refs->repo->disable_ref_updates) { strbuf_addstr(err, _("ref updates forbidden inside quarantine environment")); return -1; diff --git a/repository.c b/repository.c index 3c8b3813b0..455c2d279f 100644 --- a/repository.c +++ b/repository.c @@ -179,7 +179,7 @@ void repo_set_gitdir(struct repository *repo, repo->objects->sources->path = objects_path; } - repo->objects->sources->disable_ref_updates = o->disable_ref_updates; + repo->disable_ref_updates = o->disable_ref_updates; free(repo->objects->alternate_db); repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); diff --git a/repository.h b/repository.h index 5808a5d610..614649413b 100644 --- a/repository.h +++ b/repository.h @@ -71,6 +71,13 @@ struct repository { */ struct ref_store *refs_private; + /* + * Disable ref updates. This is especially used in contexts where + * transactions may still be rolled back so that we don't start to + * reference objects that may vanish. + */ + bool disable_ref_updates; + /* * A strmap of ref_stores, stored by submodule name, accessible via * `repo_get_submodule_ref_store()`. @@ -187,7 +194,7 @@ struct set_gitdir_args { const char *graft_file; const char *index_file; const char *alternate_db; - int disable_ref_updates; + bool disable_ref_updates; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index 8bf52df716..a752e9fc84 100644 --- a/setup.c +++ b/setup.c @@ -1682,7 +1682,7 @@ void setup_git_env(const char *git_dir) args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = 1; + args.disable_ref_updates = true; } repo_set_gitdir(the_repository, git_dir, &args); -- GitLab From 299e2db42e63d1de3fed0aef043100932c16b0db Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 12:32:10 +0100 Subject: [PATCH 20/27] oidset: introduce `oidset_equal()` Introduce a new function that allows the caller to verify whether two oidsets contain the exact same object IDs. Note that this change requires us to change `oidset_iter_init()` to accept a `const struct oidset`. Signed-off-by: Patrick Steinhardt --- oidset.c | 16 ++++++++++++++++ oidset.h | 9 +++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/oidset.c b/oidset.c index 8d36aef8dc..c8ff0b385c 100644 --- a/oidset.c +++ b/oidset.c @@ -16,6 +16,22 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid) return pos != kh_end(&set->set); } +bool oidset_equal(const struct oidset *a, const struct oidset *b) +{ + struct oidset_iter iter; + struct object_id *a_oid; + + if (oidset_size(a) != oidset_size(b)) + return false; + + oidset_iter_init(a, &iter); + while ((a_oid = oidset_iter_next(&iter))) + if (!oidset_contains(b, a_oid)) + return false; + + return true; +} + int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; diff --git a/oidset.h b/oidset.h index 0106b6f278..e0f1a6ff4f 100644 --- a/oidset.h +++ b/oidset.h @@ -38,6 +38,11 @@ void oidset_init(struct oidset *set, size_t initial_size); */ int oidset_contains(const struct oidset *set, const struct object_id *oid); +/** + * Returns true iff `a` and `b` contain the exact same OIDs. + */ +bool oidset_equal(const struct oidset *a, const struct oidset *b); + /** * Insert the oid into the set; a copy is made, so "oid" does not need * to persist after this function is called. @@ -94,11 +99,11 @@ void oidset_parse_file_carefully(struct oidset *set, const char *path, oidset_parse_tweak_fn fn, void *cbdata); struct oidset_iter { - kh_oid_set_t *set; + const kh_oid_set_t *set; khiter_t iter; }; -static inline void oidset_iter_init(struct oidset *set, +static inline void oidset_iter_init(const struct oidset *set, struct oidset_iter *iter) { iter->set = &set->set; -- GitLab From 0cd68ba9c289f8d49f7f0435cfd71eec1393ed72 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 12:12:26 +0100 Subject: [PATCH 21/27] builtin/index-pack: fix deferred fsck outside repos When asked to perform object consistency checks via the `--fsck-objects` flag we verify that each object part of the pack is valid. In general, this check can even be performed outside of a Git repository: we don't need an initialized object database as we simply read the object from the packfile directly. But there's one exception: a subset of the object checks may be deferred to a later point in time. For now, this only concerns ".gitmodules" and ".gitattributes" files: whenever we see a tree referencing these files we queue them for a deferred check. This is done because we need to do some extra checks for those files to ensure that they are well-formed, and these checks need to be done regardless of whether the corresponding blobs are part of the packfile or not. This works inside a repository, but unfortunately the logic leads to a segfault when running outside of one. This is because we eventually call `odb_read_object()`, which will crash because the object database has not been initialized. There's multiple options here: - We could in theory create a purely in-memory database with only a packfile store that contains the single packfile. We don't really have the infrastructure for this yet though, and it would end up being quite hacky. - We could refuse to perform consistency checks outside of a repository. But most of the checks work alright, so this would be a regression. - We can skip the finalizing consistency checks when running outside of a repository. This is not as invasive as skipping all checks, but it's not great to randomly skip a subset of tests, either. None of these options really feel perfect. The first one would be the obvious choice if easily possible. There's another option though: instead of skipping the final object checks, we can die if there are any queued object checks. With this change we now die exactly if and only if we would have previously segfaulted. Like this we ensure that objects that _may_ fail the consistency checks won't be silently skipped, and at the same time we give users a much better error message. Refactor the code accordingly and add a test that would have triggered the segfault. Note that we also move down the logic to add the packfile to the store. There is no point doing this any earlier than right before we execute `fsck_finish()`, and it ensures that the logic to set up and perform the consistency check is self-contained. Signed-off-by: Patrick Steinhardt --- builtin/index-pack.c | 21 ++++++++++++++++++--- fsck.c | 6 ++++++ fsck.h | 7 +++++++ t/t5302-pack-index.sh | 16 ++++++++++++++++ 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4..699fe678cd 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,7 +1640,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) + if (do_fsck_object && startup_info->have_repository) packfile_store_load_pack(the_repository->objects->packfiles, final_index_name, 0); @@ -2110,8 +2110,23 @@ int cmd_index_pack(int argc, else close(input_fd); - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { + /* + * We cannot perform queued consistency checks when running + * outside of a repository because those require us to read + * from the object database, which is uninitialized. + * + * TODO: we may eventually set up an in-memory object database, + * which would allow us to perform these queued checks. + */ + if (!startup_info->have_repository && + fsck_has_queued_checks(&fsck_options)) + die(_("cannot perform queued object checks outside " + "of a repository")); + + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } free(opts.anomaly); free(objects); diff --git a/fsck.c b/fsck.c index 341e100d24..8e1565fe6d 100644 --- a/fsck.c +++ b/fsck.c @@ -1350,6 +1350,12 @@ int fsck_finish(struct fsck_options *options) return ret; } +bool fsck_has_queued_checks(struct fsck_options *options) +{ + return !oidset_equal(&options->gitmodules_found, &options->gitmodules_done) || + !oidset_equal(&options->gitattributes_found, &options->gitattributes_done); +} + void fsck_options_clear(struct fsck_options *options) { free(options->msg_type); diff --git a/fsck.h b/fsck.h index cb6ef32f4f..336917c045 100644 --- a/fsck.h +++ b/fsck.h @@ -248,6 +248,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Check whether there are any checks that have been queued up and that still + * need to be run. Returns `false` iff `fsck_finish()` wouldn't perform any + * actions, `true` otherwise. + */ +bool fsck_has_queued_checks(struct fsck_options *options); + /* * Clear the fsck_options struct, freeing any allocated memory. */ diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 413c99274c..9697448cb2 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -293,4 +293,20 @@ test_expect_success 'too-large packs report the breach' ' grep "maximum allowed size (20 bytes)" err ' +# git-index-pack(1) uses the default hash algorithm outside of the repository, +# and it has no way to tell it otherwise. So we can only run this test with the +# default hash algorithm, as it would otherwise fail to parse the tree. +test_expect_success DEFAULT_HASH_ALGORITHM 'index-pack --fsck-objects outside of a repo' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + printf "100644 blob $(test_oid 001)\t.gitattributes\n" >tree && + git mktree --missing tree-oid && + git pack-objects err && + test_grep "cannot perform queued object checks outside of a repository" err + ) +' + test_done -- GitLab From da09bb243ec045640fd9150d242fe70443f57e71 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 15:34:17 +0100 Subject: [PATCH 22/27] t/helper: stop setting up `the_repository` repeatedly The "repository" test helper sets up `the_repository` twice. In fact though, we don't even have to set it up even once: all we need is to set up its hash algorithm, because we still depend on some subsystems that aren't free of `the_repository`. Refactor the code accordingly. This prepares for a subsequent change, where setting up the repository repeatedly will lead to a `BUG()`. Signed-off-by: Patrick Steinhardt --- t/helper/test-repository.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d..9ba94cdffa 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -17,10 +17,6 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, struct commit *c; struct commit_list *parent; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -47,10 +43,6 @@ static void test_get_commit_tree_in_graph(const char *gitdir, struct commit *c; struct tree *tree; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -75,24 +67,20 @@ static void test_get_commit_tree_in_graph(const char *gitdir, int cmd__repository(int argc, const char **argv) { - int nongit_ok = 0; - - setup_git_directory_gently(&nongit_ok); - if (argc < 2) die("must have at least 2 arguments"); if (!strcmp(argv[1], "parse_commit_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_parse_commit_in_graph(argv[2], argv[3], &oid); } else if (!strcmp(argv[1], "get_commit_tree_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_get_commit_tree_in_graph(argv[2], argv[3], &oid); } else { -- GitLab From 630052c1267eecce82f5061fd353909a67f0fd4c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 15:44:32 +0100 Subject: [PATCH 23/27] http-push: stop setting up `the_repository` for each reference When pushing references via HTTP we call `repo_init_revisions()` in a loop for each reference that we're about to push. As third argument we pass the result of `setup_git_directory()`, which causes us to reinitialize the repository every single time. This is an obvious waste of compute, as the repository that we're working in will never change across any of the initializations. The only reason that we do this is to retrieve the directory of the repository. Furthermore, this is about to create issues in a subsequent commit, where reinitializing the repository will cause a `BUG()`. Address this by storing the Git directory in a variable instead so that we don't have to call the function repeatedly. Signed-off-by: Patrick Steinhardt --- http-push.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b..a48ca23799 100644 --- a/http-push.c +++ b/http-push.c @@ -1725,6 +1725,7 @@ int cmd_main(int argc, const char **argv) int i; int new_refs; struct ref *ref, *local_refs = NULL; + const char *gitdir; CALLOC_ARRAY(repo, 1); @@ -1787,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - setup_git_directory(); + gitdir = setup_git_directory(); memset(remote_dir_exists, -1, 256); @@ -1941,7 +1942,7 @@ int cmd_main(int argc, const char **argv) if (!push_all && !is_null_oid(&ref->old_oid)) strvec_pushf(&commit_argv, "^%s", oid_to_hex(&ref->old_oid)); - repo_init_revisions(the_repository, &revs, setup_git_directory()); + repo_init_revisions(the_repository, &revs, gitdir); setup_revisions_from_strvec(&commit_argv, &revs, NULL); revs.edge_hint = 0; /* just in case */ -- GitLab From 047c5d59e542320d9a8e2fee3824a234d7678292 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 07:54:36 +0100 Subject: [PATCH 24/27] odb: handle initialization of sources in `odb_new()` The logic to set up a new object database is currently distributed across two functions in "repository.c": - In `initialize_repository()` we initialize an empty object database. This object database is not fully initialized and doesn't have any sources attached to it. - The primary object database source is then created in `repo_set_gitdir()`. Ideally though, the logic should be entirely self-contained so that we can iterate more readily on how exactly the sources themselves get set up. Refactor `odb_new()` to handle both allocation and setup of the object database. This ensures that the object database is always initialized and ready for use, and it allows us to change how the sources get set up eventually. Note that `repo_set_gitdir()` still reaches into the sources when the function gets called with an already-initialized object database. This will be fixed in the next commit. Signed-off-by: Patrick Steinhardt --- odb.c | 14 +++++++++++++- odb.h | 15 ++++++++++++++- repository.c | 20 ++++++++------------ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index ccc6e999e7..88b40c81c0 100644 --- a/odb.c +++ b/odb.c @@ -1034,15 +1034,27 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } -struct object_database *odb_new(struct repository *repo) +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *secondary_sources) { struct object_database *o = xmalloc(sizeof(*o)); + char *to_free = NULL; memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + + if (!primary_source) + primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); + o->sources_tail = &o->sources->next; + o->alternate_db = xstrdup_or_null(secondary_sources); + + free(to_free); + return o; } diff --git a/odb.h b/odb.h index 99c4d48972..41b3c03027 100644 --- a/odb.h +++ b/odb.h @@ -159,7 +159,20 @@ struct object_database { struct string_list submodule_source_paths; }; -struct object_database *odb_new(struct repository *repo); +/* + * Create a new object database for the given repository. + * + * If the primary source parameter is set it will override the usual primary + * object directory derived from the repository's common directory. The + * alternate sources are expected to be a PATH_SEP-separated list of secondary + * sources. Note that these alternate sources will be added in addition to, not + * instead of, the alternates identified by the primary source. + * + * Returns the newly created object database. + */ +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *alternate_sources); /* Free the object database and release all resources. */ void odb_free(struct object_database *o); diff --git a/repository.c b/repository.c index 455c2d279f..5975c8f341 100644 --- a/repository.c +++ b/repository.c @@ -52,7 +52,6 @@ static void set_default_hash_algo(struct repository *repo) void initialize_repository(struct repository *repo) { - repo->objects = odb_new(repo); repo->remote_state = remote_state_new(); repo->parsed_objects = parsed_object_pool_new(repo); ALLOC_ARRAY(repo->index, 1); @@ -160,29 +159,26 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; - char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - - if (!repo->objects->sources) { - repo->objects->sources = odb_source_new(repo->objects, - objects_path, true); - repo->objects->sources_tail = &repo->objects->sources->next; - free(objects_path); + + if (!repo->objects) { + repo->objects = odb_new(repo, o->object_dir, o->alternate_db); } else { + char *objects_path = NULL; + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); free(repo->objects->sources->path); repo->objects->sources->path = objects_path; + free(repo->objects->alternate_db); + repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); } repo->disable_ref_updates = o->disable_ref_updates; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); expand_base_dir(&repo->graft_file, o->graft_file, repo->commondir, "info/grafts"); expand_base_dir(&repo->index_file, o->index_file, -- GitLab From 019562827e97d90a4c5b89eb13d7cb843cdaad3f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 16:07:56 +0100 Subject: [PATCH 25/27] chdir-notify: add function to unregister listeners While we (obviously) have a way to register new listeners that get called whenever we chdir(3p), we don't have an equivalent that can be used to unregister such a listener again. Add one, as it will be required in a subsequent commit. Signed-off-by: Patrick Steinhardt --- chdir-notify.c | 18 ++++++++++++++++++ chdir-notify.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/chdir-notify.c b/chdir-notify.c index 0d7bc04607..f8bfe3cbef 100644 --- a/chdir-notify.c +++ b/chdir-notify.c @@ -25,6 +25,24 @@ void chdir_notify_register(const char *name, list_add_tail(&e->list, &chdir_notify_entries); } +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data) +{ + struct list_head *pos, *p; + + list_for_each_safe(pos, p, &chdir_notify_entries) { + struct chdir_notify_entry *e = + list_entry(pos, struct chdir_notify_entry, list); + + if (e->cb != cb || e->data != data || !e->name != !name || + (e->name && strcmp(e->name, name))) + continue; + + list_del(pos); + free(e); + } +} + static void reparent_cb(const char *name, const char *old_cwd, const char *new_cwd, diff --git a/chdir-notify.h b/chdir-notify.h index 366e4c1ee9..81eb69d846 100644 --- a/chdir-notify.h +++ b/chdir-notify.h @@ -41,6 +41,8 @@ typedef void (*chdir_notify_callback)(const char *name, const char *new_cwd, void *data); void chdir_notify_register(const char *name, chdir_notify_callback cb, void *data); +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data); void chdir_notify_reparent(const char *name, char **path); /* -- GitLab From a7b7084fea4cee617ec4c6d925a6290bc3aa7c80 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 07:59:11 +0100 Subject: [PATCH 26/27] odb: handle changing a repository's commondir The function `repo_set_gitdir()` is called in two situations: - To initialize the repository with its discovered location. As part of this we also set up the new object database. - To update the repository's discovered location in case the process changes its working directory so that we update relative paths. This means we also have to update any relative paths that are potentially used in the object database. In the context of the object database we ideally wouldn't ever have to worry about the second case: if all paths used by our object database sources were absolute, then we wouldn't have to update them. But unfortunately, the paths aren't only used to locate files owned by the given source, but we also use them for reporting purposes. One such example is `repo_get_object_directory()`, where we cannot just change semantics to always return absolute paths, as that is likely to break tooling out there. One solution to this would be to have both a "display path" and an "internal path". This would allow us to use internal paths for all internal matters, but continue to use the potentially-relative display paths so that we don't break compatibility. But converting the codebase to honor this split is quite a messy endeavour, and it wouldn't even help us with the goal to get rid of the need to update the display path on chdir(3p). Another solution would be to rework "setup.c" so that we never have to update paths in the first place. In that case, we'd only initialize the repository once we have figured out final locations for all directories. This would be a significant simplification of that subsystem indeed, but the current logic is so messy that it would take significant investments to get there. Meanwhile though, while object sources may still use relative paths, the best thing we can do is to handle the reparenting of the object source paths in the object database itself. This can be done by registering one callback for each object database so that we get notified whenever the current working directory changes, and we then perform the reparenting ourselves. Ideally, this wouldn't even happen on the object database level, but instead handled by each object database source. But we don't yet have proper pluggable object database sources, so this will need to be handled at a later point in time. The logic itself is rather simple: - We register the callback when creating the object database. - We unregister the callback when releasing it again. - We split up `set_git_dir_1()` so that it becomes possible to skip recreating the object database. This is required because the function is called both when the current working directory changes, but also when we set up the repository. Calling this function without skipping creation of the ODB will result in a bug in case it's already created. Signed-off-by: Patrick Steinhardt --- odb.c | 37 +++++++++++++++++++++-- odb.h | 4 --- repository.c | 13 ++------ repository.h | 1 + setup.c | 84 ++++++++++++++++++++++++++++------------------------ 5 files changed, 83 insertions(+), 56 deletions(-) diff --git a/odb.c b/odb.c index 88b40c81c0..70665fb7f4 100644 --- a/odb.c +++ b/odb.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "abspath.h" +#include "chdir-notify.h" #include "commit-graph.h" #include "config.h" #include "dir.h" @@ -142,9 +143,9 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) +static struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source *source; @@ -1034,6 +1035,32 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } +static void odb_update_commondir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct object_database *odb = cb_data; + struct odb_source *source; + + /* + * In theory, we only have to do this for the primary object source, as + * alternates' paths are always resolved to an absolute path. + */ + for (source = odb->sources; source; source = source->next) { + char *path; + + if (is_absolute_path(source->path)) + continue; + + path = reparent_relative_path(old_cwd, new_cwd, + source->path); + + free(source->path); + source->path = path; + } +} + struct object_database *odb_new(struct repository *repo, const char *primary_source, const char *secondary_sources) @@ -1055,6 +1082,8 @@ struct object_database *odb_new(struct repository *repo, free(to_free); + chdir_notify_register(NULL, odb_update_commondir, o); + return o; } @@ -1106,6 +1135,8 @@ void odb_free(struct object_database *o) packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); + chdir_notify_unregister(NULL, odb_update_commondir, o); + free(o); } diff --git a/odb.h b/odb.h index 41b3c03027..014cd9585a 100644 --- a/odb.h +++ b/odb.h @@ -78,10 +78,6 @@ struct odb_source { char *path; }; -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local); - struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/repository.c b/repository.c index 5975c8f341..863f24411b 100644 --- a/repository.c +++ b/repository.c @@ -165,17 +165,10 @@ void repo_set_gitdir(struct repository *repo, repo_set_commondir(repo, o->commondir); - if (!repo->objects) { + if (!repo->objects) repo->objects = odb_new(repo, o->object_dir, o->alternate_db); - } else { - char *objects_path = NULL; - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - free(repo->objects->sources->path); - repo->objects->sources->path = objects_path; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); - } + else if (!o->skip_initializing_odb) + BUG("cannot reinitialize an already-initialized object directory"); repo->disable_ref_updates = o->disable_ref_updates; diff --git a/repository.h b/repository.h index 614649413b..6063c4b846 100644 --- a/repository.h +++ b/repository.h @@ -195,6 +195,7 @@ struct set_gitdir_args { const char *index_file; const char *alternate_db; bool disable_ref_updates; + bool skip_initializing_odb; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index a752e9fc84..a625f9fbc8 100644 --- a/setup.c +++ b/setup.c @@ -1002,10 +1002,51 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } -static void set_git_dir_1(const char *path) +static void setup_git_env_internal(const char *git_dir, + bool skip_initializing_odb) +{ + char *git_replace_ref_base; + const char *shallow_file; + const char *replace_ref_base; + struct set_gitdir_args args = { NULL }; + struct strvec to_free = STRVEC_INIT; + + args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); + args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); + args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); + args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); + args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); + if (getenv(GIT_QUARANTINE_ENVIRONMENT)) + args.disable_ref_updates = true; + args.skip_initializing_odb = skip_initializing_odb; + + repo_set_gitdir(the_repository, git_dir, &args); + strvec_clear(&to_free); + + if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) + disable_replace_refs(); + replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); + git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base + : "refs/replace/"); + update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); + + shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); + if (shallow_file) + set_alternate_shallow_file(the_repository, shallow_file, 0); + + if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) + fetch_if_missing = 0; +} + +void setup_git_env(const char *git_dir) +{ + setup_git_env_internal(git_dir, false); +} + +static void set_git_dir_1(const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); + setup_git_env_internal(path, skip_initializing_odb); } static void update_relative_gitdir(const char *name UNUSED, @@ -1020,7 +1061,7 @@ static void update_relative_gitdir(const char *name UNUSED, trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(path); + set_git_dir_1(path, true); if (tmp_objdir) tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); @@ -1035,7 +1076,7 @@ static void set_git_dir(const char *path, int make_realpath) path = realpath.buf; } - set_git_dir_1(path); + set_git_dir_1(path, false); if (!is_absolute_path(path)) chdir_notify_register(NULL, update_relative_gitdir, NULL); @@ -1668,41 +1709,6 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -void setup_git_env(const char *git_dir) -{ - char *git_replace_ref_base; - const char *shallow_file; - const char *replace_ref_base; - struct set_gitdir_args args = { NULL }; - struct strvec to_free = STRVEC_INIT; - - args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); - args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); - args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); - args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); - args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); - if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = true; - } - - repo_set_gitdir(the_repository, git_dir, &args); - strvec_clear(&to_free); - - if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) - disable_replace_refs(); - replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); - git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base - : "refs/replace/"); - update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); - - shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); - if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); - - if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) - fetch_if_missing = 0; -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; -- GitLab From 0422b0412ca8bd4da4d446cf83985fa3b7c549d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 16:37:15 +0100 Subject: [PATCH 27/27] odb: handle recreation of quarantine directories In the preceding commit we have moved the logic that reparents object database sources on chdir(3p) from "setup.c" into "odb.c". Let's also do the same for any temporary quarantine directories so that the complete reparenting logic is self-contained in "odb.c". Signed-off-by: Patrick Steinhardt --- odb.c | 7 +++++++ setup.c | 5 ----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 70665fb7f4..dc8f292f3d 100644 --- a/odb.c +++ b/odb.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "strvec.h" #include "submodule.h" +#include "tmp-objdir.h" #include "trace2.h" #include "write-or-die.h" @@ -1041,8 +1042,11 @@ static void odb_update_commondir(const char *name UNUSED, void *cb_data) { struct object_database *odb = cb_data; + struct tmp_objdir *tmp_objdir; struct odb_source *source; + tmp_objdir = tmp_objdir_unapply_primary_odb(); + /* * In theory, we only have to do this for the primary object source, as * alternates' paths are always resolved to an absolute path. @@ -1059,6 +1063,9 @@ static void odb_update_commondir(const char *name UNUSED, free(source->path); source->path = path; } + + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); } struct object_database *odb_new(struct repository *repo, diff --git a/setup.c b/setup.c index a625f9fbc8..ae66188af3 100644 --- a/setup.c +++ b/setup.c @@ -22,7 +22,6 @@ #include "chdir-notify.h" #include "path.h" #include "quote.h" -#include "tmp-objdir.h" #include "trace.h" #include "trace2.h" #include "worktree.h" @@ -1056,14 +1055,10 @@ static void update_relative_gitdir(const char *name UNUSED, { char *path = reparent_relative_path(old_cwd, new_cwd, repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); set_git_dir_1(path, true); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); } -- GitLab