diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 2f642697e9e106c3c4c96f7c22d66aec6c7a27f4..458bb87363386f934431a1f1bdebc4e4231259b6 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -9,7 +9,14 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] [--[no-]bitmap] +'git multi-pack-index' [] write [--preferred-pack=] + [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] + [--refs-snapshot=] +'git multi-pack-index' [] compact [--[no-]incremental] + [--[no-]bitmap] +'git multi-pack-index' [] verify +'git multi-pack-index' [] expire +'git multi-pack-index' [] repack [--batch-size=] DESCRIPTION ----------- @@ -18,6 +25,8 @@ Write or verify a multi-pack-index (MIDX) file. OPTIONS ------- +The following command-line options are applicable to all sub-commands: + --object-dir=:: Use given directory for the location of Git objects. We check `/packs/multi-pack-index` for the current MIDX file, and @@ -73,7 +82,18 @@ marker). Write an incremental MIDX file containing only objects and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when - necessary. Incompatible with `--bitmap`. + necessary. +-- + +compact:: + Write a new MIDX layer containing only objects and packs present + in the range `` to ``, where both arguments are + checksums of existing layers in the MIDX chain. ++ +-- + --incremental:: + Write the result to a MIDX chain instead of writing a + stand-alone MIDX. Incompatible with `--bitmap`. -- verify:: diff --git a/Makefile b/Makefile index 6fc322ff88184d596f70947a63606b95347b3e43..f3fc3d99ea199bab062a9b2b8de9e7f45472ce9d 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,9 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/source.o +LIB_OBJS += odb/source-files.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1297,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41bad6072aa68dcddb18f271791f1d8c..0fc70d13a8807eb51c9e44b494ec2aadbd2d69db 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" @@ -129,22 +129,20 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; - enum object_type type; - unsigned long sz; + struct odb_read_stream *st; char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43dc43e3c4bbae4efc1a09e110b4898c5..97ea8d60d6187b35de7f5fd6ea8bc5c529679bc3 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { @@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837b03beddeaf5b0825a183b37eb5a9f0..d8bb8923bce7c2bb02b8a3fed4aa39377664bb3f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } @@ -807,11 +807,14 @@ struct for_each_object_payload { void *payload; }; -static int batch_one_object_loose(const struct object_id *oid, - const char *path UNUSED, - void *_payload) +static int batch_one_object_oi(const struct object_id *oid, + struct object_info *oi, + void *_payload) { struct for_each_object_payload *payload = _payload; + if (oi && oi->whence == OI_PACKED) + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset, + payload->payload); return payload->callback(oid, NULL, 0, payload->payload); } @@ -847,8 +850,15 @@ static void batch_each_object(struct batch_options *opt, .payload = _payload, }; struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + struct odb_source *source; - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0); + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, + &payload, flags); + if (ret) + break; + } if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { @@ -862,8 +872,15 @@ static void batch_each_object(struct batch_options *opt, &payload, flags); } } else { - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + struct object_info oi = { 0 }; + + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = packfile_store_for_each_object(files->packed, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } free_bitmap_index(bitmap); @@ -923,7 +940,7 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; batch_each_object(opt, batch_unordered_object, - FOR_EACH_OBJECT_PACK_ORDER, &cb); + ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 7849005ccb15ff40fbccc32059d6a04f75b8d44e..a41f95191e79aadc410ce401498b43265b6a6f49 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -875,6 +875,7 @@ static void end_packfile(void) running = 1; clear_delta_base_cache(); if (object_count) { + struct odb_source_files *files = odb_source_files_downcast(pack_data->repo->objects->sources); struct packed_git *new_p; struct object_id cur_pack_oid; char *idx_name; @@ -900,8 +901,7 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, - idx_name, 1); + new_p = packfile_store_load_pack(files->packed, idx_name, 1); if (!new_p) die(_("core Git rejected index %s"), idx_name); all_packs[pack_id] = new_p; @@ -955,7 +955,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; + struct odb_source *source; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -979,7 +979,13 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + } + + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid)) + continue; e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1096,10 +1102,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { - struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); + struct odb_source *source; struct object_entry *e; struct object_id oid; unsigned long hdrlen; @@ -1179,24 +1185,31 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); + goto out; + } + + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); - } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { + if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid)) + continue; e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - - } else { - e->depth = 0; - e->type = OBJ_BLOB; - e->pack_id = pack_id; - e->idx.offset = offset; - e->idx.crc32 = crc32_end(pack_file); - object_count++; - object_count_by_type[OBJ_BLOB]++; + goto out; } + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + +out: free(in_buf); free(out_buf); } diff --git a/builtin/fsck.c b/builtin/fsck.c index c489582faa6650501906984164799403a61fd896..96107695ae1ed111dcdb39abce7b8c52ed650ed1 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, return 0; } -static void mark_unreachable_referents(const struct object_id *oid) +static int mark_unreachable_referents(const struct object_id *oid, + struct object_info *io UNUSED, + void *data UNUSED) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) - return; /* not part of our original set */ + return 0; /* not part of our original set */ if (obj->flags & REACHABLE) - return; /* reachable objects already traversed */ + return 0; /* reachable objects already traversed */ /* * Avoid passing OBJ_NONE to fsck_walk, which will parse the object @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid) fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) free_tree_buffer((struct tree *)obj); -} -static int mark_loose_unreachable_referents(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); - return 0; -} - -static int mark_packed_unreachable_referents(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_unreachable_referents(oid); return 0; } @@ -340,7 +327,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); @@ -393,12 +381,8 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - for_each_loose_object(the_repository->objects, - mark_loose_unreachable_referents, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_unreachable_referents, - NULL, - 0); + odb_for_each_object(the_repository->objects, NULL, + mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -847,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path, fsck_resolve_undo(istate, index_path); } -static void mark_object_for_connectivity(const struct object_id *oid) +static int mark_object_for_connectivity(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data UNUSED) { struct object *obj = lookup_unknown_object(the_repository, oid); obj->flags |= HAS_OBJ; -} - -static int mark_loose_for_connectivity(const struct object_id *oid, - const char *path UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); - return 0; -} - -static int mark_packed_for_connectivity(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data UNUSED) -{ - mark_object_for_connectivity(oid); return 0; } @@ -1000,10 +970,8 @@ int cmd_fsck(int argc, fsck_refs(the_repository); if (connectivity_only) { - for_each_loose_object(the_repository->objects, - mark_loose_for_connectivity, NULL, 0); - for_each_packed_object(the_repository, - mark_packed_for_connectivity, NULL, 0); + odb_for_each_object(the_repository->objects, NULL, + mark_object_for_connectivity, NULL, 0); } else { odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) diff --git a/builtin/gc.c b/builtin/gc.c index 92c6e7b954faffa06a6801c58052e602e4956421..883eb191018cdcf1217d4fbc08f42c56b815170b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,14 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; - - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) - return 0; - - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + unsigned long auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + return odb_source_loose_count_objects(the_repository->objects->sources, + ODB_COUNT_OBJECTS_APPROXIMATE) + > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, @@ -592,7 +569,8 @@ static uint64_t total_ram(void) static uint64_t estimate_repack_memory(struct gc_config *cfg, struct packed_git *pack) { - unsigned long nr_objects = repo_approximate_object_count(the_repository); + unsigned long nr_objects = odb_count_objects(the_repository->objects, + ODB_COUNT_OBJECTS_APPROXIMATE); size_t os_cache, heap; if (!pack || !nr_objects) diff --git a/builtin/grep.c b/builtin/grep.c index 53cccf2d25068c664c27643873b8c6d7fe99d848..61379909b885fa56f2e79123ea7fee98a83bc417 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1213,8 +1213,16 @@ int cmd_grep(int argc, */ if (recurse_submodules) repo_read_gitmodules(the_repository, 1); - if (startup_info->have_repository) - packfile_store_prepare(the_repository->objects->packfiles); + + if (startup_info->have_repository) { + struct odb_source *source; + + odb_prepare_alternates(the_repository->objects); + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_prepare(files->packed); + } + } start_threads(&opt); } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 699fe678cd60b0af8b7edf77f2204a064140d94a..d1e47279a8c7c94720d99fc85d1a2da1b0380a5b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } @@ -1640,9 +1637,11 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object && startup_info->have_repository) - packfile_store_load_pack(the_repository->objects->packfiles, - final_index_name, 0); + if (do_fsck_object && startup_info->have_repository) { + struct odb_source_files *files = + odb_source_files_downcast(the_repository->objects->sources); + packfile_store_load_pack(files->packed, final_index_name, 0); + } if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/builtin/log.c b/builtin/log.c index c8319b8af38c8c732ddf632a0d3ced68487f8cd3..d4cf9c59c81a8397bd1162e09844870a31bedd77 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5f364aa816ba25d48c8f59ffd3a33c3344ce84a8..40afa8f1ed8d9ca3d47fc5db37ff98458661d3bd 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -13,8 +13,13 @@ #include "repository.h" #define BUILTIN_MIDX_WRITE_USAGE \ - N_("git multi-pack-index [] write [--preferred-pack=]" \ - "[--refs-snapshot=]") + N_("git multi-pack-index [] write [--preferred-pack=]\n" \ + " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ + " [--refs-snapshot=]") + +#define BUILTIN_MIDX_COMPACT_USAGE \ + N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ + " [--[no-]bitmap] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -29,6 +34,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = { BUILTIN_MIDX_WRITE_USAGE, NULL }; +static char const * const builtin_multi_pack_index_compact_usage[] = { + BUILTIN_MIDX_COMPACT_USAGE, + NULL +}; static char const * const builtin_multi_pack_index_verify_usage[] = { BUILTIN_MIDX_VERIFY_USAGE, NULL @@ -43,6 +52,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = { }; static char const * const builtin_multi_pack_index_usage[] = { BUILTIN_MIDX_WRITE_USAGE, + BUILTIN_MIDX_COMPACT_USAGE, BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_EXPIRE_USAGE, BUILTIN_MIDX_REPACK_USAGE, @@ -84,6 +94,8 @@ static struct option common_opts[] = { N_("directory"), N_("object directory containing set of packfile and pack-index pairs"), parse_object_dir), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), + MIDX_PROGRESS), OPT_END(), }; @@ -138,8 +150,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, @@ -194,14 +204,71 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, return ret; } +static int cmd_multi_pack_index_compact(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct multi_pack_index *m, *cur; + struct multi_pack_index *from_midx = NULL; + struct multi_pack_index *to_midx = NULL; + struct odb_source *source; + int ret; + + struct option *options; + static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_BIT(0, "incremental", &opts.flags, + N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_END(), + }; + + repo_config(repo, git_multi_pack_index_write_config, NULL); + + options = add_common_options(builtin_multi_pack_index_compact_options); + + trace2_cmd_mode(argv[0]); + + if (isatty(2)) + opts.flags |= MIDX_PROGRESS; + argc = parse_options(argc, argv, prefix, + options, builtin_multi_pack_index_compact_usage, + 0); + + if (argc != 2) + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + source = handle_object_dir_option(the_repository); + + FREE_AND_NULL(options); + + m = get_multi_pack_index(source); + + for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) { + const char *midx_csum = get_midx_checksum(cur); + + if (!from_midx && !strcmp(midx_csum, argv[0])) + from_midx = cur; + if (!to_midx && !strcmp(midx_csum, argv[1])) + to_midx = cur; + } + + if (!from_midx) + die(_("could not find MIDX 'from': %s"), argv[0]); + if (!to_midx) + die(_("could not find MIDX 'to': %s"), argv[1]); + + ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + + return ret; +} + static int cmd_multi_pack_index_verify(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { struct option *options; static struct option builtin_multi_pack_index_verify_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -231,8 +298,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_expire_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -264,8 +329,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, static struct option builtin_multi_pack_index_repack_options[] = { OPT_UNSIGNED(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -300,6 +363,7 @@ int cmd_multi_pack_index(int argc, struct option builtin_multi_pack_index_options[] = { OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack), OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write), + OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact), OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify), OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire), OPT_END(), diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 7937106ec53555ef156337ebda3b09efa9d374dd..4522a0eb4e421d203c8ab6731362cab80044b21e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); @@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); @@ -1527,49 +1529,54 @@ static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, unsigned flags, uint32_t mtime) { - struct packed_git **cache; + struct odb_source *source; - for (cache = kept_pack_cache(r, flags); *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + struct packed_git **cache = packfile_store_get_kept_pack_cache(files->packed, flags); - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; + for (; *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) + ofs = find_pack_entry_one(oid, p); + if (!ofs) continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } } return -1; @@ -1622,9 +1629,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= KEPT_PACK_IN_CORE; /* * If the object is in a pack that we want to ignore, *and* we @@ -1747,13 +1754,17 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->packs, p); - if (want != -1) - return want; + for (source = the_repository->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + + for (e = files->packed->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); + if (!exclude && want > 0) + packfile_list_prepend(&files->packed->packs, p); + if (want != -1) + return want; + } } if (uri_protocols.nr) { @@ -3904,7 +3915,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) for_each_object_in_pack(p, add_object_entry_from_pack, revs, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); } strbuf_release(&buf); @@ -3929,7 +3940,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; @@ -4028,7 +4039,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS); + return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); } static int cruft_include_check(struct commit *commit, void *data) @@ -4306,25 +4317,12 @@ static void show_edge(struct commit *commit) } static int add_object_in_unpacked_pack(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, + struct object_info *oi, void *data UNUSED) { if (cruft) { - off_t offset; - time_t mtime; - - if (pack->is_cruft) { - if (load_pack_mtimes(pack) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(pack, pos); - } else { - mtime = pack->mtime; - } - offset = nth_packed_object_offset(pack, pos); - - add_cruft_object_entry(oid, OBJ_NONE, pack, offset, - NULL, mtime); + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack, + oi->u.packed.offset, NULL, *oi->mtimep); } else { add_object_entry(oid, OBJ_NONE, "", 0); } @@ -4333,14 +4331,26 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, static void add_objects_in_unpacked_packs(void) { - if (for_each_packed_object(to_pack.repo, - add_object_in_unpacked_pack, - NULL, - FOR_EACH_OBJECT_PACK_ORDER | - FOR_EACH_OBJECT_LOCAL_ONLY | - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) - die(_("cannot open pack index")); + struct odb_source *source; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + }; + + odb_prepare_alternates(to_pack.repo->objects); + for (source = to_pack.repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!source->local) + continue; + + if (packfile_store_for_each_object(files->packed, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); + } } static int add_loose_object(const struct object_id *oid, const char *path, diff --git a/commit-graph.c b/commit-graph.c index 80be2ff2c39842675c962d262021a993e6b00cc5..c5c9ce282490a7922c576aeff527cc7ae58a10e9 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1485,24 +1485,16 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, return 0; } -static int add_packed_commits(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) +static int add_packed_commits_oi(const struct object_id *oid, + struct object_info *oi, + void *data) { struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; - enum object_type type; - off_t offset = nth_packed_object_offset(pack, pos); - struct object_info oi = OBJECT_INFO_INIT; if (ctx->progress) display_progress(ctx->progress, ++ctx->progress_done); - oi.typep = &type; - if (packed_object_info(ctx->r, pack, offset, &oi) < 0) - die(_("unable to get type of object %s"), oid_to_hex(oid)); - - if (type != OBJ_COMMIT) + if (*oi->typep != OBJ_COMMIT) return 0; oid_array_append(&ctx->oids, oid); @@ -1511,6 +1503,22 @@ static int add_packed_commits(const struct object_id *oid, return 0; } +static int add_packed_commits(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *data) +{ + enum object_type type; + off_t offset = nth_packed_object_offset(pack, pos); + struct object_info oi = OBJECT_INFO_INIT; + + oi.typep = &type; + if (packed_object_info(pack->repo, pack, offset, &oi) < 0) + die(_("unable to get type of object %s"), oid_to_hex(oid)); + + return add_packed_commits_oi(oid, &oi, data); +} + static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) { struct commit_list *parent; @@ -1933,7 +1941,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, goto cleanup; } for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + ODB_FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } @@ -1965,13 +1973,25 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx, static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { + struct odb_source *source; + enum object_type type; + struct object_info oi = { + .typep = &type, + }; + if (ctx->report_progress) ctx->progress = start_delayed_progress( ctx->r, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); - for_each_packed_object(ctx->r, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); + + odb_prepare_alternates(ctx->r->objects); + for (source = ctx->r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, + ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + } + if (ctx->progress_done < ctx->approx_nr_objects) display_progress(ctx->progress, ctx->approx_nr_objects); stop_progress(&ctx->progress); @@ -2595,7 +2615,7 @@ int write_commit_graph(struct odb_source *source, replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE; } - ctx.approx_nr_objects = repo_approximate_object_count(r); + ctx.approx_nr_objects = odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE); if (ctx.append && g) { for (i = 0; i < g->num_commits; i++) { diff --git a/entry.c b/entry.c index cae02eb50398d7cfcdac8b4c4382e067e9de02d9..7817aee362ed9e7e14e3a2b92c9cc0ab5f013673 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/git-compat-util.h b/git-compat-util.h index 398e0fac4fab6007903fdd9fd31dcc28fe531a65..a7aa5f05fc9445551041607feba87a738910ca72 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -670,6 +670,14 @@ static inline int cast_size_t_to_int(size_t a) return (int)a; } +static inline uint32_t u32_add(uint32_t a, uint32_t b) +{ + if (unsigned_add_overflows(a, b)) + die("uint32_t overflow: %"PRIuMAX" + %"PRIuMAX, + (uintmax_t)a, (uintmax_t)b); + return a + b; +} + static inline uint64_t u64_mult(uint64_t a, uint64_t b) { if (unsigned_mult_overflows(a, b)) diff --git a/http.c b/http.c index 41f850db16d19f653e0ac7af2878ed1e34fee22f..8ea1b9d1f68c16143130aced426df3f224b6c429 100644 --- a/http.c +++ b/http.c @@ -2543,8 +2543,9 @@ int finish_http_pack_request(struct http_pack_request *preq) void http_install_packfile(struct packed_git *p, struct packfile_list *list_to_remove_from) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); packfile_list_remove(list_to_remove_from, p); - packfile_store_add_pack(the_repository->objects->packfiles, p); + packfile_store_add_pack(files->packed, p); } struct http_pack_request *new_http_pack_request( diff --git a/loose.c b/loose.c index 56cf64b648bf80c3d7d8e0649f87687138018743..07333be6969fcc370b052fa1ed7c684442ff20af 100644 --- a/loose.c +++ b/loose.c @@ -3,6 +3,7 @@ #include "path.h" #include "object-file.h" #include "odb.h" +#include "odb/source-files.h" #include "hex.h" #include "repository.h" #include "wrapper.h" @@ -49,27 +50,29 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose->map; + struct odb_source_files *files = odb_source_files_downcast(source); + struct loose_object_map *map = files->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid); return inserted; } static int load_one_loose_object_map(struct repository *repo, struct odb_source *source) { + struct odb_source_files *files = odb_source_files_downcast(source); struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose->map) - loose_object_map_init(&source->loose->map); - if (!source->loose->cache) { - ALLOC_ARRAY(source->loose->cache, 1); - oidtree_init(source->loose->cache); + if (!files->loose->map) + loose_object_map_init(&files->loose->map); + if (!files->loose->cache) { + ALLOC_ARRAY(files->loose->cache, 1); + oidtree_init(files->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); @@ -125,7 +128,8 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; + struct odb_source_files *files = odb_source_files_downcast(repo->objects->sources); + kh_oid_map_t *map = files->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +235,8 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose->map; + struct odb_source_files *files = odb_source_files_downcast(source); + struct loose_object_map *loose_map = files->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/meson.build b/meson.build index f1b3615659e56a78a8b6db37c9ba9b2e8591dcc8..c8a8b3882c0f9631aa780e1b1e43449bd6b4d348 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,9 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/source.c', + 'odb/source-files.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +493,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/midx-write.c b/midx-write.c index e3e9be6d03cd6fd20e761eab3c4bd1ff4b4d6583..4e5de5a831c7cfbd20c52e98f6476844b7f5b549 100644 --- a/midx-write.c +++ b/midx-write.c @@ -108,12 +108,24 @@ struct write_midx_context { int incremental; uint32_t num_multi_pack_indexes_before; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + int compact; + struct string_list *to_include; struct repository *repo; struct odb_source *source; }; +static uint32_t midx_pack_perm(struct write_midx_context *ctx, + uint32_t orig_pack_int_id) +{ + if (ctx->compact) + orig_pack_int_id -= ctx->compact_from->num_packs_in_base; + return ctx->pack_perm[orig_pack_int_id]; +} + static int should_include_pack(const struct write_midx_context *ctx, const char *file_name) { @@ -317,6 +329,45 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t start_pack, + uint32_t cur_fanout) +{ + uint32_t cur_pack; + + if (ctx->m && !ctx->incremental) + midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout, + ctx->preferred_pack_idx); + + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { + int preferred = cur_pack == ctx->preferred_pack_idx; + midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack, + preferred, cur_fanout); + } + + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) + midx_fanout_add_pack_fanout(fanout, ctx->info, + ctx->preferred_pack_idx, 1, + cur_fanout); +} + +static void midx_fanout_add_compact(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t cur_fanout) +{ + struct multi_pack_index *m = ctx->compact_to; + + ASSERT(ctx->compact); + + while (m && m != ctx->compact_from->base_midx) { + midx_fanout_add_midx_fanout(fanout, m, cur_fanout, + NO_PREFERRED_PACK); + m = m->base_midx; + } +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -335,6 +386,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx, size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; + if (ctx->compact) + ASSERT(!start_pack); + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) total_objects = st_add(total_objects, ctx->info[cur_pack].p->num_objects); @@ -353,23 +407,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - if (ctx->m && !ctx->incremental) - midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, - ctx->preferred_pack_idx); - - for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { - int preferred = cur_pack == ctx->preferred_pack_idx; - midx_fanout_add_pack_fanout(&fanout, - ctx->info, cur_pack, - preferred, cur_fanout); - } - - if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && - ctx->preferred_pack_idx < start_pack) - midx_fanout_add_pack_fanout(&fanout, ctx->info, - ctx->preferred_pack_idx, 1, - cur_fanout); - + if (ctx->compact) + midx_fanout_add_compact(&fanout, ctx, cur_fanout); + else + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* @@ -410,11 +451,6 @@ static int write_midx_pack_names(struct hashfile *f, void *data) if (ctx->info[i].expired) continue; - if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0) - BUG("incorrect pack-file order: %s before %s", - ctx->info[i - 1].pack_name, - ctx->info[i].pack_name); - writelen = strlen(ctx->info[i].pack_name) + 1; hashwrite(f, ctx->info[i].pack_name, writelen); written += writelen; @@ -514,12 +550,12 @@ static int write_midx_object_offsets(struct hashfile *f, for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *obj = list++; - if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) + if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED) BUG("object %s is in an expired pack with int-id %d", oid_to_hex(&obj->oid), obj->pack_int_id); - hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); + hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id)); if (ctx->large_offsets_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -620,8 +656,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; - data[i].pack = ctx->pack_perm[e->pack_int_id]; - if (!e->preferred) + data[i].pack = midx_pack_perm(ctx, e->pack_int_id); + if (!e->preferred || ctx->compact) data[i].pack |= (1U << 31); data[i].offset = e->offset; } @@ -630,14 +666,14 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[data[i].nr]; - struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = i + base_objects; pack->bitmap_nr++; pack_order[i] = data[i].nr; } for (i = 0; i < ctx->nr; i++) { - struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + struct pack_info *pack = &ctx->info[i]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = 0; } @@ -691,7 +727,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata, struct object_entry *to = packlist_alloc(pdata, &from->oid); oe_set_in_pack(pdata, to, - ctx->info[ctx->pack_perm[from->pack_int_id]].p); + ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p); } trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); @@ -909,6 +945,21 @@ static int write_midx_bitmap(struct write_midx_context *ctx, return ret; } +static int fill_pack_from_midx(struct pack_info *info, + struct multi_pack_index *m, + uint32_t pack_int_id) +{ + if (prepare_midx_pack(m, pack_int_id)) + return error(_("could not load pack %d"), pack_int_id); + + fill_pack_info(info, + m->packs[pack_int_id - m->num_packs_in_base], + m->pack_names[pack_int_id - m->num_packs_in_base], + pack_int_id); + + return 0; +} + static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -916,19 +967,85 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) for (m = ctx->m; m; m = m->base_midx) { uint32_t i; - for (i = 0; i < m->num_packs; i++) { - if (prepare_midx_pack(m, m->num_packs_in_base + i)) - return error(_("could not load pack")); - + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], - m->pack_names[i], - m->num_packs_in_base + i); + + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0) + return -1; + + ctx->nr++; } } return 0; } +static uint32_t compactible_packs_between(const struct multi_pack_index *from, + const struct multi_pack_index *to) +{ + uint32_t nr; + + ASSERT(from && to); + + nr = u32_add(to->num_packs, to->num_packs_in_base); + if (nr < from->num_packs_in_base) + BUG("unexpected number of packs in base during compaction: " + "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base); + + return nr - from->num_packs_in_base; +} + +static int fill_packs_from_midx_range(struct write_midx_context *ctx, + int bitmap_order) +{ + struct multi_pack_index *m = ctx->compact_to; + uint32_t packs_nr; + + ASSERT(ctx->compact && !ctx->nr); + ASSERT(ctx->compact_from); + ASSERT(ctx->compact_to); + + packs_nr = compactible_packs_between(ctx->compact_from, + ctx->compact_to); + + ALLOC_GROW(ctx->info, packs_nr, ctx->alloc); + + while (m != ctx->compact_from->base_midx) { + uint32_t pack_int_id, preferred_pack_id; + uint32_t i; + + if (bitmap_order) { + if (midx_preferred_pack(m, &preferred_pack_id) < 0) + die(_("could not determine preferred pack")); + } else { + preferred_pack_id = m->num_packs_in_base; + } + + pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + preferred_pack_id) < 0) + return -1; + + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { + if (preferred_pack_id == i) + continue; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + i) < 0) + return -1; + } + + ctx->nr += m->num_packs; + m = m->base_midx; + } + + ASSERT(ctx->nr == packs_nr); + + return 0; +} + static struct { const char *non_split; const char *split; @@ -955,7 +1072,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { - const unsigned char *hash = get_midx_checksum(m); + const unsigned char *hash = get_midx_hash(m); get_midx_filename_ext(m->source, &from, hash, midx_exts[i].non_split); @@ -1014,14 +1131,30 @@ static void clear_midx_files(struct odb_source *source, strbuf_release(&buf); } -static int write_midx_internal(struct odb_source *source, - struct string_list *packs_to_include, - struct string_list *packs_to_drop, - const char *preferred_pack_name, - const char *refs_snapshot, - unsigned flags) +static int midx_hashcmp(const struct multi_pack_index *a, + const struct multi_pack_index *b, + const struct git_hash_algo *algop) { - struct repository *r = source->odb->repo; + return hashcmp(get_midx_hash(a), get_midx_hash(b), algop); +} + +struct write_midx_opts { + struct odb_source *source; + + struct string_list *packs_to_include; + struct string_list *packs_to_drop; + + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + + const char *preferred_pack_name; + const char *refs_snapshot; + unsigned flags; +}; + +static int write_midx_internal(struct write_midx_opts *opts) +{ + struct repository *r = opts->source->odb->repo; struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; @@ -1036,27 +1169,39 @@ static int write_midx_internal(struct odb_source *source, int dropped_packs = 0; int result = -1; const char **keep_hashes = NULL; + size_t keep_hashes_nr = 0; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); ctx.repo = r; - ctx.source = source; + ctx.source = opts->source; + + ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); + ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT); - ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); + if (ctx.compact) { + if (!opts->compact_from) + BUG("expected non-NULL 'from' MIDX during compaction"); + if (!opts->compact_to) + BUG("expected non-NULL 'to' MIDX during compaction"); + + ctx.compact_from = opts->compact_from; + ctx.compact_to = opts->compact_to; + } if (ctx.incremental) strbuf_addf(&midx_name, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", - source->path); + opts->source->path); else - get_midx_filename(source, &midx_name); + get_midx_filename(opts->source, &midx_name); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); - if (!packs_to_include || ctx.incremental) { - struct multi_pack_index *m = get_multi_pack_index(source); + if (!opts->packs_to_include || ctx.incremental) { + struct multi_pack_index *m = get_multi_pack_index(opts->source); if (m && !midx_checksum_valid(m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); m = NULL; @@ -1071,11 +1216,18 @@ static int write_midx_internal(struct odb_source *source, */ if (ctx.incremental) ctx.base_midx = m; - else if (!packs_to_include) + if (!opts->packs_to_include) ctx.m = m; } } + /* + * If compacting MIDX layer(s) in the range [from, to], then the + * compacted MIDX will share the same base MIDX as 'from'. + */ + if (ctx.compact) + ctx.base_midx = ctx.compact_from->base_midx; + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1084,39 +1236,48 @@ static int write_midx_internal(struct odb_source *source, if (ctx.incremental) { struct multi_pack_index *m = ctx.base_midx; while (m) { - if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { + if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), - hash_to_hex_algop(get_midx_checksum(m), - m->source->odb->repo->hash_algo)); + get_midx_checksum(m)); goto cleanup; } ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx)) { + } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) { goto cleanup; } start_pack = ctx.nr; ctx.pack_paths_checked = 0; - if (flags & MIDX_PROGRESS) + if (opts->flags & MIDX_PROGRESS) ctx.progress = start_delayed_progress(r, _("Adding packfiles to multi-pack-index"), 0); else ctx.progress = NULL; - ctx.to_include = packs_to_include; + if (ctx.compact) { + int bitmap_order = 0; + if (opts->preferred_pack_name) + bitmap_order |= 1; + else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) + bitmap_order |= 1; - for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); + fill_packs_from_midx_range(&ctx, bitmap_order); + } else { + ctx.to_include = opts->packs_to_include; + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + } stop_progress(&ctx.progress); if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) && !ctx.incremental && - !(packs_to_include || packs_to_drop)) { + !ctx.compact && + !(opts->packs_to_include || opts->packs_to_drop)) { struct bitmap_index *bitmap_git; int bitmap_exists; - int want_bitmap = flags & MIDX_WRITE_BITMAP; + int want_bitmap = opts->flags & MIDX_WRITE_BITMAP; bitmap_git = prepare_midx_bitmap_git(ctx.m); bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); @@ -1128,7 +1289,8 @@ static int write_midx_internal(struct odb_source *source, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(source, "bitmap", NULL); + clear_midx_files_ext(opts->source, "bitmap", + NULL); result = 0; goto cleanup; } @@ -1139,11 +1301,11 @@ static int write_midx_internal(struct odb_source *source, goto cleanup; /* nothing to do */ } - if (preferred_pack_name) { + if (opts->preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (size_t i = 0; i < ctx.nr; i++) { - if (!cmp_idx_or_pack_name(preferred_pack_name, + if (!cmp_idx_or_pack_name(opts->preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; break; @@ -1152,9 +1314,9 @@ static int write_midx_internal(struct odb_source *source, if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); + opts->preferred_pack_name); } else if (ctx.nr && - (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { + (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; @@ -1165,7 +1327,7 @@ static int write_midx_internal(struct odb_source *source, */ open_pack_index(oldest); - if (packs_to_drop && packs_to_drop->nr) + if (opts->packs_to_drop && opts->packs_to_drop->nr) BUG("cannot write a MIDX bitmap during expiration"); /* @@ -1225,22 +1387,26 @@ static int write_midx_internal(struct odb_source *source, ctx.large_offsets_needed = 1; } - QSORT(ctx.info, ctx.nr, pack_info_compare); + if (!ctx.compact) + QSORT(ctx.info, ctx.nr, pack_info_compare); - if (packs_to_drop && packs_to_drop->nr) { + if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; - for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + ASSERT(!ctx.compact); + + for (size_t i = 0; + i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); if (!cmp) { drop_index++; ctx.info[i].expired = 1; } else if (cmp > 0) { error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); drop_index++; missing_drops++; i--; @@ -1261,12 +1427,20 @@ static int write_midx_internal(struct odb_source *source, */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); for (size_t i = 0; i < ctx.nr; i++) { + uint32_t from = ctx.info[i].orig_pack_int_id; + uint32_t to; + if (ctx.info[i].expired) { + to = PACK_EXPIRED; dropped_packs++; - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; } else { - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; + to = i - dropped_packs; } + + if (ctx.compact) + from -= ctx.compact_from->num_packs_in_base; + + ctx.pack_perm[from] = to; } for (size_t i = 0; i < ctx.nr; i++) { @@ -1277,16 +1451,16 @@ static int write_midx_internal(struct odb_source *source, } /* Check that the preferred pack wasn't expired (if given). */ - if (preferred_pack_name) { - struct pack_info *preferred = bsearch(preferred_pack_name, + if (opts->preferred_pack_name) { + struct pack_info *preferred = bsearch(opts->preferred_pack_name, ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); if (preferred) { - uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; + uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id); if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), - preferred_pack_name); + opts->preferred_pack_name); } } @@ -1300,15 +1474,15 @@ static int write_midx_internal(struct odb_source *source, } if (!ctx.entries_nr) { - if (flags & MIDX_WRITE_BITMAP) + if (opts->flags & MIDX_WRITE_BITMAP) warning(_("refusing to write multi-pack .bitmap without any objects")); - flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); + opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); } if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(source, &lock_name); + get_midx_chain_filename(opts->source, &lock_name); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1351,7 +1525,7 @@ static int write_midx_internal(struct odb_source *source, MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); - if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { + if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), @@ -1369,11 +1543,11 @@ static int write_midx_internal(struct odb_source *source, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX && + if (opts->flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(&ctx, midx_hash); - if (flags & MIDX_WRITE_BITMAP) { + if (opts->flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; struct commit **commits; uint32_t commits_nr; @@ -1383,7 +1557,7 @@ static int write_midx_internal(struct odb_source *source, prepare_midx_packing_data(&pdata, &ctx); - commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); + commits = find_commits_for_midx_bitmap(&commits_nr, opts->refs_snapshot, &ctx); /* * The previous steps translated the information from @@ -1396,7 +1570,7 @@ static int write_midx_internal(struct odb_source *source, if (write_midx_bitmap(&ctx, midx_hash, &pdata, commits, commits_nr, - flags) < 0) { + opts->flags) < 0) { error(_("could not write multi-pack bitmap")); clear_packing_data(&pdata); free(commits); @@ -1414,7 +1588,24 @@ static int write_midx_internal(struct odb_source *source, if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); + if (ctx.compact) { + struct multi_pack_index *m; + + /* + * Keep all MIDX layers excluding those in the range [from, to]. + */ + for (m = ctx.base_midx; m; m = m->base_midx) + keep_hashes_nr++; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) + keep_hashes_nr++; + + keep_hashes_nr++; /* include the compacted layer */ + } else { + keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; + } + CALLOC_ARRAY(keep_hashes, keep_hashes_nr); if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1429,7 +1620,7 @@ static int write_midx_internal(struct odb_source *source, if (link_midx_to_chain(ctx.base_midx) < 0) goto cleanup; - get_split_midx_filename_ext(source, &final_midx_name, + get_split_midx_filename_ext(opts->source, &final_midx_name, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { @@ -1439,18 +1630,47 @@ static int write_midx_internal(struct odb_source *source, strbuf_release(&final_midx_name); - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + if (ctx.compact) { + struct multi_pack_index *m; + uint32_t num_layers_before_from = 0; + uint32_t i; - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + for (m = ctx.base_midx; m; m = m->base_midx) + num_layers_before_from++; - keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), + m = ctx.base_midx; + for (i = 0; i < num_layers_before_from; i++) { + uint32_t j = num_layers_before_from - i - 1; + + keep_hashes[j] = xstrdup(get_midx_checksum(m)); + m = m->base_midx; + } + + keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); - m = m->base_midx; + + i = 0; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) { + keep_hashes[keep_hashes_nr - i - 1] = + xstrdup(get_midx_checksum(m)); + i++; + } + } else { + keep_hashes[ctx.num_multi_pack_indexes_before] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + + keep_hashes[j] = xstrdup(get_midx_checksum(m)); + m = m->base_midx; + } } - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1463,8 +1683,7 @@ static int write_midx_internal(struct odb_source *source, if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(source, keep_hashes, - ctx.num_multi_pack_indexes_before + 1, + clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, ctx.incremental); result = 0; @@ -1482,7 +1701,7 @@ static int write_midx_internal(struct odb_source *source, free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) free((char *)keep_hashes[i]); free(keep_hashes); } @@ -1497,9 +1716,14 @@ int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, NULL, NULL, - preferred_pack_name, refs_snapshot, - flags); + struct write_midx_opts opts = { + .source = source, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int write_midx_file_only(struct odb_source *source, @@ -1507,8 +1731,30 @@ int write_midx_file_only(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, packs_to_include, NULL, - preferred_pack_name, refs_snapshot, flags); + struct write_midx_opts opts = { + .source = source, + .packs_to_include = packs_to_include, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); +} + +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags) +{ + struct write_midx_opts opts = { + .source = source, + .compact_from = from, + .compact_to = to, + .flags = flags | MIDX_WRITE_COMPACT, + }; + + return write_midx_internal(&opts); } int expire_midx_packs(struct odb_source *source, unsigned flags) @@ -1568,8 +1814,11 @@ int expire_midx_packs(struct odb_source *source, unsigned flags) free(count); if (packs_to_drop.nr) - result = write_midx_internal(source, NULL, - &packs_to_drop, NULL, NULL, flags); + result = write_midx_internal(&(struct write_midx_opts) { + .source = source, + .packs_to_drop = &packs_to_drop, + .flags = flags & MIDX_PROGRESS, + }); string_list_clear(&packs_to_drop, 0); @@ -1776,8 +2025,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) goto cleanup; } - result = write_midx_internal(source, NULL, NULL, NULL, NULL, - flags); + result = write_midx_internal(&(struct write_midx_opts) { + .source = source, + .flags = flags, + }); cleanup: free(include_pack); diff --git a/midx.c b/midx.c index 24e1e721754d0cbffed53d7545f12bca77d157bd..aecdaebd363ace01544de3d131664707f8b72f82 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,13 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const char *get_midx_checksum(const struct multi_pack_index *m) +{ + return hash_to_hex_algop(get_midx_hash(m), + m->source->odb->repo->hash_algo); +} + +const unsigned char *get_midx_hash(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } @@ -95,8 +101,9 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - packfile_store_prepare(source->odb->packfiles); - return source->midx; + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_prepare(files->packed); + return files->packed->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, @@ -203,11 +210,6 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou if (!end) die(_("multi-pack-index pack-name chunk is too short")); cur_pack_name = end + 1; - - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) - die(_("multi-pack-index pack names out of order: '%s' before '%s'"), - m->pack_names[i - 1], - m->pack_names[i]); } trace2_data_intmax("midx", r, "load/num_packs", m->num_packs); @@ -405,6 +407,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + FREE_AND_NULL(m->pack_names_sorted); free(m); } @@ -447,7 +450,7 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m, int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct repository *r = m->source->odb->repo; + struct odb_source_files *files = odb_source_files_downcast(m->source); struct strbuf pack_name = STRBUF_INIT; struct packed_git *p; @@ -458,10 +461,10 @@ int prepare_midx_pack(struct multi_pack_index *m, if (m->packs[pack_int_id]) return 0; - strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, + strbuf_addf(&pack_name, "%s/pack/%s", files->base.path, m->pack_names[pack_int_id]); - p = packfile_store_load_pack(r->objects->packfiles, - pack_name.buf, m->source->local); + p = packfile_store_load_pack(files->packed, + pack_name.buf, files->base.local); strbuf_release(&pack_name); if (!p) { @@ -650,17 +653,37 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name, return strcmp(idx_or_pack_name, idx_name); } + +static int midx_pack_names_cmp(const void *a, const void *b, void *m_) +{ + struct multi_pack_index *m = m_; + return strcmp(m->pack_names[*(const size_t *)a], + m->pack_names[*(const size_t *)b]); +} + static int midx_contains_pack_1(struct multi_pack_index *m, const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; + if (!m->pack_names_sorted) { + uint32_t i; + + ALLOC_ARRAY(m->pack_names_sorted, m->num_packs); + + for (i = 0; i < m->num_packs; i++) + m->pack_names_sorted[i] = i; + + QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp, + m); + } + while (first < last) { uint32_t mid = first + (last - first) / 2; const char *current; int cmp; - current = m->pack_names[mid]; + current = m->pack_names[m->pack_names_sorted[mid]]; cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); if (!cmp) return 1; @@ -704,18 +727,19 @@ int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) int prepare_multi_pack_index_one(struct odb_source *source) { + struct odb_source_files *files = odb_source_files_downcast(source); struct repository *r = source->odb->repo; prepare_repo_settings(r); if (!r->settings.core_multi_pack_index) return 0; - if (source->midx) + if (files->packed->midx) return 1; - source->midx = load_multi_pack_index(source); + files->packed->midx = load_multi_pack_index(source); - return !!source->midx; + return !!files->packed->midx; } int midx_checksum_valid(struct multi_pack_index *m) @@ -804,9 +828,10 @@ void clear_midx_file(struct repository *r) struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; } } diff --git a/midx.h b/midx.h index 6e54d73503d56088923ceff1c69e1f8d6013ac12..61f9809b8c96b0f8cfb71735b7d8fa6ef28009cf 100644 --- a/midx.h +++ b/midx.h @@ -71,6 +71,7 @@ struct multi_pack_index { uint32_t num_packs_in_base; const char **pack_names; + size_t *pack_names_sorted; struct packed_git **packs; }; @@ -80,12 +81,14 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) +#define MIDX_WRITE_COMPACT (1 << 6) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(struct multi_pack_index *m); +const char *get_midx_checksum(const struct multi_pack_index *m) /* static buffer */; +const unsigned char *get_midx_hash(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); @@ -128,6 +131,10 @@ int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); diff --git a/object-file.c b/object-file.c index 84c9249dab520fd5153b36a11a2a211fb0e43877..ce8450a68e66b9935a2e1dc6b5f25f99aa5674fd 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" @@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,35 +160,18 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } /* - * Find "oid" as a loose object in given source. - * Returns 0 on success, negative on failure. + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, - struct stat *st, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - - *path = odb_loose_path(loose->source, &buf, oid); - if (!lstat(*path, st)) - return 0; - - return -1; -} - -/* - * Like stat_loose_object(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { @@ -234,23 +215,42 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { + struct odb_source_files *files = odb_source_files_downcast(source); const char *p; - int fd = open_loose_object(source->loose, oid, &p); + int fd = open_loose_object(files->loose, oid, &p); if (fd < 0) return NULL; return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +329,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -389,19 +396,22 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int odb_source_loose_read_object_info(struct odb_source *source, +static int read_object_info_from_path(struct odb_source *source, + const char *path, const struct object_id *oid, - struct object_info *oi, int flags) + struct object_info *oi, + unsigned flags) { + struct odb_source_files *files = odb_source_files_downcast(source); int status = 0; int fd; unsigned long mapsize; - const char *path; void *map; git_zstream stream; char hdr[MAX_HEADER_LEN]; unsigned long size_scratch; enum object_type type_scratch; + struct stat st; if (oi->delta_base_oid) oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); @@ -415,23 +425,45 @@ int odb_source_loose_read_object_info(struct odb_source *source, * object even exists. */ if (!oi->typep && !oi->sizep && !oi->contentp) { - struct stat st; - if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(source->loose, oid) ? 0 : -1; - if (stat_loose_object(source->loose, oid, &st, &path) < 0) + if (!oi->disk_sizep && !oi->mtimep && (flags & OBJECT_INFO_QUICK)) { + status = quick_has_loose(files->loose, oid) ? 0 : -1; + if (!status) + oi->whence = OI_LOOSE; + return status; + } + + if (lstat(path, &st)) return -1; + if (oi->disk_sizep) *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + + oi->whence = OI_LOOSE; return 0; } - fd = open_loose_object(source->loose, oid, &path); + fd = git_open(path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); return -1; } - map = map_fd(fd, path, &mapsize); + + if (fstat(fd, &st)) { + close(fd); + return -1; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + return error(_("object file %s is empty"), path); + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); if (!map) return -1; @@ -439,9 +471,10 @@ int odb_source_loose_read_object_info(struct odb_source *source, oi->sizep = &size_scratch; if (!oi->typep) oi->typep = &type_scratch; - if (oi->disk_sizep) *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { case ULHR_OK: @@ -483,6 +516,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, return status; } +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + static struct strbuf buf = STRBUF_INIT; + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(source, buf.buf, oid, oi, flags); +} + static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, @@ -974,6 +1017,45 @@ int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_source(source, oid, 1); } +static int count_loose(const struct object_id *oid UNUSED, + const char *path UNUSED, + void *data) +{ + unsigned long *count = data; + (*count)++; + return 0; +} + +unsigned long odb_source_loose_count_objects(struct odb_source *source, + unsigned flags) +{ + const unsigned hexsz_loose = source->odb->repo->hash_algo->hexsz - 2; + struct strbuf path = STRBUF_INIT; + unsigned long count = 0; + struct dirent *ent; + DIR *dir; + + if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { + for_each_loose_file_in_source(source, count_loose, + NULL, NULL, &count); + return count; + } + + strbuf_addf(&path, "%s/17", source->path); + dir = opendir(path.buf); + strbuf_release(&path); + if (!dir) + return 0; + + while ((ent = readdir(dir)) != NULL) + if (strspn(ent->d_name, "0123456789abcdef") == hexsz_loose && + ent->d_name[hexsz_loose] == '\0') + count++; + closedir(dir); + + return count * 256; +} + int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) @@ -1737,24 +1819,45 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn cb, void *data, - enum for_each_object_flags flags) -{ +struct for_each_object_wrapper_data { struct odb_source *source; + struct object_info *oi; + unsigned flags; + odb_for_each_object_cb cb; + void *cb_data; +}; - odb_prepare_alternates(odb); - for (source = odb->sources; source; source = source->next) { - int r = for_each_loose_file_in_source(source, cb, NULL, - NULL, data); - if (r) - return r; +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->oi && + read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0) + return -1; + return data->cb(oid, data->oi, data->cb_data); +} - if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) - break; - } +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct for_each_object_wrapper_data data = { + .source = source, + .oi = oi, + .flags = flags, + .cb = cb, + .cb_data = cb_data, + }; - return 0; + /* There are no loose promisor objects, so we can return immediately. */ + if (flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) + return 0; + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); } static int append_loose_object(const struct object_id *oid, @@ -1768,33 +1871,34 @@ static int append_loose_object(const struct object_id *oid, struct oidtree *odb_source_loose_cache(struct odb_source *source, const struct object_id *oid) { + struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); + size_t word_bits = bitsizeof(files->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) + (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose->subdir_seen[word_index]; + bitmap = &files->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose->cache; - if (!source->loose->cache) { - ALLOC_ARRAY(source->loose->cache, 1); - oidtree_init(source->loose->cache); + return files->loose->cache; + if (!files->loose->cache) { + ALLOC_ARRAY(files->loose->cache, 1); + oidtree_init(files->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose->cache); + files->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose->cache; + return files->loose->cache; } static void odb_source_loose_clear_cache(struct odb_source_loose *loose) @@ -1807,7 +1911,8 @@ static void odb_source_loose_clear_cache(struct odb_source_loose *loose) void odb_source_loose_reprepare(struct odb_source *source) { - odb_source_loose_clear_cache(source->loose); + struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_loose_clear_cache(files->loose); } static int check_stream_oid(git_zstream *stream, @@ -1980,3 +2085,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbda63102e345c49e7bbf18871ed82123..f1318de7addbcc0058be16037416fb24a3a422a0 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -45,11 +47,12 @@ void odb_source_loose_reprepare(struct odb_source *source); int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, - struct object_info *oi, int flags); + struct object_info *oi, + unsigned flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -62,6 +65,14 @@ int odb_source_loose_has_object(struct odb_source *source, int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); +/* + * Because object hashes are cryptographic and thus evenly distributed, + * we can check only one and get a reasonable estimate via extrapolation. The + * shard used for this is "objects/17". + */ +unsigned long odb_source_loose_count_objects(struct odb_source *source, + unsigned flags); + int odb_source_loose_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, @@ -124,16 +135,16 @@ int for_each_loose_file_in_source(struct odb_source *source, void *data); /* - * Iterate over all accessible loose objects without respect to - * reachability. By default, this includes both local and alternate objects. - * The order in which objects are visited is unspecified. - * - * Any flags specific to packs are ignored. + * Iterate through all loose objects in the given object database source and + * invoke the callback function for each of them. If given, the object info + * will be populated with the object's data as if you had called + * `odb_source_loose_read_object_info()` on the object. */ -int for_each_loose_object(struct object_database *odb, - each_loose_object_fn, void *, - enum for_each_object_flags flags); - +int odb_source_loose_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /** * format_object_header() is a thin wrapper around s xsnprintf() that @@ -143,40 +154,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/object-name.c b/object-name.c index fed5de51531fde8657be62c6ac311dd098f0b9ce..81269d241ac5067bb56b4928cfede23dbe065be7 100644 --- a/object-name.c +++ b/object-name.c @@ -837,7 +837,8 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, const unsigned hexsz = algo->hexsz; if (len < 0) { - unsigned long count = repo_approximate_object_count(r); + unsigned long count = odb_count_objects(r->objects, + ODB_COUNT_OBJECTS_APPROXIMATE); /* * Add one because the MSB only tells us the highest bit set, * not including the value of all the _other_ bits (so "15" diff --git a/odb.c b/odb.c index dc8f292f3d9645e2de8d990630a29cb4b4b1e20a..3ba32d3dc39384a05c57980c1eafcc91b95bbbbb 100644 --- a/odb.c +++ b/odb.c @@ -89,17 +89,20 @@ int odb_mkstemp(struct object_database *odb, /* * Return non-zero iff the path is usable as an alternate object database. */ -static int alt_odb_usable(struct object_database *o, const char *path, - const char *normalized_objdir) +static bool odb_is_source_usable(struct object_database *o, const char *path) { int r; + struct strbuf normalized_objdir = STRBUF_INIT; + bool usable = false; + + strbuf_realpath(&normalized_objdir, o->sources->path, 1); /* Detect cases where alternate disappeared */ if (!is_directory(path)) { error(_("object directory %s does not exist; " "check .git/objects/info/alternates"), path); - return 0; + goto out; } /* @@ -116,87 +119,99 @@ static int alt_odb_usable(struct object_database *o, const char *path, kh_value(o->source_by_path, p) = o->sources; } - if (fspatheq(path, normalized_objdir)) - return 0; + if (fspatheq(path, normalized_objdir.buf)) + goto out; if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path)) - return 0; + goto out; - return 1; -} + usable = true; -/* - * Prepare alternate object database registry. - * - * The variable alt_odb_list points at the list of struct - * odb_source. The elements on this list come from - * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT - * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, - * whose contents is similar to that environment variable but can be - * LF separated. Its base points at a statically allocated buffer that - * contains "/the/directory/corresponding/to/.git/objects/...", while - * its name points just after the slash at the end of ".git/objects/" - * in the example above, and has enough space to hold all hex characters - * of the object ID, an extra slash for the first level indirection, and - * the terminating NUL. - */ -static void read_info_alternates(struct object_database *odb, - const char *relative_base, - int depth); +out: + strbuf_release(&normalized_objdir); + return usable; +} -static struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out) { - struct odb_source *source; + struct strbuf pathbuf = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; - CALLOC_ARRAY(source, 1); - source->odb = odb; - source->local = local; - source->path = xstrdup(path); - source->loose = odb_source_loose_new(source); + if (!string || !*string) + return; - return source; -} + while (*string) { + const char *end; + + strbuf_reset(&buf); + strbuf_reset(&pathbuf); + + if (*string == '#') { + /* comment; consume up to next separator */ + end = strchrnul(string, sep); + } else if (*string == '"' && !unquote_c_style(&buf, string, &end)) { + /* + * quoted path; unquote_c_style has copied the + * data for us and set "end". Broken quoting (e.g., + * an entry that doesn't end with a quote) falls + * back to the unquoted case below. + */ + } else { + /* normal, unquoted path */ + end = strchrnul(string, sep); + strbuf_add(&buf, string, end - string); + } -static struct odb_source *link_alt_odb_entry(struct object_database *odb, - const char *dir, - const char *relative_base, - int depth) -{ - struct odb_source *alternate = NULL; - struct strbuf pathbuf = STRBUF_INIT; - struct strbuf tmp = STRBUF_INIT; - khiter_t pos; - int ret; + if (*end) + end++; + string = end; - if (!is_absolute_path(dir) && relative_base) { - strbuf_realpath(&pathbuf, relative_base, 1); - strbuf_addch(&pathbuf, '/'); - } - strbuf_addstr(&pathbuf, dir); + if (!buf.len) + continue; - if (!strbuf_realpath(&tmp, pathbuf.buf, 0)) { - error(_("unable to normalize alternate object path: %s"), - pathbuf.buf); - goto error; + if (!is_absolute_path(buf.buf) && relative_base) { + strbuf_realpath(&pathbuf, relative_base, 1); + strbuf_addch(&pathbuf, '/'); + } + strbuf_addbuf(&pathbuf, &buf); + + strbuf_reset(&buf); + if (!strbuf_realpath(&buf, pathbuf.buf, 0)) { + error(_("unable to normalize alternate object path: %s"), + pathbuf.buf); + continue; + } + + /* + * The trailing slash after the directory name is given by + * this function at the end. Remove duplicates. + */ + while (buf.len && buf.buf[buf.len - 1] == '/') + strbuf_setlen(&buf, buf.len - 1); + + strvec_push(out, buf.buf); } - strbuf_swap(&pathbuf, &tmp); - /* - * The trailing slash after the directory name is given by - * this function at the end. Remove duplicates. - */ - while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') - strbuf_setlen(&pathbuf, pathbuf.len - 1); + strbuf_release(&pathbuf); + strbuf_release(&buf); +} - strbuf_reset(&tmp); - strbuf_realpath(&tmp, odb->sources->path, 1); +static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, + const char *source, + int depth) +{ + struct odb_source *alternate = NULL; + struct strvec sources = STRVEC_INIT; + khiter_t pos; + int ret; - if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) + if (!odb_is_source_usable(odb, source)) goto error; - alternate = odb_source_new(odb, pathbuf.buf, false); + alternate = odb_source_new(odb, source, false); /* add the alternate entry */ *odb->sources_tail = alternate; @@ -208,126 +223,28 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ - read_info_alternates(odb, alternate->path, depth + 1); - - error: - strbuf_release(&tmp); - strbuf_release(&pathbuf); - return alternate; -} - -static const char *parse_alt_odb_entry(const char *string, - int sep, - struct strbuf *out) -{ - const char *end; - - strbuf_reset(out); - - if (*string == '#') { - /* comment; consume up to next separator */ - end = strchrnul(string, sep); - } else if (*string == '"' && !unquote_c_style(out, string, &end)) { - /* - * quoted path; unquote_c_style has copied the - * data for us and set "end". Broken quoting (e.g., - * an entry that doesn't end with a quote) falls - * back to the unquoted case below. - */ - } else { - /* normal, unquoted path */ - end = strchrnul(string, sep); - strbuf_add(out, string, end - string); - } - - if (*end) - end++; - return end; -} - -static void link_alt_odb_entries(struct object_database *odb, const char *alt, - int sep, const char *relative_base, int depth) -{ - struct strbuf dir = STRBUF_INIT; - - if (!alt || !*alt) - return; - - if (depth > 5) { + odb_source_read_alternates(alternate, &sources); + if (sources.nr && depth + 1 > 5) { error(_("%s: ignoring alternate object stores, nesting too deep"), - relative_base); - return; - } - - while (*alt) { - alt = parse_alt_odb_entry(alt, sep, &dir); - if (!dir.len) - continue; - link_alt_odb_entry(odb, dir.buf, relative_base, depth); - } - strbuf_release(&dir); -} - -static void read_info_alternates(struct object_database *odb, - const char *relative_base, - int depth) -{ - char *path; - struct strbuf buf = STRBUF_INIT; - - path = xstrfmt("%s/info/alternates", relative_base); - if (strbuf_read_file(&buf, path, 1024) < 0) { - warn_on_fopen_errors(path); - free(path); - return; + source); + } else { + for (size_t i = 0; i < sources.nr; i++) + odb_add_alternate_recursively(odb, sources.v[i], depth + 1); } - link_alt_odb_entries(odb, buf.buf, '\n', relative_base, depth); - strbuf_release(&buf); - free(path); + error: + strvec_clear(&sources); + return alternate; } void odb_add_to_alternates_file(struct object_database *odb, const char *dir) { - struct lock_file lock = LOCK_INIT; - char *alts = repo_git_path(odb->repo, "objects/info/alternates"); - FILE *in, *out; - int found = 0; - - hold_lock_file_for_update(&lock, alts, LOCK_DIE_ON_ERROR); - out = fdopen_lock_file(&lock, "w"); - if (!out) - die_errno(_("unable to fdopen alternates lockfile")); - - in = fopen(alts, "r"); - if (in) { - struct strbuf line = STRBUF_INIT; - - while (strbuf_getline(&line, in) != EOF) { - if (!strcmp(dir, line.buf)) { - found = 1; - break; - } - fprintf_or_die(out, "%s\n", line.buf); - } - - strbuf_release(&line); - fclose(in); - } - else if (errno != ENOENT) - die_errno(_("unable to read alternates file")); - - if (found) { - rollback_lock_file(&lock); - } else { - fprintf_or_die(out, "%s\n", dir); - if (commit_lock_file(&lock)) - die_errno(_("unable to move new alternates file into place")); - if (odb->loaded_alternates) - link_alt_odb_entries(odb, dir, '\n', NULL, 0); - } - free(alts); + int ret = odb_source_write_alternate(odb->sources, dir); + if (ret < 0) + die(NULL); + if (odb->loaded_alternates) + odb_add_alternate_recursively(odb, dir, 0); } struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, @@ -338,7 +255,7 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, * overwritten when they are. */ odb_prepare_alternates(odb); - return link_alt_odb_entry(odb, dir, NULL, 0); + return odb_add_alternate_recursively(odb, dir, 0); } struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, @@ -369,13 +286,6 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, return source->next; } -static void odb_source_free(struct odb_source *source) -{ - free(source->path); - odb_source_loose_free(source->loose); - free(source); -} - void odb_restore_primary_source(struct object_database *odb, struct odb_source *restore_source, const char *old_path) @@ -609,13 +519,19 @@ int odb_for_each_alternate(struct object_database *odb, void odb_prepare_alternates(struct object_database *odb) { + struct strvec sources = STRVEC_INIT; + if (odb->loaded_alternates) return; - link_alt_odb_entries(odb, odb->alternate_db, PATH_SEP, NULL, 0); + parse_alternates(odb->alternate_db, PATH_SEP, NULL, &sources); + odb_source_read_alternates(odb->sources, &sources); + for (size_t i = 0; i < sources.nr; i++) + odb_add_alternate_recursively(odb, sources.v[i], 0); - read_info_alternates(odb, odb->sources->path, 0); odb->loaded_alternates = 1; + + strvec_clear(&sources); } int odb_has_alternates(struct object_database *odb) @@ -670,8 +586,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -697,6 +611,8 @@ static int do_oid_object_info_extended(struct object_database *odb, oidclr(oi->delta_base_oid, odb->repo->hash_algo); if (oi->contentp) *oi->contentp = xmemdupz(co->buf, co->size); + if (oi->mtimep) + *oi->mtimep = 0; oi->whence = OI_CACHED; return 0; } @@ -706,19 +622,18 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; - /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) - if (!odb_source_loose_read_object_info(source, real, oi, flags)) + if (!odb_source_read_object_info(source, real, oi, flags)) return 0; /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, + flags | OBJECT_INFO_AFTER_REPREPARE)) + return 0; } /* @@ -751,25 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, @@ -995,18 +891,47 @@ int odb_freshen_object(struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - - if (packfile_store_freshen_object(odb->packfiles, oid)) - return 1; - odb_prepare_alternates(odb); for (source = odb->sources; source; source = source->next) - if (odb_source_loose_freshen_object(source, oid)) + if (odb_source_freshen_object(source, oid)) return 1; + return 0; +} + +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + ret = odb_source_for_each_object(source, oi, cb, cb_data, flags); + if (ret) + return ret; + } return 0; } +unsigned long odb_count_objects(struct object_database *odb, + unsigned flags) +{ + struct odb_source *source; + unsigned long count = 0; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + count += odb_source_count_objects(source, flags); + + return count; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { @@ -1025,15 +950,15 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return odb_source_loose_write_object(odb->sources, buf, len, type, - oid, compat_oid, flags); + return odb_source_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); } int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid) { - return odb_source_loose_write_stream(odb->sources, stream, len, oid); + return odb_source_write_object_stream(odb->sources, stream, len, oid); } static void odb_update_commondir(const char *name UNUSED, @@ -1077,12 +1002,14 @@ struct object_database *odb_new(struct repository *repo, memset(o, 0, sizeof(*o)); o->repo = repo; - o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + if (!primary_source) + primary_source = repo->object_storage; if (!primary_source) primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); @@ -1097,15 +1024,8 @@ struct object_database *odb_new(struct repository *repo, void odb_close(struct object_database *o) { struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - + for (source = o->sources; source; source = source->next) + odb_source_close(source); close_commit_graph(o); } @@ -1132,14 +1052,13 @@ void odb_free(struct object_database *o) oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); + odb_close(o); odb_free_sources(o); for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); free(o->cached_objects); - odb_close(o); - packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); chdir_notify_unregister(NULL, odb_update_commondir, o); @@ -1163,12 +1082,10 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) - odb_source_loose_reprepare(source); + odb_source_reprepare(source); o->approximate_object_count_valid = 0; - packfile_store_reprepare(o->packfiles); - obj_read_unlock(); } diff --git a/odb.h b/odb.h index 014cd9585a2f6efe7367e300afd465906f4a1e3a..ecdbd29e7d53f44ccac94aa5cf02ffa69733fb3a 100644 --- a/odb.h +++ b/odb.h @@ -3,6 +3,7 @@ #include "hashmap.h" #include "object.h" +#include "odb/source.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -30,54 +31,6 @@ extern int fetch_if_missing; */ char *compute_alternate_path(const char *path, struct strbuf *err); -/* - * The source is the part of the object database that stores the actual - * objects. It thus encapsulates the logic to read and write the specific - * on-disk format. An object database can have multiple sources: - * - * - The primary source, which is typically located in "$GIT_DIR/objects". - * This is where new objects are usually written to. - * - * - Alternate sources, which are configured via "objects/info/alternates" or - * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These - * alternate sources are only used to read objects. - */ -struct odb_source { - struct odb_source *next; - - /* Object database that owns this object source. */ - struct object_database *odb; - - /* Private state for loose objects. */ - struct odb_source_loose *loose; - - /* - * private data - * - * should only be accessed directly by packfile.c and midx.c - */ - struct multi_pack_index *midx; - - /* - * Figure out whether this is the local source of the owning - * repository, which would typically be its ".git/objects" directory. - * This local object directory is usually where objects would be - * written to. - */ - bool local; - - /* - * This object store is ephemeral, so there is no need to fsync. - */ - int will_destroy; - - /* - * Path to the source. If this is a relative path, it is relative to - * the current working directory. - */ - char *path; -}; - struct packed_git; struct packfile_store; struct cached_object_entry; @@ -128,9 +81,6 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - /* * This is meant to hold a *small* number of objects that you would * want odb_read_object() to be able to return, but yet you do not want @@ -324,13 +274,13 @@ struct object_info { off_t *disk_sizep; struct object_id *delta_base_oid; void **contentp; + time_t *mtimep; /* Response */ enum { OI_CACHED, OI_LOOSE, OI_PACKED, - OI_DBCACHED } whence; union { /* @@ -344,34 +294,58 @@ struct object_info { struct { struct packed_git *pack; off_t offset; - unsigned int is_delta; + enum packed_object_type { + PACKED_OBJECT_TYPE_UNKNOWN, + PACKED_OBJECT_TYPE_FULL, + PACKED_OBJECT_TYPE_OFS_DELTA, + PACKED_OBJECT_TYPE_REF_DELTA, + } type; } packed; } u; }; +/* + * Given an object info structure, figure out whether any of its request + * pointers are populated. + */ +static inline bool object_info_is_blank_request(struct object_info *oi) +{ + return !oi->typep && !oi->sizep && !oi->disk_sizep && + !oi->delta_base_oid && !oi->contentp && !oi->mtimep; +} + /* * Initializer for a "struct object_info" that wants no items. You may * also memset() the memory to all-zeroes. */ #define OBJECT_INFO_INIT { 0 } -/* Invoke lookup_replace_object() on the given hash */ -#define OBJECT_INFO_LOOKUP_REPLACE 1 -/* Do not retry packed storage after checking packed and loose storage */ -#define OBJECT_INFO_QUICK 8 -/* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ -#define OBJECT_INFO_SKIP_FETCH_OBJECT 16 -/* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK - */ -#define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 3), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_AFTER_REPREPARE = (1 << 2), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 4), -/* Die if object corruption (not just an object being missing) was detected. */ -#define OBJECT_INFO_DIE_IF_CORRUPT 32 + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 5), + + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; /* * Read object info from the object database and populate the `object_info` @@ -410,6 +384,31 @@ int odb_has_object(struct object_database *odb, int odb_freshen_object(struct object_database *odb, const struct object_id *oid); +/* Flags that can be passed to `odb_count_objects()`. */ +enum odb_count_objects_flags { + /* + * Allow the number of objects to be estimated. This flags essentially + * asks the backend to trade accuracy for speed. The exact details of + * how these estimations happen is backend-specific. Some backends may + * not honor this flag at all. + */ + ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), + + /* + * Also estimate objects that are stored in an unoptimized format. This + * flag may be ignored in case a backend does not discern between + * unoptimized/optimized formats. + */ + ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED = (1 << 1), +}; + +/* + * Count the nubber of objects in the object database. This function does not + * account for reachability and may count objects multiple times. + */ +unsigned long odb_count_objects(struct object_database *odb, + unsigned flags); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); @@ -445,26 +444,44 @@ static inline void obj_read_unlock(void) if(obj_read_use_lock) pthread_mutex_unlock(&obj_read_mutex); } + /* Flags for for_each_*_object(). */ -enum for_each_object_flags { +enum odb_for_each_object_flags { /* Iterate only over local objects, not alternates. */ - FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), + ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), /* Only iterate over packs obtained from the promisor remote. */ - FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), /* * Visit objects within a pack in packfile order rather than .idx order */ - FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2), /* Only iterate over packs that are not marked as kept in-core. */ - FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), /* Only iterate over packs that do not have .keep files. */ - FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * Iterate through all objects contained in the object database. Note that + * objects may be iterated over multiple times in case they are either stored + * in different backends or in case they are stored in multiple sources. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + * + * Returns 0 on success, a negative error code in case a failure occurred, or + * an arbitrary non-zero error code returned by the callback itself. + */ +int odb_for_each_object(struct object_database *odb, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + enum { /* * By default, `odb_write_object()` does not actually write anything @@ -513,4 +530,9 @@ int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid); +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out); + #endif /* ODB_H */ diff --git a/odb/source-files.c b/odb/source-files.c new file mode 100644 index 0000000000000000000000000000000000000000..2a74106a1027f2e4bb58740f91c26cb79a2e4300 --- /dev/null +++ b/odb/source-files.c @@ -0,0 +1,226 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "lockfile.h" +#include "object-file.h" +#include "odb.h" +#include "odb/source.h" +#include "odb/source-files.h" +#include "packfile.h" +#include "strbuf.h" +#include "write-or-die.h" + +static void odb_source_files_free(struct odb_source *source) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_loose_free(files->loose); + packfile_store_free(files->packed); + odb_source_release(&files->base); + free(files); +} + +static void odb_source_files_close(struct odb_source *source) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + packfile_store_close(files->packed); +} + +static void odb_source_files_reprepare(struct odb_source *source) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_loose_reprepare(&files->base); + packfile_store_reprepare(files->packed); +} + +static int odb_source_files_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_store_read_object_info(files->packed, oid, oi, flags)) + return 0; + + /* + * A reprepare doesn't cause new loose objects to show up, so we skip + * reading loose objects in that case. + */ + if (!(flags & OBJECT_INFO_AFTER_REPREPARE) && + !odb_source_loose_read_object_info(source, oid, oi, flags)) + return 0; + + return -1; +} + +static int odb_source_files_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + if (!packfile_store_read_object_stream(out, files->packed, oid) || + !odb_source_loose_read_object_stream(out, source, oid)) + return 0; + return -1; +} + +static int odb_source_files_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + int ret; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(files->packed, oi, cb, cb_data, flags); + if (ret) + return ret; + + return 0; +} + +static unsigned long odb_source_files_count_objects(struct odb_source *source, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned long count = 0; + + count += packfile_store_count_objects(files->packed, flags); + if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED) + count += odb_source_loose_count_objects(source, flags); + + return count; +} + +static int odb_source_files_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + if (packfile_store_freshen_object(files->packed, oid) || + odb_source_loose_freshen_object(source, oid)) + return 1; + return 0; +} + +static int odb_source_files_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags) +{ + return odb_source_loose_write_object(source, buf, len, type, + oid, compat_oid, flags); +} + +static int odb_source_files_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(source, stream, len, oid); +} + +static int odb_source_files_read_alternates(struct odb_source *source, + struct strvec *out) +{ + struct strbuf buf = STRBUF_INIT; + char *path; + + path = xstrfmt("%s/info/alternates", source->path); + if (strbuf_read_file(&buf, path, 1024) < 0) { + warn_on_fopen_errors(path); + free(path); + return 0; + } + parse_alternates(buf.buf, '\n', source->path, out); + + strbuf_release(&buf); + free(path); + return 0; +} + +static int odb_source_files_write_alternate(struct odb_source *source, + const char *alternate) +{ + struct lock_file lock = LOCK_INIT; + char *path = xstrfmt("%s/%s", source->path, "info/alternates"); + FILE *in, *out; + int found = 0; + int ret; + + hold_lock_file_for_update(&lock, path, LOCK_DIE_ON_ERROR); + out = fdopen_lock_file(&lock, "w"); + if (!out) { + ret = error_errno(_("unable to fdopen alternates lockfile")); + goto out; + } + + in = fopen(path, "r"); + if (in) { + struct strbuf line = STRBUF_INIT; + + while (strbuf_getline(&line, in) != EOF) { + if (!strcmp(alternate, line.buf)) { + found = 1; + break; + } + fprintf_or_die(out, "%s\n", line.buf); + } + + strbuf_release(&line); + fclose(in); + } else if (errno != ENOENT) { + ret = error_errno(_("unable to read alternates file")); + goto out; + } + + if (found) { + rollback_lock_file(&lock); + } else { + fprintf_or_die(out, "%s\n", alternate); + if (commit_lock_file(&lock)) { + ret = error_errno(_("unable to move new alternates file into place")); + goto out; + } + } + + ret = 0; + +out: + free(path); + return ret; +} + +struct odb_source_files *odb_source_files_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source_files *files; + + CALLOC_ARRAY(files, 1); + odb_source_init(&files->base, odb, path, local); + files->loose = odb_source_loose_new(&files->base); + files->packed = packfile_store_new(&files->base); + + files->base.free = odb_source_files_free; + files->base.close = odb_source_files_close; + files->base.reprepare = odb_source_files_reprepare; + files->base.read_object_info = odb_source_files_read_object_info; + files->base.read_object_stream = odb_source_files_read_object_stream; + files->base.for_each_object = odb_source_files_for_each_object; + files->base.count_objects = odb_source_files_count_objects; + files->base.freshen_object = odb_source_files_freshen_object; + files->base.write_object = odb_source_files_write_object; + files->base.write_object_stream = odb_source_files_write_object_stream; + files->base.read_alternates = odb_source_files_read_alternates; + files->base.write_alternate = odb_source_files_write_alternate; + + return files; +} diff --git a/odb/source-files.h b/odb/source-files.h new file mode 100644 index 0000000000000000000000000000000000000000..e64187073567cedc3008859296382c1ba0124867 --- /dev/null +++ b/odb/source-files.h @@ -0,0 +1,33 @@ +#ifndef ODB_FILES_H +#define ODB_FILES_H + +#include "odb/source.h" + +struct odb_source_loose; +struct packfile_store; + +/* + * The files object database source uses a combination of loose objects and + * packfiles. It is the default backend used by Git to store objects. + */ +struct odb_source_files { + struct odb_source base; + struct odb_source_loose *loose; + struct packfile_store *packed; +}; + +/* Allocate and initialize a new object source. */ +struct odb_source_files *odb_source_files_new(struct object_database *odb, + const char *path, + bool local); + +/* + * Cast the given object database source to the files backend. This will cause + * a BUG in case the source uses doesn't use this backend. + */ +static inline struct odb_source_files *odb_source_files_downcast(struct odb_source *source) +{ + return (struct odb_source_files *) source; +} + +#endif diff --git a/odb/source.c b/odb/source.c new file mode 100644 index 0000000000000000000000000000000000000000..51bb1cde740f282126f6ed21c3b4821616745776 --- /dev/null +++ b/odb/source.c @@ -0,0 +1,57 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "object-file.h" +#include "odb/source-files.h" +#include "odb/source.h" +#include "packfile.h" + +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + const char *schema_end; + char *schema; + + schema_end = strstr(path, "://"); + if (!schema_end) + return &odb_source_files_new(odb, path, local)->base; + + schema = xstrndup(path, schema_end - path); + path = schema_end + 3; + + if (!strcmp(schema, "files")) { + source = &odb_source_files_new(odb, path, local)->base; + goto out; + } + + die(_("unknown object database source schema: '%s'"), schema); + +out: + free(schema); + return source; +} + +void odb_source_init(struct odb_source *source, + struct object_database *odb, + const char *path, + bool local) +{ + source->odb = odb; + source->local = local; + source->path = xstrdup(path); +} + +void odb_source_free(struct odb_source *source) +{ + if (!source) + return; + source->free(source); +} + +void odb_source_release(struct odb_source *source) +{ + if (!source) + return; + free(source->path); +} diff --git a/odb/source.h b/odb/source.h new file mode 100644 index 0000000000000000000000000000000000000000..7581fb19629c3d42954b82b81ec39e2c66b90456 --- /dev/null +++ b/odb/source.h @@ -0,0 +1,403 @@ +#ifndef ODB_SOURCE_H +#define ODB_SOURCE_H + +struct object_info; +struct odb_read_stream; +struct odb_source; +struct odb_write_stream; +struct strvec; + +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + +/* + * The source is the part of the object database that stores the actual + * objects. It thus encapsulates the logic to read and write the specific + * on-disk format. An object database can have multiple sources: + * + * - The primary source, which is typically located in "$GIT_DIR/objects". + * This is where new objects are usually written to. + * + * - Alternate sources, which are configured via "objects/info/alternates" or + * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These + * alternate sources are only used to read objects. + */ +struct odb_source { + struct odb_source *next; + + /* Object database that owns this object source. */ + struct object_database *odb; + + /* + * Figure out whether this is the local source of the owning + * repository, which would typically be its ".git/objects" directory. + * This local object directory is usually where objects would be + * written to. + */ + bool local; + + /* + * This object store is ephemeral, so there is no need to fsync. + */ + int will_destroy; + + /* + * Path to the source. If this is a relative path, it is relative to + * the current working directory. + */ + char *path; + + /* + * This callback is expected to free the underlying object database source and + * all associated resources. The function will never be called with a NULL pointer. + */ + void (*free)(struct odb_source *source); + + /* + * This callback is expected to close any open resources, like for + * example file descriptors or connections. The source is expected to + * still be usable after it has been closed. Closed resources may need + * to be reopened in that case. + */ + void (*close)(struct odb_source *source); + + /* + * This callback is expected to clear underlying caches of the object + * database source. The function is called when the repository has for + * example just been repacked so that new objects will become visible. + */ + void (*reprepare)(struct odb_source *source); + + /* + * This callback is expected to read object information from the object + * database source. The object info will be partially populated with + * pointers for each bit of information that was requested by the + * caller. + * + * The flags field is a combination of `OBJECT_INFO` flags. Only the + * following fields need to be handled by the backend: + * + * - `OBJECT_INFO_QUICK` indicates it is fine to use caches without + * re-verifying the data. + * + * - `OBJECT_INFO_AFTER_REPREPARE` indicates that the initial object + * lookup has failed and that the object sources have just been + * reloaded. The source should only look up objects via sources + * that may have been changed due to the reload. + * + * The callback is expected to return a negative error code in case + * reading the object has failed, 0 otherwise. + */ + int (*read_object_info)(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + + /* + * This callback is expected to create a new read stream that can be + * used to stream the object identified by the given ID. + * + * The callback is expected to return a negative error code in case + * creating the object stream has failed, 0 otherwise. + */ + int (*read_object_stream)(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); + + /* + * This callback is expected to iterate over all objects stored in this + * source and invoke the callback function for each of them. It is + * valid to yield the same object multiple time. A non-zero exit code + * from the object callback shall abort iteration. + * + * The optional `oi` structure shall be populated similar to how an individual + * call to `odb_source_read_object_info()` would have behaved. If the caller + * passes a `NULL` pointer then the object itself shall not be read. + * + * The callback is expected to return a negative error code in case the + * iteration has failed to read all objects, 0 otherwise. When the + * callback function returns a non-zero error code then that error code + * should be returned. + */ + int (*for_each_object)(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); + + /* + * This callback is expected to count the number of objects that exist + * in the given source. It is fine to both under- and overcount the + * objects. + * + * The flags field is a combination of `enum odb_count_objects_flags` + * flags. + * + * The callback is expected to return the number of objects. + */ + unsigned long (*count_objects)(struct odb_source *source, + unsigned flags); + + /* + * This callback is expected to freshen the given object so that its + * last access time is set to the current time. This is used to ensure + * that objects that are recent will not get garbage collected even if + * they were unreachable. + * + * Returns 0 in case the object does not exist, 1 in case the object + * has been freshened. + */ + int (*freshen_object)(struct odb_source *source, + const struct object_id *oid); + + /* + * This callback is expected to persist the given object into the + * object source. In case the object already exists it shall be + * freshened. + * + * The flags field is a combination of `WRITE_OBJECT` flags. + * + * The resulting object ID (and optionally the compatibility object ID) + * shall be written into the out pointers. The callback is expected to + * return 0 on success, a negative error code otherwise. + */ + int (*write_object)(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags); + + /* + * This callback is expected to persist the given object stream into + * the object source. + * + * The resulting object ID shall be written into the out pointer. The + * callback is expected to return 0 on success, a negative error code + * otherwise. + */ + int (*write_object_stream)(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + + /* + * This callback is expected to read the list of alternate object + * database sources connected to it and write them into the `strvec`. + * + * The format is expected to follow the "objectStorage" extension + * format with `(backend://)?payload` syntax. If the payload contains + * paths, these paths must be resolved to absolute paths. + * + * The callback is expected to return 0 on success, a negative error + * code otherwise. + */ + int (*read_alternates)(struct odb_source *source, + struct strvec *out); + + /* + * This callback is expected to persist the singular alternate passed + * to it into its list of alternates. Any pre-existing alternates are + * expected to remain active. Subsequent calls to `read_alternates` are + * thus expected to yield the pre-existing list of alternates plus the + * newly added alternate appended to its end. + * + * The callback is expected to return 0 on success, a negative error + * code otherwise. + */ + int (*write_alternate)(struct odb_source *source, + const char *alternate); +}; + +/* + * Allocate and initialize a new source for the given object database located + * at `path`. `local` indicates whether or not the source is the local and thus + * primary object source of the object database. + */ +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local); + +/* + * Initialize the source for the given object database located at `path`. + * `local` indicates whether or not the source is the local and thus primary + * object source of the object database. + * + * This function is only supposed to be called by specific object source + * implementations. + */ +void odb_source_init(struct odb_source *source, + struct object_database *odb, + const char *path, + bool local); + +/* + * Free the object database source, releasing all associated resources and + * freeing the structure itself. + */ +void odb_source_free(struct odb_source *source); + +/* + * Release the object database source, releasing all associated resources. + * + * This function is only supposed to be called by specific object source + * implementations. + */ +void odb_source_release(struct odb_source *source); + +/* + * Close the object database source without releasing he underlying data. The + * source can still be used going forward, but it first needs to be reopened. + * This can be useful to reduce resource usage. + */ +static inline void odb_source_close(struct odb_source *source) +{ + source->close(source); +} + +/* + * Reprepare the object database source and clear any caches. Depending on the + * backend used this may have the effect that concurrently-written objects + * become visible. + */ +static inline void odb_source_reprepare(struct odb_source *source) +{ + source->reprepare(source); +} + +/* + * Read an object from the object database source identified by its object ID. + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + unsigned flags) +{ + return source->read_object_info(source, oid, oi, flags); +} + +/* + * Create a new read stream for the given object ID. Returns 0 on success, a + * negative error code otherwise. + */ +static inline int odb_source_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + return source->read_object_stream(out, source, oid); +} + +/* + * Iterate through all objects contained in the given source and invoke the + * callback function for each of them. Returning a non-zero code from the + * callback function aborts iteration. There is no guarantee that objects + * are only iterated over once. + * + * The optional `oi` structure shall be populated similar to how an individual + * call to `odb_source_read_object_info()` would have behaved. If the caller + * passes a `NULL` pointer then the object itself shall not be read. + * + * The flags is a bitfield of `ODB_FOR_EACH_OBJECT_*` flags. Not all flags may + * apply to a specific backend, so whether or not they are honored is defined + * by the implementation. + * + * Returns 0 when all objects have been iterated over, a negative error code in + * case iteration has failed, or a non-zero value returned from the callback. + */ +static inline int odb_source_for_each_object(struct odb_source *source, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + return source->for_each_object(source, oi, cb, cb_data, flags); +} + +/* + * Count the number of objects that exist in the given object database source. + */ +static inline unsigned long odb_source_count_objects(struct odb_source *source, + unsigned flags) +{ + return source->count_objects(source, flags); +} + +/* + * Freshen an object in the object database by updating its timestamp. + * Returns 1 in case the object has been freshened, 0 in case the object does + * not exist. + */ +static inline int odb_source_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + return source->freshen_object(source, oid); +} + +/* + * Write an object into the object database source. Returns 0 on success, a + * negative error code otherwise. Populates the given out pointers for the + * object ID and the compatibility object ID, if non-NULL. + */ +static inline int odb_source_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid, + unsigned flags) +{ + return source->write_object(source, buf, len, type, oid, + compat_oid, flags); +} + +/* + * Write an object into the object database source via a stream. The overall + * length of the object must be known in advance. + * + * Return 0 on success, a negative error code otherwise. Populates the given + * out pointer for the object ID. + */ +static inline int odb_source_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + return source->write_object_stream(source, stream, len, oid); +} + +/* + * Read the list of alternative object database sources from the given backend + * and populate the `strvec` with them. The listing is not recursive -- that + * is, if any of the yielded alternate sources has alternates itself, those + * will not be yielded as part of this function call. + * + * Return 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_alternates(struct odb_source *source, + struct strvec *out) +{ + return source->read_alternates(source, out); +} + +/* + * Write and persist a new alternate object database source for the given + * source. Any preexisting alternates are expected to stay valid, and the new + * alternate shall be appended to the end of the list. + * + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_write_alternate(struct odb_source *source, + const char *alternate) +{ + return source->write_alternate(source, alternate); +} + +#endif diff --git a/odb/streaming.c b/odb/streaming.c new file mode 100644 index 0000000000000000000000000000000000000000..14a586292d69394c87d2faa646290e80e0ea838b --- /dev/null +++ b/odb/streaming.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2011, Google Inc. + */ + +#include "git-compat-util.h" +#include "convert.h" +#include "environment.h" +#include "repository.h" +#include "object-file.h" +#include "odb.h" +#include "odb/streaming.h" +#include "replace-object.h" +#include "packfile.h" + +#define FILTER_BUFFER (1024*16) + +/***************************************************************** + * + * Filtered stream + * + *****************************************************************/ + +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return odb_read_stream_close(fs->upstream); +} + +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, + size_t sz) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + size_t filled = 0; + + while (sz) { + /* do we already have filtered output? */ + if (fs->o_ptr < fs->o_end) { + size_t to_move = fs->o_end - fs->o_ptr; + if (sz < to_move) + to_move = sz; + memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); + fs->o_ptr += to_move; + sz -= to_move; + filled += to_move; + continue; + } + fs->o_end = fs->o_ptr = 0; + + /* do we have anything to feed the filter with? */ + if (fs->i_ptr < fs->i_end) { + size_t to_feed = fs->i_end - fs->i_ptr; + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + fs->ibuf + fs->i_ptr, &to_feed, + fs->obuf, &to_receive)) + return -1; + fs->i_ptr = fs->i_end - to_feed; + fs->o_end = FILTER_BUFFER - to_receive; + continue; + } + + /* tell the filter to drain upon no more input */ + if (fs->input_finished) { + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + NULL, NULL, + fs->obuf, &to_receive)) + return -1; + fs->o_end = FILTER_BUFFER - to_receive; + if (!fs->o_end) + break; + continue; + } + fs->i_end = fs->i_ptr = 0; + + /* refill the input from the upstream */ + if (!fs->input_finished) { + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end < 0) + return -1; + if (fs->i_end) + continue; + } + fs->input_finished = 1; + } + return filled; +} + +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) +{ + struct odb_filtered_read_stream *fs; + + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; + fs->upstream = st; + fs->filter = filter; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; +} + +/***************************************************************** + * + * In-core stream + * + *****************************************************************/ + +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); + return 0; +} + +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + size_t read_size = sz; + size_t remainder = st->base.size - st->read_ptr; + + if (remainder <= read_size) + read_size = remainder; + if (read_size) { + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; + } + return read_size; +} + +static int open_istream_incore(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, + }; + struct odb_incore_read_stream *st; + int ret; + + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; + ret = odb_read_object_info_extended(odb, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + + return 0; +} + +/***************************************************************************** + * static helpers variables and functions for users of streaming interface + *****************************************************************************/ + +static int istream_source(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_stream(out, source, oid)) + return 0; + + return open_istream_incore(out, odb, oid); +} + +/**************************************************************** + * Users of streaming interface + ****************************************************************/ + +int odb_read_stream_close(struct odb_read_stream *st) +{ + int r = st->close(st); + free(st); + return r; +} + +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) +{ + return st->read(st, buf, sz); +} + +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter) +{ + struct odb_read_stream *st; + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); + + if (ret) + return NULL; + + if (filter) { + /* Add "&& !is_null_stream_filter(filter)" for performance */ + struct odb_read_stream *nst = attach_stream_filter(st, filter); + if (!nst) { + odb_read_stream_close(st); + return NULL; + } + st = nst; + } + + return st; +} + +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) +{ + struct odb_read_stream *st; + ssize_t kept = 0; + int result = -1; + + st = odb_read_stream_open(odb, oid, filter); + if (!st) { + if (filter) + free_stream_filter(filter); + return result; + } + if (st->type != OBJ_BLOB) + goto close_and_exit; + for (;;) { + char buf[1024 * 16]; + ssize_t wrote, holeto; + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); + + if (readlen < 0) + goto close_and_exit; + if (!readlen) + break; + if (can_seek && sizeof(buf) == readlen) { + for (holeto = 0; holeto < readlen; holeto++) + if (buf[holeto]) + break; + if (readlen == holeto) { + kept += holeto; + continue; + } + } + + if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) + goto close_and_exit; + else + kept = 0; + wrote = write_in_full(fd, buf, readlen); + + if (wrote < 0) + goto close_and_exit; + } + if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || + xwrite(fd, "", 1) != 1)) + goto close_and_exit; + result = 0; + + close_and_exit: + odb_read_stream_close(st); + return result; +} diff --git a/odb/streaming.h b/odb/streaming.h new file mode 100644 index 0000000000000000000000000000000000000000..c7861f7e13c606af66d5b54b52b7b1cc3eb9adad --- /dev/null +++ b/odb/streaming.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef STREAMING_H +#define STREAMING_H 1 + +#include "object.h" + +struct object_database; +struct odb_read_stream; +struct stream_filter; + +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + +/* + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); + +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); + +#endif /* STREAMING_H */ diff --git a/pack-bitmap.c b/pack-bitmap.c index 8ca79725b1d4380377fc4c0e68141c7fc1968401..f466ed2ddcb4e98f94f5bfe36883f184c1a3e8a0 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) struct strbuf buf = STRBUF_INIT; if (midx->has_chain) get_split_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + get_midx_hash(midx), MIDX_EXT_BITMAP); else get_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + get_midx_hash(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, + if (!hasheq(get_midx_hash(bitmap_git->midx), bitmap_git->checksum, bitmap_repo(bitmap_git)->hash_algo)) { error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; @@ -2820,8 +2820,7 @@ void test_bitmap_walk(struct rev_info *revs) if (bitmap_is_midx(found)) fprintf_ln(stderr, "Located via MIDX '%s'.", - hash_to_hex_algop(get_midx_checksum(found->midx), - revs->repo->hash_algo)); + get_midx_checksum(found->midx)); else fprintf_ln(stderr, "Located via pack '%s'.", hash_to_hex_algop(found->pack->hash, diff --git a/pack-revindex.c b/pack-revindex.c index d0791cc4938fa2c784a1a585210552ee2c6d06fa..016195ceb93ec6891c974a40e40e40964bc54bc3 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -390,11 +390,11 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->has_chain) get_split_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + get_midx_hash(m), MIDX_EXT_REV); else get_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + get_midx_hash(m), MIDX_EXT_REV); ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, diff --git a/packfile.c b/packfile.c index 3d8b994a617e81e08b06d5b3b3aaa2999c1f73a0..3167ab229842749176964c3e4e0d8e4e1cf0c4dd 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -354,16 +355,19 @@ static void scan_windows(struct packed_git *p, } } -static int unuse_one_window(struct packed_git *current) +static int unuse_one_window(struct object_database *odb) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) - scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + for (source = odb->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + for (e = files->packed->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); + } + if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -528,15 +532,19 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { + struct odb_source *source; struct packfile_list_entry *e; struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (e = r->objects->packfiles->packs.head; e; e = e->next) { - if (e->pack->pack_fd == -1) - continue; - find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + for (e = files->packed->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) + continue; + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); + } } if (lru_p) @@ -739,8 +747,8 @@ unsigned char *use_pack(struct packed_git *p, win->len = (size_t)len; pack_mapped += win->len; - while (settings->packed_git_limit < pack_mapped - && unuse_one_window(p)) + while (settings->packed_git_limit < pack_mapped && + unuse_one_window(p->repo->objects)) ; /* nothing */ win->base = xmmap_gently(NULL, win->len, PROT_READ, MAP_PRIVATE, @@ -875,7 +883,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, p = strmap_get(&store->packs_by_path, key.buf); if (!p) { - p = add_packed_git(store->odb->repo, idx_path, + p = add_packed_git(store->source->odb->repo, idx_path, strlen(idx_path), local); if (p) packfile_store_add_pack(store, p); @@ -885,22 +893,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -990,23 +982,23 @@ void for_each_file_in_pack_dir(const char *objdir, } struct prepare_pack_data { - struct repository *r; + struct odb_source *source; struct string_list *garbage; - int local; - struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct odb_source_files *files = odb_source_files_downcast(data->source); size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && - !(data->m && midx_contains_pack(data->m, file_name))) { + !(files->packed->midx && + midx_contains_pack(files->packed->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); - packfile_store_load_pack(data->r->objects->packfiles, - trimmed_path, data->local); + packfile_store_load_pack(files->packed, + trimmed_path, data->source->local); free(trimmed_path); } @@ -1035,10 +1027,8 @@ static void prepare_packed_git_one(struct odb_source *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { - .m = source->midx, - .r = source->odb->repo, + .source = source, .garbage = &garbage, - .local = source->local, }; for_each_file_in_pack_dir(source->path, prepare_pack, &data); @@ -1078,16 +1068,11 @@ static int sort_pack(const struct packfile_list_entry *a, void packfile_store_prepare(struct packfile_store *store) { - struct odb_source *source; - if (store->initialized) return; - odb_prepare_alternates(store->odb); - for (source = store->odb->sources; source; source = source->next) { - prepare_multi_pack_index_one(source); - prepare_packed_git_one(source); - } + prepare_multi_pack_index_one(store->source); + prepare_packed_git_one(store->source); sort_packs(&store->packs.head, sort_pack); for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) @@ -1107,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor { packfile_store_prepare(store); - for (struct odb_source *source = store->odb->sources; source; source = source->next) { - struct multi_pack_index *m = source->midx; - if (!m) - continue; + if (store->midx) { + struct multi_pack_index *m = store->midx; for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++) prepare_midx_pack(m, i); } @@ -1118,37 +1101,24 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) +unsigned long packfile_store_count_objects(struct packfile_store *store, + unsigned flags UNUSED) { - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - odb_prepare_alternates(r->objects); + struct packfile_list_entry *e; + struct multi_pack_index *m; + unsigned long count = 0; - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects + m->num_objects_in_base; - } + m = get_multi_pack_index(store->source); + if (m) + count += m->num_objects + m->num_objects_in_base; - repo_for_each_pack(r, p) { - if (p->multi_pack_index || open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index || open_pack_index(e->pack)) + continue; + count += e->pack->num_objects; } - return r->objects->approximate_object_count; + + return count; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -1265,11 +1235,17 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packfile_list_entry *e; + struct odb_source *source; + + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + struct packfile_list_entry *e; + + for (e = files->packed->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; + } - for (e = r->objects->packfiles->packs.head; e; e = e->next) - if (oidset_contains(&e->pack->bad_objects, oid)) - return e->pack; return NULL; } @@ -1595,13 +1571,15 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct repository *r, struct packed_git *p, - off_t obj_offset, struct object_info *oi) +static int packed_object_info_with_index_pos(struct repository *r, struct packed_git *p, + off_t obj_offset, uint32_t *maybe_index_pos, + struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; enum object_type type; + uint32_t pack_pos; /* * We always get the representation type, but only convert it to @@ -1635,16 +1613,34 @@ int packed_object_info(struct repository *r, struct packed_git *p, } } - if (oi->disk_sizep) { - uint32_t pos; - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); type = OBJ_BAD; goto out; } + } + + if (oi->disk_sizep) + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; + if (oi->mtimep) { + if (p->is_cruft) { + uint32_t index_pos; + + if (load_pack_mtimes(p) < 0) + die(_("could not load cruft pack .mtimes")); + + if (maybe_index_pos) + index_pos = *maybe_index_pos; + else + index_pos = pack_pos_to_index(p, pack_pos); + + *oi->mtimep = nth_packed_mtime(p, index_pos); + } else { + *oi->mtimep = p->mtime; + } } if (oi->typep) { @@ -1671,14 +1667,33 @@ int packed_object_info(struct repository *r, struct packed_git *p, oidclr(oi->delta_base_oid, p->repo->hash_algo); } - oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : - OI_PACKED; + oi->whence = OI_PACKED; + oi->u.packed.offset = obj_offset; + oi->u.packed.pack = p; + + switch (type) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } out: unuse_pack(&w_curs); return type; } +int packed_object_info(struct repository *r, struct packed_git *p, + off_t obj_offset, struct object_info *oi) +{ + return packed_object_info_with_index_pos(r, p, obj_offset, NULL, oi); +} + static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@ -2105,50 +2120,97 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct packfile_store *store, + const struct object_id *oid, + struct pack_entry *e) { struct packfile_list_entry *l; - packfile_store_prepare(r->objects->packfiles); - - for (struct odb_source *source = r->objects->sources; source; source = source->next) - if (source->midx && fill_midx_entry(source->midx, oid, e)) - return 1; - - if (!r->objects->packfiles->packs.head) - return 0; + packfile_store_prepare(store); + if (store->midx && fill_midx_entry(store->midx, oid, e)) + return 1; - for (l = r->objects->packfiles->packs.head; l; l = l->next) { + for (l = store->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - if (!r->objects->packfiles->skip_mru_updates) - packfile_list_prepend(&r->objects->packfiles->packs, p); + if (!store->skip_mru_updates) + packfile_list_prepend(&store->packs, p); return 1; } } + + return 0; +} + +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (object_info_is_blank_request(oi)) { + oi->whence = OI_PACKED; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; + return 0; + } + + rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + return 0; } -static void maybe_invalidate_kept_pack_cache(struct repository *r, +static void maybe_invalidate_kept_pack_cache(struct packfile_store *store, unsigned flags) { - if (!r->objects->packfiles->kept_cache.packs) + if (!store->kept_cache.packs) return; - if (r->objects->packfiles->kept_cache.flags == flags) + if (store->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); - r->objects->packfiles->kept_cache.flags = 0; + FREE_AND_NULL(store->kept_cache.packs); + store->kept_cache.flags = 0; } -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags) { - maybe_invalidate_kept_pack_cache(r, flags); + maybe_invalidate_kept_pack_cache(store, flags); - if (!r->objects->packfiles->kept_cache.packs) { + if (!store->kept_cache.packs) { struct packed_git **packs = NULL; + struct packfile_list_entry *e; size_t nr = 0, alloc = 0; - struct packed_git *p; /* * We want "all" packs here, because we need to cover ones that @@ -2158,9 +2220,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - repo_for_each_pack(r, p) { - if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || - (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; + + if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } @@ -2168,50 +2232,59 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->packfiles->kept_cache.packs = packs; - r->objects->packfiles->kept_cache.flags = flags; + store->kept_cache.packs = packs; + store->kept_cache.flags = flags; } - return r->objects->packfiles->kept_cache.packs; + return store->kept_cache.packs; } -int find_kept_pack_entry(struct repository *r, - const struct object_id *oid, - unsigned flags, - struct pack_entry *e) +int has_object_pack(struct repository *r, const struct object_id *oid) { - struct packed_git **cache; + struct odb_source *source; + struct pack_entry e; - for (cache = kept_pack_cache(r, flags); *cache; cache++) { - struct packed_git *p = *cache; - if (fill_pack_entry(oid, e, p)) - return 1; + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = find_pack_entry(files->packed, oid, &e); + if (ret) + return ret; } return 0; } -int has_object_pack(struct repository *r, const struct object_id *oid) -{ - struct pack_entry e; - return find_pack_entry(r, oid, &e); -} - int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags) { + struct odb_source *source; struct pack_entry e; - return find_kept_pack_entry(r, oid, flags, &e); + + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + struct packed_git **cache; + + cache = packfile_store_get_kept_pack_cache(files->packed, flags); + + for (; *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, &e, p)) + return 1; + } + } + + return 0; } int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - enum for_each_object_flags flags) + unsigned flags) { uint32_t i; int r = 0; - if (flags & FOR_EACH_OBJECT_PACK_ORDER) { + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) { if (load_pack_revindex(p->repo, p)) return -1; } @@ -2232,7 +2305,7 @@ int for_each_object_in_pack(struct packed_git *p, * - in pack-order, it is pack position, which we must * convert to an index position in order to get the oid. */ - if (flags & FOR_EACH_OBJECT_PACK_ORDER) + if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) index_pos = pack_pos_to_index(p, i); else index_pos = i; @@ -2248,60 +2321,127 @@ int for_each_object_in_pack(struct packed_git *p, return r; } -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags) +static int packfile_store_for_each_object_internal(struct packfile_store *store, + each_packed_object_fn cb, + void *data, + unsigned flags, + int *pack_errors) { - struct packed_git *p; - int r = 0; - int pack_errors = 0; + struct packfile_list_entry *e; + int ret = 0; + + store->skip_mru_updates = true; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + struct packed_git *p = e->pack; - repo->objects->packfiles->skip_mru_updates = true; - repo_for_each_pack(repo, p) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { - pack_errors = 1; + *pack_errors = 1; continue; } - r = for_each_object_in_pack(p, cb, data, flags); - if (r) + + ret = for_each_object_in_pack(p, cb, data, flags); + if (ret) break; } - repo->objects->packfiles->skip_mru_updates = false; - return r ? r : pack_errors; + store->skip_mru_updates = false; + + return ret; } +struct packfile_store_for_each_object_wrapper_data { + struct packfile_store *store; + struct object_info *oi; + odb_for_each_object_cb cb; + void *cb_data; + unsigned flags; +}; + +static int packfile_store_for_each_object_wrapper(const struct object_id *oid, + struct packed_git *pack, + uint32_t index_pos, + void *cb_data) +{ + struct packfile_store_for_each_object_wrapper_data *data = cb_data; + + if (data->oi) { + off_t offset = nth_packed_object_offset(pack, index_pos); + + if (object_info_is_blank_request(data->oi)) { + data->oi->whence = OI_PACKED; + data->oi->u.packed.offset = offset; + data->oi->u.packed.pack = pack; + data->oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; + } else if (packed_object_info_with_index_pos(data->store->source->odb->repo, + pack, offset, &index_pos, data->oi) < 0) { + mark_bad_packed_object(pack, oid); + return -1; + } + } + + return data->cb(oid, data->oi, data->cb_data); +} + +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct packfile_store_for_each_object_wrapper_data data = { + .store = store, + .oi = oi, + .cb = cb, + .cb_data = cb_data, + .flags = flags, + }; + int pack_errors = 0, ret; + + ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper, + &data, flags, &pack_errors); + if (ret) + return ret; + + return pack_errors ? -1 : 0; +} + +struct add_promisor_object_data { + struct repository *repo; + struct oidset *set; +}; + static int add_promisor_object(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos UNUSED, - void *set_) + struct object_info *oi UNUSED, + void *cb_data) { - struct oidset *set = set_; + struct add_promisor_object_data *data = cb_data; struct object *obj; int we_parsed_object; - obj = lookup_object(pack->repo, oid); + obj = lookup_object(data->repo, oid); if (obj && obj->parsed) { we_parsed_object = 0; } else { we_parsed_object = 1; - obj = parse_object(pack->repo, oid); + obj = parse_object(data->repo, oid); } if (!obj) return 1; - oidset_insert(set, oid); + oidset_insert(data->set, oid); /* * If this is a tree, commit, or tag, the objects it refers @@ -2319,19 +2459,19 @@ static int add_promisor_object(const struct object_id *oid, */ return 0; while (tree_entry_gently(&desc, &entry)) - oidset_insert(set, &entry.oid); + oidset_insert(data->set, &entry.oid); if (we_parsed_object) free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, get_commit_tree_oid(commit)); + oidset_insert(data->set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) - oidset_insert(set, &parents->item->object.oid); + oidset_insert(data->set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; - oidset_insert(set, get_tagged_oid(tag)); + oidset_insert(data->set, get_tagged_oid(tag)); } return 0; } @@ -2343,10 +2483,14 @@ int is_promisor_object(struct repository *r, const struct object_id *oid) if (!promisor_objects_prepared) { if (repo_has_promisor_remote(r)) { - for_each_packed_object(r, add_promisor_object, - &promisor_objects, - FOR_EACH_OBJECT_PROMISOR_ONLY | - FOR_EACH_OBJECT_PACK_ORDER); + struct add_promisor_object_data data = { + .repo = r, + .set = &promisor_objects, + }; + + odb_for_each_object(r->objects, NULL, add_promisor_object, &data, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | + ODB_FOR_EACH_OBJECT_PACK_ORDER); } promisor_objects_prepared = 1; } @@ -2373,11 +2517,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -struct packfile_store *packfile_store_new(struct object_database *odb) +struct packfile_store *packfile_store_new(struct odb_source *source) { struct packfile_store *store; CALLOC_ARRAY(store, 1); - store->odb = odb; + store->source = source; strmap_init(&store->packs_by_path); return store; } @@ -2399,4 +2543,135 @@ void packfile_store_close(struct packfile_store *store) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } + if (store->midx) + close_midx(store->midx); + store->midx = NULL; +} + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || + repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; } diff --git a/packfile.h b/packfile.h index 20e1cf17b26083c3dd278f188c0ae4ddcd22b7ce..78d5bf1794ef425a9228468375224e0e767634ea 100644 --- a/packfile.h +++ b/packfile.h @@ -4,11 +4,14 @@ #include "list.h" #include "object.h" #include "odb.h" +#include "odb/source-files.h" #include "oidset.h" +#include "repository.h" #include "strmap.h" /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct pack_window *windows; @@ -76,7 +79,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, * A store that manages packfiles for a given object database. */ struct packfile_store { - struct object_database *odb; + struct odb_source *source; /* * The list of packfiles in the order in which they have been most @@ -98,6 +101,9 @@ struct packfile_store { unsigned flags; } kept_cache; + /* The multi-pack index that belongs to this specific packfile store. */ + struct multi_pack_index *midx; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -128,9 +134,9 @@ struct packfile_store { /* * Allocate and initialize a new empty packfile store for the given object - * database. + * database source. */ -struct packfile_store *packfile_store_new(struct object_database *odb); +struct packfile_store *packfile_store_new(struct odb_source *source); /* * Free the packfile store and all its associated state. All packfiles @@ -168,20 +174,99 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + +/* + * Count the number of objects contained in the packfiles. + */ +unsigned long packfile_store_count_objects(struct packfile_store *store, + unsigned flags); + +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); + +struct repo_for_each_pack_data { + struct odb_source *source; + struct packfile_list_entry *entry; +}; + +static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo) +{ + struct repo_for_each_pack_data data = { 0 }; + + odb_prepare_alternates(repo->objects); + + for (struct odb_source *source = repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); + if (!entry) + continue; + data.source = source; + data.entry = entry; + break; + } + + return data; +} + +static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data) +{ + struct odb_source *source; + + data->entry = data->entry->next; + if (data->entry) + return; + + for (source = data->source->next; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); + if (!entry) + continue; + data->source = source; + data->entry = entry; + return; + } + + data->source = NULL; + data->entry = NULL; +} + /* * Load and iterate through all packs of the given repository. This helper * function will yield packfiles from all object sources connected to the * repository. */ #define repo_for_each_pack(repo, p) \ - for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ - ((p) = (e ? e->pack : NULL)); e = e->next) + for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \ + ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \ + repo_for_each_pack_data_next(&eack_pack_data)) + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); /* - * Get all packs managed by the given store, including packfiles that are - * referenced by multi-pack indices. + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. */ -struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); /* * Open the packfile and add it to the store if it isn't yet known. Returns @@ -194,6 +279,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, int packfile_store_freshen_object(struct packfile_store *store, const struct object_id *oid); +enum kept_pack_type { + KEPT_PACK_ON_DISK = (1 << 0), + KEPT_PACK_IN_CORE = (1 << 1), +}; + +/* + * Retrieve the cache of kept packs from the given packfile store. Accepts a + * combination of `kept_pack_type` flags. The cache is computed on demand and + * will be recomputed whenever the flags change. + */ +struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store, + unsigned flags); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -260,9 +358,21 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - enum for_each_object_flags flags); -int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, - void *data, enum for_each_object_flags flags); + unsigned flags); + +/* + * Iterate through all packed objects in the given packfile store and invoke + * the callback function for each of them. If given, the object info will be + * populated with the object's data as if you had called + * `packfile_store_read_object_info()` on the object. + * + * The flags parameter is a combination of `odb_for_each_object_flags`. + */ +int packfile_store_for_each_object(struct packfile_store *store, + struct object_info *oi, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 @@ -270,12 +380,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ -unsigned long repo_approximate_object_count(struct repository *r); - void pack_report(struct repository *repo); /* @@ -369,22 +473,10 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 - -/* - * Iff a pack file in the given repository contains the object named by sha1, - * return true and store its location to e. - */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); -int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); - int has_object_pack(struct repository *r, const struct object_id *oid); int has_object_kept_pack(struct repository *r, const struct object_id *oid, unsigned flags); -struct packed_git **kept_pack_cache(struct repository *r, unsigned flags); - /* * Return 1 if an object in a promisor packfile is or refers to the given * object, 0 otherwise. diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6e8524fcf829c0f2fb389146b643e22..0bf4bd6d4abd8c98cfdfc0f68a39d44cf4eaa800 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/reachable.c b/reachable.c index b753c395530b6d3212006742bbd4f1671a2e22a6..4a26ccf399bc2a1b7e1830f6f7827e6fec34b642 100644 --- a/reachable.c +++ b/reachable.c @@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, return oidset_contains(&data->extra_recent_oids, oid); } -static void add_recent_object(const struct object_id *oid, - struct packed_git *pack, - off_t offset, - timestamp_t mtime, - struct recent_data *data) +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) { - struct object *obj; - enum object_type type; + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE)) + return 0; + return 1; +} - if (!obj_is_recent(oid, mtime, data)) - return; +static int add_recent_object(const struct object_id *oid, + struct object_info *oi, + void *cb_data) +{ + struct recent_data *data = cb_data; + struct object *obj; - /* - * We do not want to call parse_object here, because - * inflating blobs and trees could be very expensive. - * However, we do need to know the correct type for - * later processing, and the revision machinery expects - * commits and tags to have been parsed. - */ - type = odb_read_object_info(the_repository->objects, oid, NULL); - if (type < 0) - die("unable to get object info for %s", oid_to_hex(oid)); + if (!want_recent_object(data, oid) || + !obj_is_recent(oid, *oi->mtimep, data)) + return 0; - switch (type) { + switch (*oi->typep) { case OBJ_TAG: case OBJ_COMMIT: obj = parse_object_or_die(the_repository, oid, NULL); @@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid, break; default: die("unknown object type for %s: %s", - oid_to_hex(oid), type_name(type)); + oid_to_hex(oid), type_name(*oi->typep)); } if (!obj) die("unable to lookup %s", oid_to_hex(oid)); - - add_pending_object(data->revs, obj, ""); - if (data->cb) - data->cb(obj, pack, offset, mtime); -} - -static int want_recent_object(struct recent_data *data, - const struct object_id *oid) -{ - if (data->ignore_in_core_kept_packs && - has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS)) + if (obj->flags & SEEN) return 0; - return 1; -} -static int add_recent_loose(const struct object_id *oid, - const char *path, void *data) -{ - struct stat st; - struct object *obj; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - - if (stat(path, &st) < 0) { - /* - * It's OK if an object went away during our iteration; this - * could be due to a simultaneous repack. But anything else - * we should abort, since we might then fail to mark objects - * which should not be pruned. - */ - if (errno == ENOENT) - return 0; - return error_errno("unable to stat %s", oid_to_hex(oid)); + add_pending_object(data->revs, obj, ""); + if (data->cb) { + if (oi->whence == OI_PACKED) + data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep); + else + data->cb(obj, NULL, 0, *oi->mtimep); } - add_recent_object(oid, NULL, 0, st.st_mtime, data); - return 0; -} - -static int add_recent_packed(const struct object_id *oid, - struct packed_git *p, - uint32_t pos, - void *data) -{ - struct object *obj; - timestamp_t mtime = p->mtime; - - if (!want_recent_object(data, oid)) - return 0; - - obj = lookup_object(the_repository, oid); - - if (obj && obj->flags & SEEN) - return 0; - if (p->is_cruft) { - if (load_pack_mtimes(p) < 0) - die(_("could not load cruft pack .mtimes")); - mtime = nth_packed_mtime(p, pos); - } - add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } @@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, int ignore_in_core_kept_packs) { struct recent_data data; - enum for_each_object_flags flags; + enum odb_for_each_object_flags flags; + enum object_type type; + time_t mtime; + struct object_info oi = { + .mtimep = &mtime, + .typep = &type, + }; int r; data.revs = revs; @@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, oidset_init(&data.extra_recent_oids, 0); data.extra_recent_oids_loaded = 0; - r = for_each_loose_object(the_repository->objects, add_recent_loose, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); - if (r) - goto done; - - flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) - flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags); + r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags); + if (r) + goto done; done: oidset_clear(&data.extra_recent_oids); diff --git a/repack-promisor.c b/repack-promisor.c index ee6e0669f656028a4f59389d0ba0b8ff637526c3..35c4073632b1b49cec04f1a71ff77ca7605fcd0a 100644 --- a/repack-promisor.c +++ b/repack-promisor.c @@ -17,8 +17,8 @@ struct write_oid_context { * necessary. */ static int write_oid(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, void *data) + struct object_info *oi UNUSED, + void *data) { struct write_oid_context *ctx = data; struct child_process *cmd = ctx->cmd; @@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo, */ ctx.cmd = &cmd; ctx.algop = repo->hash_algo; - for_each_packed_object(repo, write_oid, &ctx, - FOR_EACH_OBJECT_PROMISOR_ONLY); + odb_for_each_object(repo->objects, NULL, write_oid, &ctx, + ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (cmd.in == -1) { /* No packed objects; cmd was never started */ diff --git a/repository.h b/repository.h index 6063c4b846d031d657827f8b16d65af8c09e5b29..01322ca1979822b19b4ebc7d07365fdaf1acec88 100644 --- a/repository.h +++ b/repository.h @@ -50,6 +50,15 @@ struct repository { */ char *commondir; + /* + * Location of the primary object database source. May be NULL, in + * which case the primary object database source will assumed to be + * "${commondir}/objects". + * + * This configuration can be set via "extensions.objectStorage". + */ + char *object_storage; + /* * Holds any information related to accessing the raw object content. */ diff --git a/revision.c b/revision.c index 5f0850ae5c9c1aec7838b0a9e05e2951a6f50fdd..d2b83d0f8b50f38b65fd80906c994706226dd2f3 100644 --- a/revision.c +++ b/revision.c @@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked= no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; @@ -3649,8 +3649,7 @@ void reset_revision_walk(void) } static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, + struct object_info *oi UNUSED, void *cb) { struct rev_info *revs = cb; @@ -3959,10 +3958,9 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(revs->repo, mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); - } + if (revs->exclude_promisor_objects) + odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting, + revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY); if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); diff --git a/setup.c b/setup.c index 3a6a048620dd7d5046d6ed5c04f126322e6a7618..8fcdd8d7c04474e880c2be9b82bb943bff92c0b3 100644 --- a/setup.c +++ b/setup.c @@ -686,7 +686,14 @@ static enum extension_result handle_extension(const char *var, } else if (!strcmp(ext, "relativeworktrees")) { data->relative_worktrees = git_config_bool(var, value); return EXTENSION_OK; + } else if (!strcmp(ext, "objectstorage")) { + if (!value) + return config_error_nonbool(var); + free(data->object_storage); + data->object_storage = xstrdup(value); + return EXTENSION_OK; } + return EXTENSION_UNKNOWN; } @@ -1931,12 +1938,18 @@ const char *setup_git_directory_gently(int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { + if (startup_info->have_repository) { + the_repository->object_storage = + xstrdup_or_null(repo_fmt.object_storage); + } + if (!the_repository->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; setup_git_env(gitdir); } + if (startup_info->have_repository) { repo_set_hash_algo(the_repository, repo_fmt.hash_algo); repo_set_compat_hash_algo(the_repository, @@ -2039,6 +2052,8 @@ void check_repository_format(struct repository_format *fmt) fmt = &repo_fmt; check_repository_format_gently(repo_get_git_dir(the_repository), fmt, NULL); startup_info->have_repository = 1; + the_repository->object_storage = + xstrdup_or_null(repo_fmt.object_storage); repo_set_hash_algo(the_repository, fmt->hash_algo); repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); repo_set_ref_storage_format(the_repository, diff --git a/setup.h b/setup.h index d55dcc66086308b31d86f28bcbb84f5d01e4453f..e1c1279d09a593132f23f1786bd8b320da292f79 100644 --- a/setup.h +++ b/setup.h @@ -173,6 +173,7 @@ struct repository_format { enum ref_storage_format ref_storage_format; int sparse_index; char *work_tree; + char *object_storage; struct string_list unknown_extensions; struct string_list v1_only_extensions; }; diff --git a/streaming.c b/streaming.c deleted file mode 100644 index 00ad649ae397f3c5d0f1200ec3860188133c5223..0000000000000000000000000000000000000000 --- a/streaming.c +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "convert.h" -#include "environment.h" -#include "streaming.h" -#include "repository.h" -#include "object-file.h" -#include "odb.h" -#include "replace-object.h" -#include "packfile.h" - -typedef int (*open_istream_fn)(struct git_istream *, - struct repository *, - const struct object_id *, - enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); - -#define FILTER_BUFFER (1024*16) - -struct filtered_istream { - struct git_istream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - -struct git_istream { - open_istream_fn open; - close_istream_fn close; - read_istream_fn read; - - unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - - struct filtered_istream filtered; - } u; -}; - -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct git_istream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - -/***************************************************************** - * - * Filtered stream - * - *****************************************************************/ - -static int close_istream_filtered(struct git_istream *st) -{ - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); -} - -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, - size_t sz) -{ - struct filtered_istream *fs = &(st->u.filtered); - size_t filled = 0; - - while (sz) { - /* do we already have filtered output? */ - if (fs->o_ptr < fs->o_end) { - size_t to_move = fs->o_end - fs->o_ptr; - if (sz < to_move) - to_move = sz; - memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); - fs->o_ptr += to_move; - sz -= to_move; - filled += to_move; - continue; - } - fs->o_end = fs->o_ptr = 0; - - /* do we have anything to feed the filter with? */ - if (fs->i_ptr < fs->i_end) { - size_t to_feed = fs->i_end - fs->i_ptr; - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - fs->ibuf + fs->i_ptr, &to_feed, - fs->obuf, &to_receive)) - return -1; - fs->i_ptr = fs->i_end - to_feed; - fs->o_end = FILTER_BUFFER - to_receive; - continue; - } - - /* tell the filter to drain upon no more input */ - if (fs->input_finished) { - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - NULL, NULL, - fs->obuf, &to_receive)) - return -1; - fs->o_end = FILTER_BUFFER - to_receive; - if (!fs->o_end) - break; - continue; - } - fs->i_end = fs->i_ptr = 0; - - /* refill the input from the upstream */ - if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); - if (fs->i_end < 0) - return -1; - if (fs->i_end) - continue; - } - fs->input_finished = 1; - } - return filled; -} - -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) -{ - struct git_istream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); - - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; - fs->upstream = st; - fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - return ifs; -} - -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_done: - return 0; - case z_error: - return -1; - default: - break; - } - - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct git_istream *st) -{ - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return 0; -} - -static int open_istream_loose(struct git_istream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = type; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) - break; - } - if (!st->u.loose.mapped) - return -1; - - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) - goto error; - - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return -1; -} - - -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, - size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; - break; - case z_done: - return 0; - case z_error: - return -1; - case z_used: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct git_istream *st) -{ - close_deflated_stream(st); - return 0; -} - -static int open_istream_pack_non_delta(struct git_istream *st, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) -{ - struct pack_window *window; - enum object_type in_pack_type; - - window = NULL; - - in_pack_type = unpack_object_header(st->u.in_pack.pack, - &window, - &st->u.in_pack.pos, - &st->size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - - return 0; -} - - -/***************************************************************** - * - * In-core stream - * - *****************************************************************/ - -static int close_istream_incore(struct git_istream *st) -{ - free(st->u.incore.buf); - return 0; -} - -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) -{ - size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; - - if (remainder <= read_size) - read_size = remainder; - if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; - } - return read_size; -} - -static int open_istream_incore(struct git_istream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); -} - -/***************************************************************************** - * static helpers variables and functions for users of streaming interface - *****************************************************************************/ - -static int istream_source(struct git_istream *st, - struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = type; - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; - - switch (oi.whence) { - case OI_LOOSE: - st->open = open_istream_loose; - return 0; - case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; - return 0; - } -} - -/**************************************************************** - * Users of streaming interface - ****************************************************************/ - -int close_istream(struct git_istream *st) -{ - int r = st->close(st); - free(st); - return r; -} - -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) -{ - return st->read(st, buf, sz); -} - -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) -{ - struct git_istream *st = xmalloc(sizeof(*st)); - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); - - if (ret) { - free(st); - return NULL; - } - - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } - if (filter) { - /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); - if (!nst) { - close_istream(st); - return NULL; - } - st = nst; - } - - *size = st->size; - return st; -} - -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) -{ - struct git_istream *st; - enum object_type type; - unsigned long sz; - ssize_t kept = 0; - int result = -1; - - st = open_istream(the_repository, oid, &type, &sz, filter); - if (!st) { - if (filter) - free_stream_filter(filter); - return result; - } - if (type != OBJ_BLOB) - goto close_and_exit; - for (;;) { - char buf[1024 * 16]; - ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); - - if (readlen < 0) - goto close_and_exit; - if (!readlen) - break; - if (can_seek && sizeof(buf) == readlen) { - for (holeto = 0; holeto < readlen; holeto++) - if (buf[holeto]) - break; - if (readlen == holeto) { - kept += holeto; - continue; - } - } - - if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) - goto close_and_exit; - else - kept = 0; - wrote = write_in_full(fd, buf, readlen); - - if (wrote < 0) - goto close_and_exit; - } - if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || - xwrite(fd, "", 1) != 1)) - goto close_and_exit; - result = 0; - - close_and_exit: - close_istream(st); - return result; -} diff --git a/streaming.h b/streaming.h deleted file mode 100644 index bd27f59e5764aec64cd1cf927baf213fcec4d893..0000000000000000000000000000000000000000 --- a/streaming.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef STREAMING_H -#define STREAMING_H 1 - -#include "object.h" - -/* opaque */ -struct git_istream; -struct stream_filter; - -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); - -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); - -#endif /* STREAMING_H */ diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 6de5d1665afbfc8a6bee29983107cd1c8e28a98b..6e03aabca79c6c383053700ddd5445a665e164b3 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -26,18 +26,22 @@ static int read_midx_file(const char *object_dir, const char *checksum, int show_objects) { uint32_t i; - struct multi_pack_index *m; + struct multi_pack_index *m, *tip; + int ret = 0; - m = setup_midx(object_dir); + m = tip = setup_midx(object_dir); if (!m) return 1; if (checksum) { - while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum)) + while (m && strcmp(get_midx_checksum(m), checksum)) m = m->base_midx; - if (!m) - return 1; + if (!m) { + ret = error(_("could not find MIDX with checksum %s"), + checksum); + goto out; + } } printf("header: %08x %d %d %d %d\n", @@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, } } - close_midx(m); +out: + close_midx(tip); - return 0; + return ret; } static int read_midx_checksum(const char *object_dir) @@ -94,7 +99,7 @@ static int read_midx_checksum(const char *object_dir) m = setup_midx(object_dir); if (!m) return 1; - printf("%s\n", hash_to_hex(get_midx_checksum(m))); + printf("%s\n", get_midx_checksum(m)); close_midx(m); return 0; diff --git a/t/meson.build b/t/meson.build index d3d0be28224b9c49d8f715e3a09ec648ae5470cb..45839f9dae452eeb9d12b19d3455e04076054edf 100644 --- a/t/meson.build +++ b/t/meson.build @@ -614,6 +614,7 @@ integration_tests = [ 't5332-multi-pack-reuse.sh', 't5333-pseudo-merge-bitmaps.sh', 't5334-incremental-multi-pack-index.sh', + 't5335-compact-multi-pack-index.sh', 't5351-unpack-large-objects.sh', 't5400-send-pack.sh', 't5401-update-hooks.sh', diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches index 8ee2d3f7c815023d2c9520d270489a0daf36896f..e8d6c13ccd0333614acf0d31c9b2613f80d1fc1b 100644 --- a/t/t0450/adoc-help-mismatches +++ b/t/t0450/adoc-help-mismatches @@ -33,7 +33,6 @@ merge merge-file merge-index merge-one-file -multi-pack-index name-rev notes push diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 93f319a4b29fbb3d3899a1d1f3914dd7766dd672..03676d37b98a90c6209b2d3c3ae641e9bfec9a79 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -450,12 +450,7 @@ test_expect_success 'verify invalid chunk offset' ' "improper chunk offset(s)" ' -test_expect_success 'verify packnames out of order' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ - "pack names out of order" -' - -test_expect_success 'verify packnames out of order' ' +test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh new file mode 100755 index 0000000000000000000000000000000000000000..a306f5043052b6fcabbf7d2cb390795e01930ac3 --- /dev/null +++ b/t/t5335-compact-multi-pack-index.sh @@ -0,0 +1,218 @@ +#!/bin/sh + +test_description='multi-pack-index compaction' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +nth_line() { + local n="$1" + shift + awk "NR==$n" "$@" +} + +write_packs () { + for c in "$@" + do + test_commit "$c" && + + git pack-objects --all --unpacked $packdir/pack-$c && + git prune-packed && + + git multi-pack-index write --incremental --bitmap || return 1 + done +} + +test_midx_layer_packs () { + local checksum="$1" && + shift && + + test-tool read-midx $objdir "$checksum" >out && + + printf "%s\n" "$@" >expect && + # NOTE: do *not* pipe through sort here, we want to ensure the + # order of packs is preserved during compaction. + grep "^pack-" out | cut -d"-" -f2 >actual && + + test_cmp expect actual +} + +test_midx_layer_object_uniqueness () { + : >objs.all + while read layer + do + test-tool read-midx --show-objects $objdir "$layer" >out && + grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer && + test_stdout_line_count = 0 comm -12 objs.all objs.layer && + cat objs.all objs.layer | sort >objs.tmp && + mv objs.tmp objs.all || return 1 + done <$midx_chain +} + +test_expect_success 'MIDX compaction with lex-ordered pack names' ' + git init midx-compact-lex-order && + ( + cd midx-compact-lex-order && + + write_packs A B C D E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' + git init midx-compact-non-lex-order && + ( + cd midx-compact-non-lex-order && + + write_packs D C A B E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +midx_objs_by_pack () { + awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort +} + +tag_objs_from_pack () { + objs="$(git rev-list --objects --no-object-names "$2")" && + printf "$1 %s\n" $objs | sort +} + +test_expect_success 'MIDX compaction preserves pack object selection' ' + git init midx-compact-preserve-selection && + ( + cd midx-compact-preserve-selection && + + test_commit A && + test_commit B && + + # Create two packs, one containing just the objects from + # A, and another containing all objects from the + # repository. + p1="$(echo A | git pack-objects --revs --delta-base-offset \ + $packdir/pack-1)" && + p0="$(echo B | git pack-objects --revs --delta-base-offset \ + $packdir/pack-0)" && + + echo "pack-1-$p1.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + echo "pack-0-$p0.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + + write_packs C && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + + test-tool read-midx --show-objects $objdir \ + "$(nth_line 1 "$midx_chain")" >AB.info && + test-tool read-midx --show-objects $objdir \ + "$(nth_line 2 "$midx_chain")" >C.info && + + midx_objs_by_pack AB.actual && + midx_objs_by_pack C.actual && + + { + tag_objs_from_pack 1 A && + tag_objs_from_pack 0 A..B + } | sort >AB.expect && + tag_objs_from_pack C B..C >C.expect && + + test_cmp AB.expect AB.actual && + test_cmp C.expect C.actual + ) +' + +test_expect_success 'MIDX compaction with bitmaps' ' + git init midx-compact-with-bitmaps && + ( + cd midx-compact-with-bitmaps && + + write_packs foo bar baz quux woot && + + test-tool read-midx --bitmap $objdir >bitmap.expect && + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test-tool read-midx --bitmap $objdir >bitmap.actual && + + test_cmp bitmap.expect bitmap.actual && + + true + ) +' + +test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' + git init midx-compact-with-bitmaps-non-trivial && + ( + cd midx-compact-with-bitmaps-non-trivial && + + git branch -m main && + + # D(4) + # / + # A(1) --- B(2) --- C(3) --- G(7) + # \ + # E(5) --- F(6) + write_packs A B C && + git checkout -b side && + write_packs D && + git checkout -b other B && + write_packs E F && + git checkout main && + write_packs G && + + cat $midx_chain && + + # Compact layers 2-4, leaving us with: + # + # [A, [B, C, D], E, F, G] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + + # Then compact the top two layers, condensing the above + # such that the new 4th layer contains F and G. + # + # [A, [B, C, D], E, [F, G]] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 4 "$midx_chain")" \ + "$(nth_line 5 "$midx_chain")" && + + cat $midx_chain + ) +' + +test_done