diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc
index 2f642697e9e106c3c4c96f7c22d66aec6c7a27f4..458bb87363386f934431a1f1bdebc4e4231259b6 100644
--- a/Documentation/git-multi-pack-index.adoc
+++ b/Documentation/git-multi-pack-index.adoc
@@ -9,7 +9,14 @@ git-multi-pack-index - Write and verify multi-pack-indexes
SYNOPSIS
--------
[verse]
-'git multi-pack-index' [--object-dir=
] [--[no-]bitmap]
+'git multi-pack-index' [] write [--preferred-pack=]
+ [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]
+ [--refs-snapshot=]
+'git multi-pack-index' [] compact [--[no-]incremental]
+ [--[no-]bitmap]
+'git multi-pack-index' [] verify
+'git multi-pack-index' [] expire
+'git multi-pack-index' [] repack [--batch-size=]
DESCRIPTION
-----------
@@ -18,6 +25,8 @@ Write or verify a multi-pack-index (MIDX) file.
OPTIONS
-------
+The following command-line options are applicable to all sub-commands:
+
--object-dir=::
Use given directory for the location of Git objects. We check
`/packs/multi-pack-index` for the current MIDX file, and
@@ -73,7 +82,18 @@ marker).
Write an incremental MIDX file containing only objects
and packs not present in an existing MIDX layer.
Migrates non-incremental MIDXs to incremental ones when
- necessary. Incompatible with `--bitmap`.
+ necessary.
+--
+
+compact::
+ Write a new MIDX layer containing only objects and packs present
+ in the range `` to ``, where both arguments are
+ checksums of existing layers in the MIDX chain.
++
+--
+ --incremental::
+ Write the result to a MIDX chain instead of writing a
+ stand-alone MIDX. Incompatible with `--bitmap`.
--
verify::
diff --git a/Makefile b/Makefile
index 6fc322ff88184d596f70947a63606b95347b3e43..f3fc3d99ea199bab062a9b2b8de9e7f45472ce9d 100644
--- a/Makefile
+++ b/Makefile
@@ -1201,6 +1201,9 @@ LIB_OBJS += object-file.o
LIB_OBJS += object-name.o
LIB_OBJS += object.o
LIB_OBJS += odb.o
+LIB_OBJS += odb/source.o
+LIB_OBJS += odb/source-files.o
+LIB_OBJS += odb/streaming.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
@@ -1294,7 +1297,6 @@ LIB_OBJS += split-index.o
LIB_OBJS += stable-qsort.o
LIB_OBJS += statinfo.o
LIB_OBJS += strbuf.o
-LIB_OBJS += streaming.o
LIB_OBJS += string-list.o
LIB_OBJS += strmap.o
LIB_OBJS += strvec.o
diff --git a/archive-tar.c b/archive-tar.c
index 73b63ddc41bad6072aa68dcddb18f271791f1d8c..0fc70d13a8807eb51c9e44b494ec2aadbd2d69db 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -12,8 +12,8 @@
#include "tar.h"
#include "archive.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "strbuf.h"
-#include "streaming.h"
#include "run-command.h"
#include "write-or-die.h"
@@ -129,22 +129,20 @@ static void write_trailer(void)
*/
static int stream_blocked(struct repository *r, const struct object_id *oid)
{
- struct git_istream *st;
- enum object_type type;
- unsigned long sz;
+ struct odb_read_stream *st;
char buf[BLOCKSIZE];
ssize_t readlen;
- st = open_istream(r, oid, &type, &sz, NULL);
+ st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return error(_("cannot stream blob %s"), oid_to_hex(oid));
for (;;) {
- readlen = read_istream(st, buf, sizeof(buf));
+ readlen = odb_read_stream_read(st, buf, sizeof(buf));
if (readlen <= 0)
break;
do_write_blocked(buf, readlen);
}
- close_istream(st);
+ odb_read_stream_close(st);
if (!readlen)
finish_record();
return readlen;
diff --git a/archive-zip.c b/archive-zip.c
index bea5bdd43dc43e3c4bbae4efc1a09e110b4898c5..97ea8d60d6187b35de7f5fd6ea8bc5c529679bc3 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,9 +10,9 @@
#include "gettext.h"
#include "git-zlib.h"
#include "hex.h"
-#include "streaming.h"
#include "utf8.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "strbuf.h"
#include "userdiff.h"
#include "write-or-die.h"
@@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args,
enum zip_method method;
unsigned char *out;
void *deflated = NULL;
- struct git_istream *stream = NULL;
+ struct odb_read_stream *stream = NULL;
unsigned long flags = 0;
int is_binary = -1;
const char *path_without_prefix = path + args->baselen;
@@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args,
method = ZIP_METHOD_DEFLATE;
if (!buffer) {
- enum object_type type;
- stream = open_istream(args->repo, oid, &type, &size,
- NULL);
+ stream = odb_read_stream_open(args->repo->objects, oid, NULL);
if (!stream)
return error(_("cannot stream blob %s"),
oid_to_hex(oid));
+ size = stream->size;
flags |= ZIP_STREAM;
out = NULL;
} else {
@@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args,
ssize_t readlen;
for (;;) {
- readlen = read_istream(stream, buf, sizeof(buf));
+ readlen = odb_read_stream_read(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
@@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args,
buf, readlen);
write_or_die(1, buf, readlen);
}
- close_istream(stream);
+ odb_read_stream_close(stream);
if (readlen)
return readlen;
@@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args,
zstream.avail_out = sizeof(compressed);
for (;;) {
- readlen = read_istream(stream, buf, sizeof(buf));
+ readlen = odb_read_stream_read(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
@@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args,
}
}
- close_istream(stream);
+ odb_read_stream_close(stream);
if (readlen)
return readlen;
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 983ecec837b03beddeaf5b0825a183b37eb5a9f0..d8bb8923bce7c2bb02b8a3fed4aa39377664bb3f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -18,13 +18,13 @@
#include "list-objects-filter-options.h"
#include "parse-options.h"
#include "userdiff.h"
-#include "streaming.h"
#include "oid-array.h"
#include "packfile.h"
#include "pack-bitmap.h"
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "replace-object.h"
#include "promisor-remote.h"
#include "mailmap.h"
@@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode,
static int stream_blob(const struct object_id *oid)
{
- if (stream_blob_to_fd(1, oid, NULL, 0))
+ if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0))
die("unable to stream %s to stdout", oid_to_hex(oid));
return 0;
}
@@ -807,11 +807,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -847,8 +850,15 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
@@ -862,8 +872,15 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ int ret = packfile_store_for_each_object(files->packed, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
@@ -923,7 +940,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 7849005ccb15ff40fbccc32059d6a04f75b8d44e..a41f95191e79aadc410ce401498b43265b6a6f49 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -875,6 +875,7 @@ static void end_packfile(void)
running = 1;
clear_delta_base_cache();
if (object_count) {
+ struct odb_source_files *files = odb_source_files_downcast(pack_data->repo->objects->sources);
struct packed_git *new_p;
struct object_id cur_pack_oid;
char *idx_name;
@@ -900,8 +901,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles,
- idx_name, 1);
+ new_p = packfile_store_load_pack(files->packed, idx_name, 1);
if (!new_p)
die(_("core Git rejected index %s"), idx_name);
all_packs[pack_id] = new_p;
@@ -955,7 +955,7 @@ static int store_object(
struct object_id *oidout,
uintmax_t mark)
{
- struct packfile_store *packs = the_repository->objects->packfiles;
+ struct odb_source *source;
void *out, *delta;
struct object_entry *e;
unsigned char hdr[96];
@@ -979,7 +979,13 @@ static int store_object(
if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
- } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) {
+ }
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+
+ if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid))
+ continue;
e->type = type;
e->pack_id = MAX_PACK_ID;
e->idx.offset = 1; /* just not zero! */
@@ -1096,10 +1102,10 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint)
static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
{
- struct packfile_store *packs = the_repository->objects->packfiles;
size_t in_sz = 64 * 1024, out_sz = 64 * 1024;
unsigned char *in_buf = xmalloc(in_sz);
unsigned char *out_buf = xmalloc(out_sz);
+ struct odb_source *source;
struct object_entry *e;
struct object_id oid;
unsigned long hdrlen;
@@ -1179,24 +1185,31 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
if (e->idx.offset) {
duplicate_count_by_type[OBJ_BLOB]++;
truncate_pack(&checkpoint);
+ goto out;
+ }
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
- } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) {
+ if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid))
+ continue;
e->type = OBJ_BLOB;
e->pack_id = MAX_PACK_ID;
e->idx.offset = 1; /* just not zero! */
duplicate_count_by_type[OBJ_BLOB]++;
truncate_pack(&checkpoint);
-
- } else {
- e->depth = 0;
- e->type = OBJ_BLOB;
- e->pack_id = pack_id;
- e->idx.offset = offset;
- e->idx.crc32 = crc32_end(pack_file);
- object_count++;
- object_count_by_type[OBJ_BLOB]++;
+ goto out;
}
+ e->depth = 0;
+ e->type = OBJ_BLOB;
+ e->pack_id = pack_id;
+ e->idx.offset = offset;
+ e->idx.crc32 = crc32_end(pack_file);
+ object_count++;
+ object_count_by_type[OBJ_BLOB]++;
+
+out:
free(in_buf);
free(out_buf);
}
diff --git a/builtin/fsck.c b/builtin/fsck.c
index c489582faa6650501906984164799403a61fd896..96107695ae1ed111dcdb39abce7b8c52ed650ed1 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -13,11 +13,11 @@
#include "fsck.h"
#include "parse-options.h"
#include "progress.h"
-#include "streaming.h"
#include "packfile.h"
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "path.h"
#include "read-cache-ll.h"
#include "replace-object.h"
@@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *io UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -340,7 +327,8 @@ static void check_unreachable_object(struct object *obj)
}
f = xfopen(filename, "w");
if (obj->type == OBJ_BLOB) {
- if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1))
+ if (odb_stream_blob_to_fd(the_repository->objects, fileno(f),
+ &obj->oid, NULL, 1))
die_errno(_("could not write '%s'"), filename);
} else
fprintf(f, "%s\n", describe_object(&obj->oid));
@@ -393,12 +381,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -847,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1000,10 +970,8 @@ int cmd_fsck(int argc,
fsck_refs(the_repository);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
diff --git a/builtin/gc.c b/builtin/gc.c
index 92c6e7b954faffa06a6801c58052e602e4956421..883eb191018cdcf1217d4fbc08f42c56b815170b 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -467,37 +467,14 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED)
static int too_many_loose_objects(int limit)
{
/*
- * Quickly check if a "gc" is needed, by estimating how
- * many loose objects there are. Because SHA-1 is evenly
- * distributed, we can check only one and get a reasonable
- * estimate.
+ * This is weird, but stems from legacy behaviour: the GC auto
+ * threshold was always essentially interpreted as if it was rounded up
+ * to the next multiple 256 of, so we retain this behaviour for now.
*/
- DIR *dir;
- struct dirent *ent;
- int auto_threshold;
- int num_loose = 0;
- int needed = 0;
- const unsigned hexsz_loose = the_hash_algo->hexsz - 2;
- char *path;
-
- path = repo_git_path(the_repository, "objects/17");
- dir = opendir(path);
- free(path);
- if (!dir)
- return 0;
-
- auto_threshold = DIV_ROUND_UP(limit, 256);
- while ((ent = readdir(dir)) != NULL) {
- if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose ||
- ent->d_name[hexsz_loose] != '\0')
- continue;
- if (++num_loose > auto_threshold) {
- needed = 1;
- break;
- }
- }
- closedir(dir);
- return needed;
+ unsigned long auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
+ return odb_source_loose_count_objects(the_repository->objects->sources,
+ ODB_COUNT_OBJECTS_APPROXIMATE)
+ > auto_threshold;
}
static struct packed_git *find_base_packs(struct string_list *packs,
@@ -592,7 +569,8 @@ static uint64_t total_ram(void)
static uint64_t estimate_repack_memory(struct gc_config *cfg,
struct packed_git *pack)
{
- unsigned long nr_objects = repo_approximate_object_count(the_repository);
+ unsigned long nr_objects = odb_count_objects(the_repository->objects,
+ ODB_COUNT_OBJECTS_APPROXIMATE);
size_t os_cache, heap;
if (!pack || !nr_objects)
diff --git a/builtin/grep.c b/builtin/grep.c
index 53cccf2d25068c664c27643873b8c6d7fe99d848..61379909b885fa56f2e79123ea7fee98a83bc417 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1213,8 +1213,16 @@ int cmd_grep(int argc,
*/
if (recurse_submodules)
repo_read_gitmodules(the_repository, 1);
- if (startup_info->have_repository)
- packfile_store_prepare(the_repository->objects->packfiles);
+
+ if (startup_info->have_repository) {
+ struct odb_source *source;
+
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ packfile_store_prepare(files->packed);
+ }
+ }
start_threads(&opt);
} else {
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 699fe678cd60b0af8b7edf77f2204a064140d94a..d1e47279a8c7c94720d99fc85d1a2da1b0380a5b 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -16,12 +16,12 @@
#include "progress.h"
#include "fsck.h"
#include "strbuf.h"
-#include "streaming.h"
#include "thread-utils.h"
#include "packfile.h"
#include "pack-revindex.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "oid-array.h"
#include "oidset.h"
#include "path.h"
@@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid,
struct compare_data {
struct object_entry *entry;
- struct git_istream *st;
+ struct odb_read_stream *st;
unsigned char *buf;
unsigned long buf_size;
};
@@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
}
while (size) {
- ssize_t len = read_istream(data->st, data->buf, size);
+ ssize_t len = odb_read_stream_read(data->st, data->buf, size);
if (len == 0)
die(_("SHA1 COLLISION FOUND WITH %s !"),
oid_to_hex(&data->entry->idx.oid));
@@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
static int check_collison(struct object_entry *entry)
{
struct compare_data data;
- enum object_type type;
- unsigned long size;
if (entry->size <= repo_settings_get_big_file_threshold(the_repository) ||
entry->type != OBJ_BLOB)
@@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry)
memset(&data, 0, sizeof(data));
data.entry = entry;
- data.st = open_istream(the_repository, &entry->idx.oid, &type, &size,
- NULL);
+ data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL);
if (!data.st)
return -1;
- if (size != entry->size || type != entry->type)
+ if (data.st->size != entry->size || data.st->type != entry->type)
die(_("SHA1 COLLISION FOUND WITH %s !"),
oid_to_hex(&entry->idx.oid));
unpack_data(entry, compare_objects, &data);
- close_istream(data.st);
+ odb_read_stream_close(data.st);
free(data.buf);
return 0;
}
@@ -1640,9 +1637,11 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
rename_tmp_packfile(&final_index_name, curr_index_name, &index_name,
hash, "idx", 1);
- if (do_fsck_object && startup_info->have_repository)
- packfile_store_load_pack(the_repository->objects->packfiles,
- final_index_name, 0);
+ if (do_fsck_object && startup_info->have_repository) {
+ struct odb_source_files *files =
+ odb_source_files_downcast(the_repository->objects->sources);
+ packfile_store_load_pack(files->packed, final_index_name, 0);
+ }
if (!from_stdin) {
printf("%s\n", hash_to_hex(hash));
diff --git a/builtin/log.c b/builtin/log.c
index c8319b8af38c8c732ddf632a0d3ced68487f8cd3..d4cf9c59c81a8397bd1162e09844870a31bedd77 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -16,6 +16,7 @@
#include "refs.h"
#include "object-name.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "pager.h"
#include "color.h"
#include "commit.h"
@@ -35,7 +36,6 @@
#include "parse-options.h"
#include "line-log.h"
#include "branch.h"
-#include "streaming.h"
#include "version.h"
#include "mailmap.h"
#include "progress.h"
@@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
fflush(rev->diffopt.file);
if (!rev->diffopt.flags.textconv_set_via_cmdline ||
!rev->diffopt.flags.allow_textconv)
- return stream_blob_to_fd(1, oid, NULL, 0);
+ return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
if (get_oid_with_context(the_repository, obj_name,
GET_OID_RECORD_PATH,
@@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
!textconv_object(the_repository, obj_context.path,
obj_context.mode, &oidc, 1, &buf, &size)) {
object_context_release(&obj_context);
- return stream_blob_to_fd(1, oid, NULL, 0);
+ return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
}
if (!buf)
diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
index 5f364aa816ba25d48c8f59ffd3a33c3344ce84a8..40afa8f1ed8d9ca3d47fc5db37ff98458661d3bd 100644
--- a/builtin/multi-pack-index.c
+++ b/builtin/multi-pack-index.c
@@ -13,8 +13,13 @@
#include "repository.h"
#define BUILTIN_MIDX_WRITE_USAGE \
- N_("git multi-pack-index [] write [--preferred-pack=]" \
- "[--refs-snapshot=]")
+ N_("git multi-pack-index [] write [--preferred-pack=]\n" \
+ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \
+ " [--refs-snapshot=]")
+
+#define BUILTIN_MIDX_COMPACT_USAGE \
+ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \
+ " [--[no-]bitmap] ")
#define BUILTIN_MIDX_VERIFY_USAGE \
N_("git multi-pack-index [] verify")
@@ -29,6 +34,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = {
BUILTIN_MIDX_WRITE_USAGE,
NULL
};
+static char const * const builtin_multi_pack_index_compact_usage[] = {
+ BUILTIN_MIDX_COMPACT_USAGE,
+ NULL
+};
static char const * const builtin_multi_pack_index_verify_usage[] = {
BUILTIN_MIDX_VERIFY_USAGE,
NULL
@@ -43,6 +52,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = {
};
static char const * const builtin_multi_pack_index_usage[] = {
BUILTIN_MIDX_WRITE_USAGE,
+ BUILTIN_MIDX_COMPACT_USAGE,
BUILTIN_MIDX_VERIFY_USAGE,
BUILTIN_MIDX_EXPIRE_USAGE,
BUILTIN_MIDX_REPACK_USAGE,
@@ -84,6 +94,8 @@ static struct option common_opts[] = {
N_("directory"),
N_("object directory containing set of packfile and pack-index pairs"),
parse_object_dir),
+ OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"),
+ MIDX_PROGRESS),
OPT_END(),
};
@@ -138,8 +150,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv,
N_("pack for reuse when computing a multi-pack bitmap")),
OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"),
MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX),
- OPT_BIT(0, "progress", &opts.flags,
- N_("force progress reporting"), MIDX_PROGRESS),
OPT_BIT(0, "incremental", &opts.flags,
N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL),
OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
@@ -194,14 +204,71 @@ static int cmd_multi_pack_index_write(int argc, const char **argv,
return ret;
}
+static int cmd_multi_pack_index_compact(int argc, const char **argv,
+ const char *prefix,
+ struct repository *repo)
+{
+ struct multi_pack_index *m, *cur;
+ struct multi_pack_index *from_midx = NULL;
+ struct multi_pack_index *to_midx = NULL;
+ struct odb_source *source;
+ int ret;
+
+ struct option *options;
+ static struct option builtin_multi_pack_index_compact_options[] = {
+ OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"),
+ MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX),
+ OPT_BIT(0, "incremental", &opts.flags,
+ N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL),
+ OPT_END(),
+ };
+
+ repo_config(repo, git_multi_pack_index_write_config, NULL);
+
+ options = add_common_options(builtin_multi_pack_index_compact_options);
+
+ trace2_cmd_mode(argv[0]);
+
+ if (isatty(2))
+ opts.flags |= MIDX_PROGRESS;
+ argc = parse_options(argc, argv, prefix,
+ options, builtin_multi_pack_index_compact_usage,
+ 0);
+
+ if (argc != 2)
+ usage_with_options(builtin_multi_pack_index_compact_usage,
+ options);
+ source = handle_object_dir_option(the_repository);
+
+ FREE_AND_NULL(options);
+
+ m = get_multi_pack_index(source);
+
+ for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) {
+ const char *midx_csum = get_midx_checksum(cur);
+
+ if (!from_midx && !strcmp(midx_csum, argv[0]))
+ from_midx = cur;
+ if (!to_midx && !strcmp(midx_csum, argv[1]))
+ to_midx = cur;
+ }
+
+ if (!from_midx)
+ die(_("could not find MIDX 'from': %s"), argv[0]);
+ if (!to_midx)
+ die(_("could not find MIDX 'to': %s"), argv[1]);
+
+ ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags);
+
+ return ret;
+}
+
static int cmd_multi_pack_index_verify(int argc, const char **argv,
const char *prefix,
struct repository *repo UNUSED)
{
struct option *options;
static struct option builtin_multi_pack_index_verify_options[] = {
- OPT_BIT(0, "progress", &opts.flags,
- N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(),
};
struct odb_source *source;
@@ -231,8 +298,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv,
{
struct option *options;
static struct option builtin_multi_pack_index_expire_options[] = {
- OPT_BIT(0, "progress", &opts.flags,
- N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(),
};
struct odb_source *source;
@@ -264,8 +329,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv,
static struct option builtin_multi_pack_index_repack_options[] = {
OPT_UNSIGNED(0, "batch-size", &opts.batch_size,
N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")),
- OPT_BIT(0, "progress", &opts.flags,
- N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(),
};
struct odb_source *source;
@@ -300,6 +363,7 @@ int cmd_multi_pack_index(int argc,
struct option builtin_multi_pack_index_options[] = {
OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack),
OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write),
+ OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact),
OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify),
OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire),
OPT_END(),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 7937106ec53555ef156337ebda3b09efa9d374dd..4522a0eb4e421d203c8ab6731362cab80044b21e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -22,7 +22,6 @@
#include "pack-objects.h"
#include "progress.h"
#include "refs.h"
-#include "streaming.h"
#include "thread-utils.h"
#include "pack-bitmap.h"
#include "delta-islands.h"
@@ -33,6 +32,7 @@
#include "packfile.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "replace-object.h"
#include "dir.h"
#include "midx.h"
@@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size)
return stream.total_out;
}
-static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
+static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f,
const struct object_id *oid)
{
git_zstream stream;
@@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi
for (;;) {
ssize_t readlen;
int zret = Z_OK;
- readlen = read_istream(st, ibuf, sizeof(ibuf));
+ readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf));
if (readlen == -1)
die(_("unable to read %s"), oid_to_hex(oid));
@@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
unsigned hdrlen;
enum object_type type;
void *buf;
- struct git_istream *st = NULL;
+ struct odb_read_stream *st = NULL;
const unsigned hashsz = the_hash_algo->rawsz;
if (!usable_delta) {
if (oe_type(entry) == OBJ_BLOB &&
oe_size_greater_than(&to_pack, entry,
repo_settings_get_big_file_threshold(the_repository)) &&
- (st = open_istream(the_repository, &entry->idx.oid, &type,
- &size, NULL)) != NULL)
+ (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
+ NULL)) != NULL) {
buf = NULL;
- else {
+ type = st->type;
+ size = st->size;
+ } else {
buf = odb_read_object(the_repository->objects,
&entry->idx.oid, &type,
&size);
@@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
dheader[--pos] = 128 | (--ofs & 127);
if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
if (st)
- close_istream(st);
+ odb_read_stream_close(st);
free(buf);
return 0;
}
@@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
*/
if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
if (st)
- close_istream(st);
+ odb_read_stream_close(st);
free(buf);
return 0;
}
@@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
} else {
if (limit && hdrlen + datalen + hashsz >= limit) {
if (st)
- close_istream(st);
+ odb_read_stream_close(st);
free(buf);
return 0;
}
@@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
}
if (st) {
datalen = write_large_blob_data(st, f, &entry->idx.oid);
- close_istream(st);
+ odb_read_stream_close(st);
} else {
hashwrite(f, buf, datalen);
free(buf);
@@ -1527,49 +1529,54 @@ static int want_cruft_object_mtime(struct repository *r,
const struct object_id *oid,
unsigned flags, uint32_t mtime)
{
- struct packed_git **cache;
+ struct odb_source *source;
- for (cache = kept_pack_cache(r, flags); *cache; cache++) {
- struct packed_git *p = *cache;
- off_t ofs;
- uint32_t candidate_mtime;
+ for (source = r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct packed_git **cache = packfile_store_get_kept_pack_cache(files->packed, flags);
- ofs = find_pack_entry_one(oid, p);
- if (!ofs)
- continue;
+ for (; *cache; cache++) {
+ struct packed_git *p = *cache;
+ off_t ofs;
+ uint32_t candidate_mtime;
- /*
- * We have a copy of the object 'oid' in a non-cruft
- * pack. We can avoid packing an additional copy
- * regardless of what the existing copy's mtime is since
- * it is outside of a cruft pack.
- */
- if (!p->is_cruft)
- return 0;
-
- /*
- * If we have a copy of the object 'oid' in a cruft
- * pack, then either read the cruft pack's mtime for
- * that object, or, if that can't be loaded, assume the
- * pack's mtime itself.
- */
- if (!load_pack_mtimes(p)) {
- uint32_t pos;
- if (offset_to_pack_pos(p, ofs, &pos) < 0)
+ ofs = find_pack_entry_one(oid, p);
+ if (!ofs)
continue;
- candidate_mtime = nth_packed_mtime(p, pos);
- } else {
- candidate_mtime = p->mtime;
- }
- /*
- * We have a surviving copy of the object in a cruft
- * pack whose mtime is greater than or equal to the one
- * we are considering. We can thus avoid packing an
- * additional copy of that object.
- */
- if (mtime <= candidate_mtime)
- return 0;
+ /*
+ * We have a copy of the object 'oid' in a non-cruft
+ * pack. We can avoid packing an additional copy
+ * regardless of what the existing copy's mtime is since
+ * it is outside of a cruft pack.
+ */
+ if (!p->is_cruft)
+ return 0;
+
+ /*
+ * If we have a copy of the object 'oid' in a cruft
+ * pack, then either read the cruft pack's mtime for
+ * that object, or, if that can't be loaded, assume the
+ * pack's mtime itself.
+ */
+ if (!load_pack_mtimes(p)) {
+ uint32_t pos;
+ if (offset_to_pack_pos(p, ofs, &pos) < 0)
+ continue;
+ candidate_mtime = nth_packed_mtime(p, pos);
+ } else {
+ candidate_mtime = p->mtime;
+ }
+
+ /*
+ * We have a surviving copy of the object in a cruft
+ * pack whose mtime is greater than or equal to the one
+ * we are considering. We can thus avoid packing an
+ * additional copy of that object.
+ */
+ if (mtime <= candidate_mtime)
+ return 0;
+ }
}
return -1;
@@ -1622,9 +1629,9 @@ static int want_found_object(const struct object_id *oid, int exclude,
*/
unsigned flags = 0;
if (ignore_packed_keep_on_disk)
- flags |= ON_DISK_KEEP_PACKS;
+ flags |= KEPT_PACK_ON_DISK;
if (ignore_packed_keep_in_core)
- flags |= IN_CORE_KEEP_PACKS;
+ flags |= KEPT_PACK_IN_CORE;
/*
* If the object is in a pack that we want to ignore, *and* we
@@ -1747,13 +1754,17 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
}
}
- for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) {
- struct packed_git *p = e->pack;
- want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
- if (!exclude && want > 0)
- packfile_list_prepend(&the_repository->objects->packfiles->packs, p);
- if (want != -1)
- return want;
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+
+ for (e = files->packed->packs.head; e; e = e->next) {
+ struct packed_git *p = e->pack;
+ want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
+ if (!exclude && want > 0)
+ packfile_list_prepend(&files->packed->packs, p);
+ if (want != -1)
+ return want;
+ }
}
if (uri_protocols.nr) {
@@ -3904,7 +3915,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -3929,7 +3940,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
* an optimization during delta selection.
*/
revs.no_kept_objects = 1;
- revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+ revs.keep_pack_cache_flags |= KEPT_PACK_IN_CORE;
revs.blob_objects = 1;
revs.tree_objects = 1;
revs.tag_objects = 1;
@@ -4028,7 +4039,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE);
}
static int cruft_include_check(struct commit *commit, void *data)
@@ -4306,25 +4317,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4333,14 +4331,26 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(files->packed, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
diff --git a/commit-graph.c b/commit-graph.c
index 80be2ff2c39842675c962d262021a993e6b00cc5..c5c9ce282490a7922c576aeff527cc7ae58a10e9 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1485,24 +1485,16 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
-static int add_packed_commits(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data)
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
{
struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
- enum object_type type;
- off_t offset = nth_packed_object_offset(pack, pos);
- struct object_info oi = OBJECT_INFO_INIT;
if (ctx->progress)
display_progress(ctx->progress, ++ctx->progress_done);
- oi.typep = &type;
- if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
- die(_("unable to get type of object %s"), oid_to_hex(oid));
-
- if (type != OBJ_COMMIT)
+ if (*oi->typep != OBJ_COMMIT)
return 0;
oid_array_append(&ctx->oids, oid);
@@ -1511,6 +1503,22 @@ static int add_packed_commits(const struct object_id *oid,
return 0;
}
+static int add_packed_commits(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
+{
+ enum object_type type;
+ off_t offset = nth_packed_object_offset(pack, pos);
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ oi.typep = &type;
+ if (packed_object_info(pack->repo, pack, offset, &oi) < 0)
+ die(_("unable to get type of object %s"), oid_to_hex(oid));
+
+ return add_packed_commits_oi(oid, &oi, data);
+}
+
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
{
struct commit_list *parent;
@@ -1933,7 +1941,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1965,13 +1973,25 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ }
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
@@ -2595,7 +2615,7 @@ int write_commit_graph(struct odb_source *source,
replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
}
- ctx.approx_nr_objects = repo_approximate_object_count(r);
+ ctx.approx_nr_objects = odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE);
if (ctx.append && g) {
for (i = 0; i < g->num_commits; i++) {
diff --git a/entry.c b/entry.c
index cae02eb50398d7cfcdac8b4c4382e067e9de02d9..7817aee362ed9e7e14e3a2b92c9cc0ab5f013673 100644
--- a/entry.c
+++ b/entry.c
@@ -2,13 +2,13 @@
#include "git-compat-util.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "dir.h"
#include "environment.h"
#include "gettext.h"
#include "hex.h"
#include "name-hash.h"
#include "sparse-index.h"
-#include "streaming.h"
#include "submodule.h"
#include "symlinks.h"
#include "progress.h"
@@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path,
if (fd < 0)
return -1;
- result |= stream_blob_to_fd(fd, &ce->oid, filter, 1);
+ result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1);
*fstat_done = fstat_checkout_output(fd, state, statbuf);
result |= close(fd);
diff --git a/git-compat-util.h b/git-compat-util.h
index 398e0fac4fab6007903fdd9fd31dcc28fe531a65..a7aa5f05fc9445551041607feba87a738910ca72 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -670,6 +670,14 @@ static inline int cast_size_t_to_int(size_t a)
return (int)a;
}
+static inline uint32_t u32_add(uint32_t a, uint32_t b)
+{
+ if (unsigned_add_overflows(a, b))
+ die("uint32_t overflow: %"PRIuMAX" + %"PRIuMAX,
+ (uintmax_t)a, (uintmax_t)b);
+ return a + b;
+}
+
static inline uint64_t u64_mult(uint64_t a, uint64_t b)
{
if (unsigned_mult_overflows(a, b))
diff --git a/http.c b/http.c
index 41f850db16d19f653e0ac7af2878ed1e34fee22f..8ea1b9d1f68c16143130aced426df3f224b6c429 100644
--- a/http.c
+++ b/http.c
@@ -2543,8 +2543,9 @@ int finish_http_pack_request(struct http_pack_request *preq)
void http_install_packfile(struct packed_git *p,
struct packfile_list *list_to_remove_from)
{
+ struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
packfile_list_remove(list_to_remove_from, p);
- packfile_store_add_pack(the_repository->objects->packfiles, p);
+ packfile_store_add_pack(files->packed, p);
}
struct http_pack_request *new_http_pack_request(
diff --git a/loose.c b/loose.c
index 56cf64b648bf80c3d7d8e0649f87687138018743..07333be6969fcc370b052fa1ed7c684442ff20af 100644
--- a/loose.c
+++ b/loose.c
@@ -3,6 +3,7 @@
#include "path.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/source-files.h"
#include "hex.h"
#include "repository.h"
#include "wrapper.h"
@@ -49,27 +50,29 @@ static int insert_loose_map(struct odb_source *source,
const struct object_id *oid,
const struct object_id *compat_oid)
{
- struct loose_object_map *map = source->loose->map;
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct loose_object_map *map = files->loose->map;
int inserted = 0;
inserted |= insert_oid_pair(map->to_compat, oid, compat_oid);
inserted |= insert_oid_pair(map->to_storage, compat_oid, oid);
if (inserted)
- oidtree_insert(source->loose->cache, compat_oid);
+ oidtree_insert(files->loose->cache, compat_oid);
return inserted;
}
static int load_one_loose_object_map(struct repository *repo, struct odb_source *source)
{
+ struct odb_source_files *files = odb_source_files_downcast(source);
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
FILE *fp;
- if (!source->loose->map)
- loose_object_map_init(&source->loose->map);
- if (!source->loose->cache) {
- ALLOC_ARRAY(source->loose->cache, 1);
- oidtree_init(source->loose->cache);
+ if (!files->loose->map)
+ loose_object_map_init(&files->loose->map);
+ if (!files->loose->cache) {
+ ALLOC_ARRAY(files->loose->cache, 1);
+ oidtree_init(files->loose->cache);
}
insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
@@ -125,7 +128,8 @@ int repo_read_loose_object_map(struct repository *repo)
int repo_write_loose_object_map(struct repository *repo)
{
- kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat;
+ struct odb_source_files *files = odb_source_files_downcast(repo->objects->sources);
+ kh_oid_map_t *map = files->loose->map->to_compat;
struct lock_file lock;
int fd;
khiter_t iter;
@@ -231,7 +235,8 @@ int repo_loose_object_map_oid(struct repository *repo,
khiter_t pos;
for (source = repo->objects->sources; source; source = source->next) {
- struct loose_object_map *loose_map = source->loose->map;
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct loose_object_map *loose_map = files->loose->map;
if (!loose_map)
continue;
map = (to == repo->compat_hash_algo) ?
diff --git a/meson.build b/meson.build
index f1b3615659e56a78a8b6db37c9ba9b2e8591dcc8..c8a8b3882c0f9631aa780e1b1e43449bd6b4d348 100644
--- a/meson.build
+++ b/meson.build
@@ -397,6 +397,9 @@ libgit_sources = [
'object-name.c',
'object.c',
'odb.c',
+ 'odb/source.c',
+ 'odb/source-files.c',
+ 'odb/streaming.c',
'oid-array.c',
'oidmap.c',
'oidset.c',
@@ -490,7 +493,6 @@ libgit_sources = [
'stable-qsort.c',
'statinfo.c',
'strbuf.c',
- 'streaming.c',
'string-list.c',
'strmap.c',
'strvec.c',
diff --git a/midx-write.c b/midx-write.c
index e3e9be6d03cd6fd20e761eab3c4bd1ff4b4d6583..4e5de5a831c7cfbd20c52e98f6476844b7f5b549 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -108,12 +108,24 @@ struct write_midx_context {
int incremental;
uint32_t num_multi_pack_indexes_before;
+ struct multi_pack_index *compact_from;
+ struct multi_pack_index *compact_to;
+ int compact;
+
struct string_list *to_include;
struct repository *repo;
struct odb_source *source;
};
+static uint32_t midx_pack_perm(struct write_midx_context *ctx,
+ uint32_t orig_pack_int_id)
+{
+ if (ctx->compact)
+ orig_pack_int_id -= ctx->compact_from->num_packs_in_base;
+ return ctx->pack_perm[orig_pack_int_id];
+}
+
static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name)
{
@@ -317,6 +329,45 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
}
}
+static void midx_fanout_add(struct midx_fanout *fanout,
+ struct write_midx_context *ctx,
+ uint32_t start_pack,
+ uint32_t cur_fanout)
+{
+ uint32_t cur_pack;
+
+ if (ctx->m && !ctx->incremental)
+ midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout,
+ ctx->preferred_pack_idx);
+
+ for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
+ int preferred = cur_pack == ctx->preferred_pack_idx;
+ midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack,
+ preferred, cur_fanout);
+ }
+
+ if (ctx->preferred_pack_idx != NO_PREFERRED_PACK &&
+ ctx->preferred_pack_idx < start_pack)
+ midx_fanout_add_pack_fanout(fanout, ctx->info,
+ ctx->preferred_pack_idx, 1,
+ cur_fanout);
+}
+
+static void midx_fanout_add_compact(struct midx_fanout *fanout,
+ struct write_midx_context *ctx,
+ uint32_t cur_fanout)
+{
+ struct multi_pack_index *m = ctx->compact_to;
+
+ ASSERT(ctx->compact);
+
+ while (m && m != ctx->compact_from->base_midx) {
+ midx_fanout_add_midx_fanout(fanout, m, cur_fanout,
+ NO_PREFERRED_PACK);
+ m = m->base_midx;
+ }
+}
+
/*
* It is possible to artificially get into a state where there are many
* duplicate copies of objects. That can create high memory pressure if
@@ -335,6 +386,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
size_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 };
+ if (ctx->compact)
+ ASSERT(!start_pack);
+
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
total_objects = st_add(total_objects,
ctx->info[cur_pack].p->num_objects);
@@ -353,23 +407,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0;
- if (ctx->m && !ctx->incremental)
- midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
- ctx->preferred_pack_idx);
-
- for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
- int preferred = cur_pack == ctx->preferred_pack_idx;
- midx_fanout_add_pack_fanout(&fanout,
- ctx->info, cur_pack,
- preferred, cur_fanout);
- }
-
- if (ctx->preferred_pack_idx != NO_PREFERRED_PACK &&
- ctx->preferred_pack_idx < start_pack)
- midx_fanout_add_pack_fanout(&fanout, ctx->info,
- ctx->preferred_pack_idx, 1,
- cur_fanout);
-
+ if (ctx->compact)
+ midx_fanout_add_compact(&fanout, ctx, cur_fanout);
+ else
+ midx_fanout_add(&fanout, ctx, start_pack, cur_fanout);
midx_fanout_sort(&fanout);
/*
@@ -410,11 +451,6 @@ static int write_midx_pack_names(struct hashfile *f, void *data)
if (ctx->info[i].expired)
continue;
- if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0)
- BUG("incorrect pack-file order: %s before %s",
- ctx->info[i - 1].pack_name,
- ctx->info[i].pack_name);
-
writelen = strlen(ctx->info[i].pack_name) + 1;
hashwrite(f, ctx->info[i].pack_name, writelen);
written += writelen;
@@ -514,12 +550,12 @@ static int write_midx_object_offsets(struct hashfile *f,
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *obj = list++;
- if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
+ if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED)
BUG("object %s is in an expired pack with int-id %d",
oid_to_hex(&obj->oid),
obj->pack_int_id);
- hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
+ hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id));
if (ctx->large_offsets_needed && obj->offset >> 31)
hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@@ -620,8 +656,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[i];
data[i].nr = i;
- data[i].pack = ctx->pack_perm[e->pack_int_id];
- if (!e->preferred)
+ data[i].pack = midx_pack_perm(ctx, e->pack_int_id);
+ if (!e->preferred || ctx->compact)
data[i].pack |= (1U << 31);
data[i].offset = e->offset;
}
@@ -630,14 +666,14 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[data[i].nr];
- struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]];
+ struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
pack->bitmap_pos = i + base_objects;
pack->bitmap_nr++;
pack_order[i] = data[i].nr;
}
for (i = 0; i < ctx->nr; i++) {
- struct pack_info *pack = &ctx->info[ctx->pack_perm[i]];
+ struct pack_info *pack = &ctx->info[i];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
pack->bitmap_pos = 0;
}
@@ -691,7 +727,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata,
struct object_entry *to = packlist_alloc(pdata, &from->oid);
oe_set_in_pack(pdata, to,
- ctx->info[ctx->pack_perm[from->pack_int_id]].p);
+ ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p);
}
trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo);
@@ -909,6 +945,21 @@ static int write_midx_bitmap(struct write_midx_context *ctx,
return ret;
}
+static int fill_pack_from_midx(struct pack_info *info,
+ struct multi_pack_index *m,
+ uint32_t pack_int_id)
+{
+ if (prepare_midx_pack(m, pack_int_id))
+ return error(_("could not load pack %d"), pack_int_id);
+
+ fill_pack_info(info,
+ m->packs[pack_int_id - m->num_packs_in_base],
+ m->pack_names[pack_int_id - m->num_packs_in_base],
+ pack_int_id);
+
+ return 0;
+}
+
static int fill_packs_from_midx(struct write_midx_context *ctx)
{
struct multi_pack_index *m;
@@ -916,19 +967,85 @@ static int fill_packs_from_midx(struct write_midx_context *ctx)
for (m = ctx->m; m; m = m->base_midx) {
uint32_t i;
- for (i = 0; i < m->num_packs; i++) {
- if (prepare_midx_pack(m, m->num_packs_in_base + i))
- return error(_("could not load pack"));
-
+ for (i = m->num_packs_in_base;
+ i < m->num_packs_in_base + m->num_packs; i++) {
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- fill_pack_info(&ctx->info[ctx->nr++], m->packs[i],
- m->pack_names[i],
- m->num_packs_in_base + i);
+
+ if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0)
+ return -1;
+
+ ctx->nr++;
}
}
return 0;
}
+static uint32_t compactible_packs_between(const struct multi_pack_index *from,
+ const struct multi_pack_index *to)
+{
+ uint32_t nr;
+
+ ASSERT(from && to);
+
+ nr = u32_add(to->num_packs, to->num_packs_in_base);
+ if (nr < from->num_packs_in_base)
+ BUG("unexpected number of packs in base during compaction: "
+ "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base);
+
+ return nr - from->num_packs_in_base;
+}
+
+static int fill_packs_from_midx_range(struct write_midx_context *ctx,
+ int bitmap_order)
+{
+ struct multi_pack_index *m = ctx->compact_to;
+ uint32_t packs_nr;
+
+ ASSERT(ctx->compact && !ctx->nr);
+ ASSERT(ctx->compact_from);
+ ASSERT(ctx->compact_to);
+
+ packs_nr = compactible_packs_between(ctx->compact_from,
+ ctx->compact_to);
+
+ ALLOC_GROW(ctx->info, packs_nr, ctx->alloc);
+
+ while (m != ctx->compact_from->base_midx) {
+ uint32_t pack_int_id, preferred_pack_id;
+ uint32_t i;
+
+ if (bitmap_order) {
+ if (midx_preferred_pack(m, &preferred_pack_id) < 0)
+ die(_("could not determine preferred pack"));
+ } else {
+ preferred_pack_id = m->num_packs_in_base;
+ }
+
+ pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base;
+
+ if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
+ preferred_pack_id) < 0)
+ return -1;
+
+ for (i = m->num_packs_in_base;
+ i < m->num_packs_in_base + m->num_packs; i++) {
+ if (preferred_pack_id == i)
+ continue;
+
+ if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
+ i) < 0)
+ return -1;
+ }
+
+ ctx->nr += m->num_packs;
+ m = m->base_midx;
+ }
+
+ ASSERT(ctx->nr == packs_nr);
+
+ return 0;
+}
+
static struct {
const char *non_split;
const char *split;
@@ -955,7 +1072,7 @@ static int link_midx_to_chain(struct multi_pack_index *m)
}
for (i = 0; i < ARRAY_SIZE(midx_exts); i++) {
- const unsigned char *hash = get_midx_checksum(m);
+ const unsigned char *hash = get_midx_hash(m);
get_midx_filename_ext(m->source, &from,
hash, midx_exts[i].non_split);
@@ -1014,14 +1131,30 @@ static void clear_midx_files(struct odb_source *source,
strbuf_release(&buf);
}
-static int write_midx_internal(struct odb_source *source,
- struct string_list *packs_to_include,
- struct string_list *packs_to_drop,
- const char *preferred_pack_name,
- const char *refs_snapshot,
- unsigned flags)
+static int midx_hashcmp(const struct multi_pack_index *a,
+ const struct multi_pack_index *b,
+ const struct git_hash_algo *algop)
{
- struct repository *r = source->odb->repo;
+ return hashcmp(get_midx_hash(a), get_midx_hash(b), algop);
+}
+
+struct write_midx_opts {
+ struct odb_source *source;
+
+ struct string_list *packs_to_include;
+ struct string_list *packs_to_drop;
+
+ struct multi_pack_index *compact_from;
+ struct multi_pack_index *compact_to;
+
+ const char *preferred_pack_name;
+ const char *refs_snapshot;
+ unsigned flags;
+};
+
+static int write_midx_internal(struct write_midx_opts *opts)
+{
+ struct repository *r = opts->source->odb->repo;
struct strbuf midx_name = STRBUF_INIT;
unsigned char midx_hash[GIT_MAX_RAWSZ];
uint32_t start_pack;
@@ -1036,27 +1169,39 @@ static int write_midx_internal(struct odb_source *source,
int dropped_packs = 0;
int result = -1;
const char **keep_hashes = NULL;
+ size_t keep_hashes_nr = 0;
struct chunkfile *cf;
trace2_region_enter("midx", "write_midx_internal", r);
ctx.repo = r;
- ctx.source = source;
+ ctx.source = opts->source;
+
+ ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL);
+ ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT);
- ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL);
+ if (ctx.compact) {
+ if (!opts->compact_from)
+ BUG("expected non-NULL 'from' MIDX during compaction");
+ if (!opts->compact_to)
+ BUG("expected non-NULL 'to' MIDX during compaction");
+
+ ctx.compact_from = opts->compact_from;
+ ctx.compact_to = opts->compact_to;
+ }
if (ctx.incremental)
strbuf_addf(&midx_name,
"%s/pack/multi-pack-index.d/tmp_midx_XXXXXX",
- source->path);
+ opts->source->path);
else
- get_midx_filename(source, &midx_name);
+ get_midx_filename(opts->source, &midx_name);
if (safe_create_leading_directories(r, midx_name.buf))
die_errno(_("unable to create leading directories of %s"),
midx_name.buf);
- if (!packs_to_include || ctx.incremental) {
- struct multi_pack_index *m = get_multi_pack_index(source);
+ if (!opts->packs_to_include || ctx.incremental) {
+ struct multi_pack_index *m = get_multi_pack_index(opts->source);
if (m && !midx_checksum_valid(m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
m = NULL;
@@ -1071,11 +1216,18 @@ static int write_midx_internal(struct odb_source *source,
*/
if (ctx.incremental)
ctx.base_midx = m;
- else if (!packs_to_include)
+ if (!opts->packs_to_include)
ctx.m = m;
}
}
+ /*
+ * If compacting MIDX layer(s) in the range [from, to], then the
+ * compacted MIDX will share the same base MIDX as 'from'.
+ */
+ if (ctx.compact)
+ ctx.base_midx = ctx.compact_from->base_midx;
+
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16;
ctx.info = NULL;
@@ -1084,39 +1236,48 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.incremental) {
struct multi_pack_index *m = ctx.base_midx;
while (m) {
- if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) {
+ if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) {
error(_("could not load reverse index for MIDX %s"),
- hash_to_hex_algop(get_midx_checksum(m),
- m->source->odb->repo->hash_algo));
+ get_midx_checksum(m));
goto cleanup;
}
ctx.num_multi_pack_indexes_before++;
m = m->base_midx;
}
- } else if (ctx.m && fill_packs_from_midx(&ctx)) {
+ } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) {
goto cleanup;
}
start_pack = ctx.nr;
ctx.pack_paths_checked = 0;
- if (flags & MIDX_PROGRESS)
+ if (opts->flags & MIDX_PROGRESS)
ctx.progress = start_delayed_progress(r,
_("Adding packfiles to multi-pack-index"), 0);
else
ctx.progress = NULL;
- ctx.to_include = packs_to_include;
+ if (ctx.compact) {
+ int bitmap_order = 0;
+ if (opts->preferred_pack_name)
+ bitmap_order |= 1;
+ else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
+ bitmap_order |= 1;
- for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx);
+ fill_packs_from_midx_range(&ctx, bitmap_order);
+ } else {
+ ctx.to_include = opts->packs_to_include;
+ for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx);
+ }
stop_progress(&ctx.progress);
if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) &&
!ctx.incremental &&
- !(packs_to_include || packs_to_drop)) {
+ !ctx.compact &&
+ !(opts->packs_to_include || opts->packs_to_drop)) {
struct bitmap_index *bitmap_git;
int bitmap_exists;
- int want_bitmap = flags & MIDX_WRITE_BITMAP;
+ int want_bitmap = opts->flags & MIDX_WRITE_BITMAP;
bitmap_git = prepare_midx_bitmap_git(ctx.m);
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
@@ -1128,7 +1289,8 @@ static int write_midx_internal(struct odb_source *source,
* corresponding bitmap (or one wasn't requested).
*/
if (!want_bitmap)
- clear_midx_files_ext(source, "bitmap", NULL);
+ clear_midx_files_ext(opts->source, "bitmap",
+ NULL);
result = 0;
goto cleanup;
}
@@ -1139,11 +1301,11 @@ static int write_midx_internal(struct odb_source *source,
goto cleanup; /* nothing to do */
}
- if (preferred_pack_name) {
+ if (opts->preferred_pack_name) {
ctx.preferred_pack_idx = NO_PREFERRED_PACK;
for (size_t i = 0; i < ctx.nr; i++) {
- if (!cmp_idx_or_pack_name(preferred_pack_name,
+ if (!cmp_idx_or_pack_name(opts->preferred_pack_name,
ctx.info[i].pack_name)) {
ctx.preferred_pack_idx = i;
break;
@@ -1152,9 +1314,9 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.preferred_pack_idx == NO_PREFERRED_PACK)
warning(_("unknown preferred pack: '%s'"),
- preferred_pack_name);
+ opts->preferred_pack_name);
} else if (ctx.nr &&
- (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
+ (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
struct packed_git *oldest = ctx.info[0].p;
ctx.preferred_pack_idx = 0;
@@ -1165,7 +1327,7 @@ static int write_midx_internal(struct odb_source *source,
*/
open_pack_index(oldest);
- if (packs_to_drop && packs_to_drop->nr)
+ if (opts->packs_to_drop && opts->packs_to_drop->nr)
BUG("cannot write a MIDX bitmap during expiration");
/*
@@ -1225,22 +1387,26 @@ static int write_midx_internal(struct odb_source *source,
ctx.large_offsets_needed = 1;
}
- QSORT(ctx.info, ctx.nr, pack_info_compare);
+ if (!ctx.compact)
+ QSORT(ctx.info, ctx.nr, pack_info_compare);
- if (packs_to_drop && packs_to_drop->nr) {
+ if (opts->packs_to_drop && opts->packs_to_drop->nr) {
size_t drop_index = 0;
int missing_drops = 0;
- for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) {
+ ASSERT(!ctx.compact);
+
+ for (size_t i = 0;
+ i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) {
int cmp = strcmp(ctx.info[i].pack_name,
- packs_to_drop->items[drop_index].string);
+ opts->packs_to_drop->items[drop_index].string);
if (!cmp) {
drop_index++;
ctx.info[i].expired = 1;
} else if (cmp > 0) {
error(_("did not see pack-file %s to drop"),
- packs_to_drop->items[drop_index].string);
+ opts->packs_to_drop->items[drop_index].string);
drop_index++;
missing_drops++;
i--;
@@ -1261,12 +1427,20 @@ static int write_midx_internal(struct odb_source *source,
*/
ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
for (size_t i = 0; i < ctx.nr; i++) {
+ uint32_t from = ctx.info[i].orig_pack_int_id;
+ uint32_t to;
+
if (ctx.info[i].expired) {
+ to = PACK_EXPIRED;
dropped_packs++;
- ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
} else {
- ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs;
+ to = i - dropped_packs;
}
+
+ if (ctx.compact)
+ from -= ctx.compact_from->num_packs_in_base;
+
+ ctx.pack_perm[from] = to;
}
for (size_t i = 0; i < ctx.nr; i++) {
@@ -1277,16 +1451,16 @@ static int write_midx_internal(struct odb_source *source,
}
/* Check that the preferred pack wasn't expired (if given). */
- if (preferred_pack_name) {
- struct pack_info *preferred = bsearch(preferred_pack_name,
+ if (opts->preferred_pack_name) {
+ struct pack_info *preferred = bsearch(opts->preferred_pack_name,
ctx.info, ctx.nr,
sizeof(*ctx.info),
idx_or_pack_name_cmp);
if (preferred) {
- uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
+ uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id);
if (perm == PACK_EXPIRED)
warning(_("preferred pack '%s' is expired"),
- preferred_pack_name);
+ opts->preferred_pack_name);
}
}
@@ -1300,15 +1474,15 @@ static int write_midx_internal(struct odb_source *source,
}
if (!ctx.entries_nr) {
- if (flags & MIDX_WRITE_BITMAP)
+ if (opts->flags & MIDX_WRITE_BITMAP)
warning(_("refusing to write multi-pack .bitmap without any objects"));
- flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
+ opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
}
if (ctx.incremental) {
struct strbuf lock_name = STRBUF_INIT;
- get_midx_chain_filename(source, &lock_name);
+ get_midx_chain_filename(opts->source, &lock_name);
hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR);
strbuf_release(&lock_name);
@@ -1351,7 +1525,7 @@ static int write_midx_internal(struct odb_source *source,
MIDX_CHUNK_LARGE_OFFSET_WIDTH),
write_midx_large_offsets);
- if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
+ if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
ctx.pack_order = midx_pack_order(&ctx);
add_chunk(cf, MIDX_CHUNKID_REVINDEX,
st_mult(ctx.entries_nr, sizeof(uint32_t)),
@@ -1369,11 +1543,11 @@ static int write_midx_internal(struct odb_source *source,
CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);
- if (flags & MIDX_WRITE_REV_INDEX &&
+ if (opts->flags & MIDX_WRITE_REV_INDEX &&
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
write_midx_reverse_index(&ctx, midx_hash);
- if (flags & MIDX_WRITE_BITMAP) {
+ if (opts->flags & MIDX_WRITE_BITMAP) {
struct packing_data pdata;
struct commit **commits;
uint32_t commits_nr;
@@ -1383,7 +1557,7 @@ static int write_midx_internal(struct odb_source *source,
prepare_midx_packing_data(&pdata, &ctx);
- commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx);
+ commits = find_commits_for_midx_bitmap(&commits_nr, opts->refs_snapshot, &ctx);
/*
* The previous steps translated the information from
@@ -1396,7 +1570,7 @@ static int write_midx_internal(struct odb_source *source,
if (write_midx_bitmap(&ctx,
midx_hash, &pdata, commits, commits_nr,
- flags) < 0) {
+ opts->flags) < 0) {
error(_("could not write multi-pack bitmap"));
clear_packing_data(&pdata);
free(commits);
@@ -1414,7 +1588,24 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.num_multi_pack_indexes_before == UINT32_MAX)
die(_("too many multi-pack-indexes"));
- CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1);
+ if (ctx.compact) {
+ struct multi_pack_index *m;
+
+ /*
+ * Keep all MIDX layers excluding those in the range [from, to].
+ */
+ for (m = ctx.base_midx; m; m = m->base_midx)
+ keep_hashes_nr++;
+ for (m = ctx.m;
+ m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
+ m = m->base_midx)
+ keep_hashes_nr++;
+
+ keep_hashes_nr++; /* include the compacted layer */
+ } else {
+ keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1;
+ }
+ CALLOC_ARRAY(keep_hashes, keep_hashes_nr);
if (ctx.incremental) {
FILE *chainf = fdopen_lock_file(&lk, "w");
@@ -1429,7 +1620,7 @@ static int write_midx_internal(struct odb_source *source,
if (link_midx_to_chain(ctx.base_midx) < 0)
goto cleanup;
- get_split_midx_filename_ext(source, &final_midx_name,
+ get_split_midx_filename_ext(opts->source, &final_midx_name,
midx_hash, MIDX_EXT_MIDX);
if (rename_tempfile(&incr, final_midx_name.buf) < 0) {
@@ -1439,18 +1630,47 @@ static int write_midx_internal(struct odb_source *source,
strbuf_release(&final_midx_name);
- keep_hashes[ctx.num_multi_pack_indexes_before] =
- xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo));
+ if (ctx.compact) {
+ struct multi_pack_index *m;
+ uint32_t num_layers_before_from = 0;
+ uint32_t i;
- for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
- uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
+ for (m = ctx.base_midx; m; m = m->base_midx)
+ num_layers_before_from++;
- keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m),
+ m = ctx.base_midx;
+ for (i = 0; i < num_layers_before_from; i++) {
+ uint32_t j = num_layers_before_from - i - 1;
+
+ keep_hashes[j] = xstrdup(get_midx_checksum(m));
+ m = m->base_midx;
+ }
+
+ keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash,
r->hash_algo));
- m = m->base_midx;
+
+ i = 0;
+ for (m = ctx.m;
+ m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
+ m = m->base_midx) {
+ keep_hashes[keep_hashes_nr - i - 1] =
+ xstrdup(get_midx_checksum(m));
+ i++;
+ }
+ } else {
+ keep_hashes[ctx.num_multi_pack_indexes_before] =
+ xstrdup(hash_to_hex_algop(midx_hash,
+ r->hash_algo));
+
+ for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
+ uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
+
+ keep_hashes[j] = xstrdup(get_midx_checksum(m));
+ m = m->base_midx;
+ }
}
- for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++)
+ for (uint32_t i = 0; i < keep_hashes_nr; i++)
fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]);
} else {
keep_hashes[ctx.num_multi_pack_indexes_before] =
@@ -1463,8 +1683,7 @@ static int write_midx_internal(struct odb_source *source,
if (commit_lock_file(&lk) < 0)
die_errno(_("could not write multi-pack-index"));
- clear_midx_files(source, keep_hashes,
- ctx.num_multi_pack_indexes_before + 1,
+ clear_midx_files(opts->source, keep_hashes, keep_hashes_nr,
ctx.incremental);
result = 0;
@@ -1482,7 +1701,7 @@ static int write_midx_internal(struct odb_source *source,
free(ctx.pack_perm);
free(ctx.pack_order);
if (keep_hashes) {
- for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++)
+ for (uint32_t i = 0; i < keep_hashes_nr; i++)
free((char *)keep_hashes[i]);
free(keep_hashes);
}
@@ -1497,9 +1716,14 @@ int write_midx_file(struct odb_source *source,
const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags)
{
- return write_midx_internal(source, NULL, NULL,
- preferred_pack_name, refs_snapshot,
- flags);
+ struct write_midx_opts opts = {
+ .source = source,
+ .preferred_pack_name = preferred_pack_name,
+ .refs_snapshot = refs_snapshot,
+ .flags = flags,
+ };
+
+ return write_midx_internal(&opts);
}
int write_midx_file_only(struct odb_source *source,
@@ -1507,8 +1731,30 @@ int write_midx_file_only(struct odb_source *source,
const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags)
{
- return write_midx_internal(source, packs_to_include, NULL,
- preferred_pack_name, refs_snapshot, flags);
+ struct write_midx_opts opts = {
+ .source = source,
+ .packs_to_include = packs_to_include,
+ .preferred_pack_name = preferred_pack_name,
+ .refs_snapshot = refs_snapshot,
+ .flags = flags,
+ };
+
+ return write_midx_internal(&opts);
+}
+
+int write_midx_file_compact(struct odb_source *source,
+ struct multi_pack_index *from,
+ struct multi_pack_index *to,
+ unsigned flags)
+{
+ struct write_midx_opts opts = {
+ .source = source,
+ .compact_from = from,
+ .compact_to = to,
+ .flags = flags | MIDX_WRITE_COMPACT,
+ };
+
+ return write_midx_internal(&opts);
}
int expire_midx_packs(struct odb_source *source, unsigned flags)
@@ -1568,8 +1814,11 @@ int expire_midx_packs(struct odb_source *source, unsigned flags)
free(count);
if (packs_to_drop.nr)
- result = write_midx_internal(source, NULL,
- &packs_to_drop, NULL, NULL, flags);
+ result = write_midx_internal(&(struct write_midx_opts) {
+ .source = source,
+ .packs_to_drop = &packs_to_drop,
+ .flags = flags & MIDX_PROGRESS,
+ });
string_list_clear(&packs_to_drop, 0);
@@ -1776,8 +2025,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags)
goto cleanup;
}
- result = write_midx_internal(source, NULL, NULL, NULL, NULL,
- flags);
+ result = write_midx_internal(&(struct write_midx_opts) {
+ .source = source,
+ .flags = flags,
+ });
cleanup:
free(include_pack);
diff --git a/midx.c b/midx.c
index 24e1e721754d0cbffed53d7545f12bca77d157bd..aecdaebd363ace01544de3d131664707f8b72f82 100644
--- a/midx.c
+++ b/midx.c
@@ -24,7 +24,13 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext
int cmp_idx_or_pack_name(const char *idx_or_pack_name,
const char *idx_name);
-const unsigned char *get_midx_checksum(struct multi_pack_index *m)
+const char *get_midx_checksum(const struct multi_pack_index *m)
+{
+ return hash_to_hex_algop(get_midx_hash(m),
+ m->source->odb->repo->hash_algo);
+}
+
+const unsigned char *get_midx_hash(const struct multi_pack_index *m)
{
return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz;
}
@@ -95,8 +101,9 @@ static int midx_read_object_offsets(const unsigned char *chunk_start,
struct multi_pack_index *get_multi_pack_index(struct odb_source *source)
{
- packfile_store_prepare(source->odb->packfiles);
- return source->midx;
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ packfile_store_prepare(files->packed);
+ return files->packed->midx;
}
static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source,
@@ -203,11 +210,6 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou
if (!end)
die(_("multi-pack-index pack-name chunk is too short"));
cur_pack_name = end + 1;
-
- if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
- die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
- m->pack_names[i - 1],
- m->pack_names[i]);
}
trace2_data_intmax("midx", r, "load/num_packs", m->num_packs);
@@ -405,6 +407,7 @@ void close_midx(struct multi_pack_index *m)
}
FREE_AND_NULL(m->packs);
FREE_AND_NULL(m->pack_names);
+ FREE_AND_NULL(m->pack_names_sorted);
free(m);
}
@@ -447,7 +450,7 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m,
int prepare_midx_pack(struct multi_pack_index *m,
uint32_t pack_int_id)
{
- struct repository *r = m->source->odb->repo;
+ struct odb_source_files *files = odb_source_files_downcast(m->source);
struct strbuf pack_name = STRBUF_INIT;
struct packed_git *p;
@@ -458,10 +461,10 @@ int prepare_midx_pack(struct multi_pack_index *m,
if (m->packs[pack_int_id])
return 0;
- strbuf_addf(&pack_name, "%s/pack/%s", m->source->path,
+ strbuf_addf(&pack_name, "%s/pack/%s", files->base.path,
m->pack_names[pack_int_id]);
- p = packfile_store_load_pack(r->objects->packfiles,
- pack_name.buf, m->source->local);
+ p = packfile_store_load_pack(files->packed,
+ pack_name.buf, files->base.local);
strbuf_release(&pack_name);
if (!p) {
@@ -650,17 +653,37 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name,
return strcmp(idx_or_pack_name, idx_name);
}
+
+static int midx_pack_names_cmp(const void *a, const void *b, void *m_)
+{
+ struct multi_pack_index *m = m_;
+ return strcmp(m->pack_names[*(const size_t *)a],
+ m->pack_names[*(const size_t *)b]);
+}
+
static int midx_contains_pack_1(struct multi_pack_index *m,
const char *idx_or_pack_name)
{
uint32_t first = 0, last = m->num_packs;
+ if (!m->pack_names_sorted) {
+ uint32_t i;
+
+ ALLOC_ARRAY(m->pack_names_sorted, m->num_packs);
+
+ for (i = 0; i < m->num_packs; i++)
+ m->pack_names_sorted[i] = i;
+
+ QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp,
+ m);
+ }
+
while (first < last) {
uint32_t mid = first + (last - first) / 2;
const char *current;
int cmp;
- current = m->pack_names[mid];
+ current = m->pack_names[m->pack_names_sorted[mid]];
cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
if (!cmp)
return 1;
@@ -704,18 +727,19 @@ int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
int prepare_multi_pack_index_one(struct odb_source *source)
{
+ struct odb_source_files *files = odb_source_files_downcast(source);
struct repository *r = source->odb->repo;
prepare_repo_settings(r);
if (!r->settings.core_multi_pack_index)
return 0;
- if (source->midx)
+ if (files->packed->midx)
return 1;
- source->midx = load_multi_pack_index(source);
+ files->packed->midx = load_multi_pack_index(source);
- return !!source->midx;
+ return !!files->packed->midx;
}
int midx_checksum_valid(struct multi_pack_index *m)
@@ -804,9 +828,10 @@ void clear_midx_file(struct repository *r)
struct odb_source *source;
for (source = r->objects->sources; source; source = source->next) {
- if (source->midx)
- close_midx(source->midx);
- source->midx = NULL;
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ if (files->packed->midx)
+ close_midx(files->packed->midx);
+ files->packed->midx = NULL;
}
}
diff --git a/midx.h b/midx.h
index 6e54d73503d56088923ceff1c69e1f8d6013ac12..61f9809b8c96b0f8cfb71735b7d8fa6ef28009cf 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,7 @@ struct multi_pack_index {
uint32_t num_packs_in_base;
const char **pack_names;
+ size_t *pack_names_sorted;
struct packed_git **packs;
};
@@ -80,12 +81,14 @@ struct multi_pack_index {
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
#define MIDX_WRITE_INCREMENTAL (1 << 5)
+#define MIDX_WRITE_COMPACT (1 << 6)
#define MIDX_EXT_REV "rev"
#define MIDX_EXT_BITMAP "bitmap"
#define MIDX_EXT_MIDX "midx"
-const unsigned char *get_midx_checksum(struct multi_pack_index *m);
+const char *get_midx_checksum(const struct multi_pack_index *m) /* static buffer */;
+const unsigned char *get_midx_hash(const struct multi_pack_index *m);
void get_midx_filename(struct odb_source *source, struct strbuf *out);
void get_midx_filename_ext(struct odb_source *source, struct strbuf *out,
const unsigned char *hash, const char *ext);
@@ -128,6 +131,10 @@ int write_midx_file_only(struct odb_source *source,
struct string_list *packs_to_include,
const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags);
+int write_midx_file_compact(struct odb_source *source,
+ struct multi_pack_index *from,
+ struct multi_pack_index *to,
+ unsigned flags);
void clear_midx_file(struct repository *r);
int verify_midx_file(struct odb_source *source, unsigned flags);
int expire_midx_packs(struct odb_source *source, unsigned flags);
diff --git a/object-file.c b/object-file.c
index 84c9249dab520fd5153b36a11a2a211fb0e43877..ce8450a68e66b9935a2e1dc6b5f25f99aa5674fd 100644
--- a/object-file.c
+++ b/object-file.c
@@ -20,13 +20,13 @@
#include "object-file-convert.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
#include "path.h"
#include "read-cache-ll.h"
#include "setup.h"
-#include "streaming.h"
#include "tempfile.h"
#include "tmp-objdir.h"
@@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
int stream_object_signature(struct repository *r, const struct object_id *oid)
{
struct object_id real_oid;
- unsigned long size;
- enum object_type obj_type;
- struct git_istream *st;
+ struct odb_read_stream *st;
struct git_hash_ctx c;
char hdr[MAX_HEADER_LEN];
int hdrlen;
- st = open_istream(r, oid, &obj_type, &size, NULL);
+ st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return -1;
/* Generate the header */
- hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
+ hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
/* Sha1.. */
r->hash_algo->init_fn(&c);
git_hash_update(&c, hdr, hdrlen);
for (;;) {
char buf[1024 * 16];
- ssize_t readlen = read_istream(st, buf, sizeof(buf));
+ ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
if (readlen < 0) {
- close_istream(st);
+ odb_read_stream_close(st);
return -1;
}
if (!readlen)
@@ -162,35 +160,18 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
git_hash_update(&c, buf, readlen);
}
git_hash_final_oid(&real_oid, &c);
- close_istream(st);
+ odb_read_stream_close(st);
return !oideq(oid, &real_oid) ? -1 : 0;
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -234,23 +215,42 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
return map;
}
-void *odb_source_loose_map_object(struct odb_source *source,
- const struct object_id *oid,
- unsigned long *size)
+static void *odb_source_loose_map_object(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned long *size)
{
+ struct odb_source_files *files = odb_source_files_downcast(source);
const char *p;
- int fd = open_loose_object(source->loose, oid, &p);
+ int fd = open_loose_object(files->loose, oid, &p);
if (fd < 0)
return NULL;
return map_fd(fd, p, size);
}
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
- unsigned char *map,
- unsigned long mapsize,
- void *buffer,
- unsigned long bufsiz)
+enum unpack_loose_header_result {
+ ULHR_OK,
+ ULHR_BAD,
+ ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+ unsigned char *map,
+ unsigned long mapsize,
+ void *buffer,
+ unsigned long bufsiz)
{
int status;
@@ -329,11 +329,18 @@ static void *unpack_loose_rest(git_zstream *stream,
}
/*
+ * parse_loose_header() parses the starting " \0" of an
+ * object. If it doesn't follow that format -1 is returned. To check
+ * the validity of the populate the "typep" in the "struct
+ * object_info". It will be OBJ_BAD if the object type is unknown. The
+ * parsed can be retrieved via "oi->sizep", and from there
+ * passed to unpack_loose_rest().
+ *
* We used to just use "sscanf()", but that's actually way
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
-int parse_loose_header(const char *hdr, struct object_info *oi)
+static int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
size_t size;
@@ -389,19 +396,22 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
+ struct odb_source_files *files = odb_source_files_downcast(source);
int status = 0;
int fd;
unsigned long mapsize;
- const char *path;
void *map;
git_zstream stream;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, source->odb->repo->hash_algo);
@@ -415,23 +425,45 @@ int odb_source_loose_read_object_info(struct odb_source *source,
* object even exists.
*/
if (!oi->typep && !oi->sizep && !oi->contentp) {
- struct stat st;
- if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK))
- return quick_has_loose(source->loose, oid) ? 0 : -1;
- if (stat_loose_object(source->loose, oid, &st, &path) < 0)
+ if (!oi->disk_sizep && !oi->mtimep && (flags & OBJECT_INFO_QUICK)) {
+ status = quick_has_loose(files->loose, oid) ? 0 : -1;
+ if (!status)
+ oi->whence = OI_LOOSE;
+ return status;
+ }
+
+ if (lstat(path, &st))
return -1;
+
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+
+ oi->whence = OI_LOOSE;
return 0;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
return -1;
}
- map = map_fd(fd, path, &mapsize);
+
+ if (fstat(fd, &st)) {
+ close(fd);
+ return -1;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ return error(_("object file %s is empty"), path);
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map)
return -1;
@@ -439,9 +471,10 @@ int odb_source_loose_read_object_info(struct odb_source *source,
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
-
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
case ULHR_OK:
@@ -483,6 +516,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
return status;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
@@ -974,6 +1017,45 @@ int odb_source_loose_freshen_object(struct odb_source *source,
return !!check_and_freshen_source(source, oid, 1);
}
+static int count_loose(const struct object_id *oid UNUSED,
+ const char *path UNUSED,
+ void *data)
+{
+ unsigned long *count = data;
+ (*count)++;
+ return 0;
+}
+
+unsigned long odb_source_loose_count_objects(struct odb_source *source,
+ unsigned flags)
+{
+ const unsigned hexsz_loose = source->odb->repo->hash_algo->hexsz - 2;
+ struct strbuf path = STRBUF_INIT;
+ unsigned long count = 0;
+ struct dirent *ent;
+ DIR *dir;
+
+ if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) {
+ for_each_loose_file_in_source(source, count_loose,
+ NULL, NULL, &count);
+ return count;
+ }
+
+ strbuf_addf(&path, "%s/17", source->path);
+ dir = opendir(path.buf);
+ strbuf_release(&path);
+ if (!dir)
+ return 0;
+
+ while ((ent = readdir(dir)) != NULL)
+ if (strspn(ent->d_name, "0123456789abcdef") == hexsz_loose &&
+ ent->d_name[hexsz_loose] == '\0')
+ count++;
+ closedir(dir);
+
+ return count * 256;
+}
+
int odb_source_loose_write_stream(struct odb_source *source,
struct odb_write_stream *in_stream, size_t len,
struct object_id *oid)
@@ -1737,24 +1819,45 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
-{
+struct for_each_object_wrapper_data {
struct odb_source *source;
+ struct object_info *oi;
+ unsigned flags;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+ if (data->oi &&
+ read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
+ return -1;
+ return data->cb(oid, data->oi, data->cb_data);
+}
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
- }
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .oi = oi,
+ .flags = flags,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
- return 0;
+ /* There are no loose promisor objects, so we can return immediately. */
+ if (flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
}
static int append_loose_object(const struct object_id *oid,
@@ -1768,33 +1871,34 @@ static int append_loose_object(const struct object_id *oid,
struct oidtree *odb_source_loose_cache(struct odb_source *source,
const struct object_id *oid)
{
+ struct odb_source_files *files = odb_source_files_downcast(source);
int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT;
- size_t word_bits = bitsizeof(source->loose->subdir_seen[0]);
+ size_t word_bits = bitsizeof(files->loose->subdir_seen[0]);
size_t word_index = subdir_nr / word_bits;
size_t mask = (size_t)1u << (subdir_nr % word_bits);
uint32_t *bitmap;
if (subdir_nr < 0 ||
- (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen))
+ (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen))
BUG("subdir_nr out of range");
- bitmap = &source->loose->subdir_seen[word_index];
+ bitmap = &files->loose->subdir_seen[word_index];
if (*bitmap & mask)
- return source->loose->cache;
- if (!source->loose->cache) {
- ALLOC_ARRAY(source->loose->cache, 1);
- oidtree_init(source->loose->cache);
+ return files->loose->cache;
+ if (!files->loose->cache) {
+ ALLOC_ARRAY(files->loose->cache, 1);
+ oidtree_init(files->loose->cache);
}
strbuf_addstr(&buf, source->path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
source->odb->repo->hash_algo,
append_loose_object,
NULL, NULL,
- source->loose->cache);
+ files->loose->cache);
*bitmap |= mask;
strbuf_release(&buf);
- return source->loose->cache;
+ return files->loose->cache;
}
static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
@@ -1807,7 +1911,8 @@ static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
void odb_source_loose_reprepare(struct odb_source *source)
{
- odb_source_loose_clear_cache(source->loose);
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ odb_source_loose_clear_cache(files->loose);
}
static int check_stream_oid(git_zstream *stream,
@@ -1980,3 +2085,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
loose_object_map_clear(&loose->map);
free(loose);
}
+
+struct odb_loose_read_stream {
+ struct odb_read_stream base;
+ git_zstream z;
+ enum {
+ ODB_LOOSE_READ_STREAM_INUSE,
+ ODB_LOOSE_READ_STREAM_DONE,
+ ODB_LOOSE_READ_STREAM_ERROR,
+ } z_state;
+ void *mapped;
+ unsigned long mapsize;
+ char hdr[32];
+ int hdr_avail;
+ int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+ struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+ size_t total_read = 0;
+
+ switch (st->z_state) {
+ case ODB_LOOSE_READ_STREAM_DONE:
+ return 0;
+ case ODB_LOOSE_READ_STREAM_ERROR:
+ return -1;
+ default:
+ break;
+ }
+
+ if (st->hdr_used < st->hdr_avail) {
+ size_t to_copy = st->hdr_avail - st->hdr_used;
+ if (sz < to_copy)
+ to_copy = sz;
+ memcpy(buf, st->hdr + st->hdr_used, to_copy);
+ st->hdr_used += to_copy;
+ total_read += to_copy;
+ }
+
+ while (total_read < sz) {
+ int status;
+
+ st->z.next_out = (unsigned char *)buf + total_read;
+ st->z.avail_out = sz - total_read;
+ status = git_inflate(&st->z, Z_FINISH);
+
+ total_read = st->z.next_out - (unsigned char *)buf;
+
+ if (status == Z_STREAM_END) {
+ git_inflate_end(&st->z);
+ st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+ break;
+ }
+ if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+ git_inflate_end(&st->z);
+ st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+ return -1;
+ }
+ }
+ return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+ struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+ if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+ git_inflate_end(&st->z);
+ munmap(st->mapped, st->mapsize);
+ return 0;
+}
+
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ struct odb_source *source,
+ const struct object_id *oid)
+{
+ struct object_info oi = OBJECT_INFO_INIT;
+ struct odb_loose_read_stream *st;
+ unsigned long mapsize;
+ void *mapped;
+
+ mapped = odb_source_loose_map_object(source, oid, &mapsize);
+ if (!mapped)
+ return -1;
+
+ /*
+ * Note: we must allocate this structure early even though we may still
+ * fail. This is because we need to initialize the zlib stream, and it
+ * is not possible to copy the stream around after the fact because it
+ * has self-referencing pointers.
+ */
+ CALLOC_ARRAY(st, 1);
+
+ switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+ sizeof(st->hdr))) {
+ case ULHR_OK:
+ break;
+ case ULHR_BAD:
+ case ULHR_TOO_LONG:
+ goto error;
+ }
+
+ oi.sizep = &st->base.size;
+ oi.typep = &st->base.type;
+
+ if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+ goto error;
+
+ st->mapped = mapped;
+ st->mapsize = mapsize;
+ st->hdr_used = strlen(st->hdr) + 1;
+ st->hdr_avail = st->z.total_out;
+ st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+ st->base.close = close_istream_loose;
+ st->base.read = read_istream_loose;
+
+ *out = &st->base;
+
+ return 0;
+error:
+ git_inflate_end(&st->z);
+ munmap(st->mapped, st->mapsize);
+ free(st);
+ return -1;
+}
diff --git a/object-file.h b/object-file.h
index eeffa67bbda63102e345c49e7bbf18871ed82123..f1318de7addbcc0058be16037416fb24a3a422a0 100644
--- a/object-file.h
+++ b/object-file.h
@@ -16,6 +16,8 @@ enum {
int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
+struct object_info;
+struct odb_read_stream;
struct odb_source;
struct odb_source_loose {
@@ -45,11 +47,12 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
-void *odb_source_loose_map_object(struct odb_source *source,
- const struct object_id *oid,
- unsigned long *size);
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ struct odb_source *source,
+ const struct object_id *oid);
/*
* Return true iff an object database source has a loose object
@@ -62,6 +65,14 @@ int odb_source_loose_has_object(struct odb_source *source,
int odb_source_loose_freshen_object(struct odb_source *source,
const struct object_id *oid);
+/*
+ * Because object hashes are cryptographic and thus evenly distributed,
+ * we can check only one and get a reasonable estimate via extrapolation. The
+ * shard used for this is "objects/17".
+ */
+unsigned long odb_source_loose_count_objects(struct odb_source *source,
+ unsigned flags);
+
int odb_source_loose_write_object(struct odb_source *source,
const void *buf, unsigned long len,
enum object_type type, struct object_id *oid,
@@ -124,16 +135,16 @@ int for_each_loose_file_in_source(struct odb_source *source,
void *data);
/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If given, the object info
+ * will be populated with the object's data as if you had called
+ * `odb_source_loose_read_object_info()` on the object.
*/
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum for_each_object_flags flags);
-
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
@@ -143,40 +154,6 @@ int for_each_loose_object(struct object_database *odb,
int format_object_header(char *str, size_t size, enum object_type type,
size_t objsize);
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-enum unpack_loose_header_result {
- ULHR_OK,
- ULHR_BAD,
- ULHR_TOO_LONG,
-};
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
- unsigned char *map,
- unsigned long mapsize,
- void *buffer,
- unsigned long bufsiz);
-
-/**
- * parse_loose_header() parses the starting " \0" of an
- * object. If it doesn't follow that format -1 is returned. To check
- * the validity of the populate the "typep" in the "struct
- * object_info". It will be OBJ_BAD if the object type is unknown. The
- * parsed can be retrieved via "oi->sizep", and from there
- * passed to unpack_loose_rest().
- */
-struct object_info;
-int parse_loose_header(const char *hdr, struct object_info *oi);
-
int force_object_loose(struct odb_source *source,
const struct object_id *oid, time_t mtime);
diff --git a/object-name.c b/object-name.c
index fed5de51531fde8657be62c6ac311dd098f0b9ce..81269d241ac5067bb56b4928cfede23dbe065be7 100644
--- a/object-name.c
+++ b/object-name.c
@@ -837,7 +837,8 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
const unsigned hexsz = algo->hexsz;
if (len < 0) {
- unsigned long count = repo_approximate_object_count(r);
+ unsigned long count = odb_count_objects(r->objects,
+ ODB_COUNT_OBJECTS_APPROXIMATE);
/*
* Add one because the MSB only tells us the highest bit set,
* not including the value of all the _other_ bits (so "15"
diff --git a/odb.c b/odb.c
index dc8f292f3d9645e2de8d990630a29cb4b4b1e20a..3ba32d3dc39384a05c57980c1eafcc91b95bbbbb 100644
--- a/odb.c
+++ b/odb.c
@@ -89,17 +89,20 @@ int odb_mkstemp(struct object_database *odb,
/*
* Return non-zero iff the path is usable as an alternate object database.
*/
-static int alt_odb_usable(struct object_database *o, const char *path,
- const char *normalized_objdir)
+static bool odb_is_source_usable(struct object_database *o, const char *path)
{
int r;
+ struct strbuf normalized_objdir = STRBUF_INIT;
+ bool usable = false;
+
+ strbuf_realpath(&normalized_objdir, o->sources->path, 1);
/* Detect cases where alternate disappeared */
if (!is_directory(path)) {
error(_("object directory %s does not exist; "
"check .git/objects/info/alternates"),
path);
- return 0;
+ goto out;
}
/*
@@ -116,87 +119,99 @@ static int alt_odb_usable(struct object_database *o, const char *path,
kh_value(o->source_by_path, p) = o->sources;
}
- if (fspatheq(path, normalized_objdir))
- return 0;
+ if (fspatheq(path, normalized_objdir.buf))
+ goto out;
if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path))
- return 0;
+ goto out;
- return 1;
-}
+ usable = true;
-/*
- * Prepare alternate object database registry.
- *
- * The variable alt_odb_list points at the list of struct
- * odb_source. The elements on this list come from
- * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
- * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
- * whose contents is similar to that environment variable but can be
- * LF separated. Its base points at a statically allocated buffer that
- * contains "/the/directory/corresponding/to/.git/objects/...", while
- * its name points just after the slash at the end of ".git/objects/"
- * in the example above, and has enough space to hold all hex characters
- * of the object ID, an extra slash for the first level indirection, and
- * the terminating NUL.
- */
-static void read_info_alternates(struct object_database *odb,
- const char *relative_base,
- int depth);
+out:
+ strbuf_release(&normalized_objdir);
+ return usable;
+}
-static struct odb_source *odb_source_new(struct object_database *odb,
- const char *path,
- bool local)
+void parse_alternates(const char *string,
+ int sep,
+ const char *relative_base,
+ struct strvec *out)
{
- struct odb_source *source;
+ struct strbuf pathbuf = STRBUF_INIT;
+ struct strbuf buf = STRBUF_INIT;
- CALLOC_ARRAY(source, 1);
- source->odb = odb;
- source->local = local;
- source->path = xstrdup(path);
- source->loose = odb_source_loose_new(source);
+ if (!string || !*string)
+ return;
- return source;
-}
+ while (*string) {
+ const char *end;
+
+ strbuf_reset(&buf);
+ strbuf_reset(&pathbuf);
+
+ if (*string == '#') {
+ /* comment; consume up to next separator */
+ end = strchrnul(string, sep);
+ } else if (*string == '"' && !unquote_c_style(&buf, string, &end)) {
+ /*
+ * quoted path; unquote_c_style has copied the
+ * data for us and set "end". Broken quoting (e.g.,
+ * an entry that doesn't end with a quote) falls
+ * back to the unquoted case below.
+ */
+ } else {
+ /* normal, unquoted path */
+ end = strchrnul(string, sep);
+ strbuf_add(&buf, string, end - string);
+ }
-static struct odb_source *link_alt_odb_entry(struct object_database *odb,
- const char *dir,
- const char *relative_base,
- int depth)
-{
- struct odb_source *alternate = NULL;
- struct strbuf pathbuf = STRBUF_INIT;
- struct strbuf tmp = STRBUF_INIT;
- khiter_t pos;
- int ret;
+ if (*end)
+ end++;
+ string = end;
- if (!is_absolute_path(dir) && relative_base) {
- strbuf_realpath(&pathbuf, relative_base, 1);
- strbuf_addch(&pathbuf, '/');
- }
- strbuf_addstr(&pathbuf, dir);
+ if (!buf.len)
+ continue;
- if (!strbuf_realpath(&tmp, pathbuf.buf, 0)) {
- error(_("unable to normalize alternate object path: %s"),
- pathbuf.buf);
- goto error;
+ if (!is_absolute_path(buf.buf) && relative_base) {
+ strbuf_realpath(&pathbuf, relative_base, 1);
+ strbuf_addch(&pathbuf, '/');
+ }
+ strbuf_addbuf(&pathbuf, &buf);
+
+ strbuf_reset(&buf);
+ if (!strbuf_realpath(&buf, pathbuf.buf, 0)) {
+ error(_("unable to normalize alternate object path: %s"),
+ pathbuf.buf);
+ continue;
+ }
+
+ /*
+ * The trailing slash after the directory name is given by
+ * this function at the end. Remove duplicates.
+ */
+ while (buf.len && buf.buf[buf.len - 1] == '/')
+ strbuf_setlen(&buf, buf.len - 1);
+
+ strvec_push(out, buf.buf);
}
- strbuf_swap(&pathbuf, &tmp);
- /*
- * The trailing slash after the directory name is given by
- * this function at the end. Remove duplicates.
- */
- while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
- strbuf_setlen(&pathbuf, pathbuf.len - 1);
+ strbuf_release(&pathbuf);
+ strbuf_release(&buf);
+}
- strbuf_reset(&tmp);
- strbuf_realpath(&tmp, odb->sources->path, 1);
+static struct odb_source *odb_add_alternate_recursively(struct object_database *odb,
+ const char *source,
+ int depth)
+{
+ struct odb_source *alternate = NULL;
+ struct strvec sources = STRVEC_INIT;
+ khiter_t pos;
+ int ret;
- if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf))
+ if (!odb_is_source_usable(odb, source))
goto error;
- alternate = odb_source_new(odb, pathbuf.buf, false);
+ alternate = odb_source_new(odb, source, false);
/* add the alternate entry */
*odb->sources_tail = alternate;
@@ -208,126 +223,28 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb,
kh_value(odb->source_by_path, pos) = alternate;
/* recursively add alternates */
- read_info_alternates(odb, alternate->path, depth + 1);
-
- error:
- strbuf_release(&tmp);
- strbuf_release(&pathbuf);
- return alternate;
-}
-
-static const char *parse_alt_odb_entry(const char *string,
- int sep,
- struct strbuf *out)
-{
- const char *end;
-
- strbuf_reset(out);
-
- if (*string == '#') {
- /* comment; consume up to next separator */
- end = strchrnul(string, sep);
- } else if (*string == '"' && !unquote_c_style(out, string, &end)) {
- /*
- * quoted path; unquote_c_style has copied the
- * data for us and set "end". Broken quoting (e.g.,
- * an entry that doesn't end with a quote) falls
- * back to the unquoted case below.
- */
- } else {
- /* normal, unquoted path */
- end = strchrnul(string, sep);
- strbuf_add(out, string, end - string);
- }
-
- if (*end)
- end++;
- return end;
-}
-
-static void link_alt_odb_entries(struct object_database *odb, const char *alt,
- int sep, const char *relative_base, int depth)
-{
- struct strbuf dir = STRBUF_INIT;
-
- if (!alt || !*alt)
- return;
-
- if (depth > 5) {
+ odb_source_read_alternates(alternate, &sources);
+ if (sources.nr && depth + 1 > 5) {
error(_("%s: ignoring alternate object stores, nesting too deep"),
- relative_base);
- return;
- }
-
- while (*alt) {
- alt = parse_alt_odb_entry(alt, sep, &dir);
- if (!dir.len)
- continue;
- link_alt_odb_entry(odb, dir.buf, relative_base, depth);
- }
- strbuf_release(&dir);
-}
-
-static void read_info_alternates(struct object_database *odb,
- const char *relative_base,
- int depth)
-{
- char *path;
- struct strbuf buf = STRBUF_INIT;
-
- path = xstrfmt("%s/info/alternates", relative_base);
- if (strbuf_read_file(&buf, path, 1024) < 0) {
- warn_on_fopen_errors(path);
- free(path);
- return;
+ source);
+ } else {
+ for (size_t i = 0; i < sources.nr; i++)
+ odb_add_alternate_recursively(odb, sources.v[i], depth + 1);
}
- link_alt_odb_entries(odb, buf.buf, '\n', relative_base, depth);
- strbuf_release(&buf);
- free(path);
+ error:
+ strvec_clear(&sources);
+ return alternate;
}
void odb_add_to_alternates_file(struct object_database *odb,
const char *dir)
{
- struct lock_file lock = LOCK_INIT;
- char *alts = repo_git_path(odb->repo, "objects/info/alternates");
- FILE *in, *out;
- int found = 0;
-
- hold_lock_file_for_update(&lock, alts, LOCK_DIE_ON_ERROR);
- out = fdopen_lock_file(&lock, "w");
- if (!out)
- die_errno(_("unable to fdopen alternates lockfile"));
-
- in = fopen(alts, "r");
- if (in) {
- struct strbuf line = STRBUF_INIT;
-
- while (strbuf_getline(&line, in) != EOF) {
- if (!strcmp(dir, line.buf)) {
- found = 1;
- break;
- }
- fprintf_or_die(out, "%s\n", line.buf);
- }
-
- strbuf_release(&line);
- fclose(in);
- }
- else if (errno != ENOENT)
- die_errno(_("unable to read alternates file"));
-
- if (found) {
- rollback_lock_file(&lock);
- } else {
- fprintf_or_die(out, "%s\n", dir);
- if (commit_lock_file(&lock))
- die_errno(_("unable to move new alternates file into place"));
- if (odb->loaded_alternates)
- link_alt_odb_entries(odb, dir, '\n', NULL, 0);
- }
- free(alts);
+ int ret = odb_source_write_alternate(odb->sources, dir);
+ if (ret < 0)
+ die(NULL);
+ if (odb->loaded_alternates)
+ odb_add_alternate_recursively(odb, dir, 0);
}
struct odb_source *odb_add_to_alternates_memory(struct object_database *odb,
@@ -338,7 +255,7 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb,
* overwritten when they are.
*/
odb_prepare_alternates(odb);
- return link_alt_odb_entry(odb, dir, NULL, 0);
+ return odb_add_alternate_recursively(odb, dir, 0);
}
struct odb_source *odb_set_temporary_primary_source(struct object_database *odb,
@@ -369,13 +286,6 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb,
return source->next;
}
-static void odb_source_free(struct odb_source *source)
-{
- free(source->path);
- odb_source_loose_free(source->loose);
- free(source);
-}
-
void odb_restore_primary_source(struct object_database *odb,
struct odb_source *restore_source,
const char *old_path)
@@ -609,13 +519,19 @@ int odb_for_each_alternate(struct object_database *odb,
void odb_prepare_alternates(struct object_database *odb)
{
+ struct strvec sources = STRVEC_INIT;
+
if (odb->loaded_alternates)
return;
- link_alt_odb_entries(odb, odb->alternate_db, PATH_SEP, NULL, 0);
+ parse_alternates(odb->alternate_db, PATH_SEP, NULL, &sources);
+ odb_source_read_alternates(odb->sources, &sources);
+ for (size_t i = 0; i < sources.nr; i++)
+ odb_add_alternate_recursively(odb, sources.v[i], 0);
- read_info_alternates(odb, odb->sources->path, 0);
odb->loaded_alternates = 1;
+
+ strvec_clear(&sources);
}
int odb_has_alternates(struct object_database *odb)
@@ -670,8 +586,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
{
static struct object_info blank_oi = OBJECT_INFO_INIT;
const struct cached_object *co;
- struct pack_entry e;
- int rtype;
const struct object_id *real = oid;
int already_retried = 0;
@@ -697,6 +611,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
return 0;
}
@@ -706,19 +622,18 @@ static int do_oid_object_info_extended(struct object_database *odb,
while (1) {
struct odb_source *source;
- if (find_pack_entry(odb->repo, real, &e))
- break;
-
/* Most likely it's a loose object. */
for (source = odb->sources; source; source = source->next)
- if (!odb_source_loose_read_object_info(source, real, oi, flags))
+ if (!odb_source_read_object_info(source, real, oi, flags))
return 0;
/* Not a loose object; someone else may have just packed it. */
if (!(flags & OBJECT_INFO_QUICK)) {
odb_reprepare(odb->repo->objects);
- if (find_pack_entry(odb->repo, real, &e))
- break;
+ for (source = odb->sources; source; source = source->next)
+ if (!odb_source_read_object_info(source, real, oi,
+ flags | OBJECT_INFO_AFTER_REPREPARE))
+ return 0;
}
/*
@@ -751,25 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
}
return -1;
}
-
- if (oi == &blank_oi)
- /*
- * We know that the caller doesn't actually need the
- * information below, so return early.
- */
- return 0;
- rtype = packed_object_info(odb->repo, e.p, e.offset, oi);
- if (rtype < 0) {
- mark_bad_packed_object(e.p, real);
- return do_oid_object_info_extended(odb, real, oi, 0);
- } else if (oi->whence == OI_PACKED) {
- oi->u.packed.offset = e.offset;
- oi->u.packed.pack = e.p;
- oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
- rtype == OBJ_OFS_DELTA);
- }
-
- return 0;
}
static int oid_object_info_convert(struct repository *r,
@@ -995,18 +891,47 @@ int odb_freshen_object(struct object_database *odb,
const struct object_id *oid)
{
struct odb_source *source;
-
- if (packfile_store_freshen_object(odb->packfiles, oid))
- return 1;
-
odb_prepare_alternates(odb);
for (source = odb->sources; source; source = source->next)
- if (odb_source_loose_freshen_object(source, oid))
+ if (odb_source_freshen_object(source, oid))
return 1;
+ return 0;
+}
+
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ ret = odb_source_for_each_object(source, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
return 0;
}
+unsigned long odb_count_objects(struct object_database *odb,
+ unsigned flags)
+{
+ struct odb_source *source;
+ unsigned long count = 0;
+
+ odb_prepare_alternates(odb);
+ for (source = odb->sources; source; source = source->next)
+ count += odb_source_count_objects(source, flags);
+
+ return count;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
@@ -1025,15 +950,15 @@ int odb_write_object_ext(struct object_database *odb,
struct object_id *compat_oid,
unsigned flags)
{
- return odb_source_loose_write_object(odb->sources, buf, len, type,
- oid, compat_oid, flags);
+ return odb_source_write_object(odb->sources, buf, len, type,
+ oid, compat_oid, flags);
}
int odb_write_object_stream(struct object_database *odb,
struct odb_write_stream *stream, size_t len,
struct object_id *oid)
{
- return odb_source_loose_write_stream(odb->sources, stream, len, oid);
+ return odb_source_write_object_stream(odb->sources, stream, len, oid);
}
static void odb_update_commondir(const char *name UNUSED,
@@ -1077,12 +1002,14 @@ struct object_database *odb_new(struct repository *repo,
memset(o, 0, sizeof(*o));
o->repo = repo;
- o->packfiles = packfile_store_new(o);
pthread_mutex_init(&o->replace_mutex, NULL);
string_list_init_dup(&o->submodule_source_paths);
+ if (!primary_source)
+ primary_source = repo->object_storage;
if (!primary_source)
primary_source = to_free = xstrfmt("%s/objects", repo->commondir);
+
o->sources = odb_source_new(o, primary_source, true);
o->sources_tail = &o->sources->next;
o->alternate_db = xstrdup_or_null(secondary_sources);
@@ -1097,15 +1024,8 @@ struct object_database *odb_new(struct repository *repo,
void odb_close(struct object_database *o)
{
struct odb_source *source;
-
- packfile_store_close(o->packfiles);
-
- for (source = o->sources; source; source = source->next) {
- if (source->midx)
- close_midx(source->midx);
- source->midx = NULL;
- }
-
+ for (source = o->sources; source; source = source->next)
+ odb_source_close(source);
close_commit_graph(o);
}
@@ -1132,14 +1052,13 @@ void odb_free(struct object_database *o)
oidmap_clear(&o->replace_map, 1);
pthread_mutex_destroy(&o->replace_mutex);
+ odb_close(o);
odb_free_sources(o);
for (size_t i = 0; i < o->cached_object_nr; i++)
free((char *) o->cached_objects[i].value.buf);
free(o->cached_objects);
- odb_close(o);
- packfile_store_free(o->packfiles);
string_list_clear(&o->submodule_source_paths, 0);
chdir_notify_unregister(NULL, odb_update_commondir, o);
@@ -1163,12 +1082,10 @@ void odb_reprepare(struct object_database *o)
odb_prepare_alternates(o);
for (source = o->sources; source; source = source->next)
- odb_source_loose_reprepare(source);
+ odb_source_reprepare(source);
o->approximate_object_count_valid = 0;
- packfile_store_reprepare(o->packfiles);
-
obj_read_unlock();
}
diff --git a/odb.h b/odb.h
index 014cd9585a2f6efe7367e300afd465906f4a1e3a..ecdbd29e7d53f44ccac94aa5cf02ffa69733fb3a 100644
--- a/odb.h
+++ b/odb.h
@@ -3,6 +3,7 @@
#include "hashmap.h"
#include "object.h"
+#include "odb/source.h"
#include "oidset.h"
#include "oidmap.h"
#include "string-list.h"
@@ -30,54 +31,6 @@ extern int fetch_if_missing;
*/
char *compute_alternate_path(const char *path, struct strbuf *err);
-/*
- * The source is the part of the object database that stores the actual
- * objects. It thus encapsulates the logic to read and write the specific
- * on-disk format. An object database can have multiple sources:
- *
- * - The primary source, which is typically located in "$GIT_DIR/objects".
- * This is where new objects are usually written to.
- *
- * - Alternate sources, which are configured via "objects/info/alternates" or
- * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These
- * alternate sources are only used to read objects.
- */
-struct odb_source {
- struct odb_source *next;
-
- /* Object database that owns this object source. */
- struct object_database *odb;
-
- /* Private state for loose objects. */
- struct odb_source_loose *loose;
-
- /*
- * private data
- *
- * should only be accessed directly by packfile.c and midx.c
- */
- struct multi_pack_index *midx;
-
- /*
- * Figure out whether this is the local source of the owning
- * repository, which would typically be its ".git/objects" directory.
- * This local object directory is usually where objects would be
- * written to.
- */
- bool local;
-
- /*
- * This object store is ephemeral, so there is no need to fsync.
- */
- int will_destroy;
-
- /*
- * Path to the source. If this is a relative path, it is relative to
- * the current working directory.
- */
- char *path;
-};
-
struct packed_git;
struct packfile_store;
struct cached_object_entry;
@@ -128,9 +81,6 @@ struct object_database {
struct commit_graph *commit_graph;
unsigned commit_graph_attempted : 1; /* if loading has been attempted */
- /* Should only be accessed directly by packfile.c and midx.c. */
- struct packfile_store *packfiles;
-
/*
* This is meant to hold a *small* number of objects that you would
* want odb_read_object() to be able to return, but yet you do not want
@@ -324,13 +274,13 @@ struct object_info {
off_t *disk_sizep;
struct object_id *delta_base_oid;
void **contentp;
+ time_t *mtimep;
/* Response */
enum {
OI_CACHED,
OI_LOOSE,
OI_PACKED,
- OI_DBCACHED
} whence;
union {
/*
@@ -344,34 +294,58 @@ struct object_info {
struct {
struct packed_git *pack;
off_t offset;
- unsigned int is_delta;
+ enum packed_object_type {
+ PACKED_OBJECT_TYPE_UNKNOWN,
+ PACKED_OBJECT_TYPE_FULL,
+ PACKED_OBJECT_TYPE_OFS_DELTA,
+ PACKED_OBJECT_TYPE_REF_DELTA,
+ } type;
} packed;
} u;
};
+/*
+ * Given an object info structure, figure out whether any of its request
+ * pointers are populated.
+ */
+static inline bool object_info_is_blank_request(struct object_info *oi)
+{
+ return !oi->typep && !oi->sizep && !oi->disk_sizep &&
+ !oi->delta_base_oid && !oi->contentp && !oi->mtimep;
+}
+
/*
* Initializer for a "struct object_info" that wants no items. You may
* also memset() the memory to all-zeroes.
*/
#define OBJECT_INFO_INIT { 0 }
-/* Invoke lookup_replace_object() on the given hash */
-#define OBJECT_INFO_LOOKUP_REPLACE 1
-/* Do not retry packed storage after checking packed and loose storage */
-#define OBJECT_INFO_QUICK 8
-/*
- * Do not attempt to fetch the object if missing (even if fetch_is_missing is
- * nonzero).
- */
-#define OBJECT_INFO_SKIP_FETCH_OBJECT 16
-/*
- * This is meant for bulk prefetching of missing blobs in a partial
- * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK
- */
-#define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)
+/* Flags that can be passed to `odb_read_object_info_extended()`. */
+enum object_info_flags {
+ /* Invoke lookup_replace_object() on the given hash. */
+ OBJECT_INFO_LOOKUP_REPLACE = (1 << 0),
+
+ /* Do not reprepare object sources when the first lookup has failed. */
+ OBJECT_INFO_QUICK = (1 << 3),
+
+ /* Do not reprepare object sources when the first lookup has failed. */
+ OBJECT_INFO_AFTER_REPREPARE = (1 << 2),
+
+ /*
+ * Do not attempt to fetch the object if missing (even if fetch_is_missing is
+ * nonzero).
+ */
+ OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 4),
-/* Die if object corruption (not just an object being missing) was detected. */
-#define OBJECT_INFO_DIE_IF_CORRUPT 32
+ /* Die if object corruption (not just an object being missing) was detected. */
+ OBJECT_INFO_DIE_IF_CORRUPT = (1 << 5),
+
+ /*
+ * This is meant for bulk prefetching of missing blobs in a partial
+ * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK.
+ */
+ OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK),
+};
/*
* Read object info from the object database and populate the `object_info`
@@ -410,6 +384,31 @@ int odb_has_object(struct object_database *odb,
int odb_freshen_object(struct object_database *odb,
const struct object_id *oid);
+/* Flags that can be passed to `odb_count_objects()`. */
+enum odb_count_objects_flags {
+ /*
+ * Allow the number of objects to be estimated. This flags essentially
+ * asks the backend to trade accuracy for speed. The exact details of
+ * how these estimations happen is backend-specific. Some backends may
+ * not honor this flag at all.
+ */
+ ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
+
+ /*
+ * Also estimate objects that are stored in an unoptimized format. This
+ * flag may be ignored in case a backend does not discern between
+ * unoptimized/optimized formats.
+ */
+ ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED = (1 << 1),
+};
+
+/*
+ * Count the nubber of objects in the object database. This function does not
+ * account for reachability and may count objects multiple times.
+ */
+unsigned long odb_count_objects(struct object_database *odb,
+ unsigned flags);
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect);
@@ -445,26 +444,44 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
@@ -513,4 +530,9 @@ int odb_write_object_stream(struct object_database *odb,
struct odb_write_stream *stream, size_t len,
struct object_id *oid);
+void parse_alternates(const char *string,
+ int sep,
+ const char *relative_base,
+ struct strvec *out);
+
#endif /* ODB_H */
diff --git a/odb/source-files.c b/odb/source-files.c
new file mode 100644
index 0000000000000000000000000000000000000000..2a74106a1027f2e4bb58740f91c26cb79a2e4300
--- /dev/null
+++ b/odb/source-files.c
@@ -0,0 +1,226 @@
+#include "git-compat-util.h"
+#include "gettext.h"
+#include "lockfile.h"
+#include "object-file.h"
+#include "odb.h"
+#include "odb/source.h"
+#include "odb/source-files.h"
+#include "packfile.h"
+#include "strbuf.h"
+#include "write-or-die.h"
+
+static void odb_source_files_free(struct odb_source *source)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ odb_source_loose_free(files->loose);
+ packfile_store_free(files->packed);
+ odb_source_release(&files->base);
+ free(files);
+}
+
+static void odb_source_files_close(struct odb_source *source)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ packfile_store_close(files->packed);
+}
+
+static void odb_source_files_reprepare(struct odb_source *source)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ odb_source_loose_reprepare(&files->base);
+ packfile_store_reprepare(files->packed);
+}
+
+static int odb_source_files_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+
+ if (!packfile_store_read_object_info(files->packed, oid, oi, flags))
+ return 0;
+
+ /*
+ * A reprepare doesn't cause new loose objects to show up, so we skip
+ * reading loose objects in that case.
+ */
+ if (!(flags & OBJECT_INFO_AFTER_REPREPARE) &&
+ !odb_source_loose_read_object_info(source, oid, oi, flags))
+ return 0;
+
+ return -1;
+}
+
+static int odb_source_files_read_object_stream(struct odb_read_stream **out,
+ struct odb_source *source,
+ const struct object_id *oid)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ if (!packfile_store_read_object_stream(out, files->packed, oid) ||
+ !odb_source_loose_read_object_stream(out, source, oid))
+ return 0;
+ return -1;
+}
+
+static int odb_source_files_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ int ret;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(files->packed, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static unsigned long odb_source_files_count_objects(struct odb_source *source,
+ unsigned flags)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ unsigned long count = 0;
+
+ count += packfile_store_count_objects(files->packed, flags);
+ if (flags & ODB_COUNT_OBJECTS_INCLUDE_UNOPTIMIZED)
+ count += odb_source_loose_count_objects(source, flags);
+
+ return count;
+}
+
+static int odb_source_files_freshen_object(struct odb_source *source,
+ const struct object_id *oid)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ if (packfile_store_freshen_object(files->packed, oid) ||
+ odb_source_loose_freshen_object(source, oid))
+ return 1;
+ return 0;
+}
+
+static int odb_source_files_write_object(struct odb_source *source,
+ const void *buf, unsigned long len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid,
+ unsigned flags)
+{
+ return odb_source_loose_write_object(source, buf, len, type,
+ oid, compat_oid, flags);
+}
+
+static int odb_source_files_write_object_stream(struct odb_source *source,
+ struct odb_write_stream *stream,
+ size_t len,
+ struct object_id *oid)
+{
+ return odb_source_loose_write_stream(source, stream, len, oid);
+}
+
+static int odb_source_files_read_alternates(struct odb_source *source,
+ struct strvec *out)
+{
+ struct strbuf buf = STRBUF_INIT;
+ char *path;
+
+ path = xstrfmt("%s/info/alternates", source->path);
+ if (strbuf_read_file(&buf, path, 1024) < 0) {
+ warn_on_fopen_errors(path);
+ free(path);
+ return 0;
+ }
+ parse_alternates(buf.buf, '\n', source->path, out);
+
+ strbuf_release(&buf);
+ free(path);
+ return 0;
+}
+
+static int odb_source_files_write_alternate(struct odb_source *source,
+ const char *alternate)
+{
+ struct lock_file lock = LOCK_INIT;
+ char *path = xstrfmt("%s/%s", source->path, "info/alternates");
+ FILE *in, *out;
+ int found = 0;
+ int ret;
+
+ hold_lock_file_for_update(&lock, path, LOCK_DIE_ON_ERROR);
+ out = fdopen_lock_file(&lock, "w");
+ if (!out) {
+ ret = error_errno(_("unable to fdopen alternates lockfile"));
+ goto out;
+ }
+
+ in = fopen(path, "r");
+ if (in) {
+ struct strbuf line = STRBUF_INIT;
+
+ while (strbuf_getline(&line, in) != EOF) {
+ if (!strcmp(alternate, line.buf)) {
+ found = 1;
+ break;
+ }
+ fprintf_or_die(out, "%s\n", line.buf);
+ }
+
+ strbuf_release(&line);
+ fclose(in);
+ } else if (errno != ENOENT) {
+ ret = error_errno(_("unable to read alternates file"));
+ goto out;
+ }
+
+ if (found) {
+ rollback_lock_file(&lock);
+ } else {
+ fprintf_or_die(out, "%s\n", alternate);
+ if (commit_lock_file(&lock)) {
+ ret = error_errno(_("unable to move new alternates file into place"));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ free(path);
+ return ret;
+}
+
+struct odb_source_files *odb_source_files_new(struct object_database *odb,
+ const char *path,
+ bool local)
+{
+ struct odb_source_files *files;
+
+ CALLOC_ARRAY(files, 1);
+ odb_source_init(&files->base, odb, path, local);
+ files->loose = odb_source_loose_new(&files->base);
+ files->packed = packfile_store_new(&files->base);
+
+ files->base.free = odb_source_files_free;
+ files->base.close = odb_source_files_close;
+ files->base.reprepare = odb_source_files_reprepare;
+ files->base.read_object_info = odb_source_files_read_object_info;
+ files->base.read_object_stream = odb_source_files_read_object_stream;
+ files->base.for_each_object = odb_source_files_for_each_object;
+ files->base.count_objects = odb_source_files_count_objects;
+ files->base.freshen_object = odb_source_files_freshen_object;
+ files->base.write_object = odb_source_files_write_object;
+ files->base.write_object_stream = odb_source_files_write_object_stream;
+ files->base.read_alternates = odb_source_files_read_alternates;
+ files->base.write_alternate = odb_source_files_write_alternate;
+
+ return files;
+}
diff --git a/odb/source-files.h b/odb/source-files.h
new file mode 100644
index 0000000000000000000000000000000000000000..e64187073567cedc3008859296382c1ba0124867
--- /dev/null
+++ b/odb/source-files.h
@@ -0,0 +1,33 @@
+#ifndef ODB_FILES_H
+#define ODB_FILES_H
+
+#include "odb/source.h"
+
+struct odb_source_loose;
+struct packfile_store;
+
+/*
+ * The files object database source uses a combination of loose objects and
+ * packfiles. It is the default backend used by Git to store objects.
+ */
+struct odb_source_files {
+ struct odb_source base;
+ struct odb_source_loose *loose;
+ struct packfile_store *packed;
+};
+
+/* Allocate and initialize a new object source. */
+struct odb_source_files *odb_source_files_new(struct object_database *odb,
+ const char *path,
+ bool local);
+
+/*
+ * Cast the given object database source to the files backend. This will cause
+ * a BUG in case the source uses doesn't use this backend.
+ */
+static inline struct odb_source_files *odb_source_files_downcast(struct odb_source *source)
+{
+ return (struct odb_source_files *) source;
+}
+
+#endif
diff --git a/odb/source.c b/odb/source.c
new file mode 100644
index 0000000000000000000000000000000000000000..51bb1cde740f282126f6ed21c3b4821616745776
--- /dev/null
+++ b/odb/source.c
@@ -0,0 +1,57 @@
+#include "git-compat-util.h"
+#include "gettext.h"
+#include "object-file.h"
+#include "odb/source-files.h"
+#include "odb/source.h"
+#include "packfile.h"
+
+struct odb_source *odb_source_new(struct object_database *odb,
+ const char *path,
+ bool local)
+{
+ struct odb_source *source;
+ const char *schema_end;
+ char *schema;
+
+ schema_end = strstr(path, "://");
+ if (!schema_end)
+ return &odb_source_files_new(odb, path, local)->base;
+
+ schema = xstrndup(path, schema_end - path);
+ path = schema_end + 3;
+
+ if (!strcmp(schema, "files")) {
+ source = &odb_source_files_new(odb, path, local)->base;
+ goto out;
+ }
+
+ die(_("unknown object database source schema: '%s'"), schema);
+
+out:
+ free(schema);
+ return source;
+}
+
+void odb_source_init(struct odb_source *source,
+ struct object_database *odb,
+ const char *path,
+ bool local)
+{
+ source->odb = odb;
+ source->local = local;
+ source->path = xstrdup(path);
+}
+
+void odb_source_free(struct odb_source *source)
+{
+ if (!source)
+ return;
+ source->free(source);
+}
+
+void odb_source_release(struct odb_source *source)
+{
+ if (!source)
+ return;
+ free(source->path);
+}
diff --git a/odb/source.h b/odb/source.h
new file mode 100644
index 0000000000000000000000000000000000000000..7581fb19629c3d42954b82b81ec39e2c66b90456
--- /dev/null
+++ b/odb/source.h
@@ -0,0 +1,403 @@
+#ifndef ODB_SOURCE_H
+#define ODB_SOURCE_H
+
+struct object_info;
+struct odb_read_stream;
+struct odb_source;
+struct odb_write_stream;
+struct strvec;
+
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
+/*
+ * The source is the part of the object database that stores the actual
+ * objects. It thus encapsulates the logic to read and write the specific
+ * on-disk format. An object database can have multiple sources:
+ *
+ * - The primary source, which is typically located in "$GIT_DIR/objects".
+ * This is where new objects are usually written to.
+ *
+ * - Alternate sources, which are configured via "objects/info/alternates" or
+ * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These
+ * alternate sources are only used to read objects.
+ */
+struct odb_source {
+ struct odb_source *next;
+
+ /* Object database that owns this object source. */
+ struct object_database *odb;
+
+ /*
+ * Figure out whether this is the local source of the owning
+ * repository, which would typically be its ".git/objects" directory.
+ * This local object directory is usually where objects would be
+ * written to.
+ */
+ bool local;
+
+ /*
+ * This object store is ephemeral, so there is no need to fsync.
+ */
+ int will_destroy;
+
+ /*
+ * Path to the source. If this is a relative path, it is relative to
+ * the current working directory.
+ */
+ char *path;
+
+ /*
+ * This callback is expected to free the underlying object database source and
+ * all associated resources. The function will never be called with a NULL pointer.
+ */
+ void (*free)(struct odb_source *source);
+
+ /*
+ * This callback is expected to close any open resources, like for
+ * example file descriptors or connections. The source is expected to
+ * still be usable after it has been closed. Closed resources may need
+ * to be reopened in that case.
+ */
+ void (*close)(struct odb_source *source);
+
+ /*
+ * This callback is expected to clear underlying caches of the object
+ * database source. The function is called when the repository has for
+ * example just been repacked so that new objects will become visible.
+ */
+ void (*reprepare)(struct odb_source *source);
+
+ /*
+ * This callback is expected to read object information from the object
+ * database source. The object info will be partially populated with
+ * pointers for each bit of information that was requested by the
+ * caller.
+ *
+ * The flags field is a combination of `OBJECT_INFO` flags. Only the
+ * following fields need to be handled by the backend:
+ *
+ * - `OBJECT_INFO_QUICK` indicates it is fine to use caches without
+ * re-verifying the data.
+ *
+ * - `OBJECT_INFO_AFTER_REPREPARE` indicates that the initial object
+ * lookup has failed and that the object sources have just been
+ * reloaded. The source should only look up objects via sources
+ * that may have been changed due to the reload.
+ *
+ * The callback is expected to return a negative error code in case
+ * reading the object has failed, 0 otherwise.
+ */
+ int (*read_object_info)(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags);
+
+ /*
+ * This callback is expected to create a new read stream that can be
+ * used to stream the object identified by the given ID.
+ *
+ * The callback is expected to return a negative error code in case
+ * creating the object stream has failed, 0 otherwise.
+ */
+ int (*read_object_stream)(struct odb_read_stream **out,
+ struct odb_source *source,
+ const struct object_id *oid);
+
+ /*
+ * This callback is expected to iterate over all objects stored in this
+ * source and invoke the callback function for each of them. It is
+ * valid to yield the same object multiple time. A non-zero exit code
+ * from the object callback shall abort iteration.
+ *
+ * The optional `oi` structure shall be populated similar to how an individual
+ * call to `odb_source_read_object_info()` would have behaved. If the caller
+ * passes a `NULL` pointer then the object itself shall not be read.
+ *
+ * The callback is expected to return a negative error code in case the
+ * iteration has failed to read all objects, 0 otherwise. When the
+ * callback function returns a non-zero error code then that error code
+ * should be returned.
+ */
+ int (*for_each_object)(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
+ /*
+ * This callback is expected to count the number of objects that exist
+ * in the given source. It is fine to both under- and overcount the
+ * objects.
+ *
+ * The flags field is a combination of `enum odb_count_objects_flags`
+ * flags.
+ *
+ * The callback is expected to return the number of objects.
+ */
+ unsigned long (*count_objects)(struct odb_source *source,
+ unsigned flags);
+
+ /*
+ * This callback is expected to freshen the given object so that its
+ * last access time is set to the current time. This is used to ensure
+ * that objects that are recent will not get garbage collected even if
+ * they were unreachable.
+ *
+ * Returns 0 in case the object does not exist, 1 in case the object
+ * has been freshened.
+ */
+ int (*freshen_object)(struct odb_source *source,
+ const struct object_id *oid);
+
+ /*
+ * This callback is expected to persist the given object into the
+ * object source. In case the object already exists it shall be
+ * freshened.
+ *
+ * The flags field is a combination of `WRITE_OBJECT` flags.
+ *
+ * The resulting object ID (and optionally the compatibility object ID)
+ * shall be written into the out pointers. The callback is expected to
+ * return 0 on success, a negative error code otherwise.
+ */
+ int (*write_object)(struct odb_source *source,
+ const void *buf, unsigned long len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid,
+ unsigned flags);
+
+ /*
+ * This callback is expected to persist the given object stream into
+ * the object source.
+ *
+ * The resulting object ID shall be written into the out pointer. The
+ * callback is expected to return 0 on success, a negative error code
+ * otherwise.
+ */
+ int (*write_object_stream)(struct odb_source *source,
+ struct odb_write_stream *stream, size_t len,
+ struct object_id *oid);
+
+ /*
+ * This callback is expected to read the list of alternate object
+ * database sources connected to it and write them into the `strvec`.
+ *
+ * The format is expected to follow the "objectStorage" extension
+ * format with `(backend://)?payload` syntax. If the payload contains
+ * paths, these paths must be resolved to absolute paths.
+ *
+ * The callback is expected to return 0 on success, a negative error
+ * code otherwise.
+ */
+ int (*read_alternates)(struct odb_source *source,
+ struct strvec *out);
+
+ /*
+ * This callback is expected to persist the singular alternate passed
+ * to it into its list of alternates. Any pre-existing alternates are
+ * expected to remain active. Subsequent calls to `read_alternates` are
+ * thus expected to yield the pre-existing list of alternates plus the
+ * newly added alternate appended to its end.
+ *
+ * The callback is expected to return 0 on success, a negative error
+ * code otherwise.
+ */
+ int (*write_alternate)(struct odb_source *source,
+ const char *alternate);
+};
+
+/*
+ * Allocate and initialize a new source for the given object database located
+ * at `path`. `local` indicates whether or not the source is the local and thus
+ * primary object source of the object database.
+ */
+struct odb_source *odb_source_new(struct object_database *odb,
+ const char *path,
+ bool local);
+
+/*
+ * Initialize the source for the given object database located at `path`.
+ * `local` indicates whether or not the source is the local and thus primary
+ * object source of the object database.
+ *
+ * This function is only supposed to be called by specific object source
+ * implementations.
+ */
+void odb_source_init(struct odb_source *source,
+ struct object_database *odb,
+ const char *path,
+ bool local);
+
+/*
+ * Free the object database source, releasing all associated resources and
+ * freeing the structure itself.
+ */
+void odb_source_free(struct odb_source *source);
+
+/*
+ * Release the object database source, releasing all associated resources.
+ *
+ * This function is only supposed to be called by specific object source
+ * implementations.
+ */
+void odb_source_release(struct odb_source *source);
+
+/*
+ * Close the object database source without releasing he underlying data. The
+ * source can still be used going forward, but it first needs to be reopened.
+ * This can be useful to reduce resource usage.
+ */
+static inline void odb_source_close(struct odb_source *source)
+{
+ source->close(source);
+}
+
+/*
+ * Reprepare the object database source and clear any caches. Depending on the
+ * backend used this may have the effect that concurrently-written objects
+ * become visible.
+ */
+static inline void odb_source_reprepare(struct odb_source *source)
+{
+ source->reprepare(source);
+}
+
+/*
+ * Read an object from the object database source identified by its object ID.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ return source->read_object_info(source, oid, oi, flags);
+}
+
+/*
+ * Create a new read stream for the given object ID. Returns 0 on success, a
+ * negative error code otherwise.
+ */
+static inline int odb_source_read_object_stream(struct odb_read_stream **out,
+ struct odb_source *source,
+ const struct object_id *oid)
+{
+ return source->read_object_stream(out, source, oid);
+}
+
+/*
+ * Iterate through all objects contained in the given source and invoke the
+ * callback function for each of them. Returning a non-zero code from the
+ * callback function aborts iteration. There is no guarantee that objects
+ * are only iterated over once.
+ *
+ * The optional `oi` structure shall be populated similar to how an individual
+ * call to `odb_source_read_object_info()` would have behaved. If the caller
+ * passes a `NULL` pointer then the object itself shall not be read.
+ *
+ * The flags is a bitfield of `ODB_FOR_EACH_OBJECT_*` flags. Not all flags may
+ * apply to a specific backend, so whether or not they are honored is defined
+ * by the implementation.
+ *
+ * Returns 0 when all objects have been iterated over, a negative error code in
+ * case iteration has failed, or a non-zero value returned from the callback.
+ */
+static inline int odb_source_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ return source->for_each_object(source, oi, cb, cb_data, flags);
+}
+
+/*
+ * Count the number of objects that exist in the given object database source.
+ */
+static inline unsigned long odb_source_count_objects(struct odb_source *source,
+ unsigned flags)
+{
+ return source->count_objects(source, flags);
+}
+
+/*
+ * Freshen an object in the object database by updating its timestamp.
+ * Returns 1 in case the object has been freshened, 0 in case the object does
+ * not exist.
+ */
+static inline int odb_source_freshen_object(struct odb_source *source,
+ const struct object_id *oid)
+{
+ return source->freshen_object(source, oid);
+}
+
+/*
+ * Write an object into the object database source. Returns 0 on success, a
+ * negative error code otherwise. Populates the given out pointers for the
+ * object ID and the compatibility object ID, if non-NULL.
+ */
+static inline int odb_source_write_object(struct odb_source *source,
+ const void *buf, unsigned long len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid,
+ unsigned flags)
+{
+ return source->write_object(source, buf, len, type, oid,
+ compat_oid, flags);
+}
+
+/*
+ * Write an object into the object database source via a stream. The overall
+ * length of the object must be known in advance.
+ *
+ * Return 0 on success, a negative error code otherwise. Populates the given
+ * out pointer for the object ID.
+ */
+static inline int odb_source_write_object_stream(struct odb_source *source,
+ struct odb_write_stream *stream,
+ size_t len,
+ struct object_id *oid)
+{
+ return source->write_object_stream(source, stream, len, oid);
+}
+
+/*
+ * Read the list of alternative object database sources from the given backend
+ * and populate the `strvec` with them. The listing is not recursive -- that
+ * is, if any of the yielded alternate sources has alternates itself, those
+ * will not be yielded as part of this function call.
+ *
+ * Return 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_read_alternates(struct odb_source *source,
+ struct strvec *out)
+{
+ return source->read_alternates(source, out);
+}
+
+/*
+ * Write and persist a new alternate object database source for the given
+ * source. Any preexisting alternates are expected to stay valid, and the new
+ * alternate shall be appended to the end of the list.
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_write_alternate(struct odb_source *source,
+ const char *alternate)
+{
+ return source->write_alternate(source, alternate);
+}
+
+#endif
diff --git a/odb/streaming.c b/odb/streaming.c
new file mode 100644
index 0000000000000000000000000000000000000000..14a586292d69394c87d2faa646290e80e0ea838b
--- /dev/null
+++ b/odb/streaming.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+
+#include "git-compat-util.h"
+#include "convert.h"
+#include "environment.h"
+#include "repository.h"
+#include "object-file.h"
+#include "odb.h"
+#include "odb/streaming.h"
+#include "replace-object.h"
+#include "packfile.h"
+
+#define FILTER_BUFFER (1024*16)
+
+/*****************************************************************
+ *
+ * Filtered stream
+ *
+ *****************************************************************/
+
+struct odb_filtered_read_stream {
+ struct odb_read_stream base;
+ struct odb_read_stream *upstream;
+ struct stream_filter *filter;
+ char ibuf[FILTER_BUFFER];
+ char obuf[FILTER_BUFFER];
+ int i_end, i_ptr;
+ int o_end, o_ptr;
+ int input_finished;
+};
+
+static int close_istream_filtered(struct odb_read_stream *_fs)
+{
+ struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
+ free_stream_filter(fs->filter);
+ return odb_read_stream_close(fs->upstream);
+}
+
+static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
+ size_t sz)
+{
+ struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
+ size_t filled = 0;
+
+ while (sz) {
+ /* do we already have filtered output? */
+ if (fs->o_ptr < fs->o_end) {
+ size_t to_move = fs->o_end - fs->o_ptr;
+ if (sz < to_move)
+ to_move = sz;
+ memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
+ fs->o_ptr += to_move;
+ sz -= to_move;
+ filled += to_move;
+ continue;
+ }
+ fs->o_end = fs->o_ptr = 0;
+
+ /* do we have anything to feed the filter with? */
+ if (fs->i_ptr < fs->i_end) {
+ size_t to_feed = fs->i_end - fs->i_ptr;
+ size_t to_receive = FILTER_BUFFER;
+ if (stream_filter(fs->filter,
+ fs->ibuf + fs->i_ptr, &to_feed,
+ fs->obuf, &to_receive))
+ return -1;
+ fs->i_ptr = fs->i_end - to_feed;
+ fs->o_end = FILTER_BUFFER - to_receive;
+ continue;
+ }
+
+ /* tell the filter to drain upon no more input */
+ if (fs->input_finished) {
+ size_t to_receive = FILTER_BUFFER;
+ if (stream_filter(fs->filter,
+ NULL, NULL,
+ fs->obuf, &to_receive))
+ return -1;
+ fs->o_end = FILTER_BUFFER - to_receive;
+ if (!fs->o_end)
+ break;
+ continue;
+ }
+ fs->i_end = fs->i_ptr = 0;
+
+ /* refill the input from the upstream */
+ if (!fs->input_finished) {
+ fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER);
+ if (fs->i_end < 0)
+ return -1;
+ if (fs->i_end)
+ continue;
+ }
+ fs->input_finished = 1;
+ }
+ return filled;
+}
+
+static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
+ struct stream_filter *filter)
+{
+ struct odb_filtered_read_stream *fs;
+
+ CALLOC_ARRAY(fs, 1);
+ fs->base.close = close_istream_filtered;
+ fs->base.read = read_istream_filtered;
+ fs->upstream = st;
+ fs->filter = filter;
+ fs->base.size = -1; /* unknown */
+ fs->base.type = st->type;
+
+ return &fs->base;
+}
+
+/*****************************************************************
+ *
+ * In-core stream
+ *
+ *****************************************************************/
+
+struct odb_incore_read_stream {
+ struct odb_read_stream base;
+ char *buf; /* from odb_read_object_info_extended() */
+ unsigned long read_ptr;
+};
+
+static int close_istream_incore(struct odb_read_stream *_st)
+{
+ struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
+ free(st->buf);
+ return 0;
+}
+
+static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+ struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
+ size_t read_size = sz;
+ size_t remainder = st->base.size - st->read_ptr;
+
+ if (remainder <= read_size)
+ read_size = remainder;
+ if (read_size) {
+ memcpy(buf, st->buf + st->read_ptr, read_size);
+ st->read_ptr += read_size;
+ }
+ return read_size;
+}
+
+static int open_istream_incore(struct odb_read_stream **out,
+ struct object_database *odb,
+ const struct object_id *oid)
+{
+ struct object_info oi = OBJECT_INFO_INIT;
+ struct odb_incore_read_stream stream = {
+ .base.close = close_istream_incore,
+ .base.read = read_istream_incore,
+ };
+ struct odb_incore_read_stream *st;
+ int ret;
+
+ oi.typep = &stream.base.type;
+ oi.sizep = &stream.base.size;
+ oi.contentp = (void **)&stream.buf;
+ ret = odb_read_object_info_extended(odb, oid, &oi,
+ OBJECT_INFO_DIE_IF_CORRUPT);
+ if (ret)
+ return ret;
+
+ CALLOC_ARRAY(st, 1);
+ *st = stream;
+ *out = &st->base;
+
+ return 0;
+}
+
+/*****************************************************************************
+ * static helpers variables and functions for users of streaming interface
+ *****************************************************************************/
+
+static int istream_source(struct odb_read_stream **out,
+ struct object_database *odb,
+ const struct object_id *oid)
+{
+ struct odb_source *source;
+
+ odb_prepare_alternates(odb);
+ for (source = odb->sources; source; source = source->next)
+ if (!odb_source_read_object_stream(out, source, oid))
+ return 0;
+
+ return open_istream_incore(out, odb, oid);
+}
+
+/****************************************************************
+ * Users of streaming interface
+ ****************************************************************/
+
+int odb_read_stream_close(struct odb_read_stream *st)
+{
+ int r = st->close(st);
+ free(st);
+ return r;
+}
+
+ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz)
+{
+ return st->read(st, buf, sz);
+}
+
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+ const struct object_id *oid,
+ struct stream_filter *filter)
+{
+ struct odb_read_stream *st;
+ const struct object_id *real = lookup_replace_object(odb->repo, oid);
+ int ret = istream_source(&st, odb, real);
+
+ if (ret)
+ return NULL;
+
+ if (filter) {
+ /* Add "&& !is_null_stream_filter(filter)" for performance */
+ struct odb_read_stream *nst = attach_stream_filter(st, filter);
+ if (!nst) {
+ odb_read_stream_close(st);
+ return NULL;
+ }
+ st = nst;
+ }
+
+ return st;
+}
+
+int odb_stream_blob_to_fd(struct object_database *odb,
+ int fd,
+ const struct object_id *oid,
+ struct stream_filter *filter,
+ int can_seek)
+{
+ struct odb_read_stream *st;
+ ssize_t kept = 0;
+ int result = -1;
+
+ st = odb_read_stream_open(odb, oid, filter);
+ if (!st) {
+ if (filter)
+ free_stream_filter(filter);
+ return result;
+ }
+ if (st->type != OBJ_BLOB)
+ goto close_and_exit;
+ for (;;) {
+ char buf[1024 * 16];
+ ssize_t wrote, holeto;
+ ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
+
+ if (readlen < 0)
+ goto close_and_exit;
+ if (!readlen)
+ break;
+ if (can_seek && sizeof(buf) == readlen) {
+ for (holeto = 0; holeto < readlen; holeto++)
+ if (buf[holeto])
+ break;
+ if (readlen == holeto) {
+ kept += holeto;
+ continue;
+ }
+ }
+
+ if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
+ goto close_and_exit;
+ else
+ kept = 0;
+ wrote = write_in_full(fd, buf, readlen);
+
+ if (wrote < 0)
+ goto close_and_exit;
+ }
+ if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
+ xwrite(fd, "", 1) != 1))
+ goto close_and_exit;
+ result = 0;
+
+ close_and_exit:
+ odb_read_stream_close(st);
+ return result;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7861f7e13c606af66d5b54b52b7b1cc3eb9adad
--- /dev/null
+++ b/odb/streaming.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef STREAMING_H
+#define STREAMING_H 1
+
+#include "object.h"
+
+struct object_database;
+struct odb_read_stream;
+struct stream_filter;
+
+typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *);
+typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t);
+
+/*
+ * A stream that can be used to read an object from the object database without
+ * loading all of it into memory.
+ */
+struct odb_read_stream {
+ odb_read_stream_close_fn close;
+ odb_read_stream_read_fn read;
+ enum object_type type;
+ unsigned long size; /* inflated size of full object */
+};
+
+/*
+ * Create a new object stream for the given object database. An optional filter
+ * can be used to transform the object's content.
+ *
+ * Returns the stream on success, a `NULL` pointer otherwise.
+ */
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+ const struct object_id *oid,
+ struct stream_filter *filter);
+
+/*
+ * Close the given read stream and release all resources associated with it.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+int odb_read_stream_close(struct odb_read_stream *stream);
+
+/*
+ * Read data from the stream into the buffer. Returns 0 on EOF and the number
+ * of bytes read on success. Returns a negative error code in case reading from
+ * the stream fails.
+ */
+ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
+
+/*
+ * Look up the object by its ID and write the full contents to the file
+ * descriptor. The object must be a blob, or the function will fail. When
+ * provided, the filter is used to transform the blob contents.
+ *
+ * `can_seek` should be set to 1 in case the given file descriptor can be
+ * seek(3p)'d on. This is used to support files with holes in case a
+ * significant portion of the blob contains NUL bytes.
+ *
+ * Returns a negative error code on failure, 0 on success.
+ */
+int odb_stream_blob_to_fd(struct object_database *odb,
+ int fd,
+ const struct object_id *oid,
+ struct stream_filter *filter,
+ int can_seek);
+
+#endif /* STREAMING_H */
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 8ca79725b1d4380377fc4c0e68141c7fc1968401..f466ed2ddcb4e98f94f5bfe36883f184c1a3e8a0 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx)
struct strbuf buf = STRBUF_INIT;
if (midx->has_chain)
get_split_midx_filename_ext(midx->source, &buf,
- get_midx_checksum(midx),
+ get_midx_hash(midx),
MIDX_EXT_BITMAP);
else
get_midx_filename_ext(midx->source, &buf,
- get_midx_checksum(midx),
+ get_midx_hash(midx),
MIDX_EXT_BITMAP);
return strbuf_detach(&buf, NULL);
@@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (load_bitmap_header(bitmap_git) < 0)
goto cleanup;
- if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
+ if (!hasheq(get_midx_hash(bitmap_git->midx), bitmap_git->checksum,
bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
@@ -2820,8 +2820,7 @@ void test_bitmap_walk(struct rev_info *revs)
if (bitmap_is_midx(found))
fprintf_ln(stderr, "Located via MIDX '%s'.",
- hash_to_hex_algop(get_midx_checksum(found->midx),
- revs->repo->hash_algo));
+ get_midx_checksum(found->midx));
else
fprintf_ln(stderr, "Located via pack '%s'.",
hash_to_hex_algop(found->pack->hash,
diff --git a/pack-revindex.c b/pack-revindex.c
index d0791cc4938fa2c784a1a585210552ee2c6d06fa..016195ceb93ec6891c974a40e40e40964bc54bc3 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -390,11 +390,11 @@ int load_midx_revindex(struct multi_pack_index *m)
if (m->has_chain)
get_split_midx_filename_ext(m->source, &revindex_name,
- get_midx_checksum(m),
+ get_midx_hash(m),
MIDX_EXT_REV);
else
get_midx_filename_ext(m->source, &revindex_name,
- get_midx_checksum(m),
+ get_midx_hash(m),
MIDX_EXT_REV);
ret = load_revindex_from_disk(m->source->odb->repo->hash_algo,
diff --git a/packfile.c b/packfile.c
index 3d8b994a617e81e08b06d5b3b3aaa2999c1f73a0..3167ab229842749176964c3e4e0d8e4e1cf0c4dd 100644
--- a/packfile.c
+++ b/packfile.c
@@ -20,6 +20,7 @@
#include "tree.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
#include "midx.h"
#include "commit-graph.h"
#include "pack-revindex.h"
@@ -354,16 +355,19 @@ static void scan_windows(struct packed_git *p,
}
}
-static int unuse_one_window(struct packed_git *current)
+static int unuse_one_window(struct object_database *odb)
{
+ struct odb_source *source;
struct packfile_list_entry *e;
struct packed_git *lru_p = NULL;
struct pack_window *lru_w = NULL, *lru_l = NULL;
- if (current)
- scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (e = current->repo->objects->packfiles->packs.head; e; e = e->next)
- scan_windows(e->pack, &lru_p, &lru_w, &lru_l);
+ for (source = odb->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ for (e = files->packed->packs.head; e; e = e->next)
+ scan_windows(e->pack, &lru_p, &lru_w, &lru_l);
+ }
+
if (lru_p) {
munmap(lru_w->base, lru_w->len);
pack_mapped -= lru_w->len;
@@ -528,15 +532,19 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
static int close_one_pack(struct repository *r)
{
+ struct odb_source *source;
struct packfile_list_entry *e;
struct packed_git *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (e = r->objects->packfiles->packs.head; e; e = e->next) {
- if (e->pack->pack_fd == -1)
- continue;
- find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse);
+ for (source = r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ for (e = files->packed->packs.head; e; e = e->next) {
+ if (e->pack->pack_fd == -1)
+ continue;
+ find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse);
+ }
}
if (lru_p)
@@ -739,8 +747,8 @@ unsigned char *use_pack(struct packed_git *p,
win->len = (size_t)len;
pack_mapped += win->len;
- while (settings->packed_git_limit < pack_mapped
- && unuse_one_window(p))
+ while (settings->packed_git_limit < pack_mapped &&
+ unuse_one_window(p->repo->objects))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
PROT_READ, MAP_PRIVATE,
@@ -875,7 +883,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
p = strmap_get(&store->packs_by_path, key.buf);
if (!p) {
- p = add_packed_git(store->odb->repo, idx_path,
+ p = add_packed_git(store->source->odb->repo, idx_path,
strlen(idx_path), local);
if (p)
packfile_store_add_pack(store, p);
@@ -885,22 +893,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
return p;
}
-int packfile_store_freshen_object(struct packfile_store *store,
- const struct object_id *oid)
-{
- struct pack_entry e;
- if (!find_pack_entry(store->odb->repo, oid, &e))
- return 0;
- if (e.p->is_cruft)
- return 0;
- if (e.p->freshened)
- return 1;
- if (utime(e.p->pack_name, NULL))
- return 0;
- e.p->freshened = 1;
- return 1;
-}
-
void (*report_garbage)(unsigned seen_bits, const char *path);
static void report_helper(const struct string_list *list,
@@ -990,23 +982,23 @@ void for_each_file_in_pack_dir(const char *objdir,
}
struct prepare_pack_data {
- struct repository *r;
+ struct odb_source *source;
struct string_list *garbage;
- int local;
- struct multi_pack_index *m;
};
static void prepare_pack(const char *full_name, size_t full_name_len,
const char *file_name, void *_data)
{
struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
+ struct odb_source_files *files = odb_source_files_downcast(data->source);
size_t base_len = full_name_len;
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
- !(data->m && midx_contains_pack(data->m, file_name))) {
+ !(files->packed->midx &&
+ midx_contains_pack(files->packed->midx, file_name))) {
char *trimmed_path = xstrndup(full_name, full_name_len);
- packfile_store_load_pack(data->r->objects->packfiles,
- trimmed_path, data->local);
+ packfile_store_load_pack(files->packed,
+ trimmed_path, data->source->local);
free(trimmed_path);
}
@@ -1035,10 +1027,8 @@ static void prepare_packed_git_one(struct odb_source *source)
{
struct string_list garbage = STRING_LIST_INIT_DUP;
struct prepare_pack_data data = {
- .m = source->midx,
- .r = source->odb->repo,
+ .source = source,
.garbage = &garbage,
- .local = source->local,
};
for_each_file_in_pack_dir(source->path, prepare_pack, &data);
@@ -1078,16 +1068,11 @@ static int sort_pack(const struct packfile_list_entry *a,
void packfile_store_prepare(struct packfile_store *store)
{
- struct odb_source *source;
-
if (store->initialized)
return;
- odb_prepare_alternates(store->odb);
- for (source = store->odb->sources; source; source = source->next) {
- prepare_multi_pack_index_one(source);
- prepare_packed_git_one(source);
- }
+ prepare_multi_pack_index_one(store->source);
+ prepare_packed_git_one(store->source);
sort_packs(&store->packs.head, sort_pack);
for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
@@ -1107,10 +1092,8 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor
{
packfile_store_prepare(store);
- for (struct odb_source *source = store->odb->sources; source; source = source->next) {
- struct multi_pack_index *m = source->midx;
- if (!m)
- continue;
+ if (store->midx) {
+ struct multi_pack_index *m = store->midx;
for (uint32_t i = 0; i < m->num_packs + m->num_packs_in_base; i++)
prepare_midx_pack(m, i);
}
@@ -1118,37 +1101,24 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor
return store->packs.head;
}
-/*
- * Give a fast, rough count of the number of objects in the repository. This
- * ignores loose objects completely. If you have a lot of them, then either
- * you should repack because your performance will be awful, or they are
- * all unreachable objects about to be pruned, in which case they're not really
- * interesting as a measure of repo size in the first place.
- */
-unsigned long repo_approximate_object_count(struct repository *r)
+unsigned long packfile_store_count_objects(struct packfile_store *store,
+ unsigned flags UNUSED)
{
- if (!r->objects->approximate_object_count_valid) {
- struct odb_source *source;
- unsigned long count = 0;
- struct packed_git *p;
-
- odb_prepare_alternates(r->objects);
+ struct packfile_list_entry *e;
+ struct multi_pack_index *m;
+ unsigned long count = 0;
- for (source = r->objects->sources; source; source = source->next) {
- struct multi_pack_index *m = get_multi_pack_index(source);
- if (m)
- count += m->num_objects + m->num_objects_in_base;
- }
+ m = get_multi_pack_index(store->source);
+ if (m)
+ count += m->num_objects + m->num_objects_in_base;
- repo_for_each_pack(r, p) {
- if (p->multi_pack_index || open_pack_index(p))
- continue;
- count += p->num_objects;
- }
- r->objects->approximate_object_count = count;
- r->objects->approximate_object_count_valid = 1;
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ if (e->pack->multi_pack_index || open_pack_index(e->pack))
+ continue;
+ count += e->pack->num_objects;
}
- return r->objects->approximate_object_count;
+
+ return count;
}
unsigned long unpack_object_header_buffer(const unsigned char *buf,
@@ -1265,11 +1235,17 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid)
const struct packed_git *has_packed_and_bad(struct repository *r,
const struct object_id *oid)
{
- struct packfile_list_entry *e;
+ struct odb_source *source;
+
+ for (source = r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct packfile_list_entry *e;
+
+ for (e = files->packed->packs.head; e; e = e->next)
+ if (oidset_contains(&e->pack->bad_objects, oid))
+ return e->pack;
+ }
- for (e = r->objects->packfiles->packs.head; e; e = e->next)
- if (oidset_contains(&e->pack->bad_objects, oid))
- return e->pack;
return NULL;
}
@@ -1595,13 +1571,15 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct repository *r, struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct repository *r, struct packed_git *p,
+ off_t obj_offset, uint32_t *maybe_index_pos,
+ struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type;
+ uint32_t pack_pos;
/*
* We always get the representation type, but only convert it to
@@ -1635,16 +1613,34 @@ int packed_object_info(struct repository *r, struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
type = OBJ_BAD;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load cruft pack .mtimes"));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
+
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1671,14 +1667,33 @@ int packed_object_info(struct repository *r, struct packed_git *p,
oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
- oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
- OI_PACKED;
+ oi->whence = OI_PACKED;
+ oi->u.packed.offset = obj_offset;
+ oi->u.packed.pack = p;
+
+ switch (type) {
+ case OBJ_REF_DELTA:
+ oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA;
+ break;
+ case OBJ_OFS_DELTA:
+ oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA;
+ break;
+ default:
+ oi->u.packed.type = PACKED_OBJECT_TYPE_FULL;
+ break;
+ }
out:
unuse_pack(&w_curs);
return type;
}
+int packed_object_info(struct repository *r, struct packed_git *p,
+ off_t obj_offset, struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(r, p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2105,50 +2120,97 @@ static int fill_pack_entry(const struct object_id *oid,
return 1;
}
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
+static int find_pack_entry(struct packfile_store *store,
+ const struct object_id *oid,
+ struct pack_entry *e)
{
struct packfile_list_entry *l;
- packfile_store_prepare(r->objects->packfiles);
-
- for (struct odb_source *source = r->objects->sources; source; source = source->next)
- if (source->midx && fill_midx_entry(source->midx, oid, e))
- return 1;
-
- if (!r->objects->packfiles->packs.head)
- return 0;
+ packfile_store_prepare(store);
+ if (store->midx && fill_midx_entry(store->midx, oid, e))
+ return 1;
- for (l = r->objects->packfiles->packs.head; l; l = l->next) {
+ for (l = store->packs.head; l; l = l->next) {
struct packed_git *p = l->pack;
if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {
- if (!r->objects->packfiles->skip_mru_updates)
- packfile_list_prepend(&r->objects->packfiles->packs, p);
+ if (!store->skip_mru_updates)
+ packfile_list_prepend(&store->packs, p);
return 1;
}
}
+
+ return 0;
+}
+
+int packfile_store_freshen_object(struct packfile_store *store,
+ const struct object_id *oid)
+{
+ struct pack_entry e;
+ if (!find_pack_entry(store, oid, &e))
+ return 0;
+ if (e.p->is_cruft)
+ return 0;
+ if (e.p->freshened)
+ return 1;
+ if (utime(e.p->pack_name, NULL))
+ return 0;
+ e.p->freshened = 1;
+ return 1;
+}
+
+int packfile_store_read_object_info(struct packfile_store *store,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags UNUSED)
+{
+ struct pack_entry e;
+ int rtype;
+
+ if (!find_pack_entry(store, oid, &e))
+ return 1;
+
+ /*
+ * We know that the caller doesn't actually need the
+ * information below, so return early.
+ */
+ if (object_info_is_blank_request(oi)) {
+ oi->whence = OI_PACKED;
+ oi->u.packed.offset = e.offset;
+ oi->u.packed.pack = e.p;
+ oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN;
+ return 0;
+ }
+
+ rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi);
+ if (rtype < 0) {
+ mark_bad_packed_object(e.p, oid);
+ return -1;
+ }
+
return 0;
}
-static void maybe_invalidate_kept_pack_cache(struct repository *r,
+static void maybe_invalidate_kept_pack_cache(struct packfile_store *store,
unsigned flags)
{
- if (!r->objects->packfiles->kept_cache.packs)
+ if (!store->kept_cache.packs)
return;
- if (r->objects->packfiles->kept_cache.flags == flags)
+ if (store->kept_cache.flags == flags)
return;
- FREE_AND_NULL(r->objects->packfiles->kept_cache.packs);
- r->objects->packfiles->kept_cache.flags = 0;
+ FREE_AND_NULL(store->kept_cache.packs);
+ store->kept_cache.flags = 0;
}
-struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
+struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store,
+ unsigned flags)
{
- maybe_invalidate_kept_pack_cache(r, flags);
+ maybe_invalidate_kept_pack_cache(store, flags);
- if (!r->objects->packfiles->kept_cache.packs) {
+ if (!store->kept_cache.packs) {
struct packed_git **packs = NULL;
+ struct packfile_list_entry *e;
size_t nr = 0, alloc = 0;
- struct packed_git *p;
/*
* We want "all" packs here, because we need to cover ones that
@@ -2158,9 +2220,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
* covers, one kept and one not kept, but the midx returns only
* the non-kept version.
*/
- repo_for_each_pack(r, p) {
- if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
- (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
+
+ if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) ||
+ (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) {
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr++] = p;
}
@@ -2168,50 +2232,59 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr] = NULL;
- r->objects->packfiles->kept_cache.packs = packs;
- r->objects->packfiles->kept_cache.flags = flags;
+ store->kept_cache.packs = packs;
+ store->kept_cache.flags = flags;
}
- return r->objects->packfiles->kept_cache.packs;
+ return store->kept_cache.packs;
}
-int find_kept_pack_entry(struct repository *r,
- const struct object_id *oid,
- unsigned flags,
- struct pack_entry *e)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
- struct packed_git **cache;
+ struct odb_source *source;
+ struct pack_entry e;
- for (cache = kept_pack_cache(r, flags); *cache; cache++) {
- struct packed_git *p = *cache;
- if (fill_pack_entry(oid, e, p))
- return 1;
+ odb_prepare_alternates(r->objects);
+ for (source = r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ int ret = find_pack_entry(files->packed, oid, &e);
+ if (ret)
+ return ret;
}
return 0;
}
-int has_object_pack(struct repository *r, const struct object_id *oid)
-{
- struct pack_entry e;
- return find_pack_entry(r, oid, &e);
-}
-
int has_object_kept_pack(struct repository *r, const struct object_id *oid,
unsigned flags)
{
+ struct odb_source *source;
struct pack_entry e;
- return find_kept_pack_entry(r, oid, flags, &e);
+
+ for (source = r->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct packed_git **cache;
+
+ cache = packfile_store_get_kept_pack_cache(files->packed, flags);
+
+ for (; *cache; cache++) {
+ struct packed_git *p = *cache;
+ if (fill_pack_entry(oid, &e, p))
+ return 1;
+ }
+ }
+
+ return 0;
}
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2232,7 +2305,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2248,60 +2321,127 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+static int packfile_store_for_each_object_internal(struct packfile_store *store,
+ each_packed_object_fn cb,
+ void *data,
+ unsigned flags,
+ int *pack_errors)
{
- struct packed_git *p;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_list_entry *e;
+ int ret = 0;
+
+ store->skip_mru_updates = true;
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- repo->objects->packfiles->skip_mru_updates = true;
- repo_for_each_pack(repo, p) {
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
- pack_errors = 1;
+ *pack_errors = 1;
continue;
}
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
+
+ ret = for_each_object_in_pack(p, cb, data, flags);
+ if (ret)
break;
}
- repo->objects->packfiles->skip_mru_updates = false;
- return r ? r : pack_errors;
+ store->skip_mru_updates = false;
+
+ return ret;
}
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+ unsigned flags;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
+ if (data->oi) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+
+ if (object_info_is_blank_request(data->oi)) {
+ data->oi->whence = OI_PACKED;
+ data->oi->u.packed.offset = offset;
+ data->oi->u.packed.pack = pack;
+ data->oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN;
+ } else if (packed_object_info_with_index_pos(data->store->source->odb->repo,
+ pack, offset, &index_pos, data->oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
+ }
+
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ .flags = flags,
+ };
+ int pack_errors = 0, ret;
+
+ ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
+ &data, flags, &pack_errors);
+ if (ret)
+ return ret;
+
+ return pack_errors ? -1 : 0;
+}
+
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(pack->repo, oid);
+ obj = parse_object(data->repo, oid);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2319,19 +2459,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2343,10 +2483,14 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
@@ -2373,11 +2517,11 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l
return 0;
}
-struct packfile_store *packfile_store_new(struct object_database *odb)
+struct packfile_store *packfile_store_new(struct odb_source *source)
{
struct packfile_store *store;
CALLOC_ARRAY(store, 1);
- store->odb = odb;
+ store->source = source;
strmap_init(&store->packs_by_path);
return store;
}
@@ -2399,4 +2543,135 @@ void packfile_store_close(struct packfile_store *store)
BUG("want to close pack marked 'do-not-close'");
close_pack(e->pack);
}
+ if (store->midx)
+ close_midx(store->midx);
+ store->midx = NULL;
+}
+
+struct odb_packed_read_stream {
+ struct odb_read_stream base;
+ struct packed_git *pack;
+ git_zstream z;
+ enum {
+ ODB_PACKED_READ_STREAM_UNINITIALIZED,
+ ODB_PACKED_READ_STREAM_INUSE,
+ ODB_PACKED_READ_STREAM_DONE,
+ ODB_PACKED_READ_STREAM_ERROR,
+ } z_state;
+ off_t pos;
+};
+
+static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
+ size_t sz)
+{
+ struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+ size_t total_read = 0;
+
+ switch (st->z_state) {
+ case ODB_PACKED_READ_STREAM_UNINITIALIZED:
+ memset(&st->z, 0, sizeof(st->z));
+ git_inflate_init(&st->z);
+ st->z_state = ODB_PACKED_READ_STREAM_INUSE;
+ break;
+ case ODB_PACKED_READ_STREAM_DONE:
+ return 0;
+ case ODB_PACKED_READ_STREAM_ERROR:
+ return -1;
+ case ODB_PACKED_READ_STREAM_INUSE:
+ break;
+ }
+
+ while (total_read < sz) {
+ int status;
+ struct pack_window *window = NULL;
+ unsigned char *mapped;
+
+ mapped = use_pack(st->pack, &window,
+ st->pos, &st->z.avail_in);
+
+ st->z.next_out = (unsigned char *)buf + total_read;
+ st->z.avail_out = sz - total_read;
+ st->z.next_in = mapped;
+ status = git_inflate(&st->z, Z_FINISH);
+
+ st->pos += st->z.next_in - mapped;
+ total_read = st->z.next_out - (unsigned char *)buf;
+ unuse_pack(&window);
+
+ if (status == Z_STREAM_END) {
+ git_inflate_end(&st->z);
+ st->z_state = ODB_PACKED_READ_STREAM_DONE;
+ break;
+ }
+
+ /*
+ * Unlike the loose object case, we do not have to worry here
+ * about running out of input bytes and spinning infinitely. If
+ * we get Z_BUF_ERROR due to too few input bytes, then we'll
+ * replenish them in the next use_pack() call when we loop. If
+ * we truly hit the end of the pack (i.e., because it's corrupt
+ * or truncated), then use_pack() catches that and will die().
+ */
+ if (status != Z_OK && status != Z_BUF_ERROR) {
+ git_inflate_end(&st->z);
+ st->z_state = ODB_PACKED_READ_STREAM_ERROR;
+ return -1;
+ }
+ }
+ return total_read;
+}
+
+static int close_istream_pack_non_delta(struct odb_read_stream *_st)
+{
+ struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+ if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
+ git_inflate_end(&st->z);
+ return 0;
+}
+
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+ struct packfile_store *store,
+ const struct object_id *oid)
+{
+ struct odb_packed_read_stream *stream;
+ struct pack_window *window = NULL;
+ struct object_info oi = OBJECT_INFO_INIT;
+ enum object_type in_pack_type;
+ unsigned long size;
+
+ oi.sizep = &size;
+
+ if (packfile_store_read_object_info(store, oid, &oi, 0) ||
+ oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
+ oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
+ repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
+ return -1;
+
+ in_pack_type = unpack_object_header(oi.u.packed.pack,
+ &window,
+ &oi.u.packed.offset,
+ &size);
+ unuse_pack(&window);
+ switch (in_pack_type) {
+ default:
+ return -1; /* we do not do deltas for now */
+ case OBJ_COMMIT:
+ case OBJ_TREE:
+ case OBJ_BLOB:
+ case OBJ_TAG:
+ break;
+ }
+
+ CALLOC_ARRAY(stream, 1);
+ stream->base.close = close_istream_pack_non_delta;
+ stream->base.read = read_istream_pack_non_delta;
+ stream->base.type = in_pack_type;
+ stream->base.size = size;
+ stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
+ stream->pack = oi.u.packed.pack;
+ stream->pos = oi.u.packed.offset;
+
+ *out = &stream->base;
+
+ return 0;
}
diff --git a/packfile.h b/packfile.h
index 20e1cf17b26083c3dd278f188c0ae4ddcd22b7ce..78d5bf1794ef425a9228468375224e0e767634ea 100644
--- a/packfile.h
+++ b/packfile.h
@@ -4,11 +4,14 @@
#include "list.h"
#include "object.h"
#include "odb.h"
+#include "odb/source-files.h"
#include "oidset.h"
+#include "repository.h"
#include "strmap.h"
/* in odb.h */
struct object_info;
+struct odb_read_stream;
struct packed_git {
struct pack_window *windows;
@@ -76,7 +79,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs,
* A store that manages packfiles for a given object database.
*/
struct packfile_store {
- struct object_database *odb;
+ struct odb_source *source;
/*
* The list of packfiles in the order in which they have been most
@@ -98,6 +101,9 @@ struct packfile_store {
unsigned flags;
} kept_cache;
+ /* The multi-pack index that belongs to this specific packfile store. */
+ struct multi_pack_index *midx;
+
/*
* A map of packfile names to packed_git structs for tracking which
* packs have been loaded already.
@@ -128,9 +134,9 @@ struct packfile_store {
/*
* Allocate and initialize a new empty packfile store for the given object
- * database.
+ * database source.
*/
-struct packfile_store *packfile_store_new(struct object_database *odb);
+struct packfile_store *packfile_store_new(struct odb_source *source);
/*
* Free the packfile store and all its associated state. All packfiles
@@ -168,20 +174,99 @@ void packfile_store_reprepare(struct packfile_store *store);
void packfile_store_add_pack(struct packfile_store *store,
struct packed_git *pack);
+/*
+ * Try to read the object identified by its ID from the object store and
+ * populate the object info with its data. Returns 1 in case the object was
+ * not found, 0 if it was and read successfully, and a negative error code in
+ * case the object was corrupted.
+ */
+int packfile_store_read_object_info(struct packfile_store *store,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags);
+
+/*
+ * Count the number of objects contained in the packfiles.
+ */
+unsigned long packfile_store_count_objects(struct packfile_store *store,
+ unsigned flags);
+
+/*
+ * Get all packs managed by the given store, including packfiles that are
+ * referenced by multi-pack indices.
+ */
+struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store);
+
+struct repo_for_each_pack_data {
+ struct odb_source *source;
+ struct packfile_list_entry *entry;
+};
+
+static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct repository *repo)
+{
+ struct repo_for_each_pack_data data = { 0 };
+
+ odb_prepare_alternates(repo->objects);
+
+ for (struct odb_source *source = repo->objects->sources; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct packfile_list_entry *entry = packfile_store_get_packs(files->packed);
+ if (!entry)
+ continue;
+ data.source = source;
+ data.entry = entry;
+ break;
+ }
+
+ return data;
+}
+
+static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *data)
+{
+ struct odb_source *source;
+
+ data->entry = data->entry->next;
+ if (data->entry)
+ return;
+
+ for (source = data->source->next; source; source = source->next) {
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ struct packfile_list_entry *entry = packfile_store_get_packs(files->packed);
+ if (!entry)
+ continue;
+ data->source = source;
+ data->entry = entry;
+ return;
+ }
+
+ data->source = NULL;
+ data->entry = NULL;
+}
+
/*
* Load and iterate through all packs of the given repository. This helper
* function will yield packfiles from all object sources connected to the
* repository.
*/
#define repo_for_each_pack(repo, p) \
- for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \
- ((p) = (e ? e->pack : NULL)); e = e->next)
+ for (struct repo_for_each_pack_data eack_pack_data = repo_for_eack_pack_data_init(repo); \
+ ((p) = (eack_pack_data.entry ? eack_pack_data.entry->pack : NULL)); \
+ repo_for_each_pack_data_next(&eack_pack_data))
+
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+ struct packfile_store *store,
+ const struct object_id *oid);
/*
- * Get all packs managed by the given store, including packfiles that are
- * referenced by multi-pack indices.
+ * Try to read the object identified by its ID from the object store and
+ * populate the object info with its data. Returns 1 in case the object was
+ * not found, 0 if it was and read successfully, and a negative error code in
+ * case the object was corrupted.
*/
-struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store);
+int packfile_store_read_object_info(struct packfile_store *store,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags);
/*
* Open the packfile and add it to the store if it isn't yet known. Returns
@@ -194,6 +279,19 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
int packfile_store_freshen_object(struct packfile_store *store,
const struct object_id *oid);
+enum kept_pack_type {
+ KEPT_PACK_ON_DISK = (1 << 0),
+ KEPT_PACK_IN_CORE = (1 << 1),
+};
+
+/*
+ * Retrieve the cache of kept packs from the given packfile store. Accepts a
+ * combination of `kept_pack_type` flags. The cache is computed on demand and
+ * will be recomputed whenever the flags change.
+ */
+struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store,
+ unsigned flags);
+
struct pack_window {
struct pack_window *next;
unsigned char *base;
@@ -260,9 +358,21 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ unsigned flags);
+
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If given, the object info will be
+ * populated with the object's data as if you had called
+ * `packfile_store_read_object_info()` on the object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
@@ -270,12 +380,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
#define PACKDIR_FILE_GARBAGE 4
extern void (*report_garbage)(unsigned seen_bits, const char *path);
-/*
- * Give a rough count of objects in the repository. This sacrifices accuracy
- * for speed.
- */
-unsigned long repo_approximate_object_count(struct repository *r);
-
void pack_report(struct repository *repo);
/*
@@ -369,22 +473,10 @@ int packed_object_info(struct repository *r,
void mark_bad_packed_object(struct packed_git *, const struct object_id *);
const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *);
-#define ON_DISK_KEEP_PACKS 1
-#define IN_CORE_KEEP_PACKS 2
-
-/*
- * Iff a pack file in the given repository contains the object named by sha1,
- * return true and store its location to e.
- */
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
-int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-
int has_object_pack(struct repository *r, const struct object_id *oid);
int has_object_kept_pack(struct repository *r, const struct object_id *oid,
unsigned flags);
-struct packed_git **kept_pack_cache(struct repository *r, unsigned flags);
-
/*
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
diff --git a/parallel-checkout.c b/parallel-checkout.c
index fba6aa65a6e8524fcf829c0f2fb389146b643e22..0bf4bd6d4abd8c98cfdfc0f68a39d44cf4eaa800 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -13,7 +13,7 @@
#include "read-cache-ll.h"
#include "run-command.h"
#include "sigchain.h"
-#include "streaming.h"
+#include "odb/streaming.h"
#include "symlinks.h"
#include "thread-utils.h"
#include "trace2.h"
@@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid);
if (filter) {
- if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) {
+ if (odb_stream_blob_to_fd(the_repository->objects, fd,
+ &pc_item->ce->oid, filter, 1)) {
/* On error, reset fd to try writing without streaming */
if (reset_fd(fd, path))
return -1;
diff --git a/reachable.c b/reachable.c
index b753c395530b6d3212006742bbd4f1671a2e22a6..4a26ccf399bc2a1b7e1830f6f7827e6fec34b642 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ enum odb_for_each_object_flags flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/repack-promisor.c b/repack-promisor.c
index ee6e0669f656028a4f59389d0ba0b8ff637526c3..35c4073632b1b49cec04f1a71ff77ca7605fcd0a 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/repository.h b/repository.h
index 6063c4b846d031d657827f8b16d65af8c09e5b29..01322ca1979822b19b4ebc7d07365fdaf1acec88 100644
--- a/repository.h
+++ b/repository.h
@@ -50,6 +50,15 @@ struct repository {
*/
char *commondir;
+ /*
+ * Location of the primary object database source. May be NULL, in
+ * which case the primary object database source will assumed to be
+ * "${commondir}/objects".
+ *
+ * This configuration can be set via "extensions.objectStorage".
+ */
+ char *object_storage;
+
/*
* Holds any information related to accessing the raw object content.
*/
diff --git a/revision.c b/revision.c
index 5f0850ae5c9c1aec7838b0a9e05e2951a6f50fdd..d2b83d0f8b50f38b65fd80906c994706226dd2f3 100644
--- a/revision.c
+++ b/revision.c
@@ -2541,14 +2541,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
die(_("--unpacked= no longer supported"));
} else if (!strcmp(arg, "--no-kept-objects")) {
revs->no_kept_objects = 1;
- revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
- revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
+ revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE;
+ revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK;
} else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) {
revs->no_kept_objects = 1;
if (!strcmp(optarg, "in-core"))
- revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+ revs->keep_pack_cache_flags |= KEPT_PACK_IN_CORE;
if (!strcmp(optarg, "on-disk"))
- revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
+ revs->keep_pack_cache_flags |= KEPT_PACK_ON_DISK;
} else if (!strcmp(arg, "-r")) {
revs->diff = 1;
revs->diffopt.flags.recursive = 1;
@@ -3649,8 +3649,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3959,10 +3958,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
diff --git a/setup.c b/setup.c
index 3a6a048620dd7d5046d6ed5c04f126322e6a7618..8fcdd8d7c04474e880c2be9b82bb943bff92c0b3 100644
--- a/setup.c
+++ b/setup.c
@@ -686,7 +686,14 @@ static enum extension_result handle_extension(const char *var,
} else if (!strcmp(ext, "relativeworktrees")) {
data->relative_worktrees = git_config_bool(var, value);
return EXTENSION_OK;
+ } else if (!strcmp(ext, "objectstorage")) {
+ if (!value)
+ return config_error_nonbool(var);
+ free(data->object_storage);
+ data->object_storage = xstrdup(value);
+ return EXTENSION_OK;
}
+
return EXTENSION_UNKNOWN;
}
@@ -1931,12 +1938,18 @@ const char *setup_git_directory_gently(int *nongit_ok)
startup_info->have_repository ||
/* GIT_DIR_EXPLICIT */
getenv(GIT_DIR_ENVIRONMENT)) {
+ if (startup_info->have_repository) {
+ the_repository->object_storage =
+ xstrdup_or_null(repo_fmt.object_storage);
+ }
+
if (!the_repository->gitdir) {
const char *gitdir = getenv(GIT_DIR_ENVIRONMENT);
if (!gitdir)
gitdir = DEFAULT_GIT_DIR_ENVIRONMENT;
setup_git_env(gitdir);
}
+
if (startup_info->have_repository) {
repo_set_hash_algo(the_repository, repo_fmt.hash_algo);
repo_set_compat_hash_algo(the_repository,
@@ -2039,6 +2052,8 @@ void check_repository_format(struct repository_format *fmt)
fmt = &repo_fmt;
check_repository_format_gently(repo_get_git_dir(the_repository), fmt, NULL);
startup_info->have_repository = 1;
+ the_repository->object_storage =
+ xstrdup_or_null(repo_fmt.object_storage);
repo_set_hash_algo(the_repository, fmt->hash_algo);
repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo);
repo_set_ref_storage_format(the_repository,
diff --git a/setup.h b/setup.h
index d55dcc66086308b31d86f28bcbb84f5d01e4453f..e1c1279d09a593132f23f1786bd8b320da292f79 100644
--- a/setup.h
+++ b/setup.h
@@ -173,6 +173,7 @@ struct repository_format {
enum ref_storage_format ref_storage_format;
int sparse_index;
char *work_tree;
+ char *object_storage;
struct string_list unknown_extensions;
struct string_list v1_only_extensions;
};
diff --git a/streaming.c b/streaming.c
deleted file mode 100644
index 00ad649ae397f3c5d0f1200ec3860188133c5223..0000000000000000000000000000000000000000
--- a/streaming.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-
-#define USE_THE_REPOSITORY_VARIABLE
-
-#include "git-compat-util.h"
-#include "convert.h"
-#include "environment.h"
-#include "streaming.h"
-#include "repository.h"
-#include "object-file.h"
-#include "odb.h"
-#include "replace-object.h"
-#include "packfile.h"
-
-typedef int (*open_istream_fn)(struct git_istream *,
- struct repository *,
- const struct object_id *,
- enum object_type *);
-typedef int (*close_istream_fn)(struct git_istream *);
-typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
-
-#define FILTER_BUFFER (1024*16)
-
-struct filtered_istream {
- struct git_istream *upstream;
- struct stream_filter *filter;
- char ibuf[FILTER_BUFFER];
- char obuf[FILTER_BUFFER];
- int i_end, i_ptr;
- int o_end, o_ptr;
- int input_finished;
-};
-
-struct git_istream {
- open_istream_fn open;
- close_istream_fn close;
- read_istream_fn read;
-
- unsigned long size; /* inflated size of full object */
- git_zstream z;
- enum { z_unused, z_used, z_done, z_error } z_state;
-
- union {
- struct {
- char *buf; /* from odb_read_object_info_extended() */
- unsigned long read_ptr;
- } incore;
-
- struct {
- void *mapped;
- unsigned long mapsize;
- char hdr[32];
- int hdr_avail;
- int hdr_used;
- } loose;
-
- struct {
- struct packed_git *pack;
- off_t pos;
- } in_pack;
-
- struct filtered_istream filtered;
- } u;
-};
-
-/*****************************************************************
- *
- * Common helpers
- *
- *****************************************************************/
-
-static void close_deflated_stream(struct git_istream *st)
-{
- if (st->z_state == z_used)
- git_inflate_end(&st->z);
-}
-
-
-/*****************************************************************
- *
- * Filtered stream
- *
- *****************************************************************/
-
-static int close_istream_filtered(struct git_istream *st)
-{
- free_stream_filter(st->u.filtered.filter);
- return close_istream(st->u.filtered.upstream);
-}
-
-static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
- size_t sz)
-{
- struct filtered_istream *fs = &(st->u.filtered);
- size_t filled = 0;
-
- while (sz) {
- /* do we already have filtered output? */
- if (fs->o_ptr < fs->o_end) {
- size_t to_move = fs->o_end - fs->o_ptr;
- if (sz < to_move)
- to_move = sz;
- memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
- fs->o_ptr += to_move;
- sz -= to_move;
- filled += to_move;
- continue;
- }
- fs->o_end = fs->o_ptr = 0;
-
- /* do we have anything to feed the filter with? */
- if (fs->i_ptr < fs->i_end) {
- size_t to_feed = fs->i_end - fs->i_ptr;
- size_t to_receive = FILTER_BUFFER;
- if (stream_filter(fs->filter,
- fs->ibuf + fs->i_ptr, &to_feed,
- fs->obuf, &to_receive))
- return -1;
- fs->i_ptr = fs->i_end - to_feed;
- fs->o_end = FILTER_BUFFER - to_receive;
- continue;
- }
-
- /* tell the filter to drain upon no more input */
- if (fs->input_finished) {
- size_t to_receive = FILTER_BUFFER;
- if (stream_filter(fs->filter,
- NULL, NULL,
- fs->obuf, &to_receive))
- return -1;
- fs->o_end = FILTER_BUFFER - to_receive;
- if (!fs->o_end)
- break;
- continue;
- }
- fs->i_end = fs->i_ptr = 0;
-
- /* refill the input from the upstream */
- if (!fs->input_finished) {
- fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
- if (fs->i_end < 0)
- return -1;
- if (fs->i_end)
- continue;
- }
- fs->input_finished = 1;
- }
- return filled;
-}
-
-static struct git_istream *attach_stream_filter(struct git_istream *st,
- struct stream_filter *filter)
-{
- struct git_istream *ifs = xmalloc(sizeof(*ifs));
- struct filtered_istream *fs = &(ifs->u.filtered);
-
- ifs->close = close_istream_filtered;
- ifs->read = read_istream_filtered;
- fs->upstream = st;
- fs->filter = filter;
- fs->i_end = fs->i_ptr = 0;
- fs->o_end = fs->o_ptr = 0;
- fs->input_finished = 0;
- ifs->size = -1; /* unknown */
- return ifs;
-}
-
-/*****************************************************************
- *
- * Loose object stream
- *
- *****************************************************************/
-
-static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
-{
- size_t total_read = 0;
-
- switch (st->z_state) {
- case z_done:
- return 0;
- case z_error:
- return -1;
- default:
- break;
- }
-
- if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
- size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
- if (sz < to_copy)
- to_copy = sz;
- memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
- st->u.loose.hdr_used += to_copy;
- total_read += to_copy;
- }
-
- while (total_read < sz) {
- int status;
-
- st->z.next_out = (unsigned char *)buf + total_read;
- st->z.avail_out = sz - total_read;
- status = git_inflate(&st->z, Z_FINISH);
-
- total_read = st->z.next_out - (unsigned char *)buf;
-
- if (status == Z_STREAM_END) {
- git_inflate_end(&st->z);
- st->z_state = z_done;
- break;
- }
- if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
- git_inflate_end(&st->z);
- st->z_state = z_error;
- return -1;
- }
- }
- return total_read;
-}
-
-static int close_istream_loose(struct git_istream *st)
-{
- close_deflated_stream(st);
- munmap(st->u.loose.mapped, st->u.loose.mapsize);
- return 0;
-}
-
-static int open_istream_loose(struct git_istream *st, struct repository *r,
- const struct object_id *oid,
- enum object_type *type)
-{
- struct object_info oi = OBJECT_INFO_INIT;
- struct odb_source *source;
-
- oi.sizep = &st->size;
- oi.typep = type;
-
- odb_prepare_alternates(r->objects);
- for (source = r->objects->sources; source; source = source->next) {
- st->u.loose.mapped = odb_source_loose_map_object(source, oid,
- &st->u.loose.mapsize);
- if (st->u.loose.mapped)
- break;
- }
- if (!st->u.loose.mapped)
- return -1;
-
- switch (unpack_loose_header(&st->z, st->u.loose.mapped,
- st->u.loose.mapsize, st->u.loose.hdr,
- sizeof(st->u.loose.hdr))) {
- case ULHR_OK:
- break;
- case ULHR_BAD:
- case ULHR_TOO_LONG:
- goto error;
- }
- if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0)
- goto error;
-
- st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
- st->u.loose.hdr_avail = st->z.total_out;
- st->z_state = z_used;
- st->close = close_istream_loose;
- st->read = read_istream_loose;
-
- return 0;
-error:
- git_inflate_end(&st->z);
- munmap(st->u.loose.mapped, st->u.loose.mapsize);
- return -1;
-}
-
-
-/*****************************************************************
- *
- * Non-delta packed object stream
- *
- *****************************************************************/
-
-static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
- size_t sz)
-{
- size_t total_read = 0;
-
- switch (st->z_state) {
- case z_unused:
- memset(&st->z, 0, sizeof(st->z));
- git_inflate_init(&st->z);
- st->z_state = z_used;
- break;
- case z_done:
- return 0;
- case z_error:
- return -1;
- case z_used:
- break;
- }
-
- while (total_read < sz) {
- int status;
- struct pack_window *window = NULL;
- unsigned char *mapped;
-
- mapped = use_pack(st->u.in_pack.pack, &window,
- st->u.in_pack.pos, &st->z.avail_in);
-
- st->z.next_out = (unsigned char *)buf + total_read;
- st->z.avail_out = sz - total_read;
- st->z.next_in = mapped;
- status = git_inflate(&st->z, Z_FINISH);
-
- st->u.in_pack.pos += st->z.next_in - mapped;
- total_read = st->z.next_out - (unsigned char *)buf;
- unuse_pack(&window);
-
- if (status == Z_STREAM_END) {
- git_inflate_end(&st->z);
- st->z_state = z_done;
- break;
- }
-
- /*
- * Unlike the loose object case, we do not have to worry here
- * about running out of input bytes and spinning infinitely. If
- * we get Z_BUF_ERROR due to too few input bytes, then we'll
- * replenish them in the next use_pack() call when we loop. If
- * we truly hit the end of the pack (i.e., because it's corrupt
- * or truncated), then use_pack() catches that and will die().
- */
- if (status != Z_OK && status != Z_BUF_ERROR) {
- git_inflate_end(&st->z);
- st->z_state = z_error;
- return -1;
- }
- }
- return total_read;
-}
-
-static int close_istream_pack_non_delta(struct git_istream *st)
-{
- close_deflated_stream(st);
- return 0;
-}
-
-static int open_istream_pack_non_delta(struct git_istream *st,
- struct repository *r UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type *type UNUSED)
-{
- struct pack_window *window;
- enum object_type in_pack_type;
-
- window = NULL;
-
- in_pack_type = unpack_object_header(st->u.in_pack.pack,
- &window,
- &st->u.in_pack.pos,
- &st->size);
- unuse_pack(&window);
- switch (in_pack_type) {
- default:
- return -1; /* we do not do deltas for now */
- case OBJ_COMMIT:
- case OBJ_TREE:
- case OBJ_BLOB:
- case OBJ_TAG:
- break;
- }
- st->z_state = z_unused;
- st->close = close_istream_pack_non_delta;
- st->read = read_istream_pack_non_delta;
-
- return 0;
-}
-
-
-/*****************************************************************
- *
- * In-core stream
- *
- *****************************************************************/
-
-static int close_istream_incore(struct git_istream *st)
-{
- free(st->u.incore.buf);
- return 0;
-}
-
-static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
-{
- size_t read_size = sz;
- size_t remainder = st->size - st->u.incore.read_ptr;
-
- if (remainder <= read_size)
- read_size = remainder;
- if (read_size) {
- memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
- st->u.incore.read_ptr += read_size;
- }
- return read_size;
-}
-
-static int open_istream_incore(struct git_istream *st, struct repository *r,
- const struct object_id *oid, enum object_type *type)
-{
- struct object_info oi = OBJECT_INFO_INIT;
-
- st->u.incore.read_ptr = 0;
- st->close = close_istream_incore;
- st->read = read_istream_incore;
-
- oi.typep = type;
- oi.sizep = &st->size;
- oi.contentp = (void **)&st->u.incore.buf;
- return odb_read_object_info_extended(r->objects, oid, &oi,
- OBJECT_INFO_DIE_IF_CORRUPT);
-}
-
-/*****************************************************************************
- * static helpers variables and functions for users of streaming interface
- *****************************************************************************/
-
-static int istream_source(struct git_istream *st,
- struct repository *r,
- const struct object_id *oid,
- enum object_type *type)
-{
- unsigned long size;
- int status;
- struct object_info oi = OBJECT_INFO_INIT;
-
- oi.typep = type;
- oi.sizep = &size;
- status = odb_read_object_info_extended(r->objects, oid, &oi, 0);
- if (status < 0)
- return status;
-
- switch (oi.whence) {
- case OI_LOOSE:
- st->open = open_istream_loose;
- return 0;
- case OI_PACKED:
- if (!oi.u.packed.is_delta &&
- repo_settings_get_big_file_threshold(the_repository) < size) {
- st->u.in_pack.pack = oi.u.packed.pack;
- st->u.in_pack.pos = oi.u.packed.offset;
- st->open = open_istream_pack_non_delta;
- return 0;
- }
- /* fallthru */
- default:
- st->open = open_istream_incore;
- return 0;
- }
-}
-
-/****************************************************************
- * Users of streaming interface
- ****************************************************************/
-
-int close_istream(struct git_istream *st)
-{
- int r = st->close(st);
- free(st);
- return r;
-}
-
-ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
-{
- return st->read(st, buf, sz);
-}
-
-struct git_istream *open_istream(struct repository *r,
- const struct object_id *oid,
- enum object_type *type,
- unsigned long *size,
- struct stream_filter *filter)
-{
- struct git_istream *st = xmalloc(sizeof(*st));
- const struct object_id *real = lookup_replace_object(r, oid);
- int ret = istream_source(st, r, real, type);
-
- if (ret) {
- free(st);
- return NULL;
- }
-
- if (st->open(st, r, real, type)) {
- if (open_istream_incore(st, r, real, type)) {
- free(st);
- return NULL;
- }
- }
- if (filter) {
- /* Add "&& !is_null_stream_filter(filter)" for performance */
- struct git_istream *nst = attach_stream_filter(st, filter);
- if (!nst) {
- close_istream(st);
- return NULL;
- }
- st = nst;
- }
-
- *size = st->size;
- return st;
-}
-
-int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
- int can_seek)
-{
- struct git_istream *st;
- enum object_type type;
- unsigned long sz;
- ssize_t kept = 0;
- int result = -1;
-
- st = open_istream(the_repository, oid, &type, &sz, filter);
- if (!st) {
- if (filter)
- free_stream_filter(filter);
- return result;
- }
- if (type != OBJ_BLOB)
- goto close_and_exit;
- for (;;) {
- char buf[1024 * 16];
- ssize_t wrote, holeto;
- ssize_t readlen = read_istream(st, buf, sizeof(buf));
-
- if (readlen < 0)
- goto close_and_exit;
- if (!readlen)
- break;
- if (can_seek && sizeof(buf) == readlen) {
- for (holeto = 0; holeto < readlen; holeto++)
- if (buf[holeto])
- break;
- if (readlen == holeto) {
- kept += holeto;
- continue;
- }
- }
-
- if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
- goto close_and_exit;
- else
- kept = 0;
- wrote = write_in_full(fd, buf, readlen);
-
- if (wrote < 0)
- goto close_and_exit;
- }
- if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
- xwrite(fd, "", 1) != 1))
- goto close_and_exit;
- result = 0;
-
- close_and_exit:
- close_istream(st);
- return result;
-}
diff --git a/streaming.h b/streaming.h
deleted file mode 100644
index bd27f59e5764aec64cd1cf927baf213fcec4d893..0000000000000000000000000000000000000000
--- a/streaming.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-#ifndef STREAMING_H
-#define STREAMING_H 1
-
-#include "object.h"
-
-/* opaque */
-struct git_istream;
-struct stream_filter;
-
-struct git_istream *open_istream(struct repository *, const struct object_id *,
- enum object_type *, unsigned long *,
- struct stream_filter *);
-int close_istream(struct git_istream *);
-ssize_t read_istream(struct git_istream *, void *, size_t);
-
-int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek);
-
-#endif /* STREAMING_H */
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index 6de5d1665afbfc8a6bee29983107cd1c8e28a98b..6e03aabca79c6c383053700ddd5445a665e164b3 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -26,18 +26,22 @@ static int read_midx_file(const char *object_dir, const char *checksum,
int show_objects)
{
uint32_t i;
- struct multi_pack_index *m;
+ struct multi_pack_index *m, *tip;
+ int ret = 0;
- m = setup_midx(object_dir);
+ m = tip = setup_midx(object_dir);
if (!m)
return 1;
if (checksum) {
- while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum))
+ while (m && strcmp(get_midx_checksum(m), checksum))
m = m->base_midx;
- if (!m)
- return 1;
+ if (!m) {
+ ret = error(_("could not find MIDX with checksum %s"),
+ checksum);
+ goto out;
+ }
}
printf("header: %08x %d %d %d %d\n",
@@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum,
}
}
- close_midx(m);
+out:
+ close_midx(tip);
- return 0;
+ return ret;
}
static int read_midx_checksum(const char *object_dir)
@@ -94,7 +99,7 @@ static int read_midx_checksum(const char *object_dir)
m = setup_midx(object_dir);
if (!m)
return 1;
- printf("%s\n", hash_to_hex(get_midx_checksum(m)));
+ printf("%s\n", get_midx_checksum(m));
close_midx(m);
return 0;
diff --git a/t/meson.build b/t/meson.build
index d3d0be28224b9c49d8f715e3a09ec648ae5470cb..45839f9dae452eeb9d12b19d3455e04076054edf 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -614,6 +614,7 @@ integration_tests = [
't5332-multi-pack-reuse.sh',
't5333-pseudo-merge-bitmaps.sh',
't5334-incremental-multi-pack-index.sh',
+ 't5335-compact-multi-pack-index.sh',
't5351-unpack-large-objects.sh',
't5400-send-pack.sh',
't5401-update-hooks.sh',
diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches
index 8ee2d3f7c815023d2c9520d270489a0daf36896f..e8d6c13ccd0333614acf0d31c9b2613f80d1fc1b 100644
--- a/t/t0450/adoc-help-mismatches
+++ b/t/t0450/adoc-help-mismatches
@@ -33,7 +33,6 @@ merge
merge-file
merge-index
merge-one-file
-multi-pack-index
name-rev
notes
push
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 93f319a4b29fbb3d3899a1d1f3914dd7766dd672..03676d37b98a90c6209b2d3c3ae641e9bfec9a79 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -450,12 +450,7 @@ test_expect_success 'verify invalid chunk offset' '
"improper chunk offset(s)"
'
-test_expect_success 'verify packnames out of order' '
- corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \
- "pack names out of order"
-'
-
-test_expect_success 'verify packnames out of order' '
+test_expect_success 'verify missing pack' '
corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \
"failed to load pack"
'
diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a306f5043052b6fcabbf7d2cb390795e01930ac3
--- /dev/null
+++ b/t/t5335-compact-multi-pack-index.sh
@@ -0,0 +1,218 @@
+#!/bin/sh
+
+test_description='multi-pack-index compaction'
+
+. ./test-lib.sh
+
+GIT_TEST_MULTI_PACK_INDEX=0
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0
+
+objdir=.git/objects
+packdir=$objdir/pack
+midxdir=$packdir/multi-pack-index.d
+midx_chain=$midxdir/multi-pack-index-chain
+
+nth_line() {
+ local n="$1"
+ shift
+ awk "NR==$n" "$@"
+}
+
+write_packs () {
+ for c in "$@"
+ do
+ test_commit "$c" &&
+
+ git pack-objects --all --unpacked $packdir/pack-$c &&
+ git prune-packed &&
+
+ git multi-pack-index write --incremental --bitmap || return 1
+ done
+}
+
+test_midx_layer_packs () {
+ local checksum="$1" &&
+ shift &&
+
+ test-tool read-midx $objdir "$checksum" >out &&
+
+ printf "%s\n" "$@" >expect &&
+ # NOTE: do *not* pipe through sort here, we want to ensure the
+ # order of packs is preserved during compaction.
+ grep "^pack-" out | cut -d"-" -f2 >actual &&
+
+ test_cmp expect actual
+}
+
+test_midx_layer_object_uniqueness () {
+ : >objs.all
+ while read layer
+ do
+ test-tool read-midx --show-objects $objdir "$layer" >out &&
+ grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer &&
+ test_stdout_line_count = 0 comm -12 objs.all objs.layer &&
+ cat objs.all objs.layer | sort >objs.tmp &&
+ mv objs.tmp objs.all || return 1
+ done <$midx_chain
+}
+
+test_expect_success 'MIDX compaction with lex-ordered pack names' '
+ git init midx-compact-lex-order &&
+ (
+ cd midx-compact-lex-order &&
+
+ write_packs A B C D E &&
+ test_line_count = 5 $midx_chain &&
+
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+ test_line_count = 3 $midx_chain &&
+
+ test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A &&
+ test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D &&
+ test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
+
+ test_midx_layer_object_uniqueness
+ )
+'
+
+test_expect_success 'MIDX compaction with non-lex-ordered pack names' '
+ git init midx-compact-non-lex-order &&
+ (
+ cd midx-compact-non-lex-order &&
+
+ write_packs D C A B E &&
+ test_line_count = 5 $midx_chain &&
+
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+ test_line_count = 3 $midx_chain &&
+
+ test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D &&
+ test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B &&
+ test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
+
+ test_midx_layer_object_uniqueness
+ )
+'
+
+midx_objs_by_pack () {
+ awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort
+}
+
+tag_objs_from_pack () {
+ objs="$(git rev-list --objects --no-object-names "$2")" &&
+ printf "$1 %s\n" $objs | sort
+}
+
+test_expect_success 'MIDX compaction preserves pack object selection' '
+ git init midx-compact-preserve-selection &&
+ (
+ cd midx-compact-preserve-selection &&
+
+ test_commit A &&
+ test_commit B &&
+
+ # Create two packs, one containing just the objects from
+ # A, and another containing all objects from the
+ # repository.
+ p1="$(echo A | git pack-objects --revs --delta-base-offset \
+ $packdir/pack-1)" &&
+ p0="$(echo B | git pack-objects --revs --delta-base-offset \
+ $packdir/pack-0)" &&
+
+ echo "pack-1-$p1.idx" | git multi-pack-index write \
+ --incremental --bitmap --stdin-packs &&
+ echo "pack-0-$p0.idx" | git multi-pack-index write \
+ --incremental --bitmap --stdin-packs &&
+
+ write_packs C &&
+
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 1 "$midx_chain")" \
+ "$(nth_line 2 "$midx_chain")" &&
+
+
+ test-tool read-midx --show-objects $objdir \
+ "$(nth_line 1 "$midx_chain")" >AB.info &&
+ test-tool read-midx --show-objects $objdir \
+ "$(nth_line 2 "$midx_chain")" >C.info &&
+
+ midx_objs_by_pack AB.actual &&
+ midx_objs_by_pack C.actual &&
+
+ {
+ tag_objs_from_pack 1 A &&
+ tag_objs_from_pack 0 A..B
+ } | sort >AB.expect &&
+ tag_objs_from_pack C B..C >C.expect &&
+
+ test_cmp AB.expect AB.actual &&
+ test_cmp C.expect C.actual
+ )
+'
+
+test_expect_success 'MIDX compaction with bitmaps' '
+ git init midx-compact-with-bitmaps &&
+ (
+ cd midx-compact-with-bitmaps &&
+
+ write_packs foo bar baz quux woot &&
+
+ test-tool read-midx --bitmap $objdir >bitmap.expect &&
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+ test-tool read-midx --bitmap $objdir >bitmap.actual &&
+
+ test_cmp bitmap.expect bitmap.actual &&
+
+ true
+ )
+'
+
+test_expect_success 'MIDX compaction with bitmaps (non-trivial)' '
+ git init midx-compact-with-bitmaps-non-trivial &&
+ (
+ cd midx-compact-with-bitmaps-non-trivial &&
+
+ git branch -m main &&
+
+ # D(4)
+ # /
+ # A(1) --- B(2) --- C(3) --- G(7)
+ # \
+ # E(5) --- F(6)
+ write_packs A B C &&
+ git checkout -b side &&
+ write_packs D &&
+ git checkout -b other B &&
+ write_packs E F &&
+ git checkout main &&
+ write_packs G &&
+
+ cat $midx_chain &&
+
+ # Compact layers 2-4, leaving us with:
+ #
+ # [A, [B, C, D], E, F, G]
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 2 "$midx_chain")" \
+ "$(nth_line 4 "$midx_chain")" &&
+
+ # Then compact the top two layers, condensing the above
+ # such that the new 4th layer contains F and G.
+ #
+ # [A, [B, C, D], E, [F, G]]
+ git multi-pack-index compact --incremental --bitmap \
+ "$(nth_line 4 "$midx_chain")" \
+ "$(nth_line 5 "$midx_chain")" &&
+
+ cat $midx_chain
+ )
+'
+
+test_done