diff --git a/Makefile b/Makefile index 7e0f77e2988e3b101dbf008a11b35cd0780d1aef..6d8dcc4622b059d813383444d7d3dd1553377218 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41bad6072aa68dcddb18f271791f1d8c..0fc70d13a8807eb51c9e44b494ec2aadbd2d69db 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" @@ -129,22 +129,20 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; - enum object_type type; - unsigned long sz; + struct odb_read_stream *st; char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43dc43e3c4bbae4efc1a09e110b4898c5..97ea8d60d6187b35de7f5fd6ea8bc5c529679bc3 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { @@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837b03beddeaf5b0825a183b37eb5a9f0..505ddaa12f530934ca1b910cd271dff2ab9227ee 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index b1a650c6731d3268c65c2204af1975fe23e6d226..c7d2eea287fe7ddb61634ac1d40c3100a2df84a7 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4d14a8bcb12165395ff02deeb8c047f..b01cb77f4a8500d3fdce51f859648c4493ba7e97 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/log.c b/builtin/log.c index c8319b8af38c8c732ddf632a0d3ced68487f8cd3..d4cf9c59c81a8397bd1162e09844870a31bedd77 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df137b44dfd1b75780b65b479f233c5a1..0d1d6995bfc35a40dd35f58cd4ba1498bac1c504 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); @@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); @@ -1716,7 +1718,7 @@ static int want_object_in_pack_mtime(const struct object_id *oid, */ struct odb_source *source = the_repository->objects->sources->next; for (; source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) return 0; } @@ -3978,7 +3980,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type int found = 0; for (; !found && source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) found = 1; /* diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index ef79e43715d362d2907c23b5613d6fdaf9a96c80..6fc64e9e4b8d5af9586a445198eb244afb870911 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -363,7 +363,7 @@ struct input_zstream_data { int status; }; -static const void *feed_input_zstream(struct input_stream *in_stream, +static const void *feed_input_zstream(struct odb_write_stream *in_stream, unsigned long *readlen) { struct input_zstream_data *data = in_stream->data; @@ -393,7 +393,7 @@ static void stream_blob(unsigned long size, unsigned nr) { git_zstream zstream = { 0 }; struct input_zstream_data data = { 0 }; - struct input_stream in_stream = { + struct odb_write_stream in_stream = { .read = feed_input_zstream, .data = &data, }; @@ -402,8 +402,7 @@ static void stream_blob(unsigned long size, unsigned nr) data.zstream = &zstream; git_inflate_init(&zstream); - if (stream_loose_object(the_repository->objects->sources, - &in_stream, size, &info->oid)) + if (odb_write_object_stream(the_repository->objects, &in_stream, size, &info->oid)) die(_("failed to write object in stream")); if (data.status != Z_STREAM_END) diff --git a/entry.c b/entry.c index cae02eb50398d7cfcdac8b4c4382e067e9de02d9..7817aee362ed9e7e14e3a2b92c9cc0ab5f013673 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/loose.c b/loose.c index e8ea6e7e24ba31218579c92fb506b012f2b72f12..56cf64b648bf80c3d7d8e0649f87687138018743 100644 --- a/loose.c +++ b/loose.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "hash.h" #include "path.h" +#include "object-file.h" #include "odb.h" #include "hex.h" #include "repository.h" @@ -48,13 +49,13 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose_map; + struct loose_object_map *map = source->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose_objects_cache, compat_oid); + oidtree_insert(source->loose->cache, compat_oid); return inserted; } @@ -64,11 +65,11 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose_map) - loose_object_map_init(&source->loose_map); - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + if (!source->loose->map) + loose_object_map_init(&source->loose->map); + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); @@ -124,7 +125,7 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose_map->to_compat; + kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -230,7 +231,7 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose_map; + struct loose_object_map *loose_map = source->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/meson.build b/meson.build index 1f95a06edb78297bee870b181e2878320c153dd4..fc82929b379dc5ccc99e28b22f46677c547977c6 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 4675c8ed6b67eb8b1f054aa7326f380d9a0a29b5..12177a7dd707a8f4efa46834f7e4e0a3ee8ddc4f 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" @@ -99,8 +99,8 @@ static int check_and_freshen_source(struct odb_source *source, return check_and_freshen_file(path.buf, freshen); } -int has_loose_object(struct odb_source *source, - const struct object_id *oid) +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid) { return check_and_freshen_source(source, oid, 0); } @@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,30 +160,27 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } /* - * Find "oid" as a loose object in the local repository or in an alternate. + * Find "oid" as a loose object in given source. * Returns 0 on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct repository *r, const struct object_id *oid, +static int stat_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, struct stat *st, const char **path) { - struct odb_source *source; static struct strbuf buf = STRBUF_INIT; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - if (!lstat(*path, st)) - return 0; - } + *path = odb_loose_path(loose->source, &buf, oid); + if (!lstat(*path, st)) + return 0; return -1; } @@ -194,39 +189,24 @@ static int stat_loose_object(struct repository *r, const struct object_id *oid, * Like stat_loose_object(), but actually open the object and return the * descriptor. See the caveats on the "path" parameter above. */ -static int open_loose_object(struct repository *r, +static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { - int fd; - struct odb_source *source; - int most_interesting_errno = ENOENT; static struct strbuf buf = STRBUF_INIT; + int fd; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; + *path = odb_loose_path(loose->source, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; return -1; } -static int quick_has_loose(struct repository *r, +static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - struct odb_source *source; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_loose_cache(source, oid), oid)) - return 1; - } - return 0; + return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); } /* @@ -252,23 +232,41 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *map_loose_object(struct repository *r, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; - int fd = open_loose_object(r, oid, &p); + int fd = open_loose_object(source->loose, oid, &p); if (fd < 0) return NULL; return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -347,11 +345,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -407,9 +412,9 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags) +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags) { int status = 0; int fd; @@ -422,7 +427,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, r->hash_algo); + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); /* * If we don't care about type or size, then we don't @@ -435,15 +440,15 @@ int loose_object_info(struct repository *r, if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(r, oid) ? 0 : -1; - if (stat_loose_object(r, oid, &st, &path) < 0) + return quick_has_loose(source->loose, oid) ? 0 : -1; + if (stat_loose_object(source->loose, oid, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(r, oid, &path); + fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -986,35 +991,15 @@ static int write_loose_object(struct odb_source *source, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(struct object_database *odb, - const struct object_id *oid) +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) { - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) - if (check_and_freshen_source(source, oid, 1)) - return 1; - return 0; -} - -static int freshen_packed_object(struct object_database *odb, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (!freshen_file(e.p->pack_name)) - return 0; - e.p->freshened = 1; - return 1; + return !!check_and_freshen_source(source, oid, 1); } -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid) +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *in_stream, size_t len, + struct object_id *oid) { const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; @@ -1091,12 +1076,10 @@ int stream_loose_object(struct odb_source *source, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) { + if (odb_freshen_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ @@ -1124,10 +1107,10 @@ int stream_loose_object(struct odb_source *source, return err; } -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; @@ -1155,8 +1138,7 @@ int write_object_file(struct odb_source *source, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) + if (odb_freshen_object(source->odb, oid)) return 0; if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; @@ -1179,7 +1161,7 @@ int force_object_loose(struct odb_source *source, int ret; for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (has_loose_object(s, oid)) + if (odb_source_loose_has_object(s, oid)) return 0; oi.typep = &type; @@ -1802,44 +1784,49 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid) +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]); + size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose_objects_subdir_seen[word_index]; + bitmap = &source->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose_objects_cache; - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + return source->loose->cache; + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose_objects_cache); + source->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose_objects_cache; + return source->loose->cache; } -void odb_clear_loose_cache(struct odb_source *source) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { - oidtree_clear(source->loose_objects_cache); - FREE_AND_NULL(source->loose_objects_cache); - memset(&source->loose_objects_subdir_seen, 0, - sizeof(source->loose_objects_subdir_seen)); + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +void odb_source_loose_reprepare(struct odb_source *source) +{ + odb_source_loose_clear_cache(source->loose); } static int check_stream_oid(git_zstream *stream, @@ -1995,3 +1982,144 @@ void object_file_transaction_commit(struct odb_transaction *transaction) transaction->odb->transaction = NULL; free(transaction); } + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} + +void odb_source_loose_free(struct odb_source_loose *loose) +{ + if (!loose) + return; + odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); + free(loose); +} + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index 3fd48dcafbf1dc797efabd489e68720e521a38d2..1229d5f675b44aa002cb49d9cdafe6842405cf2c 100644 --- a/object-file.h +++ b/object-file.h @@ -7,14 +7,6 @@ struct index_state; -/* - * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing - * blobs. This has a difference only if extensions.partialClone is set. - * - * Its default value is 1. - */ -extern int fetch_if_missing; - enum { INDEX_WRITE_OBJECT = (1 << 0), INDEX_FORMAT_CHECK = (1 << 1), @@ -24,17 +16,69 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; +struct odb_source_loose { + struct odb_source *source; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; +}; + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source); +void odb_source_loose_free(struct odb_source_loose *loose); + +/* Reprepare the loose source by emptying the loose object cache. */ +void odb_source_loose_reprepare(struct odb_source *source); + +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags); + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); + /* - * Populate and return the loose object cache array corresponding to the - * given object ID. + * Return true iff an object database source has a loose object + * with the specified name. This function does not respect replace + * references. */ -struct oidtree *odb_loose_cache(struct odb_source *source, +int odb_source_loose_has_object(struct odb_source *source, const struct object_id *oid); -/* Empty the loose object cache for the specified object directory. */ -void odb_clear_loose_cache(struct odb_source *source); +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid); + +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags); + +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + +/* + * Populate and return the loose object cache array corresponding to the + * given object ID. + */ +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); /* * Put in `buf` the name of the file in the local object database that @@ -44,17 +88,6 @@ const char *odb_loose_path(struct odb_source *source, struct strbuf *buf, const struct object_id *oid); -/* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. - */ -int has_loose_object(struct odb_source *source, - const struct object_id *oid); - -void *map_loose_object(struct repository *r, const struct object_id *oid, - unsigned long *size); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: @@ -112,55 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); - -struct input_stream { - const void *(*read)(struct input_stream *, unsigned long *len); - void *data; - int is_finished; -}; - -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); @@ -182,10 +166,6 @@ int check_object_signature(struct repository *r, const struct object_id *oid, */ int stream_object_signature(struct repository *r, const struct object_id *oid); -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags); - enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, }; diff --git a/object-name.c b/object-name.c index 766c757042a38950f7b498dc7fa92b72befea264..8ce0ef7c23a6bd78c5770bef51d31bba88a11323 100644 --- a/object-name.c +++ b/object-name.c @@ -116,7 +116,7 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_loose_cache(source, &ds->bin_pfx), + oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), &ds->bin_pfx, ds->len, match_prefix, ds); } diff --git a/odb.c b/odb.c index 00a6e71568b5985c0b344bfebbe92d1e0bff1294..f4cbee4b042d83e4442ba3e264b3752e68148bee 100644 --- a/odb.c +++ b/odb.c @@ -86,17 +86,16 @@ int odb_mkstemp(struct object_database *odb, /* * Return non-zero iff the path is usable as an alternate object database. */ -static int alt_odb_usable(struct object_database *o, - struct strbuf *path, - const char *normalized_objdir, khiter_t *pos) +static int alt_odb_usable(struct object_database *o, const char *path, + const char *normalized_objdir) { int r; /* Detect cases where alternate disappeared */ - if (!is_directory(path->buf)) { + if (!is_directory(path)) { error(_("object directory %s does not exist; " "check .git/objects/info/alternates"), - path->buf); + path); return 0; } @@ -113,11 +112,14 @@ static int alt_odb_usable(struct object_database *o, assert(r == 1); /* never used */ kh_value(o->source_by_path, p) = o->sources; } - if (fspatheq(path->buf, normalized_objdir)) + + if (fspatheq(path, normalized_objdir)) + return 0; + + if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path)) return 0; - *pos = kh_put_odb_path_map(o->source_by_path, path->buf, &r); - /* r: 0 = exists, 1 = never used, 2 = deleted */ - return r == 0 ? 0 : 1; + + return 1; } /* @@ -139,6 +141,21 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + + CALLOC_ARRAY(source, 1); + source->odb = odb; + source->local = local; + source->path = xstrdup(path); + source->loose = odb_source_loose_new(source); + + return source; +} + static struct odb_source *link_alt_odb_entry(struct object_database *odb, const char *dir, const char *relative_base, @@ -148,6 +165,7 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, struct strbuf pathbuf = STRBUF_INIT; struct strbuf tmp = STRBUF_INIT; khiter_t pos; + int ret; if (!is_absolute_path(dir) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); @@ -172,20 +190,18 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, strbuf_reset(&tmp); strbuf_realpath(&tmp, odb->sources->path, 1); - if (!alt_odb_usable(odb, &pathbuf, tmp.buf, &pos)) + if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) goto error; - CALLOC_ARRAY(alternate, 1); - alternate->odb = odb; - alternate->local = false; - /* pathbuf.buf is already in r->objects->source_by_path */ - alternate->path = strbuf_detach(&pathbuf, NULL); + alternate = odb_source_new(odb, pathbuf.buf, false); /* add the alternate entry */ *odb->sources_tail = alternate; odb->sources_tail = &(alternate->next); - alternate->next = NULL; - assert(odb->source_by_path); + + pos = kh_put_odb_path_map(odb->source_by_path, alternate->path, &ret); + if (!ret) + BUG("source must not yet exist"); kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ @@ -337,9 +353,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Make a new primary odb and link the old primary ODB in as an * alternate */ - source = xcalloc(1, sizeof(*source)); - source->odb = odb; - source->path = xstrdup(dir); + source = odb_source_new(odb, dir, false); /* * Disable ref updates while a temporary odb is active, since @@ -352,11 +366,10 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, return source->next; } -static void free_object_directory(struct odb_source *source) +static void odb_source_free(struct odb_source *source) { free(source->path); - odb_clear_loose_cache(source); - loose_object_map_clear(&source->loose_map); + odb_source_loose_free(source->loose); free(source); } @@ -374,7 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, BUG("we expect the old primary object store to be the first alternate"); odb->sources = restore_source; - free_object_directory(cur_source); + odb_source_free(cur_source); } char *compute_alternate_path(const char *path, struct strbuf *err) @@ -653,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -684,19 +695,24 @@ static int do_oid_object_info_extended(struct object_database *odb, return 0; } + odb_prepare_alternates(odb); + while (1) { - if (find_pack_entry(odb->repo, real, &e)) - break; + struct odb_source *source; - /* Most likely it's a loose object. */ - if (!loose_object_info(odb->repo, real, oi, flags)) + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) return 0; + /* Most likely it's a loose object. */ + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_info(source, real, oi, flags)) + return 0; + /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -729,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, @@ -969,6 +966,22 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, return odb_read_object_info_extended(odb, oid, NULL, object_info_flags) >= 0; } +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (packfile_store_freshen_object(odb->packfiles, oid)) + return 1; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (odb_source_loose_freshen_object(source, oid)) + return 1; + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { @@ -987,7 +1000,15 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return write_object_file(odb->sources, buf, len, type, oid, compat_oid, flags); + return odb_source_loose_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); +} + +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(odb->sources, stream, len, oid); } struct object_database *odb_new(struct repository *repo) @@ -1002,13 +1023,13 @@ struct object_database *odb_new(struct repository *repo) return o; } -static void free_object_directories(struct object_database *o) +static void odb_free_sources(struct object_database *o) { while (o->sources) { struct odb_source *next; next = o->sources->next; - free_object_directory(o->sources); + odb_source_free(o->sources); o->sources = next; } kh_destroy_odb_path_map(o->source_by_path); @@ -1026,7 +1047,7 @@ void odb_clear(struct object_database *o) o->commit_graph = NULL; o->commit_graph_attempted = 0; - free_object_directories(o); + odb_free_sources(o); o->sources_tail = NULL; o->loaded_alternates = 0; @@ -1057,7 +1078,7 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) - odb_clear_loose_cache(source); + odb_source_loose_reprepare(source); o->approximate_object_count_valid = 0; diff --git a/odb.h b/odb.h index e6602dd90c833c9d7c86c5e8b374c98b8db49284..9bb28008b1d953774ee198828298cec364478201 100644 --- a/odb.h +++ b/odb.h @@ -14,6 +14,14 @@ struct strbuf; struct repository; struct multi_pack_index; +/* + * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing + * blobs. This has a difference only if extensions.partialClone is set. + * + * Its default value is 1. + */ +extern int fetch_if_missing; + /* * Compute the exact path an alternate is at and returns it. In case of * error NULL is returned and the human readable error is added to `err` @@ -40,20 +48,8 @@ struct odb_source { /* Object database that owns this object source. */ struct object_database *odb; - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ - struct oidtree *loose_objects_cache; - - /* Map between object IDs for loose objects. */ - struct loose_object_map *loose_map; + /* Private state for loose objects. */ + struct odb_source_loose *loose; /* * private data @@ -89,6 +85,10 @@ struct odb_source { char *path; }; +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local); + struct packed_git; struct packfile_store; struct cached_object_entry; @@ -396,6 +396,9 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, unsigned flags); +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); @@ -489,4 +492,14 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } +struct odb_write_stream { + const void *(*read)(struct odb_write_stream *, unsigned long *len); + void *data; + int is_finished; +}; + +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + #endif /* ODB_H */ diff --git a/odb/streaming.c b/odb/streaming.c new file mode 100644 index 0000000000000000000000000000000000000000..745cd486fbb33d8405eb28d057e8244d6bfc1bf1 --- /dev/null +++ b/odb/streaming.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2011, Google Inc. + */ + +#include "git-compat-util.h" +#include "convert.h" +#include "environment.h" +#include "repository.h" +#include "object-file.h" +#include "odb.h" +#include "odb/streaming.h" +#include "replace-object.h" +#include "packfile.h" + +#define FILTER_BUFFER (1024*16) + +/***************************************************************** + * + * Filtered stream + * + *****************************************************************/ + +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return odb_read_stream_close(fs->upstream); +} + +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, + size_t sz) +{ + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + size_t filled = 0; + + while (sz) { + /* do we already have filtered output? */ + if (fs->o_ptr < fs->o_end) { + size_t to_move = fs->o_end - fs->o_ptr; + if (sz < to_move) + to_move = sz; + memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); + fs->o_ptr += to_move; + sz -= to_move; + filled += to_move; + continue; + } + fs->o_end = fs->o_ptr = 0; + + /* do we have anything to feed the filter with? */ + if (fs->i_ptr < fs->i_end) { + size_t to_feed = fs->i_end - fs->i_ptr; + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + fs->ibuf + fs->i_ptr, &to_feed, + fs->obuf, &to_receive)) + return -1; + fs->i_ptr = fs->i_end - to_feed; + fs->o_end = FILTER_BUFFER - to_receive; + continue; + } + + /* tell the filter to drain upon no more input */ + if (fs->input_finished) { + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + NULL, NULL, + fs->obuf, &to_receive)) + return -1; + fs->o_end = FILTER_BUFFER - to_receive; + if (!fs->o_end) + break; + continue; + } + fs->i_end = fs->i_ptr = 0; + + /* refill the input from the upstream */ + if (!fs->input_finished) { + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end < 0) + return -1; + if (fs->i_end) + continue; + } + fs->input_finished = 1; + } + return filled; +} + +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) +{ + struct odb_filtered_read_stream *fs; + + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; + fs->upstream = st; + fs->filter = filter; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; +} + +/***************************************************************** + * + * In-core stream + * + *****************************************************************/ + +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); + return 0; +} + +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + size_t read_size = sz; + size_t remainder = st->base.size - st->read_ptr; + + if (remainder <= read_size) + read_size = remainder; + if (read_size) { + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; + } + return read_size; +} + +static int open_istream_incore(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, + }; + struct odb_incore_read_stream *st; + int ret; + + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; + ret = odb_read_object_info_extended(odb, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + + return 0; +} + +/***************************************************************************** + * static helpers variables and functions for users of streaming interface + *****************************************************************************/ + +static int istream_source(struct odb_read_stream **out, + struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) + return 0; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_stream(out, source, oid)) + return 0; + + return open_istream_incore(out, odb, oid); +} + +/**************************************************************** + * Users of streaming interface + ****************************************************************/ + +int odb_read_stream_close(struct odb_read_stream *st) +{ + int r = st->close(st); + free(st); + return r; +} + +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) +{ + return st->read(st, buf, sz); +} + +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter) +{ + struct odb_read_stream *st; + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); + + if (ret) + return NULL; + + if (filter) { + /* Add "&& !is_null_stream_filter(filter)" for performance */ + struct odb_read_stream *nst = attach_stream_filter(st, filter); + if (!nst) { + odb_read_stream_close(st); + return NULL; + } + st = nst; + } + + return st; +} + +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) +{ + struct odb_read_stream *st; + ssize_t kept = 0; + int result = -1; + + st = odb_read_stream_open(odb, oid, filter); + if (!st) { + if (filter) + free_stream_filter(filter); + return result; + } + if (st->type != OBJ_BLOB) + goto close_and_exit; + for (;;) { + char buf[1024 * 16]; + ssize_t wrote, holeto; + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); + + if (readlen < 0) + goto close_and_exit; + if (!readlen) + break; + if (can_seek && sizeof(buf) == readlen) { + for (holeto = 0; holeto < readlen; holeto++) + if (buf[holeto]) + break; + if (readlen == holeto) { + kept += holeto; + continue; + } + } + + if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) + goto close_and_exit; + else + kept = 0; + wrote = write_in_full(fd, buf, readlen); + + if (wrote < 0) + goto close_and_exit; + } + if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || + xwrite(fd, "", 1) != 1)) + goto close_and_exit; + result = 0; + + close_and_exit: + odb_read_stream_close(st); + return result; +} diff --git a/odb/streaming.h b/odb/streaming.h new file mode 100644 index 0000000000000000000000000000000000000000..c7861f7e13c606af66d5b54b52b7b1cc3eb9adad --- /dev/null +++ b/odb/streaming.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef STREAMING_H +#define STREAMING_H 1 + +#include "object.h" + +struct object_database; +struct odb_read_stream; +struct stream_filter; + +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + +/* + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); + +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); + +#endif /* STREAMING_H */ diff --git a/packfile.c b/packfile.c index 1ae2b2fe1eda77a57f007f78726724bd26f6a743..7a16aaa90d0a2f817d2de8a5b2aac5b594f7ecfc 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -2048,7 +2049,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct list_head *pos; @@ -2071,6 +2074,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { @@ -2353,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(p); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index c9d0b93446b5f569aadb12f48fce8ffc6aace59d..3fcc5ae6e08c4b670f6d4bae9b62f882e84d3818 100644 --- a/packfile.h +++ b/packfile.h @@ -8,6 +8,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct hashmap_entry packmap_ent; @@ -144,6 +145,21 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -163,6 +179,9 @@ struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); struct packed_git *packfile_store_load_pack(struct packfile_store *store, const char *idx_path, int local); +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -354,7 +373,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6e8524fcf829c0f2fb389146b643e22..0bf4bd6d4abd8c98cfdfc0f68a39d44cf4eaa800 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/repository.c b/repository.c index 6faf5c73981ebf6c520919a5475f0243f08cea87..6aaa7ba00869bf6aa0450f3fbdf27590f7439228 100644 --- a/repository.c +++ b/repository.c @@ -160,20 +160,24 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; + char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); if (!repo->objects->sources) { - CALLOC_ARRAY(repo->objects->sources, 1); - repo->objects->sources->odb = repo->objects; - repo->objects->sources->local = true; + repo->objects->sources = odb_source_new(repo->objects, + objects_path, true); repo->objects->sources_tail = &repo->objects->sources->next; + free(objects_path); + } else { + free(repo->objects->sources->path); + repo->objects->sources->path = objects_path; } - expand_base_dir(&repo->objects->sources->path, o->object_dir, - repo->commondir, "objects"); repo->objects->sources->disable_ref_updates = o->disable_ref_updates; diff --git a/streaming.c b/streaming.c deleted file mode 100644 index 4b13827668e67a449860e087c45a01906126aa82..0000000000000000000000000000000000000000 --- a/streaming.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "convert.h" -#include "environment.h" -#include "streaming.h" -#include "repository.h" -#include "object-file.h" -#include "odb.h" -#include "replace-object.h" -#include "packfile.h" - -typedef int (*open_istream_fn)(struct git_istream *, - struct repository *, - const struct object_id *, - enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); - -#define FILTER_BUFFER (1024*16) - -struct filtered_istream { - struct git_istream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - -struct git_istream { - open_istream_fn open; - close_istream_fn close; - read_istream_fn read; - - unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - - struct filtered_istream filtered; - } u; -}; - -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct git_istream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - -/***************************************************************** - * - * Filtered stream - * - *****************************************************************/ - -static int close_istream_filtered(struct git_istream *st) -{ - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); -} - -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, - size_t sz) -{ - struct filtered_istream *fs = &(st->u.filtered); - size_t filled = 0; - - while (sz) { - /* do we already have filtered output? */ - if (fs->o_ptr < fs->o_end) { - size_t to_move = fs->o_end - fs->o_ptr; - if (sz < to_move) - to_move = sz; - memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); - fs->o_ptr += to_move; - sz -= to_move; - filled += to_move; - continue; - } - fs->o_end = fs->o_ptr = 0; - - /* do we have anything to feed the filter with? */ - if (fs->i_ptr < fs->i_end) { - size_t to_feed = fs->i_end - fs->i_ptr; - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - fs->ibuf + fs->i_ptr, &to_feed, - fs->obuf, &to_receive)) - return -1; - fs->i_ptr = fs->i_end - to_feed; - fs->o_end = FILTER_BUFFER - to_receive; - continue; - } - - /* tell the filter to drain upon no more input */ - if (fs->input_finished) { - size_t to_receive = FILTER_BUFFER; - if (stream_filter(fs->filter, - NULL, NULL, - fs->obuf, &to_receive)) - return -1; - fs->o_end = FILTER_BUFFER - to_receive; - if (!fs->o_end) - break; - continue; - } - fs->i_end = fs->i_ptr = 0; - - /* refill the input from the upstream */ - if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); - if (fs->i_end < 0) - return -1; - if (fs->i_end) - continue; - } - fs->input_finished = 1; - } - return filled; -} - -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) -{ - struct git_istream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); - - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; - fs->upstream = st; - fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - return ifs; -} - -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_done: - return 0; - case z_error: - return -1; - default: - break; - } - - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct git_istream *st) -{ - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return 0; -} - -static int open_istream_loose(struct git_istream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - oi.sizep = &st->size; - oi.typep = type; - - st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); - if (!st->u.loose.mapped) - return -1; - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) - goto error; - - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); - return -1; -} - - -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, - size_t sz) -{ - size_t total_read = 0; - - switch (st->z_state) { - case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; - break; - case z_done: - return 0; - case z_error: - return -1; - case z_used: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct git_istream *st) -{ - close_deflated_stream(st); - return 0; -} - -static int open_istream_pack_non_delta(struct git_istream *st, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) -{ - struct pack_window *window; - enum object_type in_pack_type; - - window = NULL; - - in_pack_type = unpack_object_header(st->u.in_pack.pack, - &window, - &st->u.in_pack.pos, - &st->size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - - return 0; -} - - -/***************************************************************** - * - * In-core stream - * - *****************************************************************/ - -static int close_istream_incore(struct git_istream *st) -{ - free(st->u.incore.buf); - return 0; -} - -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) -{ - size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; - - if (remainder <= read_size) - read_size = remainder; - if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; - } - return read_size; -} - -static int open_istream_incore(struct git_istream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) -{ - struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); -} - -/***************************************************************************** - * static helpers variables and functions for users of streaming interface - *****************************************************************************/ - -static int istream_source(struct git_istream *st, - struct repository *r, - const struct object_id *oid, - enum object_type *type) -{ - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = type; - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; - - switch (oi.whence) { - case OI_LOOSE: - st->open = open_istream_loose; - return 0; - case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; - return 0; - } -} - -/**************************************************************** - * Users of streaming interface - ****************************************************************/ - -int close_istream(struct git_istream *st) -{ - int r = st->close(st); - free(st); - return r; -} - -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) -{ - return st->read(st, buf, sz); -} - -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) -{ - struct git_istream *st = xmalloc(sizeof(*st)); - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); - - if (ret) { - free(st); - return NULL; - } - - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } - if (filter) { - /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); - if (!nst) { - close_istream(st); - return NULL; - } - st = nst; - } - - *size = st->size; - return st; -} - -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) -{ - struct git_istream *st; - enum object_type type; - unsigned long sz; - ssize_t kept = 0; - int result = -1; - - st = open_istream(the_repository, oid, &type, &sz, filter); - if (!st) { - if (filter) - free_stream_filter(filter); - return result; - } - if (type != OBJ_BLOB) - goto close_and_exit; - for (;;) { - char buf[1024 * 16]; - ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); - - if (readlen < 0) - goto close_and_exit; - if (!readlen) - break; - if (can_seek && sizeof(buf) == readlen) { - for (holeto = 0; holeto < readlen; holeto++) - if (buf[holeto]) - break; - if (readlen == holeto) { - kept += holeto; - continue; - } - } - - if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) - goto close_and_exit; - else - kept = 0; - wrote = write_in_full(fd, buf, readlen); - - if (wrote < 0) - goto close_and_exit; - } - if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || - xwrite(fd, "", 1) != 1)) - goto close_and_exit; - result = 0; - - close_and_exit: - close_istream(st); - return result; -} diff --git a/streaming.h b/streaming.h deleted file mode 100644 index bd27f59e5764aec64cd1cf927baf213fcec4d893..0000000000000000000000000000000000000000 --- a/streaming.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef STREAMING_H -#define STREAMING_H 1 - -#include "object.h" - -/* opaque */ -struct git_istream; -struct stream_filter; - -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); - -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); - -#endif /* STREAMING_H */