From be14de68f63ddd9654306ea6916e6541bdf7fe8a Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 2 Dec 2025 13:22:41 -0600 Subject: [PATCH 1/7] builtin/repo: group per-type object values into struct The `object_stats` structure stores object counts by type. In a subsequent commit, additional per-type object measurements will also be stored. Group per-type object values into a new struct to allow better reuse. Signed-off-by: Justin Tobler --- builtin/repo.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 2a653bd3ea..a69699857a 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -202,13 +202,17 @@ struct ref_stats { size_t others; }; -struct object_stats { +struct object_values { size_t tags; size_t commits; size_t trees; size_t blobs; }; +struct object_stats { + struct object_values type_counts; +}; + struct repo_structure { struct ref_stats refs; struct object_stats objects; @@ -281,9 +285,9 @@ static inline size_t get_total_reference_count(struct ref_stats *stats) return stats->branches + stats->remotes + stats->tags + stats->others; } -static inline size_t get_total_object_count(struct object_stats *stats) +static inline size_t get_total_object_values(struct object_values *values) { - return stats->tags + stats->commits + stats->trees + stats->blobs; + return values->tags + values->commits + values->trees + values->blobs; } static void stats_table_setup_structure(struct stats_table *table, @@ -302,14 +306,18 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); - object_total = get_total_object_count(objects); + object_total = get_total_object_values(&objects->type_counts); stats_table_addf(table, ""); stats_table_addf(table, "* %s", _("Reachable objects")); stats_table_count_addf(table, object_total, " * %s", _("Count")); - stats_table_count_addf(table, objects->commits, " * %s", _("Commits")); - stats_table_count_addf(table, objects->trees, " * %s", _("Trees")); - stats_table_count_addf(table, objects->blobs, " * %s", _("Blobs")); - stats_table_count_addf(table, objects->tags, " * %s", _("Tags")); + stats_table_count_addf(table, objects->type_counts.commits, + " * %s", _("Commits")); + stats_table_count_addf(table, objects->type_counts.trees, + " * %s", _("Trees")); + stats_table_count_addf(table, objects->type_counts.blobs, + " * %s", _("Blobs")); + stats_table_count_addf(table, objects->type_counts.tags, + " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) @@ -389,13 +397,13 @@ static void structure_keyvalue_print(struct repo_structure *stats, (uintmax_t)stats->refs.others, value_delim); printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.commits, value_delim); + (uintmax_t)stats->objects.type_counts.commits, value_delim); printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.trees, value_delim); + (uintmax_t)stats->objects.type_counts.trees, value_delim); printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.blobs, value_delim); + (uintmax_t)stats->objects.type_counts.blobs, value_delim); printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.tags, value_delim); + (uintmax_t)stats->objects.type_counts.tags, value_delim); fflush(stdout); } @@ -473,22 +481,22 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, switch (type) { case OBJ_TAG: - stats->tags += oids->nr; + stats->type_counts.tags += oids->nr; break; case OBJ_COMMIT: - stats->commits += oids->nr; + stats->type_counts.commits += oids->nr; break; case OBJ_TREE: - stats->trees += oids->nr; + stats->type_counts.trees += oids->nr; break; case OBJ_BLOB: - stats->blobs += oids->nr; + stats->type_counts.blobs += oids->nr; break; default: BUG("invalid object type"); } - object_count = get_total_object_count(stats); + object_count = get_total_object_values(&stats->type_counts); display_progress(data->progress, object_count); return 0; -- GitLab From 0a145cfeec3470a9ee2e2b00ee153cd549795664 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 11 Dec 2025 13:22:59 -0600 Subject: [PATCH 2/7] strbuf: split out logic to humanise byte values In a subsequent commit, byte size values displayed in table output for the git-repo(1) "structure" subcommand will be shown in a more human-readable format with the appropriate unit prefixes. For this usecase, the downscaled values and unit prefixes must be handled separately to ensure proper column alignment. Split out logic from strbuf_humanise() to downscale byte values and determine the corresponding unit prefix into a separate humanise_bytes() function that provides seperate value and unit strings. Note that the "byte" string in "t/helper/test-simple-ipc.c" is unmarked for translation here so that it doesn't conflict with the newly defined plural "byte/bytes" translation and instead uses it. Signed-off-by: Justin Tobler --- strbuf.c | 74 ++++++++++++++++++++------------------ strbuf.h | 14 ++++++++ t/helper/test-simple-ipc.c | 7 +++- 3 files changed, 60 insertions(+), 35 deletions(-) diff --git a/strbuf.c b/strbuf.c index 6c3851a7f8..3fbd375ad6 100644 --- a/strbuf.c +++ b/strbuf.c @@ -836,47 +836,53 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn); } -static void strbuf_humanise(struct strbuf *buf, off_t bytes, - int humanise_rate) +void humanise_bytes(off_t bytes, char **value, const char **unit, + unsigned flags) { + int humanise_rate = flags & HUMANISE_RATE; + if (bytes > 1 << 30) { - strbuf_addf(buf, - humanise_rate == 0 ? - /* TRANSLATORS: IEC 80000-13:2008 gibibyte */ - _("%u.%2.2u GiB") : - /* TRANSLATORS: IEC 80000-13:2008 gibibyte/second */ - _("%u.%2.2u GiB/s"), - (unsigned)(bytes >> 30), - (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); + *value = xstrfmt(_("%u.%2.2u"), (unsigned)(bytes >> 30), + (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); + /* TRANSLATORS: IEC 80000-13:2008 gibibyte/second and gibibyte */ + *unit = humanise_rate ? _("GiB/s") : _("GiB"); } else if (bytes > 1 << 20) { - unsigned x = bytes + 5243; /* for rounding */ - strbuf_addf(buf, - humanise_rate == 0 ? - /* TRANSLATORS: IEC 80000-13:2008 mebibyte */ - _("%u.%2.2u MiB") : - /* TRANSLATORS: IEC 80000-13:2008 mebibyte/second */ - _("%u.%2.2u MiB/s"), - x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); + unsigned x = bytes + 5243; /* for rounding */ + *value = xstrfmt(_("%u.%2.2u"), x >> 20, + ((x & ((1 << 20) - 1)) * 100) >> 20); + /* TRANSLATORS: IEC 80000-13:2008 mebibyte/second and mebibyte */ + *unit = humanise_rate ? _("MiB/s") : _("MiB"); } else if (bytes > 1 << 10) { - unsigned x = bytes + 5; /* for rounding */ - strbuf_addf(buf, - humanise_rate == 0 ? - /* TRANSLATORS: IEC 80000-13:2008 kibibyte */ - _("%u.%2.2u KiB") : - /* TRANSLATORS: IEC 80000-13:2008 kibibyte/second */ - _("%u.%2.2u KiB/s"), - x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); + unsigned x = bytes + 5; /* for rounding */ + *value = xstrfmt(_("%u.%2.2u"), x >> 10, + ((x & ((1 << 10) - 1)) * 100) >> 10); + /* TRANSLATORS: IEC 80000-13:2008 kibibyte/second and kibibyte */ + *unit = humanise_rate ? _("KiB/s") : _("KiB"); } else { - strbuf_addf(buf, - humanise_rate == 0 ? - /* TRANSLATORS: IEC 80000-13:2008 byte */ - Q_("%u byte", "%u bytes", bytes) : - /* TRANSLATORS: IEC 80000-13:2008 byte/second */ - Q_("%u byte/s", "%u bytes/s", bytes), - (unsigned)bytes); + *value = xstrfmt("%u", (unsigned)bytes); + *unit = humanise_rate ? + /* TRANSLATORS: IEC 80000-13:2008 byte/second */ + Q_("byte/s", "bytes/s", bytes) : + /* TRANSLATORS: IEC 80000-13:2008 byte */ + Q_("byte", "bytes", bytes); } } +static void strbuf_humanise(struct strbuf *buf, off_t bytes, unsigned flags) +{ + char *value; + const char *unit; + + humanise_bytes(bytes, &value, &unit, flags); + + /* + * TRANSLATORS: The first argument is the number string. The second + * argument is the unit prefix string (i.e. "12.34 MiB/s"). + */ + strbuf_addf(buf, _("%s %s"), value, unit); + free(value); +} + void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) { strbuf_humanise(buf, bytes, 0); @@ -884,7 +890,7 @@ void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) void strbuf_humanise_rate(struct strbuf *buf, off_t bytes) { - strbuf_humanise(buf, bytes, 1); + strbuf_humanise(buf, bytes, HUMANISE_RATE); } int printf_ln(const char *fmt, ...) diff --git a/strbuf.h b/strbuf.h index a580ac6084..4426163e7e 100644 --- a/strbuf.h +++ b/strbuf.h @@ -367,6 +367,20 @@ void strbuf_addbuf_percentquote(struct strbuf *dst, const struct strbuf *src); */ void strbuf_add_percentencode(struct strbuf *dst, const char *src, int flags); +enum humanise_flags { + /* + * Use rate based unit prefixes for humanised values. + */ + HUMANISE_RATE = (1 << 0), +}; + +/** + * Converts the given byte size into a downscaled human-readable value and + * corresponding unit prefix as two separate strings. + */ +void humanise_bytes(off_t bytes, char **value, const char **unit, + unsigned flags); + /** * Append the given byte size as a human-readable string (i.e. 12.23 KiB, * 3.50 MiB). diff --git a/t/helper/test-simple-ipc.c b/t/helper/test-simple-ipc.c index 03cc5eea2c..442ad6b16f 100644 --- a/t/helper/test-simple-ipc.c +++ b/t/helper/test-simple-ipc.c @@ -603,7 +603,12 @@ int cmd__simple_ipc(int argc, const char **argv) OPT_INTEGER(0, "bytecount", &cl_args.bytecount, N_("number of bytes")), OPT_INTEGER(0, "batchsize", &cl_args.batchsize, N_("number of requests per thread")), - OPT_STRING(0, "byte", &bytevalue, N_("byte"), N_("ballast character")), + /* + * The "byte" string here is not marked for translation and + * instead relies on translation in strbuf.c:humanise_bytes() to + * avoid conflict with the plural form. + */ + OPT_STRING(0, "byte", &bytevalue, "byte", N_("ballast character")), OPT_STRING(0, "token", &cl_args.token, N_("token"), N_("command token to send to the server")), OPT_END() -- GitLab From eebf0d917b257f9364a5cc8b577c03b707b225c1 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 2 Dec 2025 16:20:36 -0600 Subject: [PATCH 3/7] builtin/repo: humanise count values in structure output The table output format for the git-repo(1) structure subcommand is used by default and intended to provide output to users in a human-friendly manner. When the reference/object count values in a repository are large, it becomes more cumbersome for users to read the values. For larger values, update the table output format to instead produce more human-friendly count values that are scaled down with the appropriate unit prefix. Output for the keyvalue and nul formats remains unchanged. Signed-off-by: Justin Tobler --- builtin/repo.c | 38 +++++++++++++++++------- strbuf.c | 26 ++++++++++++++++ strbuf.h | 6 ++++ t/t1901-repo-structure.sh | 62 +++++++++++++++++++-------------------- 4 files changed, 91 insertions(+), 41 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index a69699857a..9c61bc3e17 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -223,6 +223,7 @@ struct stats_table { int name_col_width; int value_col_width; + int unit_col_width; }; /* @@ -230,6 +231,7 @@ struct stats_table { */ struct stats_table_entry { char *value; + const char *unit; }; static void stats_table_vaddf(struct stats_table *table, @@ -250,11 +252,18 @@ static void stats_table_vaddf(struct stats_table *table, if (name_width > table->name_col_width) table->name_col_width = name_width; - if (entry) { + if (!entry) + return; + if (entry->value) { int value_width = utf8_strwidth(entry->value); if (value_width > table->value_col_width) table->value_col_width = value_width; } + if (entry->unit) { + int unit_width = utf8_strwidth(entry->unit); + if (unit_width > table->unit_col_width) + table->unit_col_width = unit_width; + } } static void stats_table_addf(struct stats_table *table, const char *format, ...) @@ -273,7 +282,7 @@ static void stats_table_count_addf(struct stats_table *table, size_t value, va_list ap; CALLOC_ARRAY(entry, 1); - entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value); + humanise_count(value, &entry->value, &entry->unit); va_start(ap, format); stats_table_vaddf(table, entry, format, ap); @@ -324,20 +333,24 @@ static void stats_table_print_structure(const struct stats_table *table) { const char *name_col_title = _("Repository structure"); const char *value_col_title = _("Value"); - int name_col_width = utf8_strwidth(name_col_title); - int value_col_width = utf8_strwidth(value_col_title); + int title_name_width = utf8_strwidth(name_col_title); + int title_value_width = utf8_strwidth(value_col_title); + int name_col_width = table->name_col_width; + int value_col_width = table->value_col_width; + int unit_col_width = table->unit_col_width; struct string_list_item *item; struct strbuf buf = STRBUF_INIT; - if (table->name_col_width > name_col_width) - name_col_width = table->name_col_width; - if (table->value_col_width > value_col_width) - value_col_width = table->value_col_width; + if (title_name_width > name_col_width) + name_col_width = title_name_width; + if (title_value_width > value_col_width + unit_col_width + 1) + value_col_width = title_value_width - unit_col_width; strbuf_addstr(&buf, "| "); strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, name_col_title); strbuf_addstr(&buf, " | "); - strbuf_utf8_align(&buf, ALIGN_LEFT, value_col_width, value_col_title); + strbuf_utf8_align(&buf, ALIGN_LEFT, + value_col_width + unit_col_width + 1, value_col_title); strbuf_addstr(&buf, " |"); printf("%s\n", buf.buf); @@ -345,17 +358,20 @@ static void stats_table_print_structure(const struct stats_table *table) for (int i = 0; i < name_col_width; i++) putchar('-'); printf(" | "); - for (int i = 0; i < value_col_width; i++) + for (int i = 0; i < value_col_width + unit_col_width + 1; i++) putchar('-'); printf(" |\n"); for_each_string_list_item(item, &table->rows) { struct stats_table_entry *entry = item->util; const char *value = ""; + const char *unit = ""; if (entry) { struct stats_table_entry *entry = item->util; value = entry->value; + if (entry->unit) + unit = entry->unit; } strbuf_reset(&buf); @@ -363,6 +379,8 @@ static void stats_table_print_structure(const struct stats_table *table) strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, item->string); strbuf_addstr(&buf, " | "); strbuf_utf8_align(&buf, ALIGN_RIGHT, value_col_width, value); + strbuf_addch(&buf, ' '); + strbuf_utf8_align(&buf, ALIGN_LEFT, unit_col_width, unit); strbuf_addstr(&buf, " |"); printf("%s\n", buf.buf); } diff --git a/strbuf.c b/strbuf.c index 3fbd375ad6..9beebad5b9 100644 --- a/strbuf.c +++ b/strbuf.c @@ -836,6 +836,32 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn); } +void humanise_count(size_t count, char **value, const char **unit) +{ + if (count >= 1000000000) { + size_t x = count + 5000000; /* for rounding */ + *value = xstrfmt(_("%u.%2.2u"), (unsigned)(x / 1000000000), + (unsigned)(x % 1000000000 / 10000000)); + /* TRANSLATORS: SI decimal prefix symbol for 10^9 */ + *unit = _("G"); + } else if (count >= 1000000) { + size_t x = count + 5000; /* for rounding */ + *value = xstrfmt(_("%u.%2.2u"), (unsigned)(x / 1000000), + (unsigned)(x % 1000000 / 10000)); + /* TRANSLATORS: SI decimal prefix symbol for 10^6 */ + *unit = _("M"); + } else if (count >= 1000) { + size_t x = count + 5; /* for rounding */ + *value = xstrfmt(_("%u.%2.2u"), (unsigned)(x / 1000), + (unsigned)(x % 1000 / 10)); + /* TRANSLATORS: SI decimal prefix symbol for 10^3 */ + *unit = _("k"); + } else { + *value = xstrfmt("%u", (unsigned)count); + *unit = NULL; + } +} + void humanise_bytes(off_t bytes, char **value, const char **unit, unsigned flags) { diff --git a/strbuf.h b/strbuf.h index 4426163e7e..571bd889df 100644 --- a/strbuf.h +++ b/strbuf.h @@ -381,6 +381,12 @@ enum humanise_flags { void humanise_bytes(off_t bytes, char **value, const char **unit, unsigned flags); +/** + * Converts the given count into a downscaled human-readable value and + * corresponding unit prefix as two separate strings. + */ +void humanise_count(size_t count, char **value, const char **unit); + /** * Append the given byte size as a human-readable string (i.e. 12.23 KiB, * 3.50 MiB). diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 36a71a144e..55fd13ad1b 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -10,21 +10,21 @@ test_expect_success 'empty repository' ' ( cd repo && cat >expect <<-\EOF && - | Repository structure | Value | - | -------------------- | ----- | - | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | - | | | - | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | + | Repository structure | Value | + | -------------------- | ------ | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | EOF git repo structure >out 2>err && @@ -39,7 +39,7 @@ test_expect_success 'repository with references and objects' ' git init repo && ( cd repo && - test_commit_bulk 42 && + test_commit_bulk 1005 && git tag -a foo -m bar && oid="$(git rev-parse HEAD)" && @@ -49,21 +49,21 @@ test_expect_success 'repository with references and objects' ' git notes add -m foo && cat >expect <<-\EOF && - | Repository structure | Value | - | -------------------- | ----- | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 130 | - | * Commits | 43 | - | * Trees | 43 | - | * Blobs | 43 | - | * Tags | 1 | + | Repository structure | Value | + | -------------------- | ------ | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | EOF git repo structure >out 2>err && -- GitLab From 37f71cc1bca0765058dd5fcd8e05ba178133c6f3 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 2 Dec 2025 13:59:38 -0600 Subject: [PATCH 4/7] builtin/repo: add inflated object info to keyvalue structure output The structure subcommand for git-repo(1) outputs basic count information for objects and references. Extend this output to also provide information regarding total size of inflated objects by object type. For now, object size by object type info is only added to the keyvalue and nul output formats. In a subsequent commit, this info is also added to the table format. Signed-off-by: Justin Tobler --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 33 +++++++++++++++++++++++++++++++++ t/t1901-repo-structure.sh | 6 +++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 70f0a6d2e4..287eee4b93 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -50,6 +50,7 @@ supported: + * Reference counts categorized by type * Reachable object counts categorized by type +* Total inflated size of reachable objects by type + The output format can be chosen through the flag `--format`. Three formats are diff --git a/builtin/repo.c b/builtin/repo.c index 9c61bc3e17..e207108346 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -2,6 +2,8 @@ #include "builtin.h" #include "environment.h" +#include "hex.h" +#include "odb.h" #include "parse-options.h" #include "path-walk.h" #include "progress.h" @@ -211,6 +213,7 @@ struct object_values { struct object_stats { struct object_values type_counts; + struct object_values inflated_sizes; }; struct repo_structure { @@ -423,6 +426,15 @@ static void structure_keyvalue_print(struct repo_structure *stats, printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, (uintmax_t)stats->objects.type_counts.tags, value_delim); + printf("objects.commits.inflated_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.inflated_sizes.commits, value_delim); + printf("objects.trees.inflated_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.inflated_sizes.trees, value_delim); + printf("objects.blobs.inflated_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.inflated_sizes.blobs, value_delim); + printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.inflated_sizes.tags, value_delim); + fflush(stdout); } @@ -486,6 +498,7 @@ static void structure_count_references(struct ref_stats *stats, } struct count_objects_data { + struct object_database *odb; struct object_stats *stats; struct progress *progress; }; @@ -495,20 +508,39 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, { struct count_objects_data *data = cb_data; struct object_stats *stats = data->stats; + size_t inflated_total = 0; size_t object_count; + for (size_t i = 0; i < oids->nr; i++) { + struct object_info oi = OBJECT_INFO_INIT; + unsigned long inflated; + + oi.sizep = &inflated; + + if (odb_read_object_info_extended(data->odb, &oids->oid[i], &oi, + OBJECT_INFO_SKIP_FETCH_OBJECT | + OBJECT_INFO_QUICK) < 0) + continue; + + inflated_total += inflated; + } + switch (type) { case OBJ_TAG: stats->type_counts.tags += oids->nr; + stats->inflated_sizes.tags += inflated_total; break; case OBJ_COMMIT: stats->type_counts.commits += oids->nr; + stats->inflated_sizes.commits += inflated_total; break; case OBJ_TREE: stats->type_counts.trees += oids->nr; + stats->inflated_sizes.trees += inflated_total; break; case OBJ_BLOB: stats->type_counts.blobs += oids->nr; + stats->inflated_sizes.blobs += inflated_total; break; default: BUG("invalid object type"); @@ -526,6 +558,7 @@ static void structure_count_objects(struct object_stats *stats, { struct path_walk_info info = PATH_WALK_INFO_INIT; struct count_objects_data data = { + .odb = repo->objects, .stats = stats, }; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 55fd13ad1b..33237822fd 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -73,7 +73,7 @@ test_expect_success 'repository with references and objects' ' ) ' -test_expect_success 'keyvalue and nul format' ' +test_expect_success SHA1 'keyvalue and nul format' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -90,6 +90,10 @@ test_expect_success 'keyvalue and nul format' ' objects.trees.count=42 objects.blobs.count=42 objects.tags.count=1 + objects.commits.inflated_size=9225 + objects.trees.inflated_size=28554 + objects.blobs.inflated_size=453 + objects.tags.inflated_size=132 EOF git repo structure --format=keyvalue >out 2>err && -- GitLab From 40edf4c20b6a8b751be4cc2ebef2d3e182ec629b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 2 Dec 2025 17:25:37 -0600 Subject: [PATCH 5/7] builtin/repo: add inflated object info to structure table Update the table output format for the git-repo(1) structure command to begin printing the total inflated object size info by object type. To be more human-friendly, larger values are scaled down and displayed with the appropriate unit prefix. Output for the keyvalue and nul formats remains unchanged. Signed-off-by: Justin Tobler --- builtin/repo.c | 33 +++++++++++++++++++-- strbuf.c | 14 +++++---- strbuf.h | 5 ++++ t/t1901-repo-structure.sh | 62 +++++++++++++++++++++++---------------- 4 files changed, 80 insertions(+), 34 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index e207108346..b73cfd975b 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -292,6 +292,20 @@ static void stats_table_count_addf(struct stats_table *table, size_t value, va_end(ap); } +static void stats_table_size_addf(struct stats_table *table, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + humanise_bytes(value, &entry->value, &entry->unit, HUMANISE_COMPACT); + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + static inline size_t get_total_reference_count(struct ref_stats *stats) { return stats->branches + stats->remotes + stats->tags + stats->others; @@ -307,7 +321,8 @@ static void stats_table_setup_structure(struct stats_table *table, { struct object_stats *objects = &stats->objects; struct ref_stats *refs = &stats->refs; - size_t object_total; + size_t inflated_object_total; + size_t object_count_total; size_t ref_total; ref_total = get_total_reference_count(refs); @@ -318,10 +333,10 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_count_addf(table, refs->remotes, " * %s", _("Remotes")); stats_table_count_addf(table, refs->others, " * %s", _("Others")); - object_total = get_total_object_values(&objects->type_counts); + object_count_total = get_total_object_values(&objects->type_counts); stats_table_addf(table, ""); stats_table_addf(table, "* %s", _("Reachable objects")); - stats_table_count_addf(table, object_total, " * %s", _("Count")); + stats_table_count_addf(table, object_count_total, " * %s", _("Count")); stats_table_count_addf(table, objects->type_counts.commits, " * %s", _("Commits")); stats_table_count_addf(table, objects->type_counts.trees, @@ -330,6 +345,18 @@ static void stats_table_setup_structure(struct stats_table *table, " * %s", _("Blobs")); stats_table_count_addf(table, objects->type_counts.tags, " * %s", _("Tags")); + + inflated_object_total = get_total_object_values(&objects->inflated_sizes); + stats_table_size_addf(table, inflated_object_total, + " * %s", _("Inflated size")); + stats_table_size_addf(table, objects->inflated_sizes.commits, + " * %s", _("Commits")); + stats_table_size_addf(table, objects->inflated_sizes.trees, + " * %s", _("Trees")); + stats_table_size_addf(table, objects->inflated_sizes.blobs, + " * %s", _("Blobs")); + stats_table_size_addf(table, objects->inflated_sizes.tags, + " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) diff --git a/strbuf.c b/strbuf.c index 9beebad5b9..512c7ba680 100644 --- a/strbuf.c +++ b/strbuf.c @@ -886,11 +886,15 @@ void humanise_bytes(off_t bytes, char **value, const char **unit, *unit = humanise_rate ? _("KiB/s") : _("KiB"); } else { *value = xstrfmt("%u", (unsigned)bytes); - *unit = humanise_rate ? - /* TRANSLATORS: IEC 80000-13:2008 byte/second */ - Q_("byte/s", "bytes/s", bytes) : - /* TRANSLATORS: IEC 80000-13:2008 byte */ - Q_("byte", "bytes", bytes); + if (flags & HUMANISE_COMPACT) + /* TRANSLATORS: IEC 80000-13:2008 byte/second and byte */ + *unit = humanise_rate ? _("B/s") : _("B"); + else + *unit = humanise_rate ? + /* TRANSLATORS: IEC 80000-13:2008 byte/second */ + Q_("byte/s", "bytes/s", bytes) : + /* TRANSLATORS: IEC 80000-13:2008 byte */ + Q_("byte", "bytes", bytes); } } diff --git a/strbuf.h b/strbuf.h index 571bd889df..005c155808 100644 --- a/strbuf.h +++ b/strbuf.h @@ -372,6 +372,11 @@ enum humanise_flags { * Use rate based unit prefixes for humanised values. */ HUMANISE_RATE = (1 << 0), + /* + * Use compact "B" unit prefixes instead of "byte/bytes" for humanised + * values. + */ + HUMANISE_COMPACT = (1 << 1), }; /** diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 33237822fd..b18213c660 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -13,18 +13,23 @@ test_expect_success 'empty repository' ' | Repository structure | Value | | -------------------- | ------ | | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | | | | | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | + | * Inflated size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | EOF git repo structure >out 2>err && @@ -34,7 +39,7 @@ test_expect_success 'empty repository' ' ) ' -test_expect_success 'repository with references and objects' ' +test_expect_success SHA1 'repository with references and objects' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -49,21 +54,26 @@ test_expect_success 'repository with references and objects' ' git notes add -m foo && cat >expect <<-\EOF && - | Repository structure | Value | - | -------------------- | ------ | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 3.02 k | - | * Commits | 1.01 k | - | * Trees | 1.01 k | - | * Blobs | 1.01 k | - | * Tags | 1 | + | Repository structure | Value | + | -------------------- | ---------- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | + | * Inflated size | 16.03 MiB | + | * Commits | 217.92 KiB | + | * Trees | 15.81 MiB | + | * Blobs | 11.68 KiB | + | * Tags | 132 B | EOF git repo structure >out 2>err && -- GitLab From ba861f37c91393abee355a283757b3a6b5f84a10 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 5 Dec 2025 12:25:34 -0600 Subject: [PATCH 6/7] builtin/repo: add disk size info to keyvalue stucture output Similar to a prior commit, extend the keyvalue and nul output formats of the git-repo(1) structure command to additionally provide info regarding total object disk sizes by object type. Signed-off-by: Justin Tobler --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 18 ++++++++++++++++++ t/t1901-repo-structure.sh | 11 ++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 287eee4b93..861073f641 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -51,6 +51,7 @@ supported: * Reference counts categorized by type * Reachable object counts categorized by type * Total inflated size of reachable objects by type +* Total disk size of reachable objects by type + The output format can be chosen through the flag `--format`. Three formats are diff --git a/builtin/repo.c b/builtin/repo.c index b73cfd975b..0ed41bf9d4 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -214,6 +214,7 @@ struct object_values { struct object_stats { struct object_values type_counts; struct object_values inflated_sizes; + struct object_values disk_sizes; }; struct repo_structure { @@ -462,6 +463,15 @@ static void structure_keyvalue_print(struct repo_structure *stats, printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim, (uintmax_t)stats->objects.inflated_sizes.tags, value_delim); + printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.disk_sizes.commits, value_delim); + printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.disk_sizes.trees, value_delim); + printf("objects.blobs.disk_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.disk_sizes.blobs, value_delim); + printf("objects.tags.disk_size%c%" PRIuMAX "%c", key_delim, + (uintmax_t)stats->objects.disk_sizes.tags, value_delim); + fflush(stdout); } @@ -536,13 +546,16 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, struct count_objects_data *data = cb_data; struct object_stats *stats = data->stats; size_t inflated_total = 0; + size_t disk_total = 0; size_t object_count; for (size_t i = 0; i < oids->nr; i++) { struct object_info oi = OBJECT_INFO_INIT; unsigned long inflated; + off_t disk; oi.sizep = &inflated; + oi.disk_sizep = &disk; if (odb_read_object_info_extended(data->odb, &oids->oid[i], &oi, OBJECT_INFO_SKIP_FETCH_OBJECT | @@ -550,24 +563,29 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, continue; inflated_total += inflated; + disk_total += disk; } switch (type) { case OBJ_TAG: stats->type_counts.tags += oids->nr; stats->inflated_sizes.tags += inflated_total; + stats->disk_sizes.tags += disk_total; break; case OBJ_COMMIT: stats->type_counts.commits += oids->nr; stats->inflated_sizes.commits += inflated_total; + stats->disk_sizes.commits += disk_total; break; case OBJ_TREE: stats->type_counts.trees += oids->nr; stats->inflated_sizes.trees += inflated_total; + stats->disk_sizes.trees += disk_total; break; case OBJ_BLOB: stats->type_counts.blobs += oids->nr; stats->inflated_sizes.blobs += inflated_total; + stats->disk_sizes.blobs += disk_total; break; default: BUG("invalid object type"); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index b18213c660..dd17caad05 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -4,6 +4,11 @@ test_description='test git repo structure' . ./test-lib.sh +object_type_disk_usage() { + git rev-list --all --objects --disk-usage --filter=object:type=$1 \ + --filter-provided-objects +} + test_expect_success 'empty repository' ' test_when_finished "rm -rf repo" && git init repo && @@ -91,7 +96,7 @@ test_expect_success SHA1 'keyvalue and nul format' ' test_commit_bulk 42 && git tag -a foo -m bar && - cat >expect <<-\EOF && + cat >expect <<-EOF && references.branches.count=1 references.tags.count=1 references.remotes.count=0 @@ -104,6 +109,10 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.trees.inflated_size=28554 objects.blobs.inflated_size=453 objects.tags.inflated_size=132 + objects.commits.disk_size=$(object_type_disk_usage commit) + objects.trees.disk_size=$(object_type_disk_usage tree) + objects.blobs.disk_size=$(object_type_disk_usage blob) + objects.tags.disk_size=$(object_type_disk_usage tag) EOF git repo structure --format=keyvalue >out 2>err && -- GitLab From 3118c17ae39be1a67994647fa8e05e7aaf9d6dd5 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 5 Dec 2025 12:41:44 -0600 Subject: [PATCH 7/7] builtin/repo: add object disk size info to structure table Similar to a prior commit, update the table output format for the git-repo(1) structure command to display the total object disk usage by object type. Signed-off-by: Justin Tobler --- builtin/repo.c | 13 +++++++++++++ t/t1901-repo-structure.sh | 31 ++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 0ed41bf9d4..a071d2fdfe 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -324,6 +324,7 @@ static void stats_table_setup_structure(struct stats_table *table, struct ref_stats *refs = &stats->refs; size_t inflated_object_total; size_t object_count_total; + size_t disk_object_total; size_t ref_total; ref_total = get_total_reference_count(refs); @@ -358,6 +359,18 @@ static void stats_table_setup_structure(struct stats_table *table, " * %s", _("Blobs")); stats_table_size_addf(table, objects->inflated_sizes.tags, " * %s", _("Tags")); + + disk_object_total = get_total_object_values(&objects->disk_sizes); + stats_table_size_addf(table, disk_object_total, + " * %s", _("Disk size")); + stats_table_size_addf(table, objects->disk_sizes.commits, + " * %s", _("Commits")); + stats_table_size_addf(table, objects->disk_sizes.trees, + " * %s", _("Trees")); + stats_table_size_addf(table, objects->disk_sizes.blobs, + " * %s", _("Blobs")); + stats_table_size_addf(table, objects->disk_sizes.tags, + " * %s", _("Tags")); } static void stats_table_print_structure(const struct stats_table *table) diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index dd17caad05..1b68525079 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -5,8 +5,20 @@ test_description='test git repo structure' . ./test-lib.sh object_type_disk_usage() { - git rev-list --all --objects --disk-usage --filter=object:type=$1 \ - --filter-provided-objects + disk_usage_opt="--disk-usage" + + if test "$2" = "true" + then + disk_usage_opt="--disk-usage=human" + fi + + if test "$1" = "all" + then + git rev-list --all --objects $disk_usage_opt + else + git rev-list --all --objects $disk_usage_opt \ + --filter=object:type=$1 --filter-provided-objects + fi } test_expect_success 'empty repository' ' @@ -35,6 +47,11 @@ test_expect_success 'empty repository' ' | * Trees | 0 B | | * Blobs | 0 B | | * Tags | 0 B | + | * Disk size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | EOF git repo structure >out 2>err && @@ -58,7 +75,10 @@ test_expect_success SHA1 'repository with references and objects' ' # Also creates a commit, tree, and blob. git notes add -m foo && - cat >expect <<-\EOF && + # The tags disk size is handled specially due to the + # git-rev-list(1) --disk-usage=human option printing the full + # "byte/bytes" unit prefix instead of just "B". + cat >expect <<-EOF && | Repository structure | Value | | -------------------- | ---------- | | * References | | @@ -79,6 +99,11 @@ test_expect_success SHA1 'repository with references and objects' ' | * Trees | 15.81 MiB | | * Blobs | 11.68 KiB | | * Tags | 132 B | + | * Disk size | $(object_type_disk_usage all true) | + | * Commits | $(object_type_disk_usage commit true) | + | * Trees | $(object_type_disk_usage tree true) | + | * Blobs | $(object_type_disk_usage blob true) | + | * Tags | $(object_type_disk_usage tag) B | EOF git repo structure >out 2>err && -- GitLab