From 23bab2d8f472712181423917a1b158b678f63169 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 3 Nov 2025 16:50:53 -0600 Subject: [PATCH 1/2] diff: add option to report binary files in raw diffs When generating patch diff output, if either side of a filepair is detected as binary, Git omits the diff content and instead prints a "Binary files differ" message. From this message it is known that at least one of the files in the pair is considered binary, but not exactly which ones. Add a --report-binary-files diff option that, when enabled, extends the raw diff output format to explicitly indicate for each file whether it was considered binary or not. Signed-off-by: Justin Tobler --- Documentation/diff-format.adoc | 12 ++++++++++++ Documentation/diff-options.adoc | 4 ++++ diff.c | 9 +++++++++ diff.h | 6 ++++++ t/t4012-diff-binary.sh | 29 +++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+) diff --git a/Documentation/diff-format.adoc b/Documentation/diff-format.adoc index 9f7e988241..74c0a064ad 100644 --- a/Documentation/diff-format.adoc +++ b/Documentation/diff-format.adoc @@ -83,6 +83,18 @@ quoted as explained for the configuration variable `core.quotePath` (see linkgit:git-config[1]). Using `-z` the filename is output verbatim and the line is terminated by a NUL byte. +With the `--report-binary-files` option, a new column is added prior to the +status indicating for each file if Git considered it binary or not. If +considered binary, a file is denoted with `b`. Otherwise, `t` is used. This +column is followed by a space character. Combined diffs do not report binary +file info. + +Example: + +------------------------------------------------ +:100644 100644 5be4a4a cc95eb0 bt M file.c +------------------------------------------------ + diff format for merges ---------------------- diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc index ae31520f7f..54eb48c067 100644 --- a/Documentation/diff-options.adoc +++ b/Documentation/diff-options.adoc @@ -544,6 +544,10 @@ ifndef::git-format-patch[] Implies `--patch`. endif::git-format-patch[] +`--report-binary-files`:: + Adds a column to raw diff output to report for each file in the pair + whether it was considered binary by Git. + `--abbrev[=]`:: Instead of showing the full 40-byte hexadecimal object name in diff-raw format output and diff-tree header diff --git a/diff.c b/diff.c index a1961526c0..e231acb1a9 100644 --- a/diff.c +++ b/diff.c @@ -5747,6 +5747,8 @@ struct option *add_diff_options(const struct option *opts, OPT_CALLBACK_F(0, "binary", options, NULL, N_("output a binary diff that can be applied"), PARSE_OPT_NONEG | PARSE_OPT_NOARG, diff_opt_binary), + OPT_BOOL(0, "report-binary-files", &options->report_binary_files, + N_("report if pre- and post-image blobs are binary")), OPT_BOOL(0, "full-index", &options->flags.full_index, N_("show full pre- and post-image object names on the \"index\" lines")), OPT_COLOR_FLAG(0, "color", &options->use_color, @@ -6111,6 +6113,13 @@ static void diff_flush_raw(struct diff_filepair *p, struct diff_options *opt) fprintf(opt->file, "%s ", diff_aligned_abbrev(&p->two->oid, opt->abbrev)); } + + if (opt->report_binary_files) { + char one = diff_filespec_is_binary(opt->repo, p->one) ? 'b' : 't'; + char two = diff_filespec_is_binary(opt->repo, p->two) ? 'b' : 't'; + fprintf(opt->file, "%c%c ", one, two); + } + if (p->score) { fprintf(opt->file, "%c%03d%c", p->status, similarity_index(p), inter_name_termination); diff --git a/diff.h b/diff.h index 31eedd5c0c..402a70d7ad 100644 --- a/diff.h +++ b/diff.h @@ -369,6 +369,12 @@ struct diff_options { */ int skip_resolving_statuses; + /* + * When generating raw diff output, report for each file whether it was + * considered binary. + */ + int report_binary_files; + /* Callback which allows tweaking the options in diff_setup_done(). */ void (*set_default)(struct diff_options *); diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh index d1d30ac2a9..e026e1d3a4 100755 --- a/t/t4012-diff-binary.sh +++ b/t/t4012-diff-binary.sh @@ -130,4 +130,33 @@ test_expect_success 'diff --stat with binary files and big change count' ' test_cmp expect actual ' +test_expect_success SHA1 'diff --report-binary-files' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + echo foo >foo && + printf "\0bar\0" >bar && + echo baz >baz && + git add foo bar baz && + git commit -m foo && + + printf "\0foo\0" >foo && + printf "\0bar2\0" >bar && + echo baz2 >baz && + git commit -am "binary foo" && + + cat >expect <<-\EOF && + :100644 100644 e02d9a3a8aeb904ccc3bb9ed0600f2e963ba1a10 884a24af772a87733e911a3491c0ab576d34c06c bb M bar + :100644 100644 76018072e09c5d31c8c6e3113b8aa0fe625195ca 3414c84ca6b7ca9cbbe40dd44f4d0715c1464f6e tt M baz + :100644 100644 257cc5642cb1a054f08cc83f2d943e56fd3ebe99 a60073ceafeca287824d7b9ac3eebef233b72fce tb M foo + EOF + + git diff-tree --report-binary-files HEAD~ HEAD >out && + + test_cmp expect out + ) +' + test_done -- GitLab From 96c709edb66716b87ab67043d4cefcb76c5342bf Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 6 Nov 2025 16:43:17 -0600 Subject: [PATCH 2/2] diff: add --raw-extended option --- combine-diff.c | 31 +++++++++++++++++++++++++++++++ diff.c | 26 ++++++++++++++++++++++++++ diff.h | 4 ++++ 3 files changed, 61 insertions(+) diff --git a/combine-diff.c b/combine-diff.c index b799862068..56e34f7721 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1223,6 +1223,30 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, free(sline); } +static int entry_is_binary(struct repository *repo, struct object_id *oid, + unsigned int mode, char *path) +{ + struct userdiff_driver *driver; + unsigned long size; + int is_binary = 0; + char *buf; + + driver = userdiff_find_by_path(repo->index, path); + if (!driver) + driver = userdiff_find_by_name("default"); + + if (driver->binary != -1) + return driver->binary; + + buf = grab_blob(repo, oid, mode, &size, NULL, NULL); + if (buffer_is_binary(buf, size)) + is_binary = 1; + + free(buf); + + return is_binary; +} + static void show_raw_diff(struct combine_diff_path *p, int num_parent, struct rev_info *rev) { struct diff_options *opt = &rev->diffopt; @@ -1255,6 +1279,13 @@ static void show_raw_diff(struct combine_diff_path *p, int num_parent, struct re printf(" %s", diff_aligned_abbrev(&p->parent[i].oid, opt->abbrev)); printf(" %s ", diff_aligned_abbrev(&p->oid, opt->abbrev)); + + if (opt->raw_extended & DIFF_RAW_EXTENDED_BINARY) { + printf("binary="); + for (i = 0; i < num_parent; i++) + putchar(entry_is_binary(rev->repo, &p->parent[i].oid, p->parent[i].mode, p->parent[i].path) ? 'y' : 'n'); + printf("%c ", entry_is_binary(rev->repo, &p->oid, p->mode, p->path) ? 'y' : 'n'); + } } if (opt->output_format & (DIFF_FORMAT_RAW | DIFF_FORMAT_NAME_STATUS)) { diff --git a/diff.c b/diff.c index e231acb1a9..04057942b3 100644 --- a/diff.c +++ b/diff.c @@ -5663,6 +5663,29 @@ static int diff_opt_max_depth(const struct option *opt, return 0; } +static int diff_opt_raw_extended(const struct option *opt, + const char *arg, int unset) +{ + struct string_list list = STRING_LIST_INIT_DUP; + struct diff_options *options = opt->value; + struct string_list_item *item; + + BUG_ON_OPT_NEG(unset); + + string_list_split(&list, arg, ",", -1); + for_each_string_list_item(item, &list) { + if (!strcmp(item->string, "binary")) + options->raw_extended |= DIFF_RAW_EXTENDED_BINARY; + else + return error(_("invalid value for '%s': '%s'"), + "--raw_extended", item->string); + } + + string_list_clear(&list, 0); + + return 0; +} + /* * Consider adding new flags to __git_diff_common_options * in contrib/completion/git-completion.bash @@ -5747,6 +5770,9 @@ struct option *add_diff_options(const struct option *opts, OPT_CALLBACK_F(0, "binary", options, NULL, N_("output a binary diff that can be applied"), PARSE_OPT_NONEG | PARSE_OPT_NOARG, diff_opt_binary), + OPT_CALLBACK_F(0, "raw-extended", options, N_(""), + N_("extend raw diff output with specified additional info"), + PARSE_OPT_NONEG, diff_opt_raw_extended), OPT_BOOL(0, "report-binary-files", &options->report_binary_files, N_("report if pre- and post-image blobs are binary")), OPT_BOOL(0, "full-index", &options->flags.full_index, diff --git a/diff.h b/diff.h index 402a70d7ad..407678bb09 100644 --- a/diff.h +++ b/diff.h @@ -237,6 +237,8 @@ enum diff_submodule_format { DIFF_SUBMODULE_INLINE_DIFF }; +#define DIFF_RAW_EXTENDED_BINARY (1 << 0) + /** * the set of options the calling program wants to affect the operation of * diffcore library with. @@ -375,6 +377,8 @@ struct diff_options { */ int report_binary_files; + unsigned int raw_extended; + /* Callback which allows tweaking the options in diff_setup_done(). */ void (*set_default)(struct diff_options *); -- GitLab