From 011ce54c26318d725db1d8971d157656eb965d88 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 28 Nov 2025 17:37:13 +0100 Subject: [PATCH 1/4] last-modified: fix bug caused by inproper initialized memory git-last-modified(1) uses a scratch bitmap to keep track of paths that have been changed between commits. To avoid reallocating a bitmap on each call of process_parent(), the scratch bitmap is kept and reused. Although, it seems an incorrect length is passed to memset(3). `struct bitmap` uses `eword_t` to for internal storage. This type is typedef'd to uint64_t. To fully zero the memory used by the bitmap, multiply the length (saved in `struct bitmap::word_alloc`) by the size of `eword_t`. Reported-by: Anders Kaseorg Helped-by: Jeff King Signed-off-by: Toon Claes --- builtin/last-modified.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index b0ecbdc540..cc5fd2e795 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -327,7 +327,7 @@ static void process_parent(struct last_modified *lm, if (!(parent->object.flags & PARENT1)) active_paths_free(lm, parent); - memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); + memset(lm->scratch->words, 0x0, lm->scratch->word_alloc * sizeof(eword_t)); diff_queue_clear(&diff_queued_diff); } -- GitLab From d9038399d373abe71535e2c60e8ba4e4cc6d43ab Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 10 Dec 2025 07:31:27 +0100 Subject: [PATCH 2/4] Add MEMZERO_ARRAY() macro and use it in coccinelle A bug was found[1] in git-last-modified(1), caused by uninitialized memory. While the bug is fixed, in the discussion after that[2] the suggestion was made to introduce a macro that simplifies zeroing a dynamically allocated array. In the first patch I'm addressing the outcome of the discussion on the patch, and in the second patch I'm fixing an edge-case I've encountered while using coccinelle. There's one /oddball/ in add-patch.c that doesn't get caught by the coccinelle rules, around line 960: memset(hunk + 1, 0, (splittable_into - 1) * sizeof(*hunk)); Because there's some quirky pointer math going on, it think it's better to keep it like it is. There were some mixed opinions about naming it either CLEAR_ARRAY() or MEMZERO_ARRAY(). I choose the latter because I wanted to avoid confusion that "clear" would shrink the array to zero elements. [1]: <4dc4c8cd-c0cc-4784-8fcf-defa3a051087@mit.edu> [2]: <20251208201501.GA216526@coredump.intra.peff.net> Cc: Jeff King Signed-off-by: Toon Claes --- Changes in v2: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v1: https://patch.msgid.link/20251210-toon-cocci-memzero-v1-0-ae916a79065b@iotcl.com --- b4-submit-tracking --- # This section is used internally by b4 prep for tracking purposes. { "series": { "revision": 2, "change-id": "20251210-toon-cocci-memzero-2b6185e08ac4", "prefixes": [], "history": { "v1": [ "20251210-toon-cocci-memzero-v1-0-ae916a79065b@iotcl.com" ] } } } -- GitLab From 5d81846c460f1520dc88e81f09e6d7eda9296312 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 10 Dec 2025 12:04:15 +0100 Subject: [PATCH 3/4] git-compat-util: introduce MEMZERO_ARRAY() macro Introduce a new macro MEMZERO_ARRAY() that zeroes the memory allocated by ALLOC_ARRAY() and friends. And add coccinelle rule to enforce the use of this macro. Signed-off-by: Toon Claes --- builtin/last-modified.c | 2 +- compat/simple-ipc/ipc-win32.c | 2 +- contrib/coccinelle/array.cocci | 20 ++++++++++++++++++++ diff-delta.c | 2 +- ewah/bitmap.c | 7 +++---- git-compat-util.h | 1 + hashmap.c | 2 +- pack-revindex.c | 2 +- 8 files changed, 29 insertions(+), 9 deletions(-) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index cc5fd2e795..ac5387e861 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -327,7 +327,7 @@ static void process_parent(struct last_modified *lm, if (!(parent->object.flags & PARENT1)) active_paths_free(lm, parent); - memset(lm->scratch->words, 0x0, lm->scratch->word_alloc * sizeof(eword_t)); + MEMZERO_ARRAY(lm->scratch->words, lm->scratch->word_alloc); diff_queue_clear(&diff_queued_diff); } diff --git a/compat/simple-ipc/ipc-win32.c b/compat/simple-ipc/ipc-win32.c index a8fc812adf..4a3e7df9c7 100644 --- a/compat/simple-ipc/ipc-win32.c +++ b/compat/simple-ipc/ipc-win32.c @@ -686,7 +686,7 @@ static LPSECURITY_ATTRIBUTES get_sa(struct my_sa_data *d) goto fail; } - memset(ea, 0, NR_EA * sizeof(EXPLICIT_ACCESS)); + MEMZERO_ARRAY(ea, NR_EA); ea[0].grfAccessPermissions = GENERIC_READ | GENERIC_WRITE; ea[0].grfAccessMode = SET_ACCESS; diff --git a/contrib/coccinelle/array.cocci b/contrib/coccinelle/array.cocci index 27a3b479c9..d306f6a21e 100644 --- a/contrib/coccinelle/array.cocci +++ b/contrib/coccinelle/array.cocci @@ -101,3 +101,23 @@ expression dst, src, n; -ALLOC_ARRAY(dst, n); -COPY_ARRAY(dst, src, n); +DUP_ARRAY(dst, src, n); + +@@ +type T; +T *ptr; +expression n; +@@ +- memset(ptr, \( 0x0 \| 0 \), n * \( sizeof(T) +- \| sizeof(*ptr) +- \) ) ++ MEMZERO_ARRAY(ptr, n) + +@@ +type T; +T[] ptr; +expression n; +@@ +- memset(ptr, \( 0x0 \| 0 \), n * \( sizeof(T) +- \| sizeof(*ptr) +- \) ) ++ MEMZERO_ARRAY(ptr, n) diff --git a/diff-delta.c b/diff-delta.c index 71d37368d6..43c339f010 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -171,7 +171,7 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) mem = hash + hsize; entry = mem; - memset(hash, 0, hsize * sizeof(*hash)); + MEMZERO_ARRAY(hash, hsize); /* allocate an array to count hash entries */ hash_count = calloc(hsize, sizeof(*hash_count)); diff --git a/ewah/bitmap.c b/ewah/bitmap.c index 55928dada8..bf878bf876 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -46,8 +46,7 @@ static void bitmap_grow(struct bitmap *self, size_t word_alloc) { size_t old_size = self->word_alloc; ALLOC_GROW(self->words, word_alloc, self->word_alloc); - memset(self->words + old_size, 0x0, - (self->word_alloc - old_size) * sizeof(eword_t)); + MEMZERO_ARRAY(self->words + old_size, (self->word_alloc - old_size)); } void bitmap_set(struct bitmap *self, size_t pos) @@ -192,8 +191,8 @@ void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other) if (self->word_alloc < other_final) { self->word_alloc = other_final; REALLOC_ARRAY(self->words, self->word_alloc); - memset(self->words + original_size, 0x0, - (self->word_alloc - original_size) * sizeof(eword_t)); + MEMZERO_ARRAY(self->words + original_size, + (self->word_alloc - original_size)); } ewah_iterator_init(&it, other); diff --git a/git-compat-util.h b/git-compat-util.h index 398e0fac4f..2b8192fd2e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -726,6 +726,7 @@ static inline uint64_t u64_add(uint64_t a, uint64_t b) #define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc))) #define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x))) #define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc))) +#define MEMZERO_ARRAY(x, alloc) memset((x), 0x0, st_mult(sizeof(*(x)), (alloc))) #define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \ BARF_UNLESS_COPYABLE((dst), (src))) diff --git a/hashmap.c b/hashmap.c index a711377853..3b5d6f14bc 100644 --- a/hashmap.c +++ b/hashmap.c @@ -194,7 +194,7 @@ void hashmap_partial_clear_(struct hashmap *map, ssize_t entry_offset) return; if (entry_offset >= 0) /* called by hashmap_clear_entries */ free_individual_entries(map, entry_offset); - memset(map->table, 0, map->tablesize * sizeof(struct hashmap_entry *)); + MEMZERO_ARRAY(map->table, map->tablesize); map->shrink_at = 0; map->private_size = 0; } diff --git a/pack-revindex.c b/pack-revindex.c index d0791cc493..8598b941c8 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -75,7 +75,7 @@ static void sort_revindex(struct revindex_entry *entries, unsigned n, off_t max) for (bits = 0; max >> bits; bits += DIGIT_SIZE) { unsigned i; - memset(pos, 0, BUCKETS * sizeof(*pos)); + MEMZERO_ARRAY(pos, BUCKETS); /* * We want pos[i] to store the index of the last element that -- GitLab From 4d773d4075c0fc55e91a45d5561cd7fd173856d9 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 10 Dec 2025 12:09:25 +0100 Subject: [PATCH 4/4] contrib/coccinelle: pass include paths to spatch(1) In the previous commit a new coccinelle rule is added. But neiter `make coccicheck` nor `meson compile coccicheck` did detect a case in builtin/last-modified.c. This case involves the field `scratch` in `struct last_modified`. This field is of type `struct bitmap` and that struct has a member `eword_t *words`. Both are defined in `ewah/ewok.h`. Now, while builtin/last-modified.c does include that header (with the subdir in the #include directive), it seems coccinelle does not process it. So it's unaware of the type of `words` in the bitmap, and it doesn't recognize the rule from previous commit that uses: type T; T *ptr; Fix coccicheck by passing all possible include paths inside the Git project so spatch(1) can find the headers and can determine the types. Signed-off-by: Toon Claes --- Makefile | 2 +- contrib/coccinelle/meson.build | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6fc322ff88..46d07b2d52 100644 --- a/Makefile +++ b/Makefile @@ -981,7 +981,7 @@ SANITIZE_LEAK = SANITIZE_ADDRESS = # For the 'coccicheck' target -SPATCH_INCLUDE_FLAGS = --all-includes +SPATCH_INCLUDE_FLAGS = --all-includes $(addprefix -I ,compat ewah refs sha256 trace2 win32 xdiff) SPATCH_FLAGS = SPATCH_TEST_FLAGS = diff --git a/contrib/coccinelle/meson.build b/contrib/coccinelle/meson.build index dc3f73c2e7..ae7f5b5460 100644 --- a/contrib/coccinelle/meson.build +++ b/contrib/coccinelle/meson.build @@ -50,6 +50,11 @@ foreach header : headers_to_check coccinelle_headers += meson.project_source_root() / header endforeach +coccinelle_includes = [] +foreach path : ['compat', 'ewah', 'refs', 'sha256', 'trace2', 'win32', 'xdiff'] + coccinelle_includes += ['-I', meson.project_source_root() / path] +endforeach + patches = [ ] foreach source : coccinelle_sources patches += custom_target( @@ -58,6 +63,7 @@ foreach source : coccinelle_sources '--all-includes', '--sp-file', concatenated_rules, '--patch', meson.project_source_root(), + coccinelle_includes, '@INPUT@', ], input: meson.project_source_root() / source, -- GitLab