* [PATCH 1/7] xdiff: simplify freeing patterns around xdl_free_env()
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 2/7] git-shared-util.h: move "shared" allocation utilities here Ævar Arnfjörð Bjarmason
` (5 subsequent siblings)
6 siblings, 0 replies; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Change the invocations of xdl_free_env() so that only the function
that allocated the "xe" variable frees it, rather than passing it to a
function that might use "&xe", and on failure having that function
free it.
This simplifies the allocation management, since due to the new "{ 0
}" initialization we can pass "&xe" to xdl_free_env() without
accounting for the "&xe" not being initialized yet.
This change was originally suggested as an amendment of the series
that since got merged in 47be28e51e6 (Merge branch
'pw/xdiff-alloc-fail', 2022-03-09) in [1]. The "avoid double free of
xe2" in that series is one of the pattern we can simplify here.
1. https://lore.kernel.org/git/220216.86tuczt7js.gmgdl@evledraar.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
xdiff/xdiffi.c | 40 ++++++++++++++++------------------------
xdiff/xmerge.c | 47 ++++++++++++++++++++++++++++-------------------
xdiff/xutils.c | 12 ++++++++----
3 files changed, 52 insertions(+), 47 deletions(-)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 53e803e6bcb..6fded43e87d 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -320,15 +320,11 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0)
return -1;
- if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF) {
- res = xdl_do_patience_diff(mf1, mf2, xpp, xe);
- goto out;
- }
+ if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
+ return xdl_do_patience_diff(mf1, mf2, xpp, xe);
- if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) {
- res = xdl_do_histogram_diff(mf1, mf2, xpp, xe);
- goto out;
- }
+ if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
+ return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
/*
* Allocate and setup K vectors to be used by the differential
@@ -337,11 +333,8 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
* One is to store the forward path and one to store the backward path.
*/
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
- if (!XDL_ALLOC_ARRAY(kvd, 2 * ndiags + 2)) {
-
- xdl_free_env(xe);
+ if (!XDL_ALLOC_ARRAY(kvd, 2 * ndiags + 2))
return -1;
- }
kvdf = kvd;
kvdb = kvdf + ndiags;
kvdf += xe->xdf2.nreff + 1;
@@ -366,9 +359,6 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0,
&xenv);
xdl_free(kvd);
- out:
- if (res < 0)
- xdl_free_env(xe);
return res;
}
@@ -1054,19 +1044,19 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
xdchange_t *xscr;
- xdfenv_t xe;
+ xdfenv_t xe = { 0 };
emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
+ int status = 0;
if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
-
- return -1;
+ status = -1;
+ goto cleanup;
}
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
xdl_build_script(&xe, &xscr) < 0) {
-
- xdl_free_env(&xe);
- return -1;
+ status = -1;
+ goto cleanup;
}
if (xscr) {
if (xpp->flags & XDF_IGNORE_BLANK_LINES)
@@ -1078,12 +1068,14 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
if (ef(&xe, xscr, ecb, xecfg) < 0) {
xdl_free_script(xscr);
- xdl_free_env(&xe);
- return -1;
+ status = -1;
+ goto cleanup;
}
xdl_free_script(xscr);
}
+
+cleanup:
xdl_free_env(&xe);
- return 0;
+ return status;
}
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index af40c88a5b3..ac0cf52f3be 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -365,7 +365,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
{
for (; m; m = m->next) {
mmfile_t t1, t2;
- xdfenv_t xe;
+ xdfenv_t xe = { 0 };
xdchange_t *xscr, *x;
int i1 = m->i1, i2 = m->i2;
@@ -387,8 +387,10 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr;
t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr
+ xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr;
- if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
+ if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) {
+ xdl_free_env(&xe);
return -1;
+ }
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
xdl_build_script(&xe, &xscr) < 0) {
@@ -684,30 +686,37 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
xmparam_t const *xmp, mmbuffer_t *result)
{
- xdchange_t *xscr1 = NULL, *xscr2 = NULL;
- xdfenv_t xe1, xe2;
- int status = -1;
+ xdchange_t *xscr1, *xscr2;
+ xdfenv_t xe1 = { 0 };
+ xdfenv_t xe2 = { 0 };
+ int status;
xpparam_t const *xpp = &xmp->xpp;
result->ptr = NULL;
result->size = 0;
- if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0)
- return -1;
-
- if (xdl_do_diff(orig, mf2, xpp, &xe2) < 0)
- goto free_xe1; /* avoid double free of xe2 */
-
+ if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0) {
+ status = -1;
+ goto cleanup;
+ }
+ if (xdl_do_diff(orig, mf2, xpp, &xe2) < 0) {
+ status = -1;
+ goto cleanup;
+ }
if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 ||
- xdl_build_script(&xe1, &xscr1) < 0)
- goto out;
-
+ xdl_build_script(&xe1, &xscr1) < 0) {
+ status = -1;
+ goto cleanup;
+ }
if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 ||
- xdl_build_script(&xe2, &xscr2) < 0)
- goto out;
-
+ xdl_build_script(&xe2, &xscr2) < 0) {
+ xdl_free_script(xscr1);
+ status = -1;
+ goto cleanup;
+ }
+ status = 0;
if (!xscr1) {
result->ptr = xdl_malloc(mf2->size);
if (!result->ptr)
@@ -731,9 +740,9 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
xdl_free_script(xscr1);
xdl_free_script(xscr2);
- xdl_free_env(&xe2);
- free_xe1:
+cleanup:
xdl_free_env(&xe1);
+ xdl_free_env(&xe2);
return status;
}
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 9e36f24875d..a6f10353cff 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -414,7 +414,8 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
* ranges of lines instead of the whole files.
*/
mmfile_t subfile1, subfile2;
- xdfenv_t env;
+ xdfenv_t env = { 0 };
+ int status = 0;
subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr;
subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr +
@@ -422,15 +423,18 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr;
subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr +
diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr;
- if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0)
- return -1;
+ if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) {
+ status = -1;
+ goto cleanup;
+ }
memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1);
memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2);
+cleanup:
xdl_free_env(&env);
- return 0;
+ return status;
}
void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 2/7] git-shared-util.h: move "shared" allocation utilities here
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 1/7] xdiff: simplify freeing patterns around xdl_free_env() Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 3/7] git-shared-util.h: add G*() versions of *ALLOC_*() Ævar Arnfjörð Bjarmason
` (4 subsequent siblings)
6 siblings, 0 replies; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Move the most common allocation utilities from cache.h and
git-compat-util.h to a new git-shared-util.h. The idea is that xdiff/
and other in-tree code can share this, and that external projects
could then copy this header and include it.
They will need to include some things that git-compat-util.h does
before that, e.g. we need a "size_t" here, and if they'll end up using
any of the x*() functions they'll need to declare those. But doing so
should be fairly obvious, and we can always extend this to define some
fallback wrappers here if e.g. GIT_COMPAT_UTIL_H isn't defined.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
cache.h | 75 ---------------------------------
git-compat-util.h | 28 ++-----------
git-shared-util.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 108 insertions(+), 99 deletions(-)
create mode 100644 git-shared-util.h
diff --git a/cache.h b/cache.h
index ac5ab4ef9d3..f29dbbadf77 100644
--- a/cache.h
+++ b/cache.h
@@ -677,81 +677,6 @@ void initialize_repository_version(int hash_algo, int reinit);
void sanitize_stdfds(void);
int daemonize(void);
-#define alloc_nr(x) (((x)+16)*3/2)
-
-/**
- * Dynamically growing an array using realloc() is error prone and boring.
- *
- * Define your array with:
- *
- * - a pointer (`item`) that points at the array, initialized to `NULL`
- * (although please name the variable based on its contents, not on its
- * type);
- *
- * - an integer variable (`alloc`) that keeps track of how big the current
- * allocation is, initialized to `0`;
- *
- * - another integer variable (`nr`) to keep track of how many elements the
- * array currently has, initialized to `0`.
- *
- * Then before adding `n`th element to the item, call `ALLOC_GROW(item, n,
- * alloc)`. This ensures that the array can hold at least `n` elements by
- * calling `realloc(3)` and adjusting `alloc` variable.
- *
- * ------------
- * sometype *item;
- * size_t nr;
- * size_t alloc
- *
- * for (i = 0; i < nr; i++)
- * if (we like item[i] already)
- * return;
- *
- * // we did not like any existing one, so add one
- * ALLOC_GROW(item, nr + 1, alloc);
- * item[nr++] = value you like;
- * ------------
- *
- * You are responsible for updating the `nr` variable.
- *
- * If you need to specify the number of elements to allocate explicitly
- * then use the macro `REALLOC_ARRAY(item, alloc)` instead of `ALLOC_GROW`.
- *
- * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
- * added niceties.
- *
- * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
- do { \
- if ((nr) > alloc) { \
- if (alloc_nr(alloc) < (nr)) \
- alloc = (nr); \
- else \
- alloc = alloc_nr(alloc); \
- REALLOC_ARRAY(x, alloc); \
- } \
- } while (0)
-
-/*
- * Similar to ALLOC_GROW but handles updating of the nr value and
- * zeroing the bytes of the newly-grown array elements.
- *
- * DO NOT USE any expression with side-effect for any of the
- * arguments.
- */
-#define ALLOC_GROW_BY(x, nr, increase, alloc) \
- do { \
- if (increase) { \
- size_t new_nr = nr + (increase); \
- if (new_nr < nr) \
- BUG("negative growth in ALLOC_GROW_BY"); \
- ALLOC_GROW(x, new_nr, alloc); \
- memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
- nr = new_nr; \
- } \
- } while (0)
-
/* Initialize and use the cache information */
struct lock_file;
void preload_index(struct index_state *index,
diff --git a/git-compat-util.h b/git-compat-util.h
index 58d7708296b..eb4b27f4846 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1034,31 +1034,11 @@ FILE *fopen_or_warn(const char *path, const char *mode);
*/
int xstrncmpz(const char *s, const char *t, size_t len);
-/*
- * FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note
- * that ptr is used twice, so don't pass e.g. ptr++.
+/**
+ * Common allocation utils, including ones xdiff uses, and thus are
+ * split into this file for sharing with external projects.
*/
-#define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
-
-#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
-#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)))
-#define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
-
-#define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
- BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
-static inline void copy_array(void *dst, const void *src, size_t n, size_t size)
-{
- if (n)
- memcpy(dst, src, st_mult(size, n));
-}
-
-#define MOVE_ARRAY(dst, src, n) move_array((dst), (src), (n), sizeof(*(dst)) + \
- BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
-static inline void move_array(void *dst, const void *src, size_t n, size_t size)
-{
- if (n)
- memmove(dst, src, st_mult(size, n));
-}
+#include "git-shared-util.h"
/*
* These functions help you allocate structs with flex arrays, and copy
diff --git a/git-shared-util.h b/git-shared-util.h
new file mode 100644
index 00000000000..7b4479a0f72
--- /dev/null
+++ b/git-shared-util.h
@@ -0,0 +1,104 @@
+#ifndef GIT_SHARED_UTIL_H
+#define GIT_SHARED_UTIL_H
+
+/*
+ * FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note
+ * that ptr is used twice, so don't pass e.g. ptr++.
+ */
+#define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
+
+#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
+#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)))
+#define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
+
+#define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
+ BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
+static inline void copy_array(void *dst, const void *src, size_t n, size_t size)
+{
+ if (n)
+ memcpy(dst, src, st_mult(size, n));
+}
+
+#define MOVE_ARRAY(dst, src, n) move_array((dst), (src), (n), sizeof(*(dst)) + \
+ BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
+static inline void move_array(void *dst, const void *src, size_t n, size_t size)
+{
+ if (n)
+ memmove(dst, src, st_mult(size, n));
+}
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/**
+ * Dynamically growing an array using realloc() is error prone and boring.
+ *
+ * Define your array with:
+ *
+ * - a pointer (`item`) that points at the array, initialized to `NULL`
+ * (although please name the variable based on its contents, not on its
+ * type);
+ *
+ * - an integer variable (`alloc`) that keeps track of how big the current
+ * allocation is, initialized to `0`;
+ *
+ * - another integer variable (`nr`) to keep track of how many elements the
+ * array currently has, initialized to `0`.
+ *
+ * Then before adding `n`th element to the item, call `ALLOC_GROW(item, n,
+ * alloc)`. This ensures that the array can hold at least `n` elements by
+ * calling `realloc(3)` and adjusting `alloc` variable.
+ *
+ * ------------
+ * sometype *item;
+ * size_t nr;
+ * size_t alloc
+ *
+ * for (i = 0; i < nr; i++)
+ * if (we like item[i] already)
+ * return;
+ *
+ * // we did not like any existing one, so add one
+ * ALLOC_GROW(item, nr + 1, alloc);
+ * item[nr++] = value you like;
+ * ------------
+ *
+ * You are responsible for updating the `nr` variable.
+ *
+ * If you need to specify the number of elements to allocate explicitly
+ * then use the macro `REALLOC_ARRAY(item, alloc)` instead of `ALLOC_GROW`.
+ *
+ * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some
+ * added niceties.
+ *
+ * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+ do { \
+ if ((nr) > alloc) { \
+ if (alloc_nr(alloc) < (nr)) \
+ alloc = (nr); \
+ else \
+ alloc = alloc_nr(alloc); \
+ REALLOC_ARRAY(x, alloc); \
+ } \
+ } while (0)
+
+/*
+ * Similar to ALLOC_GROW but handles updating of the nr value and
+ * zeroing the bytes of the newly-grown array elements.
+ *
+ * DO NOT USE any expression with side-effect for any of the
+ * arguments.
+ */
+#define ALLOC_GROW_BY(x, nr, increase, alloc) \
+ do { \
+ if (increase) { \
+ size_t new_nr = nr + (increase); \
+ if (new_nr < nr) \
+ BUG("negative growth in ALLOC_GROW_BY"); \
+ ALLOC_GROW(x, new_nr, alloc); \
+ memset((x) + nr, 0, sizeof(*(x)) * (increase)); \
+ nr = new_nr; \
+ } \
+ } while (0)
+#endif
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH 3/7] git-shared-util.h: add G*() versions of *ALLOC_*()
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 1/7] xdiff: simplify freeing patterns around xdl_free_env() Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 2/7] git-shared-util.h: move "shared" allocation utilities here Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-11 10:06 ` Phillip Wood
2022-07-08 14:20 ` [PATCH 4/7] xdiff: use G[C]ALLOC_ARRAY(), not XDL_CALLOC_ARRAY() Ævar Arnfjörð Bjarmason
` (3 subsequent siblings)
6 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Add "gently" versions of ALLOC_ARRAY(), CALLOC_ARRAY() etc. using the
naming convention G*() as a shorthand for "GENTLY_*()".
Nothing uses these functions yet, but as we'll see in subsequent
commit(s) we're able to convert things that need e.g. non-fatal
"ALLOC_GROW" behavior over to this.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
git-shared-util.h | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/git-shared-util.h b/git-shared-util.h
index 7b4479a0f72..718a8e00732 100644
--- a/git-shared-util.h
+++ b/git-shared-util.h
@@ -8,8 +8,11 @@
#define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
+#define GALLOC_ARRAY(x, alloc) (x) = malloc(st_mult(sizeof(*(x)), (alloc)))
#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)))
+#define GCALLOC_ARRAY(x, alloc) (x) = calloc((alloc), sizeof(*(x)))
#define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
+#define GREALLOC_ARRAY(x, alloc) (x) = realloc((x), st_mult(sizeof(*(x)), (alloc)))
#define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
@@ -71,17 +74,25 @@ static inline void move_array(void *dst, const void *src, size_t n, size_t size)
* added niceties.
*
* DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
+ *
+ * GALLOC_GROW() behaves like ALLOC_GROW(), except that in malloc()
+ * failure we'll return NULL rather than dying.
*/
-#define ALLOC_GROW(x, nr, alloc) \
+#define ALLOC_GROW_1(x, nr, alloc, gently) \
do { \
if ((nr) > alloc) { \
if (alloc_nr(alloc) < (nr)) \
alloc = (nr); \
else \
alloc = alloc_nr(alloc); \
- REALLOC_ARRAY(x, alloc); \
+ if (gently) \
+ GREALLOC_ARRAY(x, alloc); \
+ else \
+ REALLOC_ARRAY(x, alloc); \
} \
} while (0)
+#define ALLOC_GROW(x, nr, alloc) ALLOC_GROW_1(x, nr, alloc, 0)
+#define GALLOC_GROW(x, nr, alloc) ALLOC_GROW_1(x, nr, alloc, 1)
/*
* Similar to ALLOC_GROW but handles updating of the nr value and
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH 3/7] git-shared-util.h: add G*() versions of *ALLOC_*()
2022-07-08 14:20 ` [PATCH 3/7] git-shared-util.h: add G*() versions of *ALLOC_*() Ævar Arnfjörð Bjarmason
@ 2022-07-11 10:06 ` Phillip Wood
0 siblings, 0 replies; 52+ messages in thread
From: Phillip Wood @ 2022-07-11 10:06 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason, git; +Cc: Junio C Hamano, Jeff King
Hi Ævar
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> Add "gently" versions of ALLOC_ARRAY(), CALLOC_ARRAY() etc. using the
> naming convention G*() as a shorthand for "GENTLY_*()".
It might be nicer just to call them ALLOC_ARRAY_GENTLY() etc. As the
return value needs to be checked it would make sense to implement them
as expressions as I have done for XDL_ALLOC_ARRAY() etc.
> Nothing uses these functions yet, but as we'll see in subsequent
> commit(s) we're able to convert things that need e.g. non-fatal
> "ALLOC_GROW" behavior over to this.
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> git-shared-util.h | 15 +++++++++++++--
> 1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/git-shared-util.h b/git-shared-util.h
> index 7b4479a0f72..718a8e00732 100644
> --- a/git-shared-util.h
> +++ b/git-shared-util.h
> @@ -8,8 +8,11 @@
> #define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
>
> #define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
> +#define GALLOC_ARRAY(x, alloc) (x) = malloc(st_mult(sizeof(*(x)), (alloc)))
> #define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)))
> +#define GCALLOC_ARRAY(x, alloc) (x) = calloc((alloc), sizeof(*(x)))
> #define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
> +#define GREALLOC_ARRAY(x, alloc) (x) = realloc((x), st_mult(sizeof(*(x)), (alloc)))
>
> #define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
> BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
> @@ -71,17 +74,25 @@ static inline void move_array(void *dst, const void *src, size_t n, size_t size)
> * added niceties.
> *
> * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
> + *
> + * GALLOC_GROW() behaves like ALLOC_GROW(), except that in malloc()
> + * failure we'll return NULL rather than dying.
> */
> -#define ALLOC_GROW(x, nr, alloc) \
> +#define ALLOC_GROW_1(x, nr, alloc, gently) \
> do { \
> if ((nr) > alloc) { \
> if (alloc_nr(alloc) < (nr)) \
> alloc = (nr); \
> else \
> alloc = alloc_nr(alloc); \
> - REALLOC_ARRAY(x, alloc); \
This leaks the old allocation if realloc() fails because it overwrites
the original pointer with NULL, that is particularly bad as if realloc()
fails we're already short of memory. XDL_ALLOC_GROW() shows how a
possible way to do this.
Best Wishes
Phillip
> + if (gently) \
> + GREALLOC_ARRAY(x, alloc); \
> + else \
> + REALLOC_ARRAY(x, alloc); \
> } \
> } while (0)
> +#define ALLOC_GROW(x, nr, alloc) ALLOC_GROW_1(x, nr, alloc, 0)
> +#define GALLOC_GROW(x, nr, alloc) ALLOC_GROW_1(x, nr, alloc, 1)
>
> /*
> * Similar to ALLOC_GROW but handles updating of the nr value and
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH 4/7] xdiff: use G[C]ALLOC_ARRAY(), not XDL_CALLOC_ARRAY()
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
` (2 preceding siblings ...)
2022-07-08 14:20 ` [PATCH 3/7] git-shared-util.h: add G*() versions of *ALLOC_*() Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-11 10:10 ` Phillip Wood
2022-07-08 14:20 ` [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW() Ævar Arnfjörð Bjarmason
` (2 subsequent siblings)
6 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Use the newly created GCALLOC_ARRAY() helpers rather than the recently
introduced XDL_[C]ALLOC_ARRAY().
As shown in this diff the calling convention differs, we cannot use
GCALLOC_ARRAY() as an expression, but that's an advantage in not
having to relay the "sizeof()" down via a wrapper function.
This also:
* Fixes long-standing potential overflow issues, as we're using
st_mult() in the underlying G_[C]ALLOC(). Note that the
* Slightly optimizes the "XDL_CALLOC_ARRAY", as we'll now use
calloc() rather than malloc() + memset() (although smart compilers
will probably do the same for both...).
* Changes the "XDL_CALLOC_ARRAY" behavior where we'd shortcut if the
size was already large enough, but this behavior was changed when
XDL_ALLOC_ARRAY() was introduced, so this is safe.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
xdiff/xdiffi.c | 3 ++-
xdiff/xhistogram.c | 9 ++++++---
xdiff/xmacros.h | 12 ------------
xdiff/xpatience.c | 6 ++++--
xdiff/xprepare.c | 24 ++++++++++++++++--------
5 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 6fded43e87d..077cc456087 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -333,7 +333,8 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
* One is to store the forward path and one to store the backward path.
*/
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
- if (!XDL_ALLOC_ARRAY(kvd, 2 * ndiags + 2))
+ GALLOC_ARRAY(kvd, 2 * ndiags + 2);
+ if (!kvd)
return -1;
kvdf = kvd;
kvdb = kvdf + ndiags;
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index df909004c10..f20592bfbdd 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -266,14 +266,17 @@ static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
index.table_bits = xdl_hashbits(count1);
index.records_size = 1 << index.table_bits;
- if (!XDL_CALLOC_ARRAY(index.records, index.records_size))
+ GCALLOC_ARRAY(index.records, index.records_size);
+ if (!index.records)
goto cleanup;
index.line_map_size = count1;
- if (!XDL_CALLOC_ARRAY(index.line_map, index.line_map_size))
+ GCALLOC_ARRAY(index.line_map, index.line_map_size);
+ if (!index.line_map)
goto cleanup;
- if (!XDL_CALLOC_ARRAY(index.next_ptrs, index.line_map_size))
+ GCALLOC_ARRAY(index.next_ptrs, index.line_map_size);
+ if (!index.next_ptrs)
goto cleanup;
/* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
index d13a6724629..75506bdf17e 100644
--- a/xdiff/xmacros.h
+++ b/xdiff/xmacros.h
@@ -49,18 +49,6 @@ do { \
((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \
} while (0)
-/* Allocate an array of nr elements, returns NULL on failure */
-#define XDL_ALLOC_ARRAY(p, nr) \
- ((p) = SIZE_MAX / sizeof(*(p)) >= (size_t)(nr) \
- ? xdl_malloc((nr) * sizeof(*(p))) \
- : NULL)
-
-/* Allocate an array of nr zeroed out elements, returns NULL on failure */
-#define XDL_CALLOC_ARRAY(p, nr) \
- (XDL_ALLOC_ARRAY((p), (nr)) \
- ? memset((p), 0, (nr) * sizeof(*(p))) \
- : NULL)
-
/*
* Ensure array p can accommodate at least nr elements, growing the
* array and updating alloc (which is the number of allocated
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index fe39c2978cb..bb328d9f852 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -151,7 +151,8 @@ static int fill_hashmap(mmfile_t *file1, mmfile_t *file2,
/* We know exactly how large we want the hash map */
result->alloc = count1 * 2;
- if (!XDL_CALLOC_ARRAY(result->entries, result->alloc))
+ GCALLOC_ARRAY(result->entries, result->alloc);
+ if (!result->entries)
return -1;
/* First, fill with entries from the first file */
@@ -208,7 +209,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
*/
int anchor_i = -1;
- if (!XDL_ALLOC_ARRAY(sequence, map->nr))
+ GALLOC_ARRAY(sequence, map->nr);
+ if (!sequence)
return -1;
for (entry = map->first; entry; entry = entry->next) {
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index c84549f6c50..d6cbee32a2a 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -78,15 +78,17 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
return -1;
}
- if (!XDL_CALLOC_ARRAY(cf->rchash, cf->hsize)) {
+ GCALLOC_ARRAY(cf->rchash, cf->hsize);
+ if (!cf->rchash) {
xdl_cha_free(&cf->ncha);
return -1;
}
cf->alloc = size;
- if (!XDL_ALLOC_ARRAY(cf->rcrecs, cf->alloc)) {
+ GALLOC_ARRAY(cf->rcrecs, cf->alloc);
+ if (!cf->rcrecs) {
xdl_free(cf->rchash);
xdl_cha_free(&cf->ncha);
return -1;
@@ -170,12 +172,14 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
goto abort;
- if (!XDL_ALLOC_ARRAY(recs, narec))
+ GALLOC_ARRAY(recs, narec);
+ if (!recs)
goto abort;
hbits = xdl_hashbits((unsigned int) narec);
hsize = 1 << hbits;
- if (!XDL_CALLOC_ARRAY(rhash, hsize))
+ GCALLOC_ARRAY(rhash, hsize);
+ if (!rhash)
goto abort;
nrec = 0;
@@ -196,14 +200,17 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
}
}
- if (!XDL_CALLOC_ARRAY(rchg, nrec + 2))
+ GCALLOC_ARRAY(rchg, nrec + 2);
+ if (!rchg)
goto abort;
if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
(XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
- if (!XDL_ALLOC_ARRAY(rindex, nrec + 1))
+ GALLOC_ARRAY(rindex, nrec + 1);
+ if (!rindex)
goto abort;
- if (!XDL_ALLOC_ARRAY(ha, nrec + 1))
+ GALLOC_ARRAY(ha, nrec + 1);
+ if (!ha)
goto abort;
}
@@ -369,7 +376,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
xdlclass_t *rcrec;
char *dis, *dis1, *dis2;
- if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2))
+ GCALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2);
+ if (!dis)
return -1;
dis1 = dis;
dis2 = dis1 + xdf1->nrec + 1;
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH 4/7] xdiff: use G[C]ALLOC_ARRAY(), not XDL_CALLOC_ARRAY()
2022-07-08 14:20 ` [PATCH 4/7] xdiff: use G[C]ALLOC_ARRAY(), not XDL_CALLOC_ARRAY() Ævar Arnfjörð Bjarmason
@ 2022-07-11 10:10 ` Phillip Wood
0 siblings, 0 replies; 52+ messages in thread
From: Phillip Wood @ 2022-07-11 10:10 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason, git; +Cc: Junio C Hamano, Jeff King
Hi Ævar
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> Use the newly created GCALLOC_ARRAY() helpers rather than the recently
> introduced XDL_[C]ALLOC_ARRAY().
>
> As shown in this diff the calling convention differs, we cannot use
> GCALLOC_ARRAY() as an expression, but that's an advantage in not
> having to relay the "sizeof()" down via a wrapper function.
>
> This also:
>
> * Fixes long-standing potential overflow issues, as we're using
> st_mult() in the underlying G_[C]ALLOC(). Note that the
What issues is this fixing? XDL_ALLOC_ARRAY() already checks for overflow.
> * Slightly optimizes the "XDL_CALLOC_ARRAY", as we'll now use
> calloc() rather than malloc() + memset() (although smart compilers
> will probably do the same for both...).
That's addressed in V2 of my series, unfortunately I sent it just after
you'd sent this series.
> * Changes the "XDL_CALLOC_ARRAY" behavior where we'd shortcut if the
> size was already large enough, but this behavior was changed when
> XDL_ALLOC_ARRAY() was introduced, so this is safe.
I'm not sure what you mean here - how did we shortcut before?
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> xdiff/xdiffi.c | 3 ++-
> xdiff/xhistogram.c | 9 ++++++---
> xdiff/xmacros.h | 12 ------------
> xdiff/xpatience.c | 6 ++++--
> xdiff/xprepare.c | 24 ++++++++++++++++--------
> 5 files changed, 28 insertions(+), 26 deletions(-)
>
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index 6fded43e87d..077cc456087 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -333,7 +333,8 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
> * One is to store the forward path and one to store the backward path.
> */
> ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
> - if (!XDL_ALLOC_ARRAY(kvd, 2 * ndiags + 2))
> + GALLOC_ARRAY(kvd, 2 * ndiags + 2);
> + if (!kvd)
> return -1;
> kvdf = kvd;
> kvdb = kvdf + ndiags;
> diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
> index df909004c10..f20592bfbdd 100644
> --- a/xdiff/xhistogram.c
> +++ b/xdiff/xhistogram.c
> @@ -266,14 +266,17 @@ static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
>
> index.table_bits = xdl_hashbits(count1);
> index.records_size = 1 << index.table_bits;
> - if (!XDL_CALLOC_ARRAY(index.records, index.records_size))
> + GCALLOC_ARRAY(index.records, index.records_size);
> + if (!index.records)
I don't think that having GALLOC_ARRAY() as a statement is an
improvement here.
Best Wishes
Phillip
> goto cleanup;
>
> index.line_map_size = count1;
> - if (!XDL_CALLOC_ARRAY(index.line_map, index.line_map_size))
> + GCALLOC_ARRAY(index.line_map, index.line_map_size);
> + if (!index.line_map)
> goto cleanup;
>
> - if (!XDL_CALLOC_ARRAY(index.next_ptrs, index.line_map_size))
> + GCALLOC_ARRAY(index.next_ptrs, index.line_map_size);
> + if (!index.next_ptrs)
> goto cleanup;
>
> /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */
> diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
> index d13a6724629..75506bdf17e 100644
> --- a/xdiff/xmacros.h
> +++ b/xdiff/xmacros.h
> @@ -49,18 +49,6 @@ do { \
> ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \
> } while (0)
>
> -/* Allocate an array of nr elements, returns NULL on failure */
> -#define XDL_ALLOC_ARRAY(p, nr) \
> - ((p) = SIZE_MAX / sizeof(*(p)) >= (size_t)(nr) \
> - ? xdl_malloc((nr) * sizeof(*(p))) \
> - : NULL)
> -
> -/* Allocate an array of nr zeroed out elements, returns NULL on failure */
> -#define XDL_CALLOC_ARRAY(p, nr) \
> - (XDL_ALLOC_ARRAY((p), (nr)) \
> - ? memset((p), 0, (nr) * sizeof(*(p))) \
> - : NULL)
> -
> /*
> * Ensure array p can accommodate at least nr elements, growing the
> * array and updating alloc (which is the number of allocated
> diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
> index fe39c2978cb..bb328d9f852 100644
> --- a/xdiff/xpatience.c
> +++ b/xdiff/xpatience.c
> @@ -151,7 +151,8 @@ static int fill_hashmap(mmfile_t *file1, mmfile_t *file2,
>
> /* We know exactly how large we want the hash map */
> result->alloc = count1 * 2;
> - if (!XDL_CALLOC_ARRAY(result->entries, result->alloc))
> + GCALLOC_ARRAY(result->entries, result->alloc);
> + if (!result->entries)
> return -1;
>
> /* First, fill with entries from the first file */
> @@ -208,7 +209,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
> */
> int anchor_i = -1;
>
> - if (!XDL_ALLOC_ARRAY(sequence, map->nr))
> + GALLOC_ARRAY(sequence, map->nr);
> + if (!sequence)
> return -1;
>
> for (entry = map->first; entry; entry = entry->next) {
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index c84549f6c50..d6cbee32a2a 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -78,15 +78,17 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
>
> return -1;
> }
> - if (!XDL_CALLOC_ARRAY(cf->rchash, cf->hsize)) {
> + GCALLOC_ARRAY(cf->rchash, cf->hsize);
> + if (!cf->rchash) {
>
> xdl_cha_free(&cf->ncha);
> return -1;
> }
>
> cf->alloc = size;
> - if (!XDL_ALLOC_ARRAY(cf->rcrecs, cf->alloc)) {
>
> + GALLOC_ARRAY(cf->rcrecs, cf->alloc);
> + if (!cf->rcrecs) {
> xdl_free(cf->rchash);
> xdl_cha_free(&cf->ncha);
> return -1;
> @@ -170,12 +172,14 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
>
> if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
> goto abort;
> - if (!XDL_ALLOC_ARRAY(recs, narec))
> + GALLOC_ARRAY(recs, narec);
> + if (!recs)
> goto abort;
>
> hbits = xdl_hashbits((unsigned int) narec);
> hsize = 1 << hbits;
> - if (!XDL_CALLOC_ARRAY(rhash, hsize))
> + GCALLOC_ARRAY(rhash, hsize);
> + if (!rhash)
> goto abort;
>
> nrec = 0;
> @@ -196,14 +200,17 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
> }
> }
>
> - if (!XDL_CALLOC_ARRAY(rchg, nrec + 2))
> + GCALLOC_ARRAY(rchg, nrec + 2);
> + if (!rchg)
> goto abort;
>
> if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
> (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
> - if (!XDL_ALLOC_ARRAY(rindex, nrec + 1))
> + GALLOC_ARRAY(rindex, nrec + 1);
> + if (!rindex)
> goto abort;
> - if (!XDL_ALLOC_ARRAY(ha, nrec + 1))
> + GALLOC_ARRAY(ha, nrec + 1);
> + if (!ha)
> goto abort;
> }
>
> @@ -369,7 +376,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
> xdlclass_t *rcrec;
> char *dis, *dis1, *dis2;
>
> - if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2))
> + GCALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2);
> + if (!dis)
> return -1;
> dis1 = dis;
> dis2 = dis1 + xdf1->nrec + 1;
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
` (3 preceding siblings ...)
2022-07-08 14:20 ` [PATCH 4/7] xdiff: use G[C]ALLOC_ARRAY(), not XDL_CALLOC_ARRAY() Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-11 10:13 ` Phillip Wood
2022-07-08 14:20 ` [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc() Ævar Arnfjörð Bjarmason
2022-07-08 14:20 ` [PATCH 7/7] xdiff: remove xdl_free(), use free() instead Ævar Arnfjörð Bjarmason
6 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Replace the recently introduced XDL_ALLOC_GROW() with invocations of
the GALLOC_GROW() from git-shared-util.h.
As this change shows the macro + function indirection of
XDL_ALLOC_GROW() is something we needed only because the two callsites
we used it in wanted to use it as an expression, and we thus had to
pass the "sizeof" down.
Let's just check the value afterwards instead, which allows us to use
the shared macro, we can also remove xdl_reallo(), this was its last
user.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
xdiff/xdiff.h | 1 -
xdiff/xmacros.h | 11 -----------
xdiff/xprepare.c | 8 +++++---
xdiff/xutils.c | 17 -----------------
xdiff/xutils.h | 2 --
5 files changed, 5 insertions(+), 34 deletions(-)
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 72e25a9ffa5..832cf9d977e 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -121,7 +121,6 @@ typedef struct s_bdiffparam {
#define xdl_malloc(x) xmalloc(x)
#define xdl_free(ptr) free(ptr)
-#define xdl_realloc(ptr,x) xrealloc(ptr,x)
void *xdl_mmfile_first(mmfile_t *mmf, long *size);
long xdl_mmfile_size(mmfile_t *mmf);
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
index 75506bdf17e..6a6b3057375 100644
--- a/xdiff/xmacros.h
+++ b/xdiff/xmacros.h
@@ -48,15 +48,4 @@ do { \
(v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \
((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \
} while (0)
-
-/*
- * Ensure array p can accommodate at least nr elements, growing the
- * array and updating alloc (which is the number of allocated
- * elements) as necessary. Frees p and returns -1 on failure, returns
- * 0 on success
- */
-#define XDL_ALLOC_GROW(p, nr, alloc) \
- (-!((nr) <= (alloc) || \
- ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
-
#endif /* #if !defined(XMACROS_H) */
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index d6cbee32a2a..4182d9e1c0a 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -128,8 +128,9 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
return -1;
}
rcrec->idx = cf->count++;
- if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc))
- return -1;
+ GALLOC_GROW(cf->rcrecs, cf->count, cf->alloc);
+ if (!cf->rcrecs)
+ return -1;
cf->rcrecs[rcrec->idx] = rcrec;
rcrec->line = line;
rcrec->size = rec->size;
@@ -187,7 +188,8 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
for (top = blk + bsize; cur < top; ) {
prev = cur;
hav = xdl_hash_record(&cur, top, xpp->flags);
- if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
+ GALLOC_GROW(recs, nrec + 1, narec);
+ if (!recs)
goto abort;
if (!(crec = xdl_cha_alloc(&xdf->rcha)))
goto abort;
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index a6f10353cff..c0cd5338c4e 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -436,20 +436,3 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
return status;
}
-
-void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
-{
- void *tmp = NULL;
- size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;
- if (nr > n)
- n = nr;
- if (SIZE_MAX / size >= n)
- tmp = xdl_realloc(p, n * size);
- if (tmp) {
- *alloc = n;
- } else {
- xdl_free(p);
- *alloc = 0;
- }
- return tmp;
-}
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index fd0bba94e8b..7ae3f897bef 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -42,7 +42,5 @@ int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
int line1, int count1, int line2, int count2);
-/* Do not call this function, use XDL_ALLOC_GROW instead */
-void* xdl_alloc_grow_helper(void* p, long nr, long* alloc, size_t size);
#endif /* #if !defined(XUTILS_H) */
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-08 14:20 ` [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW() Ævar Arnfjörð Bjarmason
@ 2022-07-11 10:13 ` Phillip Wood
2022-07-11 10:48 ` Ævar Arnfjörð Bjarmason
0 siblings, 1 reply; 52+ messages in thread
From: Phillip Wood @ 2022-07-11 10:13 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason, git; +Cc: Junio C Hamano, Jeff King
Hi Ævar
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> Replace the recently introduced XDL_ALLOC_GROW() with invocations of
> the GALLOC_GROW() from git-shared-util.h.
>
> As this change shows the macro + function indirection of
> XDL_ALLOC_GROW() is something we needed only because the two callsites
> we used it in wanted to use it as an expression, and we thus had to
> pass the "sizeof" down.
>
> Let's just check the value afterwards instead, which allows us to use
> the shared macro, we can also remove xdl_reallo(), this was its last
> user.
I don't think this expression->statement change is an improvement. This
change also removes the overflow checks that are present in
XDL_ALLOC_GROW() and fails to free the old allocation when realloc()
fails. It is not a like for like replacement.
Best Wishes
Phillip
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> xdiff/xdiff.h | 1 -
> xdiff/xmacros.h | 11 -----------
> xdiff/xprepare.c | 8 +++++---
> xdiff/xutils.c | 17 -----------------
> xdiff/xutils.h | 2 --
> 5 files changed, 5 insertions(+), 34 deletions(-)
>
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index 72e25a9ffa5..832cf9d977e 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -121,7 +121,6 @@ typedef struct s_bdiffparam {
>
> #define xdl_malloc(x) xmalloc(x)
> #define xdl_free(ptr) free(ptr)
> -#define xdl_realloc(ptr,x) xrealloc(ptr,x)
>
> void *xdl_mmfile_first(mmfile_t *mmf, long *size);
> long xdl_mmfile_size(mmfile_t *mmf);
> diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
> index 75506bdf17e..6a6b3057375 100644
> --- a/xdiff/xmacros.h
> +++ b/xdiff/xmacros.h
> @@ -48,15 +48,4 @@ do { \
> (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \
> ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \
> } while (0)
> -
> -/*
> - * Ensure array p can accommodate at least nr elements, growing the
> - * array and updating alloc (which is the number of allocated
> - * elements) as necessary. Frees p and returns -1 on failure, returns
> - * 0 on success
> - */
> -#define XDL_ALLOC_GROW(p, nr, alloc) \
> - (-!((nr) <= (alloc) || \
> - ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
> -
> #endif /* #if !defined(XMACROS_H) */
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index d6cbee32a2a..4182d9e1c0a 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -128,8 +128,9 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
> return -1;
> }
> rcrec->idx = cf->count++;
> - if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc))
> - return -1;
> + GALLOC_GROW(cf->rcrecs, cf->count, cf->alloc);
> + if (!cf->rcrecs)
> + return -1;
> cf->rcrecs[rcrec->idx] = rcrec;
> rcrec->line = line;
> rcrec->size = rec->size;
> @@ -187,7 +188,8 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
> for (top = blk + bsize; cur < top; ) {
> prev = cur;
> hav = xdl_hash_record(&cur, top, xpp->flags);
> - if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
> + GALLOC_GROW(recs, nrec + 1, narec);
> + if (!recs)
> goto abort;
> if (!(crec = xdl_cha_alloc(&xdf->rcha)))
> goto abort;
> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
> index a6f10353cff..c0cd5338c4e 100644
> --- a/xdiff/xutils.c
> +++ b/xdiff/xutils.c
> @@ -436,20 +436,3 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
>
> return status;
> }
> -
> -void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
> -{
> - void *tmp = NULL;
> - size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;
> - if (nr > n)
> - n = nr;
> - if (SIZE_MAX / size >= n)
> - tmp = xdl_realloc(p, n * size);
> - if (tmp) {
> - *alloc = n;
> - } else {
> - xdl_free(p);
> - *alloc = 0;
> - }
> - return tmp;
> -}
> diff --git a/xdiff/xutils.h b/xdiff/xutils.h
> index fd0bba94e8b..7ae3f897bef 100644
> --- a/xdiff/xutils.h
> +++ b/xdiff/xutils.h
> @@ -42,7 +42,5 @@ int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
> int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
> int line1, int count1, int line2, int count2);
>
> -/* Do not call this function, use XDL_ALLOC_GROW instead */
> -void* xdl_alloc_grow_helper(void* p, long nr, long* alloc, size_t size);
>
> #endif /* #if !defined(XUTILS_H) */
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-11 10:13 ` Phillip Wood
@ 2022-07-11 10:48 ` Ævar Arnfjörð Bjarmason
2022-07-13 9:09 ` Phillip Wood
0 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-11 10:48 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Mon, Jul 11 2022, Phillip Wood wrote:
> Hi Ævar
>
> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>> Replace the recently introduced XDL_ALLOC_GROW() with invocations of
>> the GALLOC_GROW() from git-shared-util.h.
>> As this change shows the macro + function indirection of
>> XDL_ALLOC_GROW() is something we needed only because the two callsites
>> we used it in wanted to use it as an expression, and we thus had to
>> pass the "sizeof" down.
>> Let's just check the value afterwards instead, which allows us to
>> use
>> the shared macro, we can also remove xdl_reallo(), this was its last
>> user.
>
> I don't think this expression->statement change is an
> improvement.
I think the use-as-statement is prettier too, but I think the uglyness
of having to pass down the sizeof() & re-implementing the macro version
of the alloc-or-die variant outweights that.
> This change also removes the overflow checks that are
> present in XDL_ALLOC_GROW()[...]
We end up calling st_mult(), which does that overflow check. Do you mean
that the POC shimmy layer I showed in another reply for libgit2 doesn't
have an st_mult() that detects overflows?
That's true, but as noted downthread of that we can & could ship that as
part of the shimmy layer, but that's unrelated to this change.
In your pre-image you use LONG_MAX instead of UINTMAX_MAX & I don't see
(but maybe I haven't looked at it carefully enough) how it does the same
dying on overflows. Doesn't it just fall back to LONG_MAX?
Part of this is that it's not clear to me from your commit(s) why you
need to rewrite alloc_nr() and rewrite (or drop?) st_mult().
> and fails to free the old allocation when
> realloc() fails. It is not a like for like replacement.
Yes, we should have a free() there. Wel spotted. But again, doing that
as part of the "gently" branch seems preferrable to have duplicate
versions for expression (non-fatal) v.s. statement (fatal) variants.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-11 10:48 ` Ævar Arnfjörð Bjarmason
@ 2022-07-13 9:09 ` Phillip Wood
2022-07-13 10:48 ` Ævar Arnfjörð Bjarmason
0 siblings, 1 reply; 52+ messages in thread
From: Phillip Wood @ 2022-07-13 9:09 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Jeff King
Hi Ævar
On 11/07/2022 11:48, Ævar Arnfjörð Bjarmason wrote:
>
> On Mon, Jul 11 2022, Phillip Wood wrote:
>
>> Hi Ævar
>>
>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>>> Replace the recently introduced XDL_ALLOC_GROW() with invocations of
>>> the GALLOC_GROW() from git-shared-util.h.
>>> As this change shows the macro + function indirection of
>>> XDL_ALLOC_GROW() is something we needed only because the two callsites
>>> we used it in wanted to use it as an expression, and we thus had to
>>> pass the "sizeof" down.
>>> Let's just check the value afterwards instead, which allows us to
>>> use
>>> the shared macro, we can also remove xdl_reallo(), this was its last
>>> user.
>>
>> I don't think this expression->statement change is an
>> improvement.
>
> I think the use-as-statement is prettier too, but I think the uglyness
> of having to pass down the sizeof() & re-implementing the macro version
> of the alloc-or-die variant outweights that.
I think this is partly a choice between prioritizing ease of
implementation or ease of use for callers.
>> This change also removes the overflow checks that are
>> present in XDL_ALLOC_GROW()[...]
>
> We end up calling st_mult(), which does that overflow check. Do you mean
> that the POC shimmy layer I showed in another reply for libgit2 doesn't
> have an st_mult() that detects overflows?
I was referring to
#define alloc_nr(x) (((x)+16)*3/2)
in cache.h. XDL_ALLOC_GROW() detects overflows when growing the number
of items as well as when calculating the number of bytes to allocate.
> That's true, but as noted downthread of that we can & could ship that as
> part of the shimmy layer, but that's unrelated to this change.
>
> In your pre-image you use LONG_MAX instead of UINTMAX_MAX & I don't see
> (but maybe I haven't looked at it carefully enough) how it does the same
> dying on overflows. Doesn't it just fall back to LONG_MAX?
It does not die on overflow as we want to return errors rather than die
in the xdiff code. It uses long to match the existing code.
> Part of this is that it's not clear to me from your commit(s) why you
> need to rewrite alloc_nr() and rewrite (or drop?) st_mult().
So that we don't die on overflow and so that the xdiff code is self
contained.
I'm a bit disappointed that this patch seems to have been written
without really taking the time to understand exactly what the code it is
replacing is doing.
Best Wishes
Phillip
>> and fails to free the old allocation when
>> realloc() fails. It is not a like for like replacement.
>
> Yes, we should have a free() there. Wel spotted. But again, doing that
> as part of the "gently" branch seems preferrable to have duplicate
> versions for expression (non-fatal) v.s. statement (fatal) variants.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-13 9:09 ` Phillip Wood
@ 2022-07-13 10:48 ` Ævar Arnfjörð Bjarmason
2022-07-13 13:21 ` Phillip Wood
0 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-13 10:48 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Wed, Jul 13 2022, Phillip Wood wrote:
> Hi Ævar
>
> On 11/07/2022 11:48, Ævar Arnfjörð Bjarmason wrote:
>> On Mon, Jul 11 2022, Phillip Wood wrote:
>>
>>> Hi Ævar
>>>
>>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>>>> Replace the recently introduced XDL_ALLOC_GROW() with invocations of
>>>> the GALLOC_GROW() from git-shared-util.h.
>>>> As this change shows the macro + function indirection of
>>>> XDL_ALLOC_GROW() is something we needed only because the two callsites
>>>> we used it in wanted to use it as an expression, and we thus had to
>>>> pass the "sizeof" down.
>>>> Let's just check the value afterwards instead, which allows us to
>>>> use
>>>> the shared macro, we can also remove xdl_reallo(), this was its last
>>>> user.
>>>
>>> I don't think this expression->statement change is an
>>> improvement.
>> I think the use-as-statement is prettier too, but I think the
>> uglyness
>> of having to pass down the sizeof() & re-implementing the macro version
>> of the alloc-or-die variant outweights that.
>
> I think this is partly a choice between prioritizing ease of
> implementation or ease of use for callers.
>
>>> This change also removes the overflow checks that are
>>> present in XDL_ALLOC_GROW()[...]
>> We end up calling st_mult(), which does that overflow check. Do you
>> mean
>> that the POC shimmy layer I showed in another reply for libgit2 doesn't
>> have an st_mult() that detects overflows?
>
> I was referring to
>
> #define alloc_nr(x) (((x)+16)*3/2)
>
> in cache.h. XDL_ALLOC_GROW() detects overflows when growing the number
> of items as well as when calculating the number of bytes to allocate.
>
>> That's true, but as noted downthread of that we can & could ship that as
>> part of the shimmy layer, but that's unrelated to this change.
>> In your pre-image you use LONG_MAX instead of UINTMAX_MAX & I don't
>> see
>> (but maybe I haven't looked at it carefully enough) how it does the same
>> dying on overflows. Doesn't it just fall back to LONG_MAX?
>
> It does not die on overflow as we want to return errors rather than
> die in the xdiff code. It uses long to match the existing code.
>
>> Part of this is that it's not clear to me from your commit(s) why you
>> need to rewrite alloc_nr() and rewrite (or drop?) st_mult().
>
> So that we don't die on overflow and so that the xdiff code is self
> contained.
>
> I'm a bit disappointed that this patch seems to have been written
> without really taking the time to understand exactly what the code it
> is replacing is doing.
Well, there's a lot to understand :) So it's also an implicit comment on
the complexity of your series.
In case it wasn't clear the main thrust of what I've been commenting on
here is asking why what you have here needs to *structurally* look the
way it does, i.e. why you think the malloc() & free() naming can't
resolve to libgit2's wrappers (per the thread ending at [1]).
And, if we can't end up with an xdiff/* in our tree that doesn't have
return value checking flying in the face of xmalloc's NORETURN()
behavior on allocation failures.
But yes, the suggested replacement isn't behaving exactly as yours does,
but I think that's just an implementation detail as far as the stuctural
questions above go. I.e.:
* You're trying to fix a long-standing overflow issue in alloc_nr(),
but in such a way that only leaves xdiff/* with the fix.
Can't we address that seperately (e.g. turning alloc_nr into a static
inline function using the st_* helper), and then make xdiff and
cache.h use that new shared helper?
* You seem to be set on the idea that you absolutely must be rewriting
large parts of the existing allocation macro, because you'd really
like to use it as an expression v.s. a statement.
I really disagree with that trade-off, i.e. the whole endeavor of
duplicating the implementation in ways that are mostly the same but
not quite (e.g. that alloc_grow case) doesn't seem worth it
v.s. sharing the behavior.
But likewise it seems to be an implementation detail of your series
that we can't have both, i.e. if you're set on using these as an
expression factoring the shared behavior in cache.h out into some
static inline functions, then calling those from both variants.
1. https://lore.kernel.org/git/220711.865yk47x54.gmgdl@evledraar.gmail.com/
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW()
2022-07-13 10:48 ` Ævar Arnfjörð Bjarmason
@ 2022-07-13 13:21 ` Phillip Wood
0 siblings, 0 replies; 52+ messages in thread
From: Phillip Wood @ 2022-07-13 13:21 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Jeff King
On 13/07/2022 11:48, Ævar Arnfjörð Bjarmason wrote:
>
> On Wed, Jul 13 2022, Phillip Wood wrote:
>>
>> I'm a bit disappointed that this patch seems to have been written
>> without really taking the time to understand exactly what the code it
>> is replacing is doing.
>
> Well, there's a lot to understand :) So it's also an implicit comment on
> the complexity of your series.
I'm surprised you think it is complex, the implementation of
XDL_ALLOC_GROW() and its helper is pretty short.
> In case it wasn't clear the main thrust of what I've been commenting on
> here is asking why what you have here needs to *structurally* look the
> way it does, i.e. why you think the malloc() & free() naming can't
> resolve to libgit2's wrappers (per the thread ending at [1]).
I think the different structural approach stems in part from the subtle
differences between ALLOC_GROW() and XDL_ALLOC_GROW(). Once one
appreciates that the latter needs to free the original allocation on
overflow and allocation failure and work with code that uses long rather
than size_t then there is not much code left to be shared between them.
> And, if we can't end up with an xdiff/* in our tree that doesn't have
> return value checking flying in the face of xmalloc's NORETURN()
> behavior on allocation failures.
I don't think xmalloc() is marked as NORETURN in wrapper.h so the
compiler would need somehow determine that from looking at the
implementation in wrapper.c but even if it is using LTO I'm not sure
it'll have that information when creating xdiff/lib.a
> But yes, the suggested replacement isn't behaving exactly as yours does,
> but I think that's just an implementation detail as far as the stuctural
> questions above go. I.e.:
>
> * You're trying to fix a long-standing overflow issue in alloc_nr(),
> but in such a way that only leaves xdiff/* with the fix.
I didn't set out to fix that issue per-se, I only realized it existed
when I reviewed this series.
> Can't we address that seperately (e.g. turning alloc_nr into a static
> inline function using the st_* helper), and then make xdiff and
> cache.h use that new shared helper?
As I've said before xdiff does not want to die on overflow so it cannot
use st_mult(). Also you cannot assume that you're dealing with size_t
when you do the overflow check in alloc_nr() so I think it is a tricky
problem to solve.
> * You seem to be set on the idea that you absolutely must be rewriting
> large parts of the existing allocation macro, because you'd really
> like to use it as an expression v.s. a statement.
It is not just that - there are plenty of differences that stem from
returning an error rather that dying that reduce the amount of common
code. Having a separate definition was also driven by a desire to keep
the xdiff code self contained.
This will likely be the last message from me in this thread for a while
- I'm going offline later next week and want to make sure I have time to
review Stolee's rebase patches before then.
Best Wishes
Phillip
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
` (4 preceding siblings ...)
2022-07-08 14:20 ` [PATCH 5/7] xdiff: use GALLOC_GROW(), not XDL_ALLOC_GROW() Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-08 17:42 ` Phillip Wood
2022-07-08 19:35 ` Jeff King
2022-07-08 14:20 ` [PATCH 7/7] xdiff: remove xdl_free(), use free() instead Ævar Arnfjörð Bjarmason
6 siblings, 2 replies; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
reason we have xdl_malloc() in the first place was to make the xdiff
code safer, it was not handling some allocation failures correctly at
the time.
But as noted in that commit doing this was intended as a stopgap, as
various code in xdiff/* did not correctly handle allocation failures,
and would have segfaulted if malloc() returned NULL.
But since then and after preceding commits we can be confident that
malloc() failures are handled correctly. All of these users of
xdl_malloc() are checking their return values, and we're returning
-1 (or similar ) to the top-level in case of failures.
This also has the big advantage of making the compiler and analysis
tools less confused, and potentially avoiding undefined (compiler)
behavior. I.e. we define our own xmalloc() to call die() on failure,
and that function uses the non-standard "noreturn" attribute on our
most common compiler targets.
But xdl_malloc()'s use conflicted with that, confusing both human
readers of this code, and potentially compilers as well.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
xdiff/xdiff.h | 1 -
xdiff/xdiffi.c | 2 +-
xdiff/xmerge.c | 10 +++++-----
xdiff/xutils.c | 2 +-
4 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 832cf9d977e..df048e0099b 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -119,7 +119,6 @@ typedef struct s_bdiffparam {
} bdiffparam_t;
-#define xdl_malloc(x) xmalloc(x)
#define xdl_free(ptr) free(ptr)
void *xdl_mmfile_first(mmfile_t *mmf, long *size);
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 077cc456087..6590811634f 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -368,7 +368,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
xdchange_t *xch;
- if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
+ if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
return NULL;
xch->next = xscr;
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index ac0cf52f3be..676ad39020d 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -60,7 +60,7 @@ static int xdl_append_merge(xdmerge_t **merge, int mode,
m->chg1 = i1 + chg1 - m->i1;
m->chg2 = i2 + chg2 - m->i2;
} else {
- m = xdl_malloc(sizeof(xdmerge_t));
+ m = malloc(sizeof(xdmerge_t));
if (!m)
return -1;
m->next = NULL;
@@ -409,7 +409,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
m->i2 = xscr->i2 + i2;
m->chg2 = xscr->chg2;
while (xscr->next) {
- xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
+ xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
if (!m2) {
xdl_free_env(&xe);
xdl_free_script(x);
@@ -670,7 +670,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
ancestor_name,
favor, changes, NULL, style,
marker_size);
- result->ptr = xdl_malloc(size);
+ result->ptr = malloc(size);
if (!result->ptr) {
xdl_cleanup_merge(changes);
return -1;
@@ -718,14 +718,14 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
}
status = 0;
if (!xscr1) {
- result->ptr = xdl_malloc(mf2->size);
+ result->ptr = malloc(mf2->size);
if (!result->ptr)
goto out;
status = 0;
memcpy(result->ptr, mf2->ptr, mf2->size);
result->size = mf2->size;
} else if (!xscr2) {
- result->ptr = xdl_malloc(mf1->size);
+ result->ptr = malloc(mf1->size);
if (!result->ptr)
goto out;
status = 0;
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index c0cd5338c4e..865e08f0e93 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -98,7 +98,7 @@ void *xdl_cha_alloc(chastore_t *cha) {
void *data;
if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
- if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
+ if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
return NULL;
}
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 14:20 ` [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc() Ævar Arnfjörð Bjarmason
@ 2022-07-08 17:42 ` Phillip Wood
2022-07-08 21:44 ` Ævar Arnfjörð Bjarmason
2022-07-08 19:35 ` Jeff King
1 sibling, 1 reply; 52+ messages in thread
From: Phillip Wood @ 2022-07-08 17:42 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason, git; +Cc: Junio C Hamano, Jeff King
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
> reason we have xdl_malloc() in the first place was to make the xdiff
> code safer, it was not handling some allocation failures correctly at
> the time.
This is untrue, we do not have xdl_malloc to make the xdiff code safer,
that macro was introduced with the initial import of xdiff. 36c83197249
just changed its definition, the entire commit consists of
-#define xdl_malloc(x) malloc(x)
+#define xdl_malloc(x) xmalloc(x)
#define xdl_free(ptr) free(ptr)
-#define xdl_realloc(ptr,x) realloc(ptr,x)
+#define xdl_realloc(ptr,x) xrealloc(ptr,x)
I can see the argument for reverting that change now that we have fixed
the error checking but that is not a good reason to remove xdl_malloc().
Best Wishes
Phillip
> But as noted in that commit doing this was intended as a stopgap, as
> various code in xdiff/* did not correctly handle allocation failures,
> and would have segfaulted if malloc() returned NULL.
>
> But since then and after preceding commits we can be confident that
> malloc() failures are handled correctly. All of these users of
> xdl_malloc() are checking their return values, and we're returning
> -1 (or similar ) to the top-level in case of failures.
>
> This also has the big advantage of making the compiler and analysis
> tools less confused, and potentially avoiding undefined (compiler)
> behavior. I.e. we define our own xmalloc() to call die() on failure,
> and that function uses the non-standard "noreturn" attribute on our
> most common compiler targets.
>
> But xdl_malloc()'s use conflicted with that, confusing both human
> readers of this code, and potentially compilers as well.
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> xdiff/xdiff.h | 1 -
> xdiff/xdiffi.c | 2 +-
> xdiff/xmerge.c | 10 +++++-----
> xdiff/xutils.c | 2 +-
> 4 files changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index 832cf9d977e..df048e0099b 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -119,7 +119,6 @@ typedef struct s_bdiffparam {
> } bdiffparam_t;
>
>
> -#define xdl_malloc(x) xmalloc(x)
> #define xdl_free(ptr) free(ptr)
>
> void *xdl_mmfile_first(mmfile_t *mmf, long *size);
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index 077cc456087..6590811634f 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -368,7 +368,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
> static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
> xdchange_t *xch;
>
> - if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
> + if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
> return NULL;
>
> xch->next = xscr;
> diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
> index ac0cf52f3be..676ad39020d 100644
> --- a/xdiff/xmerge.c
> +++ b/xdiff/xmerge.c
> @@ -60,7 +60,7 @@ static int xdl_append_merge(xdmerge_t **merge, int mode,
> m->chg1 = i1 + chg1 - m->i1;
> m->chg2 = i2 + chg2 - m->i2;
> } else {
> - m = xdl_malloc(sizeof(xdmerge_t));
> + m = malloc(sizeof(xdmerge_t));
> if (!m)
> return -1;
> m->next = NULL;
> @@ -409,7 +409,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
> m->i2 = xscr->i2 + i2;
> m->chg2 = xscr->chg2;
> while (xscr->next) {
> - xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
> + xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
> if (!m2) {
> xdl_free_env(&xe);
> xdl_free_script(x);
> @@ -670,7 +670,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
> ancestor_name,
> favor, changes, NULL, style,
> marker_size);
> - result->ptr = xdl_malloc(size);
> + result->ptr = malloc(size);
> if (!result->ptr) {
> xdl_cleanup_merge(changes);
> return -1;
> @@ -718,14 +718,14 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
> }
> status = 0;
> if (!xscr1) {
> - result->ptr = xdl_malloc(mf2->size);
> + result->ptr = malloc(mf2->size);
> if (!result->ptr)
> goto out;
> status = 0;
> memcpy(result->ptr, mf2->ptr, mf2->size);
> result->size = mf2->size;
> } else if (!xscr2) {
> - result->ptr = xdl_malloc(mf1->size);
> + result->ptr = malloc(mf1->size);
> if (!result->ptr)
> goto out;
> status = 0;
> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
> index c0cd5338c4e..865e08f0e93 100644
> --- a/xdiff/xutils.c
> +++ b/xdiff/xutils.c
> @@ -98,7 +98,7 @@ void *xdl_cha_alloc(chastore_t *cha) {
> void *data;
>
> if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
> - if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
> + if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
>
> return NULL;
> }
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 17:42 ` Phillip Wood
@ 2022-07-08 21:44 ` Ævar Arnfjörð Bjarmason
0 siblings, 0 replies; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 21:44 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Fri, Jul 08 2022, Phillip Wood wrote:
> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
>> reason we have xdl_malloc() in the first place was to make the xdiff
>> code safer, it was not handling some allocation failures correctly at
>> the time.
>
> This is untrue, we do not have xdl_malloc to make the xdiff code
> safer, that macro was introduced with the initial import of
> xdiff. 36c83197249 just changed its definition, the entire commit
> consists of
>
> -#define xdl_malloc(x) malloc(x)
> +#define xdl_malloc(x) xmalloc(x)
> #define xdl_free(ptr) free(ptr)
> -#define xdl_realloc(ptr,x) realloc(ptr,x)
> +#define xdl_realloc(ptr,x) xrealloc(ptr,x)
Yes sorry about that, I'm missing a few words there, and meant "the
reason we have this incarnation of xdl_malloc()[...]", or something to
that effect.
> I can see the argument for reverting that change now that we have
> fixed the error checking but that is not a good reason to remove
> xdl_malloc().
Indeed, but hopefully
https://lore.kernel.org/git/220708.86czef9t6y.gmgdl@evledraar.gmail.com/
is that argument.
>> But as noted in that commit doing this was intended as a stopgap, as
>> various code in xdiff/* did not correctly handle allocation failures,
>> and would have segfaulted if malloc() returned NULL.
>> But since then and after preceding commits we can be confident that
>> malloc() failures are handled correctly. All of these users of
>> xdl_malloc() are checking their return values, and we're returning
>> -1 (or similar ) to the top-level in case of failures.
>> This also has the big advantage of making the compiler and analysis
>> tools less confused, and potentially avoiding undefined (compiler)
>> behavior. I.e. we define our own xmalloc() to call die() on failure,
>> and that function uses the non-standard "noreturn" attribute on our
>> most common compiler targets.
>> But xdl_malloc()'s use conflicted with that, confusing both human
>> readers of this code, and potentially compilers as well.
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>> xdiff/xdiff.h | 1 -
>> xdiff/xdiffi.c | 2 +-
>> xdiff/xmerge.c | 10 +++++-----
>> xdiff/xutils.c | 2 +-
>> 4 files changed, 7 insertions(+), 8 deletions(-)
>> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
>> index 832cf9d977e..df048e0099b 100644
>> --- a/xdiff/xdiff.h
>> +++ b/xdiff/xdiff.h
>> @@ -119,7 +119,6 @@ typedef struct s_bdiffparam {
>> } bdiffparam_t;
>>
>> -#define xdl_malloc(x) xmalloc(x)
>> #define xdl_free(ptr) free(ptr)
>> void *xdl_mmfile_first(mmfile_t *mmf, long *size);
>> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
>> index 077cc456087..6590811634f 100644
>> --- a/xdiff/xdiffi.c
>> +++ b/xdiff/xdiffi.c
>> @@ -368,7 +368,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>> static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
>> xdchange_t *xch;
>> - if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
>> + if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
>> return NULL;
>> xch->next = xscr;
>> diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
>> index ac0cf52f3be..676ad39020d 100644
>> --- a/xdiff/xmerge.c
>> +++ b/xdiff/xmerge.c
>> @@ -60,7 +60,7 @@ static int xdl_append_merge(xdmerge_t **merge, int mode,
>> m->chg1 = i1 + chg1 - m->i1;
>> m->chg2 = i2 + chg2 - m->i2;
>> } else {
>> - m = xdl_malloc(sizeof(xdmerge_t));
>> + m = malloc(sizeof(xdmerge_t));
>> if (!m)
>> return -1;
>> m->next = NULL;
>> @@ -409,7 +409,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
>> m->i2 = xscr->i2 + i2;
>> m->chg2 = xscr->chg2;
>> while (xscr->next) {
>> - xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
>> + xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
>> if (!m2) {
>> xdl_free_env(&xe);
>> xdl_free_script(x);
>> @@ -670,7 +670,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
>> ancestor_name,
>> favor, changes, NULL, style,
>> marker_size);
>> - result->ptr = xdl_malloc(size);
>> + result->ptr = malloc(size);
>> if (!result->ptr) {
>> xdl_cleanup_merge(changes);
>> return -1;
>> @@ -718,14 +718,14 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
>> }
>> status = 0;
>> if (!xscr1) {
>> - result->ptr = xdl_malloc(mf2->size);
>> + result->ptr = malloc(mf2->size);
>> if (!result->ptr)
>> goto out;
>> status = 0;
>> memcpy(result->ptr, mf2->ptr, mf2->size);
>> result->size = mf2->size;
>> } else if (!xscr2) {
>> - result->ptr = xdl_malloc(mf1->size);
>> + result->ptr = malloc(mf1->size);
>> if (!result->ptr)
>> goto out;
>> status = 0;
>> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
>> index c0cd5338c4e..865e08f0e93 100644
>> --- a/xdiff/xutils.c
>> +++ b/xdiff/xutils.c
>> @@ -98,7 +98,7 @@ void *xdl_cha_alloc(chastore_t *cha) {
>> void *data;
>> if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
>> - if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
>> + if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
>> return NULL;
>> }
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 14:20 ` [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc() Ævar Arnfjörð Bjarmason
2022-07-08 17:42 ` Phillip Wood
@ 2022-07-08 19:35 ` Jeff King
2022-07-08 21:47 ` Ævar Arnfjörð Bjarmason
1 sibling, 1 reply; 52+ messages in thread
From: Jeff King @ 2022-07-08 19:35 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Phillip Wood
On Fri, Jul 08, 2022 at 04:20:18PM +0200, Ævar Arnfjörð Bjarmason wrote:
> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
> reason we have xdl_malloc() in the first place was to make the xdiff
> code safer, it was not handling some allocation failures correctly at
> the time.
>
> But as noted in that commit doing this was intended as a stopgap, as
> various code in xdiff/* did not correctly handle allocation failures,
> and would have segfaulted if malloc() returned NULL.
>
> But since then and after preceding commits we can be confident that
> malloc() failures are handled correctly. All of these users of
> xdl_malloc() are checking their return values, and we're returning
> -1 (or similar ) to the top-level in case of failures.
This is also losing the other parts mentioned in 36c83197249:
respecting GIT_ALLOC_LIMIT and any memory reclamation strategies.
I think you'd want an xmalloc_gently() instead of a raw malloc().
For the same reason, I suspect it's better to leave this with a layer of
preprocessor indirection. Even if we chose to use malloc() here, libgit2
might not, and having the macro makes it easier to share the code.
-Peff
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 19:35 ` Jeff King
@ 2022-07-08 21:47 ` Ævar Arnfjörð Bjarmason
2022-07-11 9:33 ` Jeff King
0 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 21:47 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Phillip Wood
On Fri, Jul 08 2022, Jeff King wrote:
> On Fri, Jul 08, 2022 at 04:20:18PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
>> reason we have xdl_malloc() in the first place was to make the xdiff
>> code safer, it was not handling some allocation failures correctly at
>> the time.
>>
>> But as noted in that commit doing this was intended as a stopgap, as
>> various code in xdiff/* did not correctly handle allocation failures,
>> and would have segfaulted if malloc() returned NULL.
>>
>> But since then and after preceding commits we can be confident that
>> malloc() failures are handled correctly. All of these users of
>> xdl_malloc() are checking their return values, and we're returning
>> -1 (or similar ) to the top-level in case of failures.
>
> This is also losing the other parts mentioned in 36c83197249:
> respecting GIT_ALLOC_LIMIT and any memory reclamation strategies.
>
> I think you'd want an xmalloc_gently() instead of a raw malloc().
...
> For the same reason, I suspect it's better to leave this with a layer of
> preprocessor indirection. Even if we chose to use malloc() here, libgit2
> might not, and having the macro makes it easier to share the code.
I think
https://lore.kernel.org/git/220708.86czef9t6y.gmgdl@evledraar.gmail.com/
in the related side-thread shows a workable way to do that.
But I didn't think about the GIT_ALLOC_LIMIT, so if we want that still
we'd need to have this code routed to not just malloc() but an
"xmalloc_gently()" as you suggest.
But we also have a few other uses of malloc() in the codebase. I wonder
if the right thing here isn't to just use malloc(), but to have
git-compat-util.h override malloc() (similar to how we we override
e.g. exit() now...), which would also catch those.
Or we could just say that it's not worth the complexity, and xdiff won't
respect GIT_ALLOC_LIMIT .. :(
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()
2022-07-08 21:47 ` Ævar Arnfjörð Bjarmason
@ 2022-07-11 9:33 ` Jeff King
0 siblings, 0 replies; 52+ messages in thread
From: Jeff King @ 2022-07-11 9:33 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Phillip Wood
On Fri, Jul 08, 2022 at 11:47:58PM +0200, Ævar Arnfjörð Bjarmason wrote:
> But we also have a few other uses of malloc() in the codebase. I wonder
> if the right thing here isn't to just use malloc(), but to have
> git-compat-util.h override malloc() (similar to how we we override
> e.g. exit() now...), which would also catch those.
I suspect that introduces new complexities, as some calls really may
want an actual no-frills malloc. I'm thinking stuff in compat/, for
example, where extra actions taken by xmalloc() could cause weird
looping or races. This was probably a lot more likely when we closed
packs via xmalloc (e.g., via git_mmap()'s malloc() call) but I think the
general principle still holds. E.g., gitsetenv() calling getenv() is a
potential questionable area.
It does look like the call in submodule--helper.c is just wrong, though,
and should be changed. You could probably detect these with the
preprocessor, but again, you run into complexities with the cases that
_should_ be vanilla malloc. Given how little of a problem this has been
historically, I'm mostly content to notice these in review and
occasionally grep for fixes.
I suppose a coccinelle rule could help, because it makes it easy to
suppress false positives (like all of compat/), which the preprocessor
doesn't.
-Peff
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-08 14:20 ` [PATCH 0/7] xdiff: use standard alloc macros, share them via git-shared-util.h Ævar Arnfjörð Bjarmason
` (5 preceding siblings ...)
2022-07-08 14:20 ` [PATCH 6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc() Ævar Arnfjörð Bjarmason
@ 2022-07-08 14:20 ` Ævar Arnfjörð Bjarmason
2022-07-08 17:51 ` Phillip Wood
6 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 14:20 UTC (permalink / raw)
To: git
Cc: Junio C Hamano, Phillip Wood, Jeff King,
Ævar Arnfjörð Bjarmason
Remove the xdl_free() wrapper macro in favor of using free()
directly. The wrapper macro was brought in with the initial import of
xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
2006-03-24).
As subsequent discussions on the topic[1] have made clear there's no
reason to use this wrapper. Both git itself as well as any external
users such as libgit2 compile the xdiff/* code as part of their own
compilation, and can thus find the right malloc() and free() at
link-time.
When compiling git we already find a custom malloc() and free()
e.g. if compiled with USE_NED_ALLOCATOR=YesPlease.
1. https://lore.kernel.org/git/220415.867d7qbaad.gmgdl@evledraar.gmail.com/
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
xdiff/xdiff.h | 3 ---
xdiff/xdiffi.c | 4 ++--
xdiff/xhistogram.c | 6 +++---
xdiff/xpatience.c | 8 ++++----
xdiff/xprepare.c | 28 ++++++++++++++--------------
xdiff/xutils.c | 2 +-
6 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index df048e0099b..a37d89fcdaf 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -118,9 +118,6 @@ typedef struct s_bdiffparam {
long bsize;
} bdiffparam_t;
-
-#define xdl_free(ptr) free(ptr)
-
void *xdl_mmfile_first(mmfile_t *mmf, long *size);
long xdl_mmfile_size(mmfile_t *mmf);
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 6590811634f..375bb81a8aa 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -359,7 +359,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0,
&xenv);
- xdl_free(kvd);
+ free(kvd);
return res;
}
@@ -960,7 +960,7 @@ void xdl_free_script(xdchange_t *xscr) {
while ((xch = xscr) != NULL) {
xscr = xscr->next;
- xdl_free(xch);
+ free(xch);
}
}
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index f20592bfbdd..be35d9c9697 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -240,9 +240,9 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
static inline void free_index(struct histindex *index)
{
- xdl_free(index->records);
- xdl_free(index->line_map);
- xdl_free(index->next_ptrs);
+ free(index->records);
+ free(index->line_map);
+ free(index->next_ptrs);
xdl_cha_free(&index->rcha);
}
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index bb328d9f852..8fffd2b8297 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -233,7 +233,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
/* No common unique lines were found */
if (!longest) {
*res = NULL;
- xdl_free(sequence);
+ free(sequence);
return 0;
}
@@ -245,7 +245,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
entry = entry->previous;
}
*res = entry;
- xdl_free(sequence);
+ free(sequence);
return 0;
}
@@ -358,7 +358,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2,
env->xdf1.rchg[line1++ - 1] = 1;
while(count2--)
env->xdf2.rchg[line2++ - 1] = 1;
- xdl_free(map.entries);
+ free(map.entries);
return 0;
}
@@ -372,7 +372,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2,
result = fall_back_to_classic_diff(&map,
line1, count1, line2, count2);
out:
- xdl_free(map.entries);
+ free(map.entries);
return result;
}
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 4182d9e1c0a..169629761c0 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -89,7 +89,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
GALLOC_ARRAY(cf->rcrecs, cf->alloc);
if (!cf->rcrecs) {
- xdl_free(cf->rchash);
+ free(cf->rchash);
xdl_cha_free(&cf->ncha);
return -1;
}
@@ -102,8 +102,8 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
static void xdl_free_classifier(xdlclassifier_t *cf) {
- xdl_free(cf->rcrecs);
- xdl_free(cf->rchash);
+ free(cf->rcrecs);
+ free(cf->rchash);
xdl_cha_free(&cf->ncha);
}
@@ -230,11 +230,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
return 0;
abort:
- xdl_free(ha);
- xdl_free(rindex);
- xdl_free(rchg);
- xdl_free(rhash);
- xdl_free(recs);
+ free(ha);
+ free(rindex);
+ free(rchg);
+ free(rhash);
+ free(recs);
xdl_cha_free(&xdf->rcha);
return -1;
}
@@ -242,11 +242,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
static void xdl_free_ctx(xdfile_t *xdf) {
- xdl_free(xdf->rhash);
- xdl_free(xdf->rindex);
- xdl_free(xdf->rchg - 1);
- xdl_free(xdf->ha);
- xdl_free(xdf->recs);
+ free(xdf->rhash);
+ free(xdf->rindex);
+ free(xdf->rchg - 1);
+ free(xdf->ha);
+ free(xdf->recs);
xdl_cha_free(&xdf->rcha);
}
@@ -424,7 +424,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
}
xdf2->nreff = nreff;
- xdl_free(dis);
+ free(dis);
return 0;
}
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 865e08f0e93..00eeba452a5 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -88,7 +88,7 @@ void xdl_cha_free(chastore_t *cha) {
for (cur = cha->head; (tmp = cur) != NULL;) {
cur = cur->next;
- xdl_free(tmp);
+ free(tmp);
}
}
--
2.37.0.913.g189dca38629
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-08 14:20 ` [PATCH 7/7] xdiff: remove xdl_free(), use free() instead Ævar Arnfjörð Bjarmason
@ 2022-07-08 17:51 ` Phillip Wood
2022-07-08 21:26 ` Ævar Arnfjörð Bjarmason
0 siblings, 1 reply; 52+ messages in thread
From: Phillip Wood @ 2022-07-08 17:51 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason, git; +Cc: Junio C Hamano, Jeff King
Hi Ævar
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> Remove the xdl_free() wrapper macro in favor of using free()
> directly. The wrapper macro was brought in with the initial import of
> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
> 2006-03-24).
>
> As subsequent discussions on the topic[1] have made clear there's no
> reason to use this wrapper.
That link is to a message where you assert that there is no reason for
the wrapper, you seem to be the only person in that thread making that
argument. The libgit2 maintainer and others are arguing that it is worth
keeping to make it easy to swap the allocator.
> Both git itself as well as any external
> users such as libgit2 compile the xdiff/* code as part of their own
> compilation, and can thus find the right malloc() and free() at
> link-time.
I'm struggling to see how that is simpler than the current situation
with xdl_malloc(). Perhaps you could show how you think libgit2 could
easily make xdiff use git__malloc() instead of malloc() without changing
the malloc() calls (the message you linked to essentially suggests they
do a search and replace). If people cannot swap in another allocator
without changing the code then you are imposing a barrier to them
contributing patches back to git's xdiff.
Best Wishes
Phillip
> When compiling git we already find a custom malloc() and free()
> e.g. if compiled with USE_NED_ALLOCATOR=YesPlease.
>
> 1. https://lore.kernel.org/git/220415.867d7qbaad.gmgdl@evledraar.gmail.com/
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> xdiff/xdiff.h | 3 ---
> xdiff/xdiffi.c | 4 ++--
> xdiff/xhistogram.c | 6 +++---
> xdiff/xpatience.c | 8 ++++----
> xdiff/xprepare.c | 28 ++++++++++++++--------------
> xdiff/xutils.c | 2 +-
> 6 files changed, 24 insertions(+), 27 deletions(-)
>
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index df048e0099b..a37d89fcdaf 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -118,9 +118,6 @@ typedef struct s_bdiffparam {
> long bsize;
> } bdiffparam_t;
>
> -
> -#define xdl_free(ptr) free(ptr)
> -
> void *xdl_mmfile_first(mmfile_t *mmf, long *size);
> long xdl_mmfile_size(mmfile_t *mmf);
>
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index 6590811634f..375bb81a8aa 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -359,7 +359,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
> res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
> kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0,
> &xenv);
> - xdl_free(kvd);
> + free(kvd);
>
> return res;
> }
> @@ -960,7 +960,7 @@ void xdl_free_script(xdchange_t *xscr) {
>
> while ((xch = xscr) != NULL) {
> xscr = xscr->next;
> - xdl_free(xch);
> + free(xch);
> }
> }
>
> diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
> index f20592bfbdd..be35d9c9697 100644
> --- a/xdiff/xhistogram.c
> +++ b/xdiff/xhistogram.c
> @@ -240,9 +240,9 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
>
> static inline void free_index(struct histindex *index)
> {
> - xdl_free(index->records);
> - xdl_free(index->line_map);
> - xdl_free(index->next_ptrs);
> + free(index->records);
> + free(index->line_map);
> + free(index->next_ptrs);
> xdl_cha_free(&index->rcha);
> }
>
> diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
> index bb328d9f852..8fffd2b8297 100644
> --- a/xdiff/xpatience.c
> +++ b/xdiff/xpatience.c
> @@ -233,7 +233,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
> /* No common unique lines were found */
> if (!longest) {
> *res = NULL;
> - xdl_free(sequence);
> + free(sequence);
> return 0;
> }
>
> @@ -245,7 +245,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)
> entry = entry->previous;
> }
> *res = entry;
> - xdl_free(sequence);
> + free(sequence);
> return 0;
> }
>
> @@ -358,7 +358,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2,
> env->xdf1.rchg[line1++ - 1] = 1;
> while(count2--)
> env->xdf2.rchg[line2++ - 1] = 1;
> - xdl_free(map.entries);
> + free(map.entries);
> return 0;
> }
>
> @@ -372,7 +372,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2,
> result = fall_back_to_classic_diff(&map,
> line1, count1, line2, count2);
> out:
> - xdl_free(map.entries);
> + free(map.entries);
> return result;
> }
>
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index 4182d9e1c0a..169629761c0 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -89,7 +89,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
>
> GALLOC_ARRAY(cf->rcrecs, cf->alloc);
> if (!cf->rcrecs) {
> - xdl_free(cf->rchash);
> + free(cf->rchash);
> xdl_cha_free(&cf->ncha);
> return -1;
> }
> @@ -102,8 +102,8 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
>
> static void xdl_free_classifier(xdlclassifier_t *cf) {
>
> - xdl_free(cf->rcrecs);
> - xdl_free(cf->rchash);
> + free(cf->rcrecs);
> + free(cf->rchash);
> xdl_cha_free(&cf->ncha);
> }
>
> @@ -230,11 +230,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
> return 0;
>
> abort:
> - xdl_free(ha);
> - xdl_free(rindex);
> - xdl_free(rchg);
> - xdl_free(rhash);
> - xdl_free(recs);
> + free(ha);
> + free(rindex);
> + free(rchg);
> + free(rhash);
> + free(recs);
> xdl_cha_free(&xdf->rcha);
> return -1;
> }
> @@ -242,11 +242,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
>
> static void xdl_free_ctx(xdfile_t *xdf) {
>
> - xdl_free(xdf->rhash);
> - xdl_free(xdf->rindex);
> - xdl_free(xdf->rchg - 1);
> - xdl_free(xdf->ha);
> - xdl_free(xdf->recs);
> + free(xdf->rhash);
> + free(xdf->rindex);
> + free(xdf->rchg - 1);
> + free(xdf->ha);
> + free(xdf->recs);
> xdl_cha_free(&xdf->rcha);
> }
>
> @@ -424,7 +424,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
> }
> xdf2->nreff = nreff;
>
> - xdl_free(dis);
> + free(dis);
>
> return 0;
> }
> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
> index 865e08f0e93..00eeba452a5 100644
> --- a/xdiff/xutils.c
> +++ b/xdiff/xutils.c
> @@ -88,7 +88,7 @@ void xdl_cha_free(chastore_t *cha) {
>
> for (cur = cha->head; (tmp = cur) != NULL;) {
> cur = cur->next;
> - xdl_free(tmp);
> + free(tmp);
> }
> }
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-08 17:51 ` Phillip Wood
@ 2022-07-08 21:26 ` Ævar Arnfjörð Bjarmason
2022-07-11 9:26 ` Phillip Wood
0 siblings, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-08 21:26 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Fri, Jul 08 2022, Phillip Wood wrote:
> Hi Ævar
>
> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>> Remove the xdl_free() wrapper macro in favor of using free()
>> directly. The wrapper macro was brought in with the initial import of
>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
>> 2006-03-24).
>> As subsequent discussions on the topic[1] have made clear there's no
>> reason to use this wrapper.
>
> That link is to a message where you assert that there is no reason for
> the wrapper, you seem to be the only person in that thread making that
> argument. The libgit2 maintainer and others are arguing that it is
> worth keeping to make it easy to swap the allocator.
Arguing that it's not needed for a technical reason, but an "aesthetic
and ergonomic one", per:
https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/;
Perhaps I misread it, but I took this as Junio concurring with what I
was pointing out there:
https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/
>> Both git itself as well as any external
>> users such as libgit2 compile the xdiff/* code as part of their own
>> compilation, and can thus find the right malloc() and free() at
>> link-time.
>
> I'm struggling to see how that is simpler than the current situation
> with xdl_malloc().
It's simpler because when maintaining that code in git.git it's less of
a special case, e.g. we have coccinelle rules that match free(), now
every such rule needs to account for the xdiff special-case.
And it's less buggy because if you're relying on us always using a
wrapper bugs will creep in, current master has this:
$ git -P grep '\bfree\(' -- xdiff
xdiff/xdiff.h:#define xdl_free(ptr) free(ptr)
xdiff/xmerge.c: free(c);
xdiff/xmerge.c: free(next_m);
> Perhaps you could show how you think libgit2 could
> easily make xdiff use git__malloc() instead of malloc() without
> changing the malloc() calls (the message you linked to essentially
> suggests they do a search and replace). If people cannot swap in
> another allocator without changing the code then you are imposing a
> barrier to them contributing patches back to git's xdiff.
I was suggesting that if libgit2 wanted to maintain a copy of xdiff that
catered to the asthetic desires of the maintainer adjusting whatever
import script you use to apply a trivial coccinelle transformation would
give you what you wanted. Except without a maintenance burden on
git.git, or the bugs you'd get now since you're not catching those two
free() cases (or any future such cases).
But just having the code use malloc() and free() directly and getting
you what you get now is easy, and doesn't require any such
search-replacement.
Here's how:
I imported the xdiff/*.[ch] files at the tip of my branch into current
libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is
partially re-applying libgit2's own monkeypatches, and partially
adjusting for upstream changes (including this one):
diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h
index b75dba819..2e00764d4 100644
--- a/src/libgit2/xdiff/git-xdiff.h
+++ b/src/libgit2/xdiff/git-xdiff.h
@@ -14,6 +14,7 @@
#ifndef INCLUDE_git_xdiff_h__
#define INCLUDE_git_xdiff_h__
+#include <regex.h>
#include "regexp.h"
/* Work around C90-conformance issues */
@@ -27,11 +28,10 @@
# endif
#endif
-#define xdl_malloc(x) git__malloc(x)
-#define xdl_free(ptr) git__free(ptr)
-#define xdl_realloc(ptr, x) git__realloc(ptr, x)
+#define malloc(x) git__malloc(x)
+#define free(ptr) git__free(ptr)
-#define XDL_BUG(msg) GIT_ASSERT(msg)
+#define BUG(msg) GIT_ASSERT(msg)
#define xdl_regex_t git_regexp
#define xdl_regmatch_t git_regmatch
@@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf(
return -1;
}
+static inline size_t st_mult(size_t a, size_t b)
+{
+ return a * b;
+}
+
+static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+ assert(nmatch > 0 && pmatch);
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = size;
+ return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
+}
#endif
diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h
index a37d89fcd..5e5b525c2 100644
--- a/src/libgit2/xdiff/xdiff.h
+++ b/src/libgit2/xdiff/xdiff.h
@@ -23,6 +23,8 @@
#if !defined(XDIFF_H)
#define XDIFF_H
+#include "git-xdiff.h"
+
#ifdef __cplusplus
extern "C" {
#endif /* #ifdef __cplusplus */
diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h
index a4285ac0e..79812ad8a 100644
--- a/src/libgit2/xdiff/xinclude.h
+++ b/src/libgit2/xdiff/xinclude.h
@@ -23,7 +23,8 @@
#if !defined(XINCLUDE_H)
#define XINCLUDE_H
-#include "git-compat-util.h"
+#include "git-xdiff.h"
+#include "git-shared-util.h"
#include "xmacros.h"
#include "xdiff.h"
#include "xtypes.h"
If you then build it and run e.g.:
gdb -args ./libgit2_tests -smerge::files
You'll get stack traces like this if you break on stdalloc__malloc
(which it uses for me in its default configuration):
(gdb) bt
#0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14
#1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101
#2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8)
at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194
IOW this was all seamlessly routed to your git__malloc() without us
having to maintain an xdl_{malloc,free}().
Now, I think that's obviously worth adjusting, e.g. the series I've got
here could make this easier for libgit2 by including st_mult() at least,
I'm not sure what you want to do about regexec_buf().
For the monkeypatching you do now of creating a "git-xdiff.h" I think it
would be very good to get a patch like what I managed to get
sha1collisiondetection.git to accept for our own use-case, which allows
us to use their upstream code as-is from a submodule:
https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-08 21:26 ` Ævar Arnfjörð Bjarmason
@ 2022-07-11 9:26 ` Phillip Wood
2022-07-11 9:54 ` Phillip Wood
2022-07-11 10:02 ` Ævar Arnfjörð Bjarmason
0 siblings, 2 replies; 52+ messages in thread
From: Phillip Wood @ 2022-07-11 9:26 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Jeff King
Hi Ævar
On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote:
>
> On Fri, Jul 08 2022, Phillip Wood wrote:
>
>> Hi Ævar
>>
>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>>> Remove the xdl_free() wrapper macro in favor of using free()
>>> directly. The wrapper macro was brought in with the initial import of
>>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
>>> 2006-03-24).
>>> As subsequent discussions on the topic[1] have made clear there's no
>>> reason to use this wrapper.
>>
>> That link is to a message where you assert that there is no reason for
>> the wrapper, you seem to be the only person in that thread making that
>> argument. The libgit2 maintainer and others are arguing that it is
>> worth keeping to make it easy to swap the allocator.
>
> Arguing that it's not needed for a technical reason, but an "aesthetic
> and ergonomic one", per:
> https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/;
>
> Perhaps I misread it, but I took this as Junio concurring with what I
> was pointing out there:
> https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/
>
>>> Both git itself as well as any external
>>> users such as libgit2 compile the xdiff/* code as part of their own
>>> compilation, and can thus find the right malloc() and free() at
>>> link-time.
>>
>> I'm struggling to see how that is simpler than the current situation
>> with xdl_malloc().
>
> It's simpler because when maintaining that code in git.git it's less of
> a special case, e.g. we have coccinelle rules that match free(), now
> every such rule needs to account for the xdiff special-case.
>
> And it's less buggy because if you're relying on us always using a
> wrapper bugs will creep in, current master has this:
>
> $ git -P grep '\bfree\(' -- xdiff
> xdiff/xdiff.h:#define xdl_free(ptr) free(ptr)
> xdiff/xmerge.c: free(c);
> xdiff/xmerge.c: free(next_m);
>
>> Perhaps you could show how you think libgit2 could
>> easily make xdiff use git__malloc() instead of malloc() without
>> changing the malloc() calls (the message you linked to essentially
>> suggests they do a search and replace). If people cannot swap in
>> another allocator without changing the code then you are imposing a
>> barrier to them contributing patches back to git's xdiff.
>
> I was suggesting that if libgit2 wanted to maintain a copy of xdiff that
> catered to the asthetic desires of the maintainer adjusting whatever
> import script you use to apply a trivial coccinelle transformation would
> give you what you wanted. Except without a maintenance burden on
> git.git, or the bugs you'd get now since you're not catching those two
> free() cases (or any future such cases).
>
> But just having the code use malloc() and free() directly and getting
> you what you get now is easy, and doesn't require any such
> search-replacement.
>
> Here's how:
>
> I imported the xdiff/*.[ch] files at the tip of my branch into current
> libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is
> partially re-applying libgit2's own monkeypatches, and partially
> adjusting for upstream changes (including this one):
>
> diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h
> index b75dba819..2e00764d4 100644
> --- a/src/libgit2/xdiff/git-xdiff.h
> +++ b/src/libgit2/xdiff/git-xdiff.h
> @@ -14,6 +14,7 @@
> #ifndef INCLUDE_git_xdiff_h__
> #define INCLUDE_git_xdiff_h__
>
> +#include <regex.h>
> #include "regexp.h"
>
> /* Work around C90-conformance issues */
> @@ -27,11 +28,10 @@
> # endif
> #endif
>
> -#define xdl_malloc(x) git__malloc(x)
> -#define xdl_free(ptr) git__free(ptr)
> -#define xdl_realloc(ptr, x) git__realloc(ptr, x)
> +#define malloc(x) git__malloc(x)
> +#define free(ptr) git__free(ptr)
>
> -#define XDL_BUG(msg) GIT_ASSERT(msg)
> +#define BUG(msg) GIT_ASSERT(msg)
>
> #define xdl_regex_t git_regexp
> #define xdl_regmatch_t git_regmatch
> @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf(
> return -1;
> }
>
> +static inline size_t st_mult(size_t a, size_t b)
> +{
> + return a * b;
> +}
> +
> +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
> + size_t nmatch, regmatch_t pmatch[], int eflags)
> +{
> + assert(nmatch > 0 && pmatch);
> + pmatch[0].rm_so = 0;
> + pmatch[0].rm_eo = size;
> + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
> +}
> #endif
> diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h
> index a37d89fcd..5e5b525c2 100644
> --- a/src/libgit2/xdiff/xdiff.h
> +++ b/src/libgit2/xdiff/xdiff.h
> @@ -23,6 +23,8 @@
> #if !defined(XDIFF_H)
> #define XDIFF_H
>
> +#include "git-xdiff.h"
> +
> #ifdef __cplusplus
> extern "C" {
> #endif /* #ifdef __cplusplus */
> diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h
> index a4285ac0e..79812ad8a 100644
> --- a/src/libgit2/xdiff/xinclude.h
> +++ b/src/libgit2/xdiff/xinclude.h
> @@ -23,7 +23,8 @@
> #if !defined(XINCLUDE_H)
> #define XINCLUDE_H
>
> -#include "git-compat-util.h"
> +#include "git-xdiff.h"
> +#include "git-shared-util.h"
> #include "xmacros.h"
> #include "xdiff.h"
> #include "xtypes.h"
>
> If you then build it and run e.g.:
>
> gdb -args ./libgit2_tests -smerge::files
>
> You'll get stack traces like this if you break on stdalloc__malloc
> (which it uses for me in its default configuration):
>
> (gdb) bt
> #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14
> #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101
> #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8)
> at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194
>
> IOW this was all seamlessly routed to your git__malloc() without us
> having to maintain an xdl_{malloc,free}().
Thanks for showing this, it is really helpful to see a concrete example.
I was especially interested to see how you went about the conversion
without redefining standard library functions or introducing
non-namespaced identifiers in files that included xdiff.h. Unfortunately
you have not done that and so I don't think the approach above is viable
for a well behaved library.
> Now, I think that's obviously worth adjusting, e.g. the series I've got
> here could make this easier for libgit2 by including st_mult() at least,
> I'm not sure what you want to do about regexec_buf().
>
> For the monkeypatching you do now of creating a "git-xdiff.h" I think it
> would be very good to get a patch like what I managed to get
> sha1collisiondetection.git to accept for our own use-case, which allows
> us to use their upstream code as-is from a submodule:
>
> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
Thanks for the link, That's a really good example where all the
identifiers are namespaced and the public include file does not pollute
the code that is including it. If you come up with something like that
where the client code can set up same #defines for malloc, calloc,
realloc and free that would be a good way forward. I do not think we
should require projects to be defining there own versions of
[C]ALLOC_*() and BUG() just to use xdiff.
Best Wishes
Phillip
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-11 9:26 ` Phillip Wood
@ 2022-07-11 9:54 ` Phillip Wood
2022-07-11 10:02 ` Ævar Arnfjörð Bjarmason
1 sibling, 0 replies; 52+ messages in thread
From: Phillip Wood @ 2022-07-11 9:54 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Jeff King
On 11/07/2022 10:26, Phillip Wood wrote:
>> For the monkeypatching you do now of creating a "git-xdiff.h" I think it
>> would be very good to get a patch like what I managed to get
>> sha1collisiondetection.git to accept for our own use-case, which allows
>> us to use their upstream code as-is from a submodule:
>>
>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
>>
>
> Thanks for the link, That's a really good example where all the
> identifiers are namespaced and the public include file does not pollute
> the code that is including it. If you come up with something like that
> where the client code can set up same #defines for malloc, calloc,
> realloc and free that would be a good way forward.
To be clear those should be namespaced, so we'd have
-Dxdl_malloc=xmalloc (or xmalloc_gently) and libgit2 would have
-D xdl_malloc=git__malloc
Best Wishes
Phillip
> I do not think we
> should require projects to be defining there own versions of
> [C]ALLOC_*() and BUG() just to use xdiff.
>
> Best Wishes
>
> Phillip
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-11 9:26 ` Phillip Wood
2022-07-11 9:54 ` Phillip Wood
@ 2022-07-11 10:02 ` Ævar Arnfjörð Bjarmason
2022-07-13 13:00 ` Phillip Wood
1 sibling, 1 reply; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-11 10:02 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Mon, Jul 11 2022, Phillip Wood wrote:
> Hi Ævar
>
> On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote:
>> On Fri, Jul 08 2022, Phillip Wood wrote:
>>
>>> Hi Ævar
>>>
>>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>>>> Remove the xdl_free() wrapper macro in favor of using free()
>>>> directly. The wrapper macro was brought in with the initial import of
>>>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
>>>> 2006-03-24).
>>>> As subsequent discussions on the topic[1] have made clear there's no
>>>> reason to use this wrapper.
>>>
>>> That link is to a message where you assert that there is no reason for
>>> the wrapper, you seem to be the only person in that thread making that
>>> argument. The libgit2 maintainer and others are arguing that it is
>>> worth keeping to make it easy to swap the allocator.
>> Arguing that it's not needed for a technical reason, but an
>> "aesthetic
>> and ergonomic one", per:
>> https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/;
>> Perhaps I misread it, but I took this as Junio concurring with what
>> I
>> was pointing out there:
>> https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/
>>
>>>> Both git itself as well as any external
>>>> users such as libgit2 compile the xdiff/* code as part of their own
>>>> compilation, and can thus find the right malloc() and free() at
>>>> link-time.
>>>
>>> I'm struggling to see how that is simpler than the current situation
>>> with xdl_malloc().
>> It's simpler because when maintaining that code in git.git it's less
>> of
>> a special case, e.g. we have coccinelle rules that match free(), now
>> every such rule needs to account for the xdiff special-case.
>> And it's less buggy because if you're relying on us always using a
>> wrapper bugs will creep in, current master has this:
>>
>> $ git -P grep '\bfree\(' -- xdiff
>> xdiff/xdiff.h:#define xdl_free(ptr) free(ptr)
>> xdiff/xmerge.c: free(c);
>> xdiff/xmerge.c: free(next_m);
>>
>>> Perhaps you could show how you think libgit2 could
>>> easily make xdiff use git__malloc() instead of malloc() without
>>> changing the malloc() calls (the message you linked to essentially
>>> suggests they do a search and replace). If people cannot swap in
>>> another allocator without changing the code then you are imposing a
>>> barrier to them contributing patches back to git's xdiff.
>> I was suggesting that if libgit2 wanted to maintain a copy of xdiff
>> that
>> catered to the asthetic desires of the maintainer adjusting whatever
>> import script you use to apply a trivial coccinelle transformation would
>> give you what you wanted. Except without a maintenance burden on
>> git.git, or the bugs you'd get now since you're not catching those two
>> free() cases (or any future such cases).
>> But just having the code use malloc() and free() directly and
>> getting
>> you what you get now is easy, and doesn't require any such
>> search-replacement.
>> Here's how:
>> I imported the xdiff/*.[ch] files at the tip of my branch into
>> current
>> libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is
>> partially re-applying libgit2's own monkeypatches, and partially
>> adjusting for upstream changes (including this one):
>>
>> diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h
>> index b75dba819..2e00764d4 100644
>> --- a/src/libgit2/xdiff/git-xdiff.h
>> +++ b/src/libgit2/xdiff/git-xdiff.h
>> @@ -14,6 +14,7 @@
>> #ifndef INCLUDE_git_xdiff_h__
>> #define INCLUDE_git_xdiff_h__
>>
>> +#include <regex.h>
>> #include "regexp.h"
>>
>> /* Work around C90-conformance issues */
>> @@ -27,11 +28,10 @@
>> # endif
>> #endif
>>
>> -#define xdl_malloc(x) git__malloc(x)
>> -#define xdl_free(ptr) git__free(ptr)
>> -#define xdl_realloc(ptr, x) git__realloc(ptr, x)
>> +#define malloc(x) git__malloc(x)
>> +#define free(ptr) git__free(ptr)
>>
>> -#define XDL_BUG(msg) GIT_ASSERT(msg)
>> +#define BUG(msg) GIT_ASSERT(msg)
>>
>> #define xdl_regex_t git_regexp
>> #define xdl_regmatch_t git_regmatch
>> @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf(
>> return -1;
>> }
>>
>> +static inline size_t st_mult(size_t a, size_t b)
>> +{
>> + return a * b;
>> +}
>> +
>> +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
>> + size_t nmatch, regmatch_t pmatch[], int eflags)
>> +{
>> + assert(nmatch > 0 && pmatch);
>> + pmatch[0].rm_so = 0;
>> + pmatch[0].rm_eo = size;
>> + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
>> +}
>> #endif
>> diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h
>> index a37d89fcd..5e5b525c2 100644
>> --- a/src/libgit2/xdiff/xdiff.h
>> +++ b/src/libgit2/xdiff/xdiff.h
>> @@ -23,6 +23,8 @@
>> #if !defined(XDIFF_H)
>> #define XDIFF_H
>>
>> +#include "git-xdiff.h"
>> +
>> #ifdef __cplusplus
>> extern "C" {
>> #endif /* #ifdef __cplusplus */
>> diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h
>> index a4285ac0e..79812ad8a 100644
>> --- a/src/libgit2/xdiff/xinclude.h
>> +++ b/src/libgit2/xdiff/xinclude.h
>> @@ -23,7 +23,8 @@
>> #if !defined(XINCLUDE_H)
>> #define XINCLUDE_H
>>
>> -#include "git-compat-util.h"
>> +#include "git-xdiff.h"
>> +#include "git-shared-util.h"
>> #include "xmacros.h"
>> #include "xdiff.h"
>> #include "xtypes.h"
>> If you then build it and run e.g.:
>> gdb -args ./libgit2_tests -smerge::files
>> You'll get stack traces like this if you break on stdalloc__malloc
>> (which it uses for me in its default configuration):
>>
>> (gdb) bt
>> #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14
>> #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101
>> #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8)
>> at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194
>> IOW this was all seamlessly routed to your git__malloc() without us
>> having to maintain an xdl_{malloc,free}().
>
> Thanks for showing this, it is really helpful to see a concrete
> example. I was especially interested to see how you went about the
> conversion without redefining standard library functions or
> introducing non-namespaced identifiers in files that included
> xdiff.h. Unfortunately you have not done that and so I don't think the
> approach above is viable for a well behaved library.
Why? Because there's some header where doing e.g.:
#include "git2/something.h"
Will directly include git-xdiff.h by proxy into the user's program?
I admit I didn't check that, and assumed that these were only included
by other *.c files in libgit2 itself. I.e. it would compile xdiff for
its own use, but then expose its own API.
Anyway, if it is directly included in some user-exposed *.h file then
yes, you don't want to overwrite their "malloc", but that's a small
matter of doing an "#undef malloc" at the end (which as the below
linked-to patch shows we'd support by having macros like
SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C).
Aside from what I'm proposing here doing such "undef at the end" seems
like a good idea in any case, because if there is any macro leakage here
you're e.g. re-defining "XDL_BUG" and other things that aren't clearly
in the libgit2 namespace now, no?
>> Now, I think that's obviously worth adjusting, e.g. the series I've got
>> here could make this easier for libgit2 by including st_mult() at least,
>> I'm not sure what you want to do about regexec_buf().
>> For the monkeypatching you do now of creating a "git-xdiff.h" I
>> think it
>> would be very good to get a patch like what I managed to get
>> sha1collisiondetection.git to accept for our own use-case, which allows
>> us to use their upstream code as-is from a submodule:
>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
>
> Thanks for the link, That's a really good example where all the
> identifiers are namespaced and the public include file does not
> pollute the code that is including it. If you come up with something
> like that where the client code can set up same #defines for malloc,
> calloc, realloc and free that would be a good way forward.
I don't need to come up with that, you've got the linker to do that.
I.e. for almost any "normal" use of xdiff you'd simply compile it with
its reference to malloc(), and then you either link that library that
already links to libc into your program.
So if you use a custom malloc the xdiff code might still use libc
malloc, or you'd link the two together and link the resulting program
with your own custom malloc.
As explained in the previous & linked-to threads this is how almost
everyone would include & use such a library, and indeed that's how git
itself can use malloc() and free() in its sources, but have options to
link to libc malloc, nedmalloc etc.
But instead of using the linker to resolve "malloc" to git__malloc you'd
like to effectively include the upstream *.[ch] files directly, and
pretend as though the upstream source used git__malloc() or whatever to
begin with.
I don't really understand why you can't just do that at the linker
level, especially as doing so guards you better against namespace
pollution. But whatever, the suggestion(s) above assume you've got a
good reason, but show that we don't need to prefix every standard
function just to accommodate that.
Instead we can just provide a knob to include a header of yours after
all others have been included (which the libgit2 monkeypatches to xdiff
already include), and have that header define "malloc" as "git__malloc",
"BUG" as "GIT_ASSERT" etc.
And yes, if you're in turn providing an interface where others will then
include your header that's doing that you'll need to apply some
namespacing paranoia, namely to "#undef malloc" etc.
But none of that requires git to carry prefixed versions of standard
functions you'd wish to replace, which the patches here show.
> I do not think we should require projects to be defining there own
> versions of [C]ALLOC_*() and BUG() just to use xdiff.
No, I don't think so either. I.e. the idea with these patches is that we
could bundle up xdiff/* and git-shared-util.h into one distributed
libgit, which down the road we could even roll independent releases for
(as upstream seems to have forever gone away).
Whereas the proposals coming out of libgit2[1] for generalizing xdiff/
for general use seem to stop just short of the specific hacks we need to
get it running for libgit2, but e.g. leave "xdl_malloc" defined as
"xmalloc".
That isn't a standard library function, and therefore the first thing
you'd need to do when using it as a library is to find out how git.git
uses that, and copy/paste it or define your own replacement.
Whereas I think it should be the other way around, we should e.g. ship a
shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for
advanced users such as libgit2 guard those with "ifndef" or whatever, so
you can have it call GIT_ASSERT() and the like instead.
1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-11 10:02 ` Ævar Arnfjörð Bjarmason
@ 2022-07-13 13:00 ` Phillip Wood
2022-07-13 13:18 ` Ævar Arnfjörð Bjarmason
0 siblings, 1 reply; 52+ messages in thread
From: Phillip Wood @ 2022-07-13 13:00 UTC (permalink / raw)
To: Ævar Arnfjörð Bjarmason; +Cc: git, Junio C Hamano, Jeff King
On 11/07/2022 11:02, Ævar Arnfjörð Bjarmason wrote:
>
> On Mon, Jul 11 2022, Phillip Wood wrote:
>
>> Hi Ævar
>>
>> On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote:
>>> On Fri, Jul 08 2022, Phillip Wood wrote:
>>>
>>>> Hi Ævar
>>>>
>>>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>>>>> Remove the xdl_free() wrapper macro in favor of using free()
>>>>> directly. The wrapper macro was brought in with the initial import of
>>>>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator,
>>>>> 2006-03-24).
>>>>> As subsequent discussions on the topic[1] have made clear there's no
>>>>> reason to use this wrapper.
>>>>
>>>> That link is to a message where you assert that there is no reason for
>>>> the wrapper, you seem to be the only person in that thread making that
>>>> argument. The libgit2 maintainer and others are arguing that it is
>>>> worth keeping to make it easy to swap the allocator.
>>> Arguing that it's not needed for a technical reason, but an
>>> "aesthetic
>>> and ergonomic one", per:
>>> https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/;
>>> Perhaps I misread it, but I took this as Junio concurring with what
>>> I
>>> was pointing out there:
>>> https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/
>>>
>>>>> Both git itself as well as any external
>>>>> users such as libgit2 compile the xdiff/* code as part of their own
>>>>> compilation, and can thus find the right malloc() and free() at
>>>>> link-time.
>>>>
>>>> I'm struggling to see how that is simpler than the current situation
>>>> with xdl_malloc().
>>> It's simpler because when maintaining that code in git.git it's less
>>> of
>>> a special case, e.g. we have coccinelle rules that match free(), now
>>> every such rule needs to account for the xdiff special-case.
>>> And it's less buggy because if you're relying on us always using a
>>> wrapper bugs will creep in, current master has this:
>>>
>>> $ git -P grep '\bfree\(' -- xdiff
>>> xdiff/xdiff.h:#define xdl_free(ptr) free(ptr)
>>> xdiff/xmerge.c: free(c);
>>> xdiff/xmerge.c: free(next_m);
>>>
>>>> Perhaps you could show how you think libgit2 could
>>>> easily make xdiff use git__malloc() instead of malloc() without
>>>> changing the malloc() calls (the message you linked to essentially
>>>> suggests they do a search and replace). If people cannot swap in
>>>> another allocator without changing the code then you are imposing a
>>>> barrier to them contributing patches back to git's xdiff.
>>> I was suggesting that if libgit2 wanted to maintain a copy of xdiff
>>> that
>>> catered to the asthetic desires of the maintainer adjusting whatever
>>> import script you use to apply a trivial coccinelle transformation would
>>> give you what you wanted. Except without a maintenance burden on
>>> git.git, or the bugs you'd get now since you're not catching those two
>>> free() cases (or any future such cases).
>>> But just having the code use malloc() and free() directly and
>>> getting
>>> you what you get now is easy, and doesn't require any such
>>> search-replacement.
>>> Here's how:
>>> I imported the xdiff/*.[ch] files at the tip of my branch into
>>> current
>>> libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is
>>> partially re-applying libgit2's own monkeypatches, and partially
>>> adjusting for upstream changes (including this one):
>>>
>>> diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h
>>> index b75dba819..2e00764d4 100644
>>> --- a/src/libgit2/xdiff/git-xdiff.h
>>> +++ b/src/libgit2/xdiff/git-xdiff.h
>>> @@ -14,6 +14,7 @@
>>> #ifndef INCLUDE_git_xdiff_h__
>>> #define INCLUDE_git_xdiff_h__
>>>
>>> +#include <regex.h>
>>> #include "regexp.h"
>>>
>>> /* Work around C90-conformance issues */
>>> @@ -27,11 +28,10 @@
>>> # endif
>>> #endif
>>>
>>> -#define xdl_malloc(x) git__malloc(x)
>>> -#define xdl_free(ptr) git__free(ptr)
>>> -#define xdl_realloc(ptr, x) git__realloc(ptr, x)
>>> +#define malloc(x) git__malloc(x)
>>> +#define free(ptr) git__free(ptr)
>>>
>>> -#define XDL_BUG(msg) GIT_ASSERT(msg)
>>> +#define BUG(msg) GIT_ASSERT(msg)
>>>
>>> #define xdl_regex_t git_regexp
>>> #define xdl_regmatch_t git_regmatch
>>> @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf(
>>> return -1;
>>> }
>>>
>>> +static inline size_t st_mult(size_t a, size_t b)
>>> +{
>>> + return a * b;
>>> +}
>>> +
>>> +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
>>> + size_t nmatch, regmatch_t pmatch[], int eflags)
>>> +{
>>> + assert(nmatch > 0 && pmatch);
>>> + pmatch[0].rm_so = 0;
>>> + pmatch[0].rm_eo = size;
>>> + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
>>> +}
>>> #endif
>>> diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h
>>> index a37d89fcd..5e5b525c2 100644
>>> --- a/src/libgit2/xdiff/xdiff.h
>>> +++ b/src/libgit2/xdiff/xdiff.h
>>> @@ -23,6 +23,8 @@
>>> #if !defined(XDIFF_H)
>>> #define XDIFF_H
>>>
>>> +#include "git-xdiff.h"
>>> +
>>> #ifdef __cplusplus
>>> extern "C" {
>>> #endif /* #ifdef __cplusplus */
>>> diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h
>>> index a4285ac0e..79812ad8a 100644
>>> --- a/src/libgit2/xdiff/xinclude.h
>>> +++ b/src/libgit2/xdiff/xinclude.h
>>> @@ -23,7 +23,8 @@
>>> #if !defined(XINCLUDE_H)
>>> #define XINCLUDE_H
>>>
>>> -#include "git-compat-util.h"
>>> +#include "git-xdiff.h"
>>> +#include "git-shared-util.h"
>>> #include "xmacros.h"
>>> #include "xdiff.h"
>>> #include "xtypes.h"
>>> If you then build it and run e.g.:
>>> gdb -args ./libgit2_tests -smerge::files
>>> You'll get stack traces like this if you break on stdalloc__malloc
>>> (which it uses for me in its default configuration):
>>>
>>> (gdb) bt
>>> #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14
>>> #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101
>>> #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8)
>>> at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194
>>> IOW this was all seamlessly routed to your git__malloc() without us
>>> having to maintain an xdl_{malloc,free}().
>>
>> Thanks for showing this, it is really helpful to see a concrete
>> example. I was especially interested to see how you went about the
>> conversion without redefining standard library functions or
>> introducing non-namespaced identifiers in files that included
>> xdiff.h. Unfortunately you have not done that and so I don't think the
>> approach above is viable for a well behaved library.
>
> Why? Because there's some header where doing e.g.:
>
> #include "git2/something.h"
>
> Will directly include git-xdiff.h by proxy into the user's program?
No because you're redefining malloc() and introducing ALLOC_GROW() etc
in
src/libgit2/{Blame_git.c,Diff_xdiff.c,Merge_file.c,Patch_generate.c,Checkout.c}
and
Test/libgit2/merge/files.c. It is not expected that including xdiff.h
will change a bunch of symbols in the file it is included in.
Best Wishes
Phillip
>
> I admit I didn't check that, and assumed that these were only included
> by other *.c files in libgit2 itself. I.e. it would compile xdiff for
> its own use, but then expose its own API.
>
> Anyway, if it is directly included in some user-exposed *.h file then
> yes, you don't want to overwrite their "malloc", but that's a small
> matter of doing an "#undef malloc" at the end (which as the below
> linked-to patch shows we'd support by having macros like
> SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C).
>
> Aside from what I'm proposing here doing such "undef at the end" seems
> like a good idea in any case, because if there is any macro leakage here
> you're e.g. re-defining "XDL_BUG" and other things that aren't clearly
> in the libgit2 namespace now, no?
>
>>> Now, I think that's obviously worth adjusting, e.g. the series I've got
>>> here could make this easier for libgit2 by including st_mult() at least,
>>> I'm not sure what you want to do about regexec_buf().
>>> For the monkeypatching you do now of creating a "git-xdiff.h" I
>>> think it
>>> would be very good to get a patch like what I managed to get
>>> sha1collisiondetection.git to accept for our own use-case, which allows
>>> us to use their upstream code as-is from a submodule:
>>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
>>
>> Thanks for the link, That's a really good example where all the
>> identifiers are namespaced and the public include file does not
>> pollute the code that is including it. If you come up with something
>> like that where the client code can set up same #defines for malloc,
>> calloc, realloc and free that would be a good way forward.
>
> I don't need to come up with that, you've got the linker to do that.
>
> I.e. for almost any "normal" use of xdiff you'd simply compile it with
> its reference to malloc(), and then you either link that library that
> already links to libc into your program.
>
> So if you use a custom malloc the xdiff code might still use libc
> malloc, or you'd link the two together and link the resulting program
> with your own custom malloc.
>
> As explained in the previous & linked-to threads this is how almost
> everyone would include & use such a library, and indeed that's how git
> itself can use malloc() and free() in its sources, but have options to
> link to libc malloc, nedmalloc etc.
>
> But instead of using the linker to resolve "malloc" to git__malloc you'd
> like to effectively include the upstream *.[ch] files directly, and
> pretend as though the upstream source used git__malloc() or whatever to
> begin with.
>
> I don't really understand why you can't just do that at the linker
> level, especially as doing so guards you better against namespace
> pollution. But whatever, the suggestion(s) above assume you've got a
> good reason, but show that we don't need to prefix every standard
> function just to accommodate that.
>
> Instead we can just provide a knob to include a header of yours after
> all others have been included (which the libgit2 monkeypatches to xdiff
> already include), and have that header define "malloc" as "git__malloc",
> "BUG" as "GIT_ASSERT" etc.
>
> And yes, if you're in turn providing an interface where others will then
> include your header that's doing that you'll need to apply some
> namespacing paranoia, namely to "#undef malloc" etc.
>
> But none of that requires git to carry prefixed versions of standard
> functions you'd wish to replace, which the patches here show.
>
>> I do not think we should require projects to be defining there own
>> versions of [C]ALLOC_*() and BUG() just to use xdiff.
>
> No, I don't think so either. I.e. the idea with these patches is that we
> could bundle up xdiff/* and git-shared-util.h into one distributed
> libgit, which down the road we could even roll independent releases for
> (as upstream seems to have forever gone away).
>
> Whereas the proposals coming out of libgit2[1] for generalizing xdiff/
> for general use seem to stop just short of the specific hacks we need to
> get it running for libgit2, but e.g. leave "xdl_malloc" defined as
> "xmalloc".
>
> That isn't a standard library function, and therefore the first thing
> you'd need to do when using it as a library is to find out how git.git
> uses that, and copy/paste it or define your own replacement.
>
> Whereas I think it should be the other way around, we should e.g. ship a
> shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for
> advanced users such as libgit2 guard those with "ifndef" or whatever, so
> you can have it call GIT_ASSERT() and the like instead.
>
> 1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH 7/7] xdiff: remove xdl_free(), use free() instead
2022-07-13 13:00 ` Phillip Wood
@ 2022-07-13 13:18 ` Ævar Arnfjörð Bjarmason
0 siblings, 0 replies; 52+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-07-13 13:18 UTC (permalink / raw)
To: Phillip Wood; +Cc: git, Junio C Hamano, Jeff King
On Wed, Jul 13 2022, Phillip Wood wrote:
> On 11/07/2022 11:02, Ævar Arnfjörð Bjarmason wrote:
>> On Mon, Jul 11 2022, Phillip Wood wrote:
> [...]
>>> Thanks for showing this, it is really helpful to see a concrete
>>> example. I was especially interested to see how you went about the
>>> conversion without redefining standard library functions or
>>> introducing non-namespaced identifiers in files that included
>>> xdiff.h. Unfortunately you have not done that and so I don't think the
>>> approach above is viable for a well behaved library.
>> Why? Because there's some header where doing e.g.:
>> #include "git2/something.h"
>> Will directly include git-xdiff.h by proxy into the user's program?
>
> No because you're redefining malloc() and introducing ALLOC_GROW() etc
> in
> src/libgit2/{Blame_git.c,Diff_xdiff.c,Merge_file.c,Patch_generate.c,Checkout.c}
> and
> Test/libgit2/merge/files.c. It is not expected that including xdiff.h
> will change a bunch of symbols in the file it is included in.
...which is why if you read to the sha1collisiondetection commit below
you'd follow that up with including a header at the end of xdiff.h which
would do:
#undef malloc
etc., so you wouldn't leak that macro beyond the code that needs it, and
you wouldn't leak the xdl_* macros either, which are purely needed
internally in that code. So even aside from my suggestion of doing it
this way it seems to me the structure has macro hygene issues worth
fixing, see e.g. how we have refs-internal.h v.s. refs.h in git.git for
similar reasons.
>> I admit I didn't check that, and assumed that these were only
>> included
>> by other *.c files in libgit2 itself. I.e. it would compile xdiff for
>> its own use, but then expose its own API.
>> Anyway, if it is directly included in some user-exposed *.h file
>> then
>> yes, you don't want to overwrite their "malloc", but that's a small
>> matter of doing an "#undef malloc" at the end (which as the below
>> linked-to patch shows we'd support by having macros like
>> SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C).
>> Aside from what I'm proposing here doing such "undef at the end"
>> seems
>> like a good idea in any case, because if there is any macro leakage here
>> you're e.g. re-defining "XDL_BUG" and other things that aren't clearly
>> in the libgit2 namespace now, no?
>>
>>>> Now, I think that's obviously worth adjusting, e.g. the series I've got
>>>> here could make this easier for libgit2 by including st_mult() at least,
>>>> I'm not sure what you want to do about regexec_buf().
>>>> For the monkeypatching you do now of creating a "git-xdiff.h" I
>>>> think it
>>>> would be very good to get a patch like what I managed to get
>>>> sha1collisiondetection.git to accept for our own use-case, which allows
>>>> us to use their upstream code as-is from a submodule:
>>>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
>>>
>>> Thanks for the link, That's a really good example where all the
>>> identifiers are namespaced and the public include file does not
>>> pollute the code that is including it. If you come up with something
>>> like that where the client code can set up same #defines for malloc,
>>> calloc, realloc and free that would be a good way forward.
>> I don't need to come up with that, you've got the linker to do that.
>> I.e. for almost any "normal" use of xdiff you'd simply compile it
>> with
>> its reference to malloc(), and then you either link that library that
>> already links to libc into your program.
>> So if you use a custom malloc the xdiff code might still use libc
>> malloc, or you'd link the two together and link the resulting program
>> with your own custom malloc.
>> As explained in the previous & linked-to threads this is how almost
>> everyone would include & use such a library, and indeed that's how git
>> itself can use malloc() and free() in its sources, but have options to
>> link to libc malloc, nedmalloc etc.
>> But instead of using the linker to resolve "malloc" to git__malloc
>> you'd
>> like to effectively include the upstream *.[ch] files directly, and
>> pretend as though the upstream source used git__malloc() or whatever to
>> begin with.
>> I don't really understand why you can't just do that at the linker
>> level, especially as doing so guards you better against namespace
>> pollution. But whatever, the suggestion(s) above assume you've got a
>> good reason, but show that we don't need to prefix every standard
>> function just to accommodate that.
>> Instead we can just provide a knob to include a header of yours
>> after
>> all others have been included (which the libgit2 monkeypatches to xdiff
>> already include), and have that header define "malloc" as "git__malloc",
>> "BUG" as "GIT_ASSERT" etc.
>> And yes, if you're in turn providing an interface where others will
>> then
>> include your header that's doing that you'll need to apply some
>> namespacing paranoia, namely to "#undef malloc" etc.
>> But none of that requires git to carry prefixed versions of standard
>> functions you'd wish to replace, which the patches here show.
>>
>>> I do not think we should require projects to be defining there own
>>> versions of [C]ALLOC_*() and BUG() just to use xdiff.
>> No, I don't think so either. I.e. the idea with these patches is
>> that we
>> could bundle up xdiff/* and git-shared-util.h into one distributed
>> libgit, which down the road we could even roll independent releases for
>> (as upstream seems to have forever gone away).
>> Whereas the proposals coming out of libgit2[1] for generalizing
>> xdiff/
>> for general use seem to stop just short of the specific hacks we need to
>> get it running for libgit2, but e.g. leave "xdl_malloc" defined as
>> "xmalloc".
>> That isn't a standard library function, and therefore the first
>> thing
>> you'd need to do when using it as a library is to find out how git.git
>> uses that, and copy/paste it or define your own replacement.
>> Whereas I think it should be the other way around, we should
>> e.g. ship a
>> shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for
>> advanced users such as libgit2 guard those with "ifndef" or whatever, so
>> you can have it call GIT_ASSERT() and the like instead.
>> 1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/
^ I.e. this.
^ permalink raw reply [flat|nested] 52+ messages in thread