All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 0/3] strbuf: improve API
@ 2016-06-06 15:13 William Duclot
  2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
                   ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: William Duclot @ 2016-06-06 15:13 UTC (permalink / raw)
  To: git
  Cc: antoine.queru, francois.beutin, mhagger, Johannes.Schindelin,
	peff, mh, gitster

This patch series implements an improvment of the strbuf API, allowing
strbuf to use preallocated memory. This makes strbuf fit to be used
in performance-critical operations.

* The first patch is simply a preparatory work, adding tests for
existing strbuf implementation.
* The second patch is also preparatory: rename a function for the third
patch.
* Most of the work is made in the third patch: handle pre-allocated
memory, extend the API, document it and test it.

As said in the third commit message, the idea comes from Michael Haggerty.

William Duclot (3):
      strbuf: add tests
      pretty.c: rename strbuf_wrap() function
      strbuf: allow to use preallocated memory

Makefile               |   1 +
pretty.c               |   7 ++++---
strbuf.c               |  82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
strbuf.h               |  44 ++++++++++++++++++++++++++++++++++++++--
t/helper/test-strbuf.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
t/t0082-strbuf.sh      |  48 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 394 insertions(+), 17 deletions(-)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH V2 1/3] strbuf: add tests
  2016-06-06 15:13 [PATCH V2 0/3] strbuf: improve API William Duclot
@ 2016-06-06 15:13 ` William Duclot
  2016-06-06 16:11   ` Matthieu Moy
  2016-06-07  8:44   ` Johannes Schindelin
  2016-06-06 15:13 ` [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function William Duclot
  2016-06-06 15:13 ` [PATCH V2 3/3] strbuf: allow to use preallocated memory William Duclot
  2 siblings, 2 replies; 32+ messages in thread
From: William Duclot @ 2016-06-06 15:13 UTC (permalink / raw)
  To: git
  Cc: antoine.queru, francois.beutin, mhagger, Johannes.Schindelin,
	peff, mh, gitster, Simon Rabourg, Matthieu Moy

Test the strbuf API. Being used throughout all Git the API could be
considered tested, but adding specific tests makes it easier to improve
and extend the API.

Signed-off-by: William Duclot <william.duclot@ensimag.grenoble-inp.fr>
Signed-off-by: Simon Rabourg <simon.rabourg@ensimag.grenoble-inp.fr>
Signed-off-by: Matthieu Moy <Matthieu.Moy@grenoble-inp.fr>
---
Changes since V1:
    * Use the parser API
    * Coding style fix
    * New test for string size

 Makefile               |   1 +
 t/helper/test-strbuf.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++
 t/t0082-strbuf.sh      |  19 ++++++++++
 3 files changed, 121 insertions(+)
 create mode 100644 t/helper/test-strbuf.c
 create mode 100755 t/t0082-strbuf.sh

diff --git a/Makefile b/Makefile
index 3f03366..dc84f43 100644
--- a/Makefile
+++ b/Makefile
@@ -613,6 +613,7 @@ TEST_PROGRAMS_NEED_X += test-scrap-cache-tree
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sha1-array
 TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-strbuf
 TEST_PROGRAMS_NEED_X += test-string-list
 TEST_PROGRAMS_NEED_X += test-submodule-config
 TEST_PROGRAMS_NEED_X += test-subprocess
diff --git a/t/helper/test-strbuf.c b/t/helper/test-strbuf.c
new file mode 100644
index 0000000..271592e
--- /dev/null
+++ b/t/helper/test-strbuf.c
@@ -0,0 +1,101 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "parse-options.h"
+#include "builtin.h"
+
+/*
+ * Check behavior on usual use cases
+ */
+static int strbuf_check_behavior(struct strbuf *sb)
+{
+	char *str_test = xstrdup("test"), *res, *old_buf;
+	size_t size, old_alloc;
+
+	strbuf_grow(sb, 1);
+	old_alloc = sb->alloc;
+	strbuf_grow(sb, sb->alloc - sb->len + 1000);
+	if (old_alloc == sb->alloc)
+		die("strbuf_grow does not realloc the buffer as expected");
+	old_buf = sb->buf;
+	res = strbuf_detach(sb, &size);
+	if (res != old_buf)
+		die("strbuf_detach does not return the expected buffer");
+	free(res);
+
+	str_test = xstrdup("test");
+	strbuf_attach(sb, str_test, strlen(str_test), strlen(str_test) + 1);
+	res = strbuf_detach(sb, &size);
+	if (size != strlen(str_test)) 
+		die ("size is not as expected");
+
+	if (res != str_test)
+		die("strbuf_detach does not return the expected buffer");
+	free(res);
+	strbuf_release(sb);
+
+	return 0;
+}
+
+static int basic_grow(struct strbuf *sb) 
+{
+	strbuf_init(sb, 0);
+	strbuf_grow(sb, 0);
+	if (sb->buf == strbuf_slopbuf)
+		die("strbuf_grow failed to alloc memory");
+	strbuf_release(sb);
+	if (sb->buf != strbuf_slopbuf)
+		die("strbuf_release does not reinitialize the strbuf");
+
+	return 0;
+}
+
+static void grow_overflow(struct strbuf *sb)
+{
+	strbuf_init(sb, 1000);
+	strbuf_grow(sb, maximum_unsigned_value_of_type((size_t)1));
+}
+
+int main(int argc, const char *argv[])
+{
+	struct strbuf sb;
+	enum {
+		MODE_UNSPECIFIED = 0,
+		MODE_BASIC_GROW ,
+		MODE_STRBUF_CHECK,
+		MODE_GROW_OVERFLOW
+	} cmdmode = MODE_UNSPECIFIED;
+	struct option options[] = {
+		OPT_CMDMODE(0, "basic_grow", &cmdmode,
+			    N_(" basic grow test"),
+			    MODE_BASIC_GROW),
+		OPT_CMDMODE(0, "strbuf_check_behavior", &cmdmode,
+			    N_("check the strbuf's behavior"),
+			    MODE_STRBUF_CHECK),
+		OPT_CMDMODE(0, "grow_overflow", &cmdmode,
+			    N_("test grow expecting overflow"),
+			    MODE_GROW_OVERFLOW),
+		OPT_END()
+	};
+
+	argc = parse_options(argc, argv, NULL, options, NULL, 0);
+
+	if (cmdmode == MODE_BASIC_GROW) {
+		/*
+		 * Check if strbuf_grow(0) allocate a new NUL-terminated buffer
+		 */
+		return basic_grow(&sb);
+	} else if (cmdmode == MODE_STRBUF_CHECK) {
+		strbuf_init(&sb, 0);
+		return strbuf_check_behavior(&sb);
+	} else if (cmdmode == MODE_GROW_OVERFLOW) {
+		/*
+		 * size_t overflow: should die().
+		 * If this does not die(), fall through to returning success.
+		 */
+		grow_overflow(&sb);
+	} else {
+		usage("test-strbuf mode");
+	}
+
+	return 0;
+}
diff --git a/t/t0082-strbuf.sh b/t/t0082-strbuf.sh
new file mode 100755
index 0000000..6a579a3
--- /dev/null
+++ b/t/t0082-strbuf.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+test_description='Test the strbuf API.'
+
+. ./test-lib.sh
+
+test_expect_success 'basic test on strbuf_grow()' '
+	test-strbuf --basic_grow
+'
+
+test_expect_success 'check strbuf behavior in usual use cases ' '
+	test-strbuf --strbuf_check_behavior
+'
+
+test_expect_success 'overflow while calling strbuf_grow' '
+	test_must_fail test-strbuf --grow_overflow
+'
+
+test_done
-- 
2.9.0.rc1.1.geac644e

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function
  2016-06-06 15:13 [PATCH V2 0/3] strbuf: improve API William Duclot
  2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
@ 2016-06-06 15:13 ` William Duclot
  2016-06-06 16:12   ` Matthieu Moy
  2016-06-07  9:04   ` Johannes Schindelin
  2016-06-06 15:13 ` [PATCH V2 3/3] strbuf: allow to use preallocated memory William Duclot
  2 siblings, 2 replies; 32+ messages in thread
From: William Duclot @ 2016-06-06 15:13 UTC (permalink / raw)
  To: git
  Cc: antoine.queru, francois.beutin, mhagger, Johannes.Schindelin,
	peff, mh, gitster, Simon Rabourg, Matthieu Moy

The function strbuf_wrap() is not part of the strbuf API, yet prevent to
extend the API to include wrapping functions. Renaming it to something
more specific allow to use "strbuf_wrap" for the strbut API.

Signed-off-by: William Duclot <william.duclot@ensimag.grenoble-inp.fr>
Signed-off-by: Simon Rabourg <simon.rabourg@ensimag.grenoble-inp.fr>
Signed-off-by: Matthieu Moy <Matthieu.Moy@grenoble-inp.fr>
---
 pretty.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pretty.c b/pretty.c
index 87c4497..2b9e89a 100644
--- a/pretty.c
+++ b/pretty.c
@@ -904,8 +904,8 @@ static void parse_commit_message(struct format_commit_context *c)
 	c->commit_message_parsed = 1;
 }
 
-static void strbuf_wrap(struct strbuf *sb, size_t pos,
-			size_t width, size_t indent1, size_t indent2)
+static void strbuf_wrap_message(struct strbuf *sb, size_t pos,
+				size_t width, size_t indent1, size_t indent2)
 {
 	struct strbuf tmp = STRBUF_INIT;
 
@@ -926,7 +926,8 @@ static void rewrap_message_tail(struct strbuf *sb,
 	    c->indent2 == new_indent2)
 		return;
 	if (c->wrap_start < sb->len)
-		strbuf_wrap(sb, c->wrap_start, c->width, c->indent1, c->indent2);
+		strbuf_wrap_message(sb, c->wrap_start, c->width, c->indent1,
+				    c->indent2);
 	c->wrap_start = sb->len;
 	c->width = new_width;
 	c->indent1 = new_indent1;
-- 
2.9.0.rc1.1.geac644e

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 15:13 [PATCH V2 0/3] strbuf: improve API William Duclot
  2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
  2016-06-06 15:13 ` [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function William Duclot
@ 2016-06-06 15:13 ` William Duclot
  2016-06-06 16:17   ` Matthieu Moy
  2016-06-06 17:19   ` Junio C Hamano
  2 siblings, 2 replies; 32+ messages in thread
From: William Duclot @ 2016-06-06 15:13 UTC (permalink / raw)
  To: git
  Cc: antoine.queru, francois.beutin, mhagger, Johannes.Schindelin,
	peff, mh, gitster

When working with strbufs (usually for dates or paths), the
malloc()/free() overhead could be easily avoided: as a sensible initial
buffer size is already known, it could be allocated on the stack. This
could avoid workarounds such as

    void f()
    {
        static struct strbuf path;
        strbuf_add(&path, ...);
        ...
        strbuf_setlen(&path, 0);
    }

which, by using a static variable, avoid most of the malloc()/free()
overhead, but makes the function unsafe to use recursively or from
multiple threads.

THE IDEA
--------

The idea here is to enhance strbuf to allow it to use memory that it
doesn't own (for example, stack-allocated memory), while (optionally)
allowing it to switch over to using allocated memory if the string grows
past the size of the pre-allocated buffer.

API ENHANCEMENT
---------------

All functions of the API can still be reliably called without
knowledge of the initialization (normal/preallocated/fixed) with the
exception that strbuf_grow() may die() if the string tries to overflow a
fixed buffer.

The API contract is still respected:

- The API users may peek strbuf.buf in-place until they perform an
  operation that makes its size change (at which point the .buf pointer
  may point at a new piece of memory).

- The API users may strbuf_detach() to obtain a piece of memory that
  belongs to them (at which point the strbuf becomes empty), hence
  needs to be freed by the callers.

- The API users letting a strbuf go out of scope without taking
  ownership of the string with strbuf_detach() _must_ release the
  resource by calling strbuf_release().

Full credit to Michael Haggerty for the idea and some of the wording of
this commit (http://mid.gmane.org/53512DB6.1070600@alum.mit.edu). The
implementation and bugs are all mine.

Signed-off by: William Duclot <william.duclot@ensimag.grenoble-inp.fr>
Signed-off by: Simon Rabourg <simon.rabourg@ensimag.grenoble-inp.fr>
Signed-off by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
---
Changes since V1:
    * Refactoring: use a `strbuf_wrap_internal()` static function
    * Use bit fields for flags
    * Completing documentation & commit message
    * Rename `strbuf_wrap_preallocated()` to `strbuf_wrap()`
    * Introduce two initialization macros for wrapping

 strbuf.c               |  82 ++++++++++++++++++++++++++-----
 strbuf.h               |  44 ++++++++++++++++-
 t/helper/test-strbuf.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++-
 t/t0082-strbuf.sh      |  29 +++++++++++
 4 files changed, 270 insertions(+), 15 deletions(-)

diff --git a/strbuf.c b/strbuf.c
index 1ba600b..689469e 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -1,6 +1,9 @@
 #include "cache.h"
 #include "refs.h"
 #include "utf8.h"

+#define MAX_ALLOC(a, b) (((a)>(b))?(a):(b))
 
 int starts_with(const char *str, const char *prefix)
 {
@@ -20,16 +23,47 @@ char strbuf_slopbuf[1];
 
 void strbuf_init(struct strbuf *sb, size_t hint)
 {
+	sb->owns_memory = 0;
+	sb->fixed_memory = 0;
 	sb->alloc = sb->len = 0;
 	sb->buf = strbuf_slopbuf;
 	if (hint)
 		strbuf_grow(sb, hint);
 }
 
+static void strbuf_wrap_internal(struct strbuf *sb, char *buf,
+				 size_t buf_len, size_t alloc_len)
+{
+	if (!buf)
+		die("the buffer of a strbuf cannot be NULL");
+
+	strbuf_release(sb);
+	sb->len = buf_len;
+	sb->alloc = alloc_len;
+	sb->buf = buf;
+}
+
+void strbuf_wrap(struct strbuf *sb, char *buf,
+		 size_t buf_len, size_t alloc_len)
+{
+	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
+	sb->owns_memory = 0;
+	sb->fixed_memory = 0;
+}
+
+void strbuf_wrap_fixed(struct strbuf *sb, char *buf,
+		       size_t buf_len, size_t alloc_len)
+{
+	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
+	sb->owns_memory = 0;
+	sb->fixed_memory = 1;
+}
+
 void strbuf_release(struct strbuf *sb)
 {
 	if (sb->alloc) {
-		free(sb->buf);
+		if (sb->owns_memory)
+			free(sb->buf);
 		strbuf_init(sb, 0);
 	}
 }
@@ -37,8 +71,13 @@ void strbuf_release(struct strbuf *sb)
 char *strbuf_detach(struct strbuf *sb, size_t *sz)
 {
 	char *res;
+
 	strbuf_grow(sb, 0);
-	res = sb->buf;
+	if (sb->owns_memory)
+		res = sb->buf;
+	else
+		res = xmemdupz(sb->buf, sb->len);
+
 	if (sz)
 		*sz = sb->len;
 	strbuf_init(sb, 0);
@@ -47,25 +86,44 @@ char *strbuf_detach(struct strbuf *sb, size_t *sz)
 
 void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
 {
-	strbuf_release(sb);
-	sb->buf   = buf;
-	sb->len   = len;
-	sb->alloc = alloc;
+	strbuf_wrap_internal(sb, buf, len, alloc);
+	sb->owns_memory = 1;
+	sb->fixed_memory = 0;
 	strbuf_grow(sb, 0);
 	sb->buf[sb->len] = '\0';
 }
 
 void strbuf_grow(struct strbuf *sb, size_t extra)
 {
-	int new_buf = !sb->alloc;
 	if (unsigned_add_overflows(extra, 1) ||
 	    unsigned_add_overflows(sb->len, extra + 1))
 		die("you want to use way too much memory");
-	if (new_buf)
-		sb->buf = NULL;
-	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
-	if (new_buf)
-		sb->buf[0] = '\0';
+	if ((sb->fixed_memory) &&
+	    sb->len + extra + 1 > sb->alloc)
+		die("you try to overflow the buffer of a fixed strbuf");
+
+	/*
+	 * ALLOC_GROW may do a realloc() if needed, so we must not use it on
+	 * a buffer the strbuf doesn't own
+	 */
+	if (sb->owns_memory) {
+		ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+	} else {
+		/*
+		 * The strbuf doesn't own the buffer: to avoid to realloc it,
+		 * the strbuf needs to use a new buffer without freeing the old
+		 */
+		if (sb->len + extra + 1 > sb->alloc) {
+			size_t new_alloc =
+				MAX_ALLOC(sb->len + extra + 1,
+					  alloc_nr(sb->alloc));
+			char *new_buf = xmalloc(new_alloc);
+			memcpy(new_buf, sb->buf, sb->len + 1);
+			sb->buf = new_buf;
+			sb->alloc = new_alloc;
+			sb->owns_memory = 1;
+		}
+	}
 }
 
 void strbuf_trim(struct strbuf *sb)
diff --git a/strbuf.h b/strbuf.h
index 7987405..57b5cd1 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -11,11 +11,16 @@
  * A strbuf is NUL terminated for convenience, but no function in the
  * strbuf API actually relies on the string being free of NULs.
  *
+ * You can avoid the malloc/free overhead of `strbuf_init()`, `strbuf_add()` and
+ * `strbuf_release()` by wrapping pre-allocated memory (stack-allocated for
+ * example) using `strbuf_wrap()` or `strbuf_wrap_fixed()`.
+ *
  * strbufs have some invariants that are very important to keep in mind:
  *
  *  - The `buf` member is never NULL, so it can be used in any usual C
  *    string operations safely. strbuf's _have_ to be initialized either by
- *    `strbuf_init()` or by `= STRBUF_INIT` before the invariants, though.
+ *    `strbuf_init()`, `= STRBUF_INIT`, `strbuf_wrap()` or `strbuf_wrap_fixed()`
+ *    before the invariants, though.
  *
  *    Do *not* assume anything on what `buf` really is (e.g. if it is
  *    allocated memory or not), use `strbuf_detach()` to unwrap a memory
@@ -45,12 +50,16 @@
  *    Doing so is safe, though if it has to be done in many places, adding the
  *    missing API to the strbuf module is the way to go.
  *
+ *    NOTE: strbuf_release can always be safely called, whether the strbuf is
+ *    wrapping a preallocated buffer or not.
+ *
  *    WARNING: Do _not_ assume that the area that is yours is of size `alloc
  *    - 1` even if it's true in the current implementation. Alloc is somehow a
  *    "private" member that should not be messed with. Use `strbuf_avail()`
  *    instead.
 */
 
+
 /**
  * Data Structures
  * ---------------
@@ -62,13 +71,23 @@
  * access to the string itself.
  */
 struct strbuf {
+	/*
+	 * The `owns_memory` flag is unset by default as strbuf_slopbuf is
+	 * shared by all buffers, thus owned by none.
+	 * The `fixed_memory` guarantees that the buffer will not be moved by
+	 * realloc().
+	 */
+	unsigned owns_memory:1, fixed_memory:1;
 	size_t alloc;
 	size_t len;
 	char *buf;
 };
 
 extern char strbuf_slopbuf[];
-#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+#define STRBUF_INIT  { 0, 0, 0, 0, strbuf_slopbuf }
+#define STRBUF_WRAP(buf, buf_len, alloc_len) { 0, 0, alloc_len, buf_len, buf }
+#define STRBUF_WRAP_FIXED(buf, buf_len, alloc_len) \
+	{ 0, 1, alloc_len, buf_len, buf }
 
 /**
  * Life Cycle Functions
@@ -81,6 +100,25 @@ extern char strbuf_slopbuf[];
  */
 extern void strbuf_init(struct strbuf *, size_t);
 
+/**
+ * Allow the caller to give a pre-allocated piece of memory for the strbuf
+ * to use. It is possible then to strbuf_grow() the string past the size of the
+ * pre-allocated buffer: a new buffer will then be allocated. The pre-allocated
+ * buffer will never be freed.
+ */
+void strbuf_wrap(struct strbuf *sb, char *buf,
+		 size_t buf_len, size_t alloc_len);
+
+/**
+ * Allow the caller to give a pre-allocated piece of memory for the strbuf
+ * to use and indicate that the strbuf must use exclusively this buffer,
+ * never realloc() it or allocate a new one. It means that the string can
+ * grow or shrink but cannot overflow the pre-allocated buffer. The
+ * pre-allocated buffer will never be freed.
+ */
+void strbuf_wrap_fixed(struct strbuf *sb, char *buf,
+		       size_t buf_len, size_t alloc_len);
+
 /**
  * Release a string buffer and the memory it used. You should not use the
  * string buffer after using this function, unless you initialize it again.
@@ -91,6 +129,8 @@ extern void strbuf_release(struct strbuf *);
  * Detach the string from the strbuf and returns it; you now own the
  * storage the string occupies and it is your responsibility from then on
  * to release it with `free(3)` when you are done with it.
+ * Must allocate a copy of the buffer in case of a wrapped buffer.
+ * Performance-critical operations have to be aware of this.
  */
 extern char *strbuf_detach(struct strbuf *, size_t *);
 
diff --git a/t/helper/test-strbuf.c b/t/helper/test-strbuf.c
index 271592e..87628d9 100644
--- a/t/helper/test-strbuf.c
+++ b/t/helper/test-strbuf.c
@@ -55,6 +55,75 @@ static void grow_overflow(struct strbuf *sb)
 	strbuf_grow(sb, maximum_unsigned_value_of_type((size_t)1));
 }
 
+static void preallocated_NULL(struct strbuf *sb)
+{
+	char str_test[5] = "test";
+
+	strbuf_wrap(sb, NULL, 0, sizeof(str_test));
+}
+
+static void grow_fixed_overflow(struct strbuf *sb)
+{
+	char str_foo[7] = "foo";
+
+	strbuf_wrap_fixed(sb, (void *)str_foo,
+			  strlen(str_foo), sizeof(str_foo));
+	strbuf_grow(sb, 3);
+	strbuf_grow(sb, 1000);
+}
+
+static void grow_fixed_overflow_min(struct strbuf *sb)
+{
+	char str_foo[7] = "foo";
+
+	strbuf_wrap_fixed(sb, (void *)str_foo,
+			  strlen(str_foo), sizeof(str_foo));
+	strbuf_grow(sb, 4);
+}
+
+static int grow_fixed_success(struct strbuf *sb)
+{
+	char str_foo[7] = "foo";
+
+	strbuf_wrap_fixed(sb, (void *)str_foo,
+			  strlen(str_foo), sizeof(str_foo));
+	strbuf_grow(sb, 3);
+
+	return 0;
+}
+
+static int detach_fixed(struct strbuf *sb)
+{
+	size_t size = 1;
+	char str_test[5] = "test";
+	char *buf;
+
+	strbuf_wrap_fixed(sb, (void *)str_test,
+			  strlen(str_test), sizeof(str_test));
+	buf = strbuf_detach(sb, &size);
+	if (size != strlen(str_test))
+		die ("size is not as expected");
+
+	if (str_test == buf)
+		die("strbuf_detach does not copy the buffer");
+	free(buf);
+
+	return 0;
+}
+
+static int release_fixed(struct strbuf *sb)
+{
+	char str_test[5] = "test";
+
+	strbuf_wrap_fixed(sb, (void *)str_test, strlen(str_test),
+			  sizeof(sb) + 1);
+	strbuf_release(sb);
+	if (sb->buf != strbuf_slopbuf)
+		die("strbuf_release does not reinitialize the strbuf");
+
+	return 0;
+}
+
 int main(int argc, const char *argv[])
 {
 	struct strbuf sb;
@@ -62,7 +131,14 @@ int main(int argc, const char *argv[])
 		MODE_UNSPECIFIED = 0,
 		MODE_BASIC_GROW ,
 		MODE_STRBUF_CHECK,
-		MODE_GROW_OVERFLOW
+		MODE_GROW_OVERFLOW,
+		MODE_PREALLOC_CHECK,
+		MODE_PREALLOC_NULL,
+		MODE_GROW_FIXED_OVERFLOW,
+		MODE_GROW_FIXED_OVERFLOW_MIN,
+		MODE_GROW_FIXED_SUCCESS,
+		MODE_DETACH_FIXED,
+		MODE_RELEASE_FIXED
 	} cmdmode = MODE_UNSPECIFIED;
 	struct option options[] = {
 		OPT_CMDMODE(0, "basic_grow", &cmdmode,
@@ -74,6 +150,27 @@ int main(int argc, const char *argv[])
 		OPT_CMDMODE(0, "grow_overflow", &cmdmode,
 			    N_("test grow expecting overflow"),
 			    MODE_GROW_OVERFLOW),
+		OPT_CMDMODE(0, "preallocated_check_behavior", &cmdmode,
+			    N_("check the wrap's behavior"),
+			    MODE_PREALLOC_CHECK),
+		OPT_CMDMODE(0, "preallocated_NULL", &cmdmode,
+			    N_("initializing wrap with NULL"),
+			    MODE_PREALLOC_NULL),
+		OPT_CMDMODE(0, "grow_fixed_overflow", &cmdmode,
+			    N_("check grow, fixed memory expecting overflow"),
+			    MODE_GROW_FIXED_OVERFLOW),
+		OPT_CMDMODE(0, "grow_fixed_overflow_min", &cmdmode,
+			    N_("check grow, fixed memory and lower_bound for an overflow"),
+			    MODE_GROW_FIXED_OVERFLOW_MIN),
+		OPT_CMDMODE(0, "grow_fixed_success", &cmdmode,
+			    N_("check grow, fixed memory"),
+			    MODE_GROW_FIXED_SUCCESS),
+		OPT_CMDMODE(0, "detach_fixed", &cmdmode,
+			    N_("check detach, fixed memory"),
+			    MODE_DETACH_FIXED),
+		OPT_CMDMODE(0, "release_fixed", &cmdmode,
+			    N_("check release, fixed memory"),
+			    MODE_RELEASE_FIXED),
 		OPT_END()
 	};
 
@@ -93,6 +190,37 @@ int main(int argc, const char *argv[])
 		 * If this does not die(), fall through to returning success.
 		 */
 		grow_overflow(&sb);
+	} else if (cmdmode == MODE_PREALLOC_CHECK) {
+		char str_test[5] = "test";
+
+		strbuf_wrap(&sb, (void *)str_test,
+			    strlen(str_test), sizeof(str_test));
+		return strbuf_check_behavior(&sb);
+	} else if (cmdmode == MODE_PREALLOC_NULL) {
+		 /*
+		 * Violation of invariant "strbuf can't be NULL": should die().
+		 * If this does not die(), fall through to returning success.
+		 */
+		preallocated_NULL(&sb);
+	} else if (cmdmode == MODE_GROW_FIXED_OVERFLOW) {
+		/*
+		 * Overflowing the buffer of a fixed strbuf: should die().
+		 * If this does not die(), fall through to returning success.
+		 */
+		grow_fixed_overflow(&sb);
+	} else if (cmdmode == MODE_GROW_FIXED_OVERFLOW_MIN) {
+		/*
+		 * Minimum strbuf_grow() for overflowing a fixed strbuf: should
+		 * die().
+		 * If this does not die(), fall through to returning success.
+		 */
+		grow_fixed_overflow_min(&sb);
+	} else if (cmdmode == MODE_GROW_FIXED_SUCCESS) {
+		return grow_fixed_success(&sb);
+	} else if (cmdmode == MODE_DETACH_FIXED) {
+		return detach_fixed(&sb);
+	} else if (cmdmode == MODE_RELEASE_FIXED) {
+		return release_fixed(&sb);
 	} else {
 		usage("test-strbuf mode");
 	}
diff --git a/t/t0082-strbuf.sh b/t/t0082-strbuf.sh
index 6a579a3..6bea747 100755
--- a/t/t0082-strbuf.sh
+++ b/t/t0082-strbuf.sh
@@ -16,4 +16,33 @@ test_expect_success 'overflow while calling strbuf_grow' '
 	test_must_fail test-strbuf --grow_overflow
 '
 
+test_expect_success 'check preallocated strbuf behavior in usual use cases' '
+
+	test-strbuf --preallocated_check_behavior
+'
+
+test_expect_success 'strbuf_wrap_preallocated NULL initialization' '
+	test_must_fail test-strbuf --preallocated_NULL
+'
+
+test_expect_success 'strbuf_grow with wrap_fixed overflow' '
+	test_must_fail test-strbuf --grow_fixed_overflow
+'
+
+test_expect_success 'strbuf_grow with wrap_fixed minimum overflow' '
+	test_must_fail test-strbuf --grow_fixed_overflow_min
+'
+
+test_expect_success 'strbuf_grow with wrap_fixed in a successful case' '
+	test-strbuf --grow_fixed_success
+'
+
+test_expect_success 'stbuf_detach with wrap_fixed memory' '
+	test-strbuf --detach_fixed
+'
+
+test_expect_success 'stbuf_release with wrap_fixed memory' '
+	test-strbuf --release_fixed
+'
+
 test_done
-- 
2.9.0.rc1.1.geac644e

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 1/3] strbuf: add tests
  2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
@ 2016-06-06 16:11   ` Matthieu Moy
  2016-06-07  8:44   ` Johannes Schindelin
  1 sibling, 0 replies; 32+ messages in thread
From: Matthieu Moy @ 2016-06-06 16:11 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh, gitster, Simon Rabourg

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

> --- a/Makefile
> +++ b/Makefile
> @@ -613,6 +613,7 @@ TEST_PROGRAMS_NEED_X += test-scrap-cache-tree
>  TEST_PROGRAMS_NEED_X += test-sha1
>  TEST_PROGRAMS_NEED_X += test-sha1-array
>  TEST_PROGRAMS_NEED_X += test-sigchain
> +TEST_PROGRAMS_NEED_X += test-strbuf
>  TEST_PROGRAMS_NEED_X += test-string-list
>  TEST_PROGRAMS_NEED_X += test-submodule-config
>  TEST_PROGRAMS_NEED_X += test-subprocess
> diff --git a/t/helper/test-strbuf.c b/t/helper/test-strbuf.c
> new file mode 100644
> index 0000000..271592e
> --- /dev/null
> +++ b/t/helper/test-strbuf.c
> @@ -0,0 +1,101 @@
> +#include "git-compat-util.h"
> +#include "strbuf.h"
> +#include "parse-options.h"
> +#include "builtin.h"
> +
> +/*
> + * Check behavior on usual use cases
> + */
> +static int strbuf_check_behavior(struct strbuf *sb)
> +{
> +	char *str_test = xstrdup("test"), *res, *old_buf;
> +	size_t size, old_alloc;
> +
> +	strbuf_grow(sb, 1);
> +	old_alloc = sb->alloc;
> +	strbuf_grow(sb, sb->alloc - sb->len + 1000);
> +	if (old_alloc == sb->alloc)
> +		die("strbuf_grow does not realloc the buffer as expected");
> +	old_buf = sb->buf;
> +	res = strbuf_detach(sb, &size);
> +	if (res != old_buf)
> +		die("strbuf_detach does not return the expected buffer");
> +	free(res);
> +
> +	str_test = xstrdup("test");
> +	strbuf_attach(sb, str_test, strlen(str_test), strlen(str_test) + 1);
> +	res = strbuf_detach(sb, &size);
> +	if (size != strlen(str_test)) 
> +		die ("size is not as expected");

Space before '('. Also, it's nice for the guy debugging that to say
"incorrect size. Expected %d, got %d" or so.

> diff --git a/t/t0082-strbuf.sh b/t/t0082-strbuf.sh
> new file mode 100755
> index 0000000..6a579a3
> --- /dev/null
> +++ b/t/t0082-strbuf.sh
> @@ -0,0 +1,19 @@
> +#!/bin/sh
> +
> +test_description='Test the strbuf API.'

The '.' is usually omitted.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function
  2016-06-06 15:13 ` [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function William Duclot
@ 2016-06-06 16:12   ` Matthieu Moy
  2016-06-07  9:04   ` Johannes Schindelin
  1 sibling, 0 replies; 32+ messages in thread
From: Matthieu Moy @ 2016-06-06 16:12 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh, gitster, Simon Rabourg

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

> The function strbuf_wrap() is not part of the strbuf API, yet prevent to
> extend the API to include wrapping functions. Renaming it to something
> more specific allow to use "strbuf_wrap" for the strbut API.

s/strbut/strbuf/

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 15:13 ` [PATCH V2 3/3] strbuf: allow to use preallocated memory William Duclot
@ 2016-06-06 16:17   ` Matthieu Moy
  2016-06-06 17:19   ` Junio C Hamano
  1 sibling, 0 replies; 32+ messages in thread
From: Matthieu Moy @ 2016-06-06 16:17 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh, gitster

I'm waiting for the discussion "is this useful" to settle before I do a
final review, but I went quickly through the code and it seems OK.

Just to show I read till the end:

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

> +test_expect_success 'check preallocated strbuf behavior in usual use cases' '
> +
> +	test-strbuf --preallocated_check_behavior

Useless blank line.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 15:13 ` [PATCH V2 3/3] strbuf: allow to use preallocated memory William Duclot
  2016-06-06 16:17   ` Matthieu Moy
@ 2016-06-06 17:19   ` Junio C Hamano
  2016-06-06 20:39     ` William Duclot
  1 sibling, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2016-06-06 17:19 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

> +#define MAX_ALLOC(a, b) (((a)>(b))?(a):(b))

I do not see why this macro is called MAX_ALLOC(); is there anything
"alloc" specific to what this does?  You may happen to use it only
for "alloc" related things, but that is not a good reason for such a
name (if the implementation can only be used for "alloc" specific
purpose for some reason, then the name is appropriate).

> @@ -20,16 +23,47 @@ char strbuf_slopbuf[1];
>  
>  void strbuf_init(struct strbuf *sb, size_t hint)
>  {
> +	sb->owns_memory = 0;
> +	sb->fixed_memory = 0;
>  	sb->alloc = sb->len = 0;
>  	sb->buf = strbuf_slopbuf;
>  	if (hint)
>  		strbuf_grow(sb, hint);
>  }

I'll complain about _grow() again later, but if the implementation
is updated to address that complaint, slopbuf-using strbuf could
start it as a special case of "starts as fixed_memory".

> +static void strbuf_wrap_internal(struct strbuf *sb, char *buf,
> +				 size_t buf_len, size_t alloc_len)
> +{
> +	if (!buf)
> +		die("the buffer of a strbuf cannot be NULL");
> +
> +	strbuf_release(sb);
> +	sb->len = buf_len;
> +	sb->alloc = alloc_len;
> +	sb->buf = buf;
> +}
> +
> +void strbuf_wrap(struct strbuf *sb, char *buf,
> +		 size_t buf_len, size_t alloc_len)
> +{
> +	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
> +	sb->owns_memory = 0;
> +	sb->fixed_memory = 0;
> +}
>
> +void strbuf_wrap_fixed(struct strbuf *sb, char *buf,
> +		       size_t buf_len, size_t alloc_len)
> +{
> +	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
> +	sb->owns_memory = 0;
> +	sb->fixed_memory = 1;
> +}

After seeing 2/3, I would say that the pretty side deserves the use
of word "wrap" more.  What the above two does is an enhancement of
what strbuf API has called "attach".  You are introducing a few new
and different ways to attach a piece of memory to a strbuf.

> @@ -37,8 +71,13 @@ void strbuf_release(struct strbuf *sb)
>  char *strbuf_detach(struct strbuf *sb, size_t *sz)
>  {
>  	char *res;
> +
>  	strbuf_grow(sb, 0);
> -	res = sb->buf;
> +	if (sb->owns_memory)
> +		res = sb->buf;
> +	else
> +		res = xmemdupz(sb->buf, sb->len);
> +
>  	if (sz)
>  		*sz = sb->len;
>  	strbuf_init(sb, 0);

Note that the user of the API does not have to care if the memory is
held by the strbuf or if the strbuf is merely borrowing it from the
initial allocation made from elsewhere (e.g. via strbuf_wrap_*) when
finalizing its use of the strbuf API to build the contents and
utilize the resulting buffer.  Theoretically, if the caller knows it
let the piece memory _it_ owns (i.e. !owns_memory) and the piece of
memory it originally gave the strbuf hasn't been moved (i.e. your
fixed_memory, which I will complain again soon), the caller
shouldn't have to get a copy via xmemdupz(), which would both save
an allocation here and free() in the caller once it is done.

But that is not done, and as the API it is a good thing.  It frees
the caller from having to worry about _where_ the memory originally
came from.

>  void strbuf_grow(struct strbuf *sb, size_t extra)
>  {
> -	int new_buf = !sb->alloc;
>  	if (unsigned_add_overflows(extra, 1) ||
>  	    unsigned_add_overflows(sb->len, extra + 1))
>  		die("you want to use way too much memory");
> -	if (new_buf)
> -		sb->buf = NULL;
> -	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
> -	if (new_buf)
> -		sb->buf[0] = '\0';
> +	if ((sb->fixed_memory) &&
> +	    sb->len + extra + 1 > sb->alloc)
> +		die("you try to overflow the buffer of a fixed strbuf");

Having praised how _detach() frees the user of the API from having
to worry about minute details of allocation, this is an eyesore.  I
really think this "fixed_memory" support MUST GO.  This "die()" here
makes the API useless _unless_ the caller always makes sure that any
operation it performs does not overflow the originally allocated
size, which makes it no better than using xsnprintf() and friends.

A better design would be to get rid of fixed_memory bit and replace
it with "owns_memory" bit.  Its meaning would be "The ->buf pointer
points at something that is not owned by this strbuf, and must not
be freed when resizing or releasing ->buf."

And when you detect that a strbuf with unfreeable memory needs to be
extended (the above condition you call "die()" on), you use the
"strbuf doesn't own the buffer" logic below.

The only difference between your version and an improved one that
gets rid of "fixed_memory" from the API user's point of view is that
they must pre-allocate absolute maximum amount of memory expected to
be consumed with your version in order not to crash, while a "start
with fixed but dynamically allocate from heap if needed" approach
lets the API user preallocate just sufficient amount of memory
expected to be consumed in a typical workload and go without calling
malloc()/free() most of the time.  Oh, and when a rare data comes in
that exceeds your initial estimate, "fixed_memory" version will just
die.

So, as I said in the previous round, I really do not see a point in
this fixed_memory business.  It does not make the implementation of
the API any easier as far as I can tell, either.

> +	/*
> +	 * ALLOC_GROW may do a realloc() if needed, so we must not use it on
> +	 * a buffer the strbuf doesn't own
> +	 */
> +	if (sb->owns_memory) {
> +		ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
> +	} else {
> +		/*
> +		 * The strbuf doesn't own the buffer: to avoid to realloc it,
> +		 * the strbuf needs to use a new buffer without freeing the old
> +		 */
> +		if (sb->len + extra + 1 > sb->alloc) {
> +			size_t new_alloc =
> +				MAX_ALLOC(sb->len + extra + 1,
> +					  alloc_nr(sb->alloc));

It is true that the you cannot simply realloc(->buf), but otherwise
the growth pattern of the buffer is the same between ->owns_memory
case and this case.  And I think the above computes it like so, but
do you really need a one-shot macro MAX_ALLOC that is named in a
misleading way?

I'd find it far easier to read if you wrote the core of ALLOC_GROW()
logic, like this:

	size_t new_nr = sb->len + extra + 1;
	if (new_nr > sb->alloc) {
		size_t new_alloc;
		if (alloc_nr(sb->alloc) < new_nr)
                	new_alloc = new_nr;
		else
			new_alloc = alloc_nr(sb->alloc);
		... copy and make it "owned" ...
	}

A better way would be to introduce a new macro ALLOC_GROW_COUNT() to
cache.h immediately before ALLOC_GROW() is defined and use it to
redo ALLOC_GROW(), perhaps like this:

#define ALLOC_GROW_COUNT(nr, alloc) \
        ((alloc_nr(alloc) < (nr)) ? (nr) : alloc_nr(alloc))

#define ALLOC_GROW(x, nr, alloc) \
	do { \
                if ((nr) > alloc) { \
                	alloc = ALLOC_GROW_COUNT(nr, alloc); \
			REALLOC_ARRAY(x, alloc); \
		} \
	} while (0)			

Then you can do

	if (sb->len + extra + 1 > sb->alloc) {
		size_t new_alloc;
                char *new_buf;

        	new_alloc = ALLOC_GROW_COUNT(sb->len + extra + 1, sb->alloc);
		new_buf = xmalloc(new_alloc);
		memcpy(...);
		...

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 17:19   ` Junio C Hamano
@ 2016-06-06 20:39     ` William Duclot
  2016-06-06 22:44       ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: William Duclot @ 2016-06-06 20:39 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh

On Mon, Jun 06, 2016 at 10:19:07AM -0700, Junio C Hamano wrote:
> William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:
> 
>> +#define MAX_ALLOC(a, b) (((a)>(b))?(a):(b))
> 
> I do not see why this macro is called MAX_ALLOC(); is there anything
> "alloc" specific to what this does?  You may happen to use it only
> for "alloc" related things, but that is not a good reason for such a
> name (if the implementation can only be used for "alloc" specific
> purpose for some reason, then the name is appropriate).

Absolutely right. This macro won't be needed anyway, as you pointed out
in the end of this review
 
>> @@ -20,16 +23,47 @@ char strbuf_slopbuf[1];
>>  
>>  void strbuf_init(struct strbuf *sb, size_t hint)
>>  {
>> +	sb->owns_memory = 0;
>> +	sb->fixed_memory = 0;
>>  	sb->alloc = sb->len = 0;
>>  	sb->buf = strbuf_slopbuf;
>>  	if (hint)
>>  		strbuf_grow(sb, hint);
>>  }
> 
> I'll complain about _grow() again later, but if the implementation
> is updated to address that complaint, slopbuf-using strbuf could
> start it as a special case of "starts as fixed_memory".
> 
>> +static void strbuf_wrap_internal(struct strbuf *sb, char *buf,
>> +				 size_t buf_len, size_t alloc_len)
>> +{
>> +	if (!buf)
>> +		die("the buffer of a strbuf cannot be NULL");
>> +
>> +	strbuf_release(sb);
>> +	sb->len = buf_len;
>> +	sb->alloc = alloc_len;
>> +	sb->buf = buf;
>> +}
>> +
>> +void strbuf_wrap(struct strbuf *sb, char *buf,
>> +		 size_t buf_len, size_t alloc_len)
>> +{
>> +	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
>> +	sb->owns_memory = 0;
>> +	sb->fixed_memory = 0;
>> +}
>>
>> +void strbuf_wrap_fixed(struct strbuf *sb, char *buf,
>> +		       size_t buf_len, size_t alloc_len)
>> +{
>> +	strbuf_wrap_internal(sb, buf, buf_len, alloc_len);
>> +	sb->owns_memory = 0;
>> +	sb->fixed_memory = 1;
>> +}
> 
> After seeing 2/3, I would say that the pretty side deserves the use
> of word "wrap" more.  What the above two does is an enhancement of
> what strbuf API has called "attach".  You are introducing a few new
> and different ways to attach a piece of memory to a strbuf.

The naming is still debatable indeed. If there is a consensus on using
"attach", fine with me.
That being said, I'm still hesitant on being able to initialize a strbuf
with an "attach" function, as the user shouldn't have to use
`strbuf_init()` or `= STRBUF_INIT` beforhand. The users can't use
`strbuf_attach()` with a non-initialized strbuf, is that a good idea to
allow them to use `strbuf_attach_preallocated()` on a non-initialized
buffer?

>> @@ -37,8 +71,13 @@ void strbuf_release(struct strbuf *sb)
>>  char *strbuf_detach(struct strbuf *sb, size_t *sz)
>>  {
>>  	char *res;
>> +
>>  	strbuf_grow(sb, 0);
>> -	res = sb->buf;
>> +	if (sb->owns_memory)
>> +		res = sb->buf;
>> +	else
>> +		res = xmemdupz(sb->buf, sb->len);
>> +
>>  	if (sz)
>>  		*sz = sb->len;
>>  	strbuf_init(sb, 0);
> 
> Note that the user of the API does not have to care if the memory is
> held by the strbuf or if the strbuf is merely borrowing it from the
> initial allocation made from elsewhere (e.g. via strbuf_wrap_*) when
> finalizing its use of the strbuf API to build the contents and
> utilize the resulting buffer.  Theoretically, if the caller knows it
> let the piece memory _it_ owns (i.e. !owns_memory) and the piece of
> memory it originally gave the strbuf hasn't been moved (i.e. your
> fixed_memory, which I will complain again soon), the caller
> shouldn't have to get a copy via xmemdupz(), which would both save
> an allocation here and free() in the caller once it is done.
> 
> But that is not done, and as the API it is a good thing.  It frees
> the caller from having to worry about _where_ the memory originally
> came from.
> 
>>  void strbuf_grow(struct strbuf *sb, size_t extra)
>>  {
>> -	int new_buf = !sb->alloc;
>>  	if (unsigned_add_overflows(extra, 1) ||
>>  	    unsigned_add_overflows(sb->len, extra + 1))
>>  		die("you want to use way too much memory");
>> -	if (new_buf)
>> -		sb->buf = NULL;
>> -	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
>> -	if (new_buf)
>> -		sb->buf[0] = '\0';
>> +	if ((sb->fixed_memory) &&
>> +	    sb->len + extra + 1 > sb->alloc)
>> +		die("you try to overflow the buffer of a fixed strbuf");
> 
> Having praised how _detach() frees the user of the API from having
> to worry about minute details of allocation, this is an eyesore.  I
> really think this "fixed_memory" support MUST GO.  This "die()" here
> makes the API useless _unless_ the caller always makes sure that any
> operation it performs does not overflow the originally allocated
> size, which makes it no better than using xsnprintf() and friends.

I'm not a fan of this die() either.

> A better design would be to get rid of fixed_memory bit and replace
> it with "owns_memory" bit.  Its meaning would be "The ->buf pointer
> points at something that is not owned by this strbuf, and must not
> be freed when resizing or releasing ->buf."
> 
> And when you detect that a strbuf with unfreeable memory needs to be
> extended (the above condition you call "die()" on), you use the
> "strbuf doesn't own the buffer" logic below.
> 
> The only difference between your version and an improved one that
> gets rid of "fixed_memory" from the API user's point of view is that
> they must pre-allocate absolute maximum amount of memory expected to
> be consumed with your version in order not to crash, while a "start
> with fixed but dynamically allocate from heap if needed" approach
> lets the API user preallocate just sufficient amount of memory
> expected to be consumed in a typical workload and go without calling
> malloc()/free() most of the time.  Oh, and when a rare data comes in
> that exceeds your initial estimate, "fixed_memory" version will just
> die.
> 
> So, as I said in the previous round, I really do not see a point in
> this fixed_memory business.  It does not make the implementation of
> the API any easier as far as I can tell, either.

I'm not sure to follow you. I agree that the "fixed strbuf" feature is
flawed by the presence of this `die()`. But (unless misunderstanding)
the "owns_memory" bit you talk about does exist in this patch, and allow
the exact behavior you describe.
The patch allow to use a preallocated strbuf OR a preallocated-and-fixed
strbuf. Does the "fixed" part is useful, that's very debatable.

>> +	/*
>> +	 * ALLOC_GROW may do a realloc() if needed, so we must not use it on
>> +	 * a buffer the strbuf doesn't own
>> +	 */
>> +	if (sb->owns_memory) {
>> +		ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
>> +	} else {
>> +		/*
>> +		 * The strbuf doesn't own the buffer: to avoid to realloc it,
>> +		 * the strbuf needs to use a new buffer without freeing the old
>> +		 */
>> +		if (sb->len + extra + 1 > sb->alloc) {
>> +			size_t new_alloc =
>> +				MAX_ALLOC(sb->len + extra + 1,
>> +					  alloc_nr(sb->alloc));
> 
> It is true that the you cannot simply realloc(->buf), but otherwise
> the growth pattern of the buffer is the same between ->owns_memory
> case and this case.  And I think the above computes it like so, but
> do you really need a one-shot macro MAX_ALLOC that is named in a
> misleading way?
> 
> I'd find it far easier to read if you wrote the core of ALLOC_GROW()
> logic, like this:
> 
> 	size_t new_nr = sb->len + extra + 1;
> 	if (new_nr > sb->alloc) {
> 		size_t new_alloc;
> 		if (alloc_nr(sb->alloc) < new_nr)
>                 	new_alloc = new_nr;
> 		else
> 			new_alloc = alloc_nr(sb->alloc);
> 		... copy and make it "owned" ...
> 	}
> 
> A better way would be to introduce a new macro ALLOC_GROW_COUNT() to
> cache.h immediately before ALLOC_GROW() is defined and use it to
> redo ALLOC_GROW(), perhaps like this:
> 
> #define ALLOC_GROW_COUNT(nr, alloc) \
>         ((alloc_nr(alloc) < (nr)) ? (nr) : alloc_nr(alloc))
> 
> #define ALLOC_GROW(x, nr, alloc) \
> 	do { \
>                 if ((nr) > alloc) { \
>                 	alloc = ALLOC_GROW_COUNT(nr, alloc); \
> 			REALLOC_ARRAY(x, alloc); \
> 		} \
> 	} while (0)			
> 
> Then you can do
> 
> 	if (sb->len + extra + 1 > sb->alloc) {
> 		size_t new_alloc;
>                 char *new_buf;
> 
>         	new_alloc = ALLOC_GROW_COUNT(sb->len + extra + 1, sb->alloc);
> 		new_buf = xmalloc(new_alloc);
> 		memcpy(...);
> 		...

The second way seems better to me too, I don't know why I were hesitant
to edit the ALLOC_GROW macro

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 20:39     ` William Duclot
@ 2016-06-06 22:44       ` Junio C Hamano
  2016-06-06 22:58         ` Jeff King
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2016-06-06 22:44 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, peff, mh

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

> I'm not sure to follow you. I agree that the "fixed strbuf" feature is
> flawed by the presence of this `die()`. But (unless misunderstanding)
> the "owns_memory" bit you talk about does exist in this patch, and allow
> the exact behavior you describe.

Imagine that I know most of my input lines are shorter than 80 bytes
and definitely shorter than 128 bytes.  I may want to say:

	/* allocate initial buffer ch[128] and attach it to line */
	struct strbuf line = STRBUF_INIT_ON_STACK(128);

	while (!strbuf_getline(&line, stdin)) {
		... use contents of &line ...
	}
        strbuf_release(&line);

knowing that I won't waste too much cycles and memory from heap most
of the time.  Further imagine that one line in the input happened to
be 200 bytes long.  After processing that line, the next call to
strbuf_getline() will call strbuf_reset(&line).

I think that call should reset line.buf to the original buffer on
the stack, instead of saying "Ok, I'll ignore the original memory
not owned by us and instead keep pointing at the allocated memory",
as the allocation was done as a fallback measure.  The reason why
strbuf_reset() is different from strbuf_release() is because we
anticipate that a buffer that is reset is going to be used again
very soon and want to avoid freeing only to call another alloc
immediately.  We just set its logical length to 0 without shrinking
what has already been allocated in the heap, because there is no
good hint that tells us what the optimal size of an empty
buffer in the particular caller that is waiting to be used before
this "improve API" effort.

But that "lack of hint" no longer is true with this "imporve API"
thing.  We _know_ that the caller thinks the anticipated typical
workload should fit well within the 128 bytes buffer that it
originally gave us.  That is what I'd expect from an "initialize a
reasonably sized piece of memory on stack and use it most of the
time in strbuf" feature.

> The patch allow to use a preallocated strbuf OR a preallocated-and-fixed
> strbuf. Does the "fixed" part is useful, that's very debatable.

"Fixed and non-extendable" is useless; there is no room for
debating.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 22:44       ` Junio C Hamano
@ 2016-06-06 22:58         ` Jeff King
  2016-06-06 23:24           ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Jeff King @ 2016-06-06 22:58 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: William Duclot, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

On Mon, Jun 06, 2016 at 03:44:07PM -0700, Junio C Hamano wrote:

> William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:
> 
> > I'm not sure to follow you. I agree that the "fixed strbuf" feature is
> > flawed by the presence of this `die()`. But (unless misunderstanding)
> > the "owns_memory" bit you talk about does exist in this patch, and allow
> > the exact behavior you describe.
> 
> Imagine that I know most of my input lines are shorter than 80 bytes
> and definitely shorter than 128 bytes.  I may want to say:
> 
> 	/* allocate initial buffer ch[128] and attach it to line */
> 	struct strbuf line = STRBUF_INIT_ON_STACK(128);
> 
> 	while (!strbuf_getline(&line, stdin)) {
> 		... use contents of &line ...
> 	}
>         strbuf_release(&line);
> 
> knowing that I won't waste too much cycles and memory from heap most
> of the time.  Further imagine that one line in the input happened to
> be 200 bytes long.  After processing that line, the next call to
> strbuf_getline() will call strbuf_reset(&line).
> 
> I think that call should reset line.buf to the original buffer on
> the stack, instead of saying "Ok, I'll ignore the original memory
> not owned by us and instead keep pointing at the allocated memory",
> as the allocation was done as a fallback measure.

I am not sure I agree. Do we think accessing the stack buffer is somehow
cheaper than the heap buffer (perhaps because of cache effects)? If so,
how much cheaper?

I think you can model reusing an already-allocated heap buffer as a
hit/miss type of scenario. A "hit" means we see a larger-than-128 line
and can avoid the allocation cost by reusing the heap buffer. A "miss"
means the line is less than 128, and we pay the cost to use the heap
instead of the stack, whatever that is.

My suspicion is that the cost of a miss is essentially zero, so the best
strategy is to optimize for as many hits as possible (once the cost of
the initial allocation has been paid, though I am still not even
convinced that is a meaningful amount, especially in a loop like this
where we can so easily reuse a heap buffer).

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 22:58         ` Jeff King
@ 2016-06-06 23:24           ` Junio C Hamano
  2016-06-06 23:25             ` Junio C Hamano
                               ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-06 23:24 UTC (permalink / raw)
  To: Jeff King
  Cc: William Duclot, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

Jeff King <peff@peff.net> writes:

>> I think that call should reset line.buf to the original buffer on
>> the stack, instead of saying "Ok, I'll ignore the original memory
>> not owned by us and instead keep pointing at the allocated memory",
>> as the allocation was done as a fallback measure.
>
> I am not sure I agree. Do we think accessing the stack buffer is somehow
> cheaper than the heap buffer (perhaps because of cache effects)? If so,
> how much cheaper?

This is not about stack vs heap or even "cheaper" (whatever your
definition of cheap is).  The principle applies equally if the
original buffer came from BSS.

Perhaps I made it clearer by using a more exaggerated example e.g. a
typical expected buffer size of 128 bytes, but the third line of
1000 line input file was an anomaly that is 200k bytes long.  I do
not want to keep that 200k bytes after finishing to process that
third line while using its initial 80 bytes for the remaining 977
lines.

By the way, William seemed to be unhappy with die(), but I actually
think having a die() in the API may not be a bad thing if the check
were about something more sensible.  For example, even if a strbuf
that can grow dynamically, capping the maximum size and say "Hey
this is a one-lne-at-a-time text interface; if we need to grow the
buffer to 10MB, there is something wrong and a producer of such an
input does not even deserve a nice error message" could be an
entirely sensible attitude.  But that does not mean an initial
allocation should be 10MB.  If the expected typical workload fits
within a lot lower bound, starting from there and allowing it to
grow up to that maximum would be the more natural thing to do.

And the problem I have with the proposed "fixed" is that it does not
allow us to do that.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 23:24           ` Junio C Hamano
@ 2016-06-06 23:25             ` Junio C Hamano
  2016-06-06 23:30             ` Jeff King
  2016-06-07  9:06             ` William Duclot
  2 siblings, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-06 23:25 UTC (permalink / raw)
  To: Jeff King
  Cc: William Duclot, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

Junio C Hamano <gitster@pobox.com> writes:

> Jeff King <peff@peff.net> writes:
>
>>> I think that call should reset line.buf to the original buffer on
>>> the stack, instead of saying "Ok, I'll ignore the original memory
>>> not owned by us and instead keep pointing at the allocated memory",
>>> as the allocation was done as a fallback measure.
>>
>> I am not sure I agree. Do we think accessing the stack buffer is somehow
>> cheaper than the heap buffer (perhaps because of cache effects)? If so,
>> how much cheaper?
>
> This is not about stack vs heap or even "cheaper" (whatever your
> definition of cheap is).  The principle applies equally if the
> original buffer came from BSS.
>
> Perhaps I made it clearer by using a more exaggerated example e.g. a

s/I made/I should have made/; sorry but I have a horrible wifi
around here today...

> typical expected buffer size of 128 bytes, but the third line of
> 1000 line input file was an anomaly that is 200k bytes long.  I do
> not want to keep that 200k bytes after finishing to process that
> third line while using its initial 80 bytes for the remaining 977
> lines.
>
> By the way, William seemed to be unhappy with die(), but I actually
> think having a die() in the API may not be a bad thing if the check
> were about something more sensible.  For example, even if a strbuf
> that can grow dynamically, capping the maximum size and say "Hey
> this is a one-lne-at-a-time text interface; if we need to grow the
> buffer to 10MB, there is something wrong and a producer of such an
> input does not even deserve a nice error message" could be an
> entirely sensible attitude.  But that does not mean an initial
> allocation should be 10MB.  If the expected typical workload fits
> within a lot lower bound, starting from there and allowing it to
> grow up to that maximum would be the more natural thing to do.
>
> And the problem I have with the proposed "fixed" is that it does not
> allow us to do that.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 23:24           ` Junio C Hamano
  2016-06-06 23:25             ` Junio C Hamano
@ 2016-06-06 23:30             ` Jeff King
  2016-06-07  9:06             ` William Duclot
  2 siblings, 0 replies; 32+ messages in thread
From: Jeff King @ 2016-06-06 23:30 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: William Duclot, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

On Mon, Jun 06, 2016 at 04:24:53PM -0700, Junio C Hamano wrote:

> This is not about stack vs heap or even "cheaper" (whatever your
> definition of cheap is).  The principle applies equally if the
> original buffer came from BSS.
> 
> Perhaps I made it clearer by using a more exaggerated example e.g. a
> typical expected buffer size of 128 bytes, but the third line of
> 1000 line input file was an anomaly that is 200k bytes long.  I do
> not want to keep that 200k bytes after finishing to process that
> third line while using its initial 80 bytes for the remaining 977
> lines.

Ah, I see. Yes, I can see that argument, though I'd counter that since
we _did_ see a 200k entry, perhaps the hint given in the code is not
actually very sensible. Having seen one big line, might we expect to see
more?

I dunno. I still feel like this whole thing is just micro-optimization
that is not even really going to be measurable outside of pathological
cases.

> By the way, William seemed to be unhappy with die(), but I actually
> think having a die() in the API may not be a bad thing if the check
> were about something more sensible.  For example, even if a strbuf
> that can grow dynamically, capping the maximum size and say "Hey
> this is a one-lne-at-a-time text interface; if we need to grow the
> buffer to 10MB, there is something wrong and a producer of such an
> input does not even deserve a nice error message" could be an
> entirely sensible attitude.  But that does not mean an initial
> allocation should be 10MB.  If the expected typical workload fits
> within a lot lower bound, starting from there and allowing it to
> grow up to that maximum would be the more natural thing to do.

Yes, I very much agree that "sensible limits" should not be the same
thing as "initial allocated sizes". It is easy to conflate the two when
using static buffers (because it lets you skip allocation entirely,
which is much simpler!), but that usually leads to "sensible limits"
being set way too low as a compromise.

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 1/3] strbuf: add tests
  2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
  2016-06-06 16:11   ` Matthieu Moy
@ 2016-06-07  8:44   ` Johannes Schindelin
  1 sibling, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2016-06-07  8:44 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger, peff, mh, gitster,
	Simon Rabourg, Matthieu Moy

Hi William,

On Mon, 6 Jun 2016, William Duclot wrote:

> +	enum {
> +		MODE_UNSPECIFIED = 0,
> +		MODE_BASIC_GROW ,
> +		MODE_STRBUF_CHECK,
> +		MODE_GROW_OVERFLOW
> +	} cmdmode = MODE_UNSPECIFIED;
> +	struct option options[] = {
> +		OPT_CMDMODE(0, "basic_grow", &cmdmode,
> +			    N_(" basic grow test"),
> +			    MODE_BASIC_GROW),
> +		OPT_CMDMODE(0, "strbuf_check_behavior", &cmdmode,
> +			    N_("check the strbuf's behavior"),
> +			    MODE_STRBUF_CHECK),
> +		OPT_CMDMODE(0, "grow_overflow", &cmdmode,
> +			    N_("test grow expecting overflow"),
> +			    MODE_GROW_OVERFLOW),
> +		OPT_END()

I found it to be really helpful to keep as much consistency as possible
(Git's source code is complicated enough, no need to make it even more
complicated).

In this particular instance, I would suggest to rename either
MODE_STRBUF_CHECK or strbuf_check_behavior to match the other one.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function
  2016-06-06 15:13 ` [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function William Duclot
  2016-06-06 16:12   ` Matthieu Moy
@ 2016-06-07  9:04   ` Johannes Schindelin
  1 sibling, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2016-06-07  9:04 UTC (permalink / raw)
  To: William Duclot
  Cc: git, antoine.queru, francois.beutin, mhagger, peff, mh, gitster,
	Simon Rabourg, Matthieu Moy

Hi William,

On Mon, 6 Jun 2016, William Duclot wrote:

> diff --git a/pretty.c b/pretty.c
> index 87c4497..2b9e89a 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -904,8 +904,8 @@ static void parse_commit_message(struct format_commit_context *c)
>  	c->commit_message_parsed = 1;
>  }
>  
> -static void strbuf_wrap(struct strbuf *sb, size_t pos,
> -			size_t width, size_t indent1, size_t indent2)
> +static void strbuf_wrap_message(struct strbuf *sb, size_t pos,
> +				size_t width, size_t indent1, size_t indent2)

A better name might be strbuf_wrap_at_column().

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-06 23:24           ` Junio C Hamano
  2016-06-06 23:25             ` Junio C Hamano
  2016-06-06 23:30             ` Jeff King
@ 2016-06-07  9:06             ` William Duclot
  2016-06-07 18:10               ` Junio C Hamano
  2016-06-08 16:20               ` Michael Haggerty
  2 siblings, 2 replies; 32+ messages in thread
From: William Duclot @ 2016-06-07  9:06 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jeff King, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

On Mon, Jun 06, 2016 at 04:24:53PM -0700, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
> 
>>> I think that call should reset line.buf to the original buffer on
>>> the stack, instead of saying "Ok, I'll ignore the original memory
>>> not owned by us and instead keep pointing at the allocated memory",
>>> as the allocation was done as a fallback measure.
>>
>> I am not sure I agree. Do we think accessing the stack buffer is somehow
>> cheaper than the heap buffer (perhaps because of cache effects)? If so,
>> how much cheaper?
> 
> This is not about stack vs heap or even "cheaper" (whatever your
> definition of cheap is).  The principle applies equally if the
> original buffer came from BSS.
> 
> Perhaps I made it clearer by using a more exaggerated example e.g. a
> typical expected buffer size of 128 bytes, but the third line of
> 1000 line input file was an anomaly that is 200k bytes long.  I do
> not want to keep that 200k bytes after finishing to process that
> third line while using its initial 80 bytes for the remaining 977
> lines.

"its initial 128 bytes", rather than "its initial 80 bytes", no?
Or else I'm lost :)
 
> By the way, William seemed to be unhappy with die(), but I actually
> think having a die() in the API may not be a bad thing if the check
> were about something more sensible.  For example, even if a strbuf
> that can grow dynamically, capping the maximum size and say "Hey
> this is a one-lne-at-a-timve text interface; if we need to grow the
> buffer to 10MB, there is something wrong and a producer of such an
> input does not even deserve a nice error message" could be an
> entirely sensible attitude.  But that does not mean an initial
> allocation should be 10MB.  If the expected typical workload fits
> within a lot lower bound, starting from there and allowing it to
> grow up to that maximum would be the more natural thing to do.
> 
> And the problem I have with the proposed "fixed" is that it does not
> allow us to do that.

The "fixed" feature was aimed to allow the users to use strbuf with
strings that they doesn't own themselves (a function parameter for
example). From Michael example in the original mail:

void f(char *path_buf, size_t path_buf_len)
{
    struct strbuf path;
    strbuf_wrap_fixed(&path, path_buf,
    strlen(path_buf),
    path_buf_len);
    ...
    /*
     * no strbuf_release() required here, but if called it
     * is a NOOP
     */
}

I don't have enough knowledge of the codebase to judge if this is
useful, you seem to think it's not.

About this capping, I have troubles to understand if this is something
you'd like to see in this patch (assuming I include your changes)? Or is
this theoretical?


To sum up:
* Rename "wrap" to "attach"
* Forget about this "fixed" feature
* Make the strbuf reuse the preallocated buffer after a reset() (and a
detach() probably?)
* Introduce a more practical macro STRBUF_INIT_ON_STACK() (maybe the
name is too technical?)
* A few code corrections

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-07  9:06             ` William Duclot
@ 2016-06-07 18:10               ` Junio C Hamano
  2016-06-08 16:20               ` Michael Haggerty
  1 sibling, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-07 18:10 UTC (permalink / raw)
  To: William Duclot
  Cc: Jeff King, git, antoine.queru, francois.beutin, mhagger,
	Johannes.Schindelin, mh

William Duclot <william.duclot@ensimag.grenoble-inp.fr> writes:

>> Perhaps I made it clearer by using a more exaggerated example e.g. a
>> typical expected buffer size of 128 bytes, but the third line of
>> 1000 line input file was an anomaly that is 200k bytes long.  I do
>> not want to keep that 200k bytes after finishing to process that
>> third line while using its initial 80 bytes for the remaining 977
>> lines.
>
> "its initial 128 bytes", rather than "its initial 80 bytes", no?

Either way would work, but 80 is closer to what I had in mind, as
the set-up of the example is "I know 99% of my input will fit within
80, but I'll allocate 128 to avoid realloc cost when there are rare
ones that bust 80.  I do not want to die only because there is an
occasional oddball that needs 200".

> The "fixed" feature was aimed to allow the users to use strbuf with
> strings that they doesn't own themselves (a function parameter for
> example). From Michael example in the original mail:
>
> void f(char *path_buf, size_t path_buf_len)
> {
>     struct strbuf path;
>     strbuf_wrap_fixed(&path, path_buf,
>     strlen(path_buf),
>     path_buf_len);
>     ...
>     /*
>      * no strbuf_release() required here, but if called it
>      * is a NOOP
>      */
> }

Think what does the "..." part would do using the "path" strbuf.

If 'f()' is meant to take the "dying is perfectly fine if the data
we have to process exceeds the area we were given even by one byte"
attitude, then the "capped to the same length as allocated" is
perfectly fine, but if 'f()' cannot afford to die() and instead has
to signal an error condition to its caller, then this function has
to check the length currently in use (i.e. path.len) and how much
more memory it can still use, before making each call to strbuf_*()
functions, no?

If we were to add "fixed" feature, we'd want to see it to help a
function like f() that cannot afford to die() and does not want to
malloc()/realloc().  I do not think what was in this series was it.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-07  9:06             ` William Duclot
  2016-06-07 18:10               ` Junio C Hamano
@ 2016-06-08 16:20               ` Michael Haggerty
  2016-06-08 18:07                 ` Junio C Hamano
  2016-06-08 19:19                 ` Jeff King
  1 sibling, 2 replies; 32+ messages in thread
From: Michael Haggerty @ 2016-06-08 16:20 UTC (permalink / raw)
  To: William Duclot, Junio C Hamano
  Cc: Jeff King, git, antoine.queru, francois.beutin, Johannes.Schindelin, mh

On 06/07/2016 11:06 AM, William Duclot wrote:
> [...]
> The "fixed" feature was aimed to allow the users to use strbuf with
> strings that they doesn't own themselves (a function parameter for
> example). From Michael example in the original mail:
> 
> void f(char *path_buf, size_t path_buf_len)
> {
>     struct strbuf path;
>     strbuf_wrap_fixed(&path, path_buf,
>     strlen(path_buf),
>     path_buf_len);
>     ...

I also thought that "fixed" strbufs would be useful in cases where you
*know* what size string you need and only want a strbuf wrapper because
it offers a lot of convenience functions. Nowadays we would do that
using something like

> static int feed_object(const unsigned char *sha1, int fd, int negative)
> {
> 	char buf[42];
> 
> 	if (negative && !has_sha1_file(sha1))
> 		return 1;
> 
> 	memcpy(buf + negative, sha1_to_hex(sha1), 40);
> 	if (negative)
> 		buf[0] = '^';
> 	buf[40 + negative] = '\n';
> 	return write_or_whine(fd, buf, 41 + negative, "send-pack: send refs");
> }

Instead, one could write

> static int feed_object(const unsigned char *sha1, int fd, int negative)
> {
> 	char buf[GIT_SHA1_HEXSZ + 2];
> 	struct strbuf line = WRAPPED_FIXED_STRBUF(buf);
> 
> 	if (negative && !has_sha1_file(sha1))
> 		return 1;
> 
> 	if (negative)
> 		strbuf_addch(&line, '^');
> 	strbuf_add(&line, sha1_to_hex(sha1), GIT_SHA1_HEXSZ);
> 	strbuf_addch(&line, '\n');
> 	return write_or_whine(fd, line.buf, line.len, "send-pack: send refs");
> }

* It's a little less manual bookkeeping, and thus less error-prone,
  than the current code.

* If somebody decides to add another character to the line but
  forgets to increase the allocation size, the code dies in testing
  rather than (a) overflowing the buffer, like the current
  code, or (b) silently becoming less performant, as if it used a
  preallocated but non-fixed strbuf.

* There's no need to strbuf_release() (which can be convenient in
  a function with multiple exit paths).

I don't know whether this particular function should be rewritten; I'm
just giving an example of the type of scenario where I think it could be
useful.

In a world without fixed strbufs, what would one use in this situation?

Michael

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-08 16:20               ` Michael Haggerty
@ 2016-06-08 18:07                 ` Junio C Hamano
  2016-06-08 19:19                 ` Jeff King
  1 sibling, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-08 18:07 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: William Duclot, Jeff King, git, antoine.queru, francois.beutin,
	Johannes.Schindelin, mh

Michael Haggerty <mhagger@alum.mit.edu> writes:

> * It's a little less manual bookkeeping, and thus less error-prone,
>   than the current code.
>
> * If somebody decides to add another character to the line but
>   forgets to increase the allocation size, the code dies in testing
>   rather than (a) overflowing the buffer, like the current
>   code, or (b) silently becoming less performant, as if it used a
>   preallocated but non-fixed strbuf.

Yeah, thanks.  A relization of pretty much the same came to me after
I wrote my last message on this topic.  The above two are
attractive.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-08 16:20               ` Michael Haggerty
  2016-06-08 18:07                 ` Junio C Hamano
@ 2016-06-08 19:19                 ` Jeff King
  2016-06-08 19:42                   ` [PATCH] send-pack: use buffered I/O to talk to pack-objects Jeff King
  2016-06-08 19:48                   ` [PATCH V2 3/3] strbuf: allow to use preallocated memory Junio C Hamano
  1 sibling, 2 replies; 32+ messages in thread
From: Jeff King @ 2016-06-08 19:19 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: William Duclot, Junio C Hamano, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

On Wed, Jun 08, 2016 at 06:20:41PM +0200, Michael Haggerty wrote:

> Instead, one could write
> 
> > static int feed_object(const unsigned char *sha1, int fd, int negative)
> > {
> > 	char buf[GIT_SHA1_HEXSZ + 2];
> > 	struct strbuf line = WRAPPED_FIXED_STRBUF(buf);
> > 
> > 	if (negative && !has_sha1_file(sha1))
> > 		return 1;
> > 
> > 	if (negative)
> > 		strbuf_addch(&line, '^');
> > 	strbuf_add(&line, sha1_to_hex(sha1), GIT_SHA1_HEXSZ);
> > 	strbuf_addch(&line, '\n');
> > 	return write_or_whine(fd, line.buf, line.len, "send-pack: send refs");
> > }

Hmm. I'm not sure that just replacing that with a regular heap-allocated
strbuf is so bad. It additionally gets rid of the SHA1_HEXSZ math in the
allocation.

So from your list of advantages:

> * It's a little less manual bookkeeping, and thus less error-prone,
>   than the current code.

We have this, but better.

> * If somebody decides to add another character to the line but
>   forgets to increase the allocation size, the code dies in testing
>   rather than (a) overflowing the buffer, like the current
>   code, or (b) silently becoming less performant, as if it used a
>   preallocated but non-fixed strbuf.

Instead of overflowing, it just silently works.

> * There's no need to strbuf_release() (which can be convenient in
>   a function with multiple exit paths).

Same.

The downside, obviously, is the cost of malloc/free. It may even be
noticeable here here because this really is a tight loop of strbuf
allocation (OTOH, we immediately make a syscall; how expensive is
write() compared to malloc()?).

We can hack around that by reusing the same strbuf.

Unfortunately the usual trick of:

  struct strbuf buf = STRBUF_INIT;
  for (...) {
	strbuf_reset(&buf);
	...
  }
  strbuf_release(&buf);

does not work, because we are in a sub-function. We can pass in the
buffer as scratch space, but that makes the function interface a little
uglier than it needs to be.

Likewise, we could make the strbuf static inside feed_object().  It's
not so bad here, where we know there aren't re-entrancy issues, but it's
not a very safe pattern in general (and it leaks the memory when we're
done with the function).

That made me wonder if we could repeatedly reuse a buffer attached to
the file descriptor. And indeed, isn't that what stdio is? The whole
reason this buffer exists is because we are using a direct descriptor
write. If we switched this function to use fprintf(), we'd avoid the
whole buffer question, have a fixed cap on our memory use (since we just
flush anytime the buffer is full) _and_ we'd reduce the number of
write syscalls we're making by almost a factor of 100.

> I don't know whether this particular function should be rewritten; I'm
> just giving an example of the type of scenario where I think it could be
> useful.
>
> In a world without fixed strbufs, what would one use in this situation?

I know I've done the exact opposite of what you wanted here and talked
about this specific function. But I _do_ think this is a pattern I've
seen several times, where we format into a buffer only to write() it
out. I think they may comprise a reasonable number of our buffer-using
loops.

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-08 19:19                 ` Jeff King
@ 2016-06-08 19:42                   ` Jeff King
  2016-06-09 12:10                     ` Matthieu Moy
  2016-06-08 19:48                   ` [PATCH V2 3/3] strbuf: allow to use preallocated memory Junio C Hamano
  1 sibling, 1 reply; 32+ messages in thread
From: Jeff King @ 2016-06-08 19:42 UTC (permalink / raw)
  To: Michael Haggerty
  Cc: William Duclot, Junio C Hamano, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

On Wed, Jun 08, 2016 at 03:19:18PM -0400, Jeff King wrote:

> That made me wonder if we could repeatedly reuse a buffer attached to
> the file descriptor. And indeed, isn't that what stdio is? The whole
> reason this buffer exists is because we are using a direct descriptor
> write. If we switched this function to use fprintf(), we'd avoid the
> whole buffer question, have a fixed cap on our memory use (since we just
> flush anytime the buffer is full) _and_ we'd reduce the number of
> write syscalls we're making by almost a factor of 100.

So all of this strbuf discussion aside, I think it is worth doing
something like this for this particular case.

-- >8 --
Subject: send-pack: use buffered I/O to talk to pack-objects

We start a pack-objects process and then write all of the
positive and negative sha1s to it over a pipe. We do so by
formatting each item into a fixed-size buffer and then
writing each individually. This has two drawbacks:

  1. There's some manual computation of the buffer size,
     which is not immediately obvious is correct (though it
     is).

  2. We write() once per sha1, which means a lot more system
     calls than are necessary.

We can solve both by wrapping the pipe descriptor in a stdio
handle; this is the same technique used by upload-pack when
serving fetches.

Note that we can also simplify and improve the error
handling here. The original detected a single write error
and broke out of the loop (presumably to avoid writing the
error message over and over), but never actually acted on
seeing an error; we just fed truncated input and took
whatever pack-objects returned.

In practice, this probably didn't matter, as the likely
errors would be caused by pack-objects dying (and we'd
probably just die with SIGPIPE anyway). But we can easily
make this simpler and more robust; the stdio handle keeps an
error flag, which we can check at the end.

Signed-off-by: Jeff King <peff@peff.net>
---
 send-pack.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/send-pack.c b/send-pack.c
index 37ee04e..299d303 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -36,18 +36,15 @@ int option_parse_push_signed(const struct option *opt,
 	die("bad %s argument: %s", opt->long_name, arg);
 }
 
-static int feed_object(const unsigned char *sha1, int fd, int negative)
+static void feed_object(const unsigned char *sha1, FILE *fh, int negative)
 {
-	char buf[42];
-
 	if (negative && !has_sha1_file(sha1))
-		return 1;
+		return;
 
-	memcpy(buf + negative, sha1_to_hex(sha1), 40);
 	if (negative)
-		buf[0] = '^';
-	buf[40 + negative] = '\n';
-	return write_or_whine(fd, buf, 41 + negative, "send-pack: send refs");
+		putc('^', fh);
+	fputs(sha1_to_hex(sha1), fh);
+	putc('\n', fh);
 }
 
 /*
@@ -73,6 +70,7 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
 		NULL,
 	};
 	struct child_process po = CHILD_PROCESS_INIT;
+	FILE *po_in;
 	int i;
 
 	i = 4;
@@ -97,21 +95,22 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
 	 * We feed the pack-objects we just spawned with revision
 	 * parameters by writing to the pipe.
 	 */
+	po_in = xfdopen(po.in, "w");
 	for (i = 0; i < extra->nr; i++)
-		if (!feed_object(extra->sha1[i], po.in, 1))
-			break;
+		feed_object(extra->sha1[i], po_in, 1);
 
 	while (refs) {
-		if (!is_null_oid(&refs->old_oid) &&
-		    !feed_object(refs->old_oid.hash, po.in, 1))
-			break;
-		if (!is_null_oid(&refs->new_oid) &&
-		    !feed_object(refs->new_oid.hash, po.in, 0))
-			break;
+		if (!is_null_oid(&refs->old_oid))
+			feed_object(refs->old_oid.hash, po_in, 1);
+		if (!is_null_oid(&refs->new_oid))
+			feed_object(refs->new_oid.hash, po_in, 0);
 		refs = refs->next;
 	}
 
-	close(po.in);
+	fflush(po_in);
+	if (ferror(po_in))
+		die_errno("error writing to pack-objects");
+	fclose(po_in);
 
 	if (args->stateless_rpc) {
 		char *buf = xmalloc(LARGE_PACKET_MAX);
-- 
2.9.0.rc2.138.geb72a36

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-08 19:19                 ` Jeff King
  2016-06-08 19:42                   ` [PATCH] send-pack: use buffered I/O to talk to pack-objects Jeff King
@ 2016-06-08 19:48                   ` Junio C Hamano
  2016-06-08 19:52                     ` Jeff King
  1 sibling, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2016-06-08 19:48 UTC (permalink / raw)
  To: Jeff King
  Cc: Michael Haggerty, William Duclot, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

Jeff King <peff@peff.net> writes:

> That made me wonder if we could repeatedly reuse a buffer attached to
> the file descriptor. And indeed, isn't that what stdio is? The whole
> reason this buffer exists is because we are using a direct descriptor
> write. If we switched this function to use fprintf(), we'd avoid the
> whole buffer question, have a fixed cap on our memory use (since we just
> flush anytime the buffer is full) _and_ we'd reduce the number of
> write syscalls we're making by almost a factor of 100.

The primary reason why we avoid stdio in the lower level part of I/O
is that the error reporting and handling is horrible.

e.g. c.f. http://article.gmane.org/gmane.comp.version-control.git/27019

Otherwise, I'd agree with your "Why aren't we using stdio if
counting and avoiding overflow is so hard?".

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-08 19:48                   ` [PATCH V2 3/3] strbuf: allow to use preallocated memory Junio C Hamano
@ 2016-06-08 19:52                     ` Jeff King
  2016-06-08 23:05                       ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Jeff King @ 2016-06-08 19:52 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Michael Haggerty, William Duclot, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

On Wed, Jun 08, 2016 at 12:48:36PM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > That made me wonder if we could repeatedly reuse a buffer attached to
> > the file descriptor. And indeed, isn't that what stdio is? The whole
> > reason this buffer exists is because we are using a direct descriptor
> > write. If we switched this function to use fprintf(), we'd avoid the
> > whole buffer question, have a fixed cap on our memory use (since we just
> > flush anytime the buffer is full) _and_ we'd reduce the number of
> > write syscalls we're making by almost a factor of 100.
> 
> The primary reason why we avoid stdio in the lower level part of I/O
> is that the error reporting and handling is horrible.
> 
> e.g. c.f. http://article.gmane.org/gmane.comp.version-control.git/27019
> 
> Otherwise, I'd agree with your "Why aren't we using stdio if
> counting and avoiding overflow is so hard?".

I agree it can be confusing (especially on the output side your errors
are likely deferred until the next flush). But in this particular case,
I think it's an improvement (see the patch I just sent and its
discussion of error handling).

I also think we could smooth over the rough edges by wrapping the
complexity (we already have fprintf_or_die, which arguably could be used
in this case, but I went with the solution that stayed closer to what
the original code was doing). And if stdio is truly too horrible, I'd
suggest we implement our own buffered I/O. Because that's effectively
what such call-sites are doing, but in a really ad-hoc and
non-performant way.

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V2 3/3] strbuf: allow to use preallocated memory
  2016-06-08 19:52                     ` Jeff King
@ 2016-06-08 23:05                       ` Junio C Hamano
  0 siblings, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-08 23:05 UTC (permalink / raw)
  To: Jeff King
  Cc: Michael Haggerty, William Duclot, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

Jeff King <peff@peff.net> writes:

> I agree it can be confusing (especially on the output side your errors
> are likely deferred until the next flush). But in this particular case,
> I think it's an improvement (see the patch I just sent and its
> discussion of error handling).

Yes, for this one, the lifetime of the file descriptor is contained
within a single function and it is straight-forward to wrap it
correctly inside a FILE * via fdopen() and do the error check at the
end, so I think the patch is an improvement.

Will queue.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-08 19:42                   ` [PATCH] send-pack: use buffered I/O to talk to pack-objects Jeff King
@ 2016-06-09 12:10                     ` Matthieu Moy
  2016-06-09 14:34                       ` Ramsay Jones
  2016-06-09 16:40                       ` Junio C Hamano
  0 siblings, 2 replies; 32+ messages in thread
From: Matthieu Moy @ 2016-06-09 12:10 UTC (permalink / raw)
  To: Jeff King
  Cc: Michael Haggerty, William Duclot, Junio C Hamano, git,
	antoine.queru, francois.beutin, Johannes.Schindelin, mh

Jeff King <peff@peff.net> writes:

> --- a/send-pack.c
> +++ b/send-pack.c
> @@ -36,18 +36,15 @@ int option_parse_push_signed(const struct option *opt,
>  	die("bad %s argument: %s", opt->long_name, arg);
>  }
>  
> -static int feed_object(const unsigned char *sha1, int fd, int negative)
> +static void feed_object(const unsigned char *sha1, FILE *fh, int negative)
>  {
> -	char buf[42];
> -
>  	if (negative && !has_sha1_file(sha1))
> -		return 1;
> +		return;
[...]
> @@ -97,21 +95,22 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
[...]
>  	for (i = 0; i < extra->nr; i++)
> -		if (!feed_object(extra->sha1[i], po.in, 1))
> -			break;
> +		feed_object(extra->sha1[i], po_in, 1);

I may have missed the obvious, but doesn't this change the behavior when
"negative && !has_sha1_file(sha1)" happens? I understand that you don't
need write_or_whine anymore, but don't understand how you get rid of the
"return 1" here.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 12:10                     ` Matthieu Moy
@ 2016-06-09 14:34                       ` Ramsay Jones
  2016-06-09 17:12                         ` Jeff King
  2016-06-09 16:40                       ` Junio C Hamano
  1 sibling, 1 reply; 32+ messages in thread
From: Ramsay Jones @ 2016-06-09 14:34 UTC (permalink / raw)
  To: Matthieu Moy, Jeff King
  Cc: Michael Haggerty, William Duclot, Junio C Hamano, git,
	antoine.queru, francois.beutin, Johannes.Schindelin, mh



On 09/06/16 13:10, Matthieu Moy wrote:
> Jeff King <peff@peff.net> writes:
> 
>> --- a/send-pack.c
>> +++ b/send-pack.c
>> @@ -36,18 +36,15 @@ int option_parse_push_signed(const struct option *opt,
>>  	die("bad %s argument: %s", opt->long_name, arg);
>>  }
>>  
>> -static int feed_object(const unsigned char *sha1, int fd, int negative)
>> +static void feed_object(const unsigned char *sha1, FILE *fh, int negative)
>>  {
>> -	char buf[42];
>> -
>>  	if (negative && !has_sha1_file(sha1))
>> -		return 1;
>> +		return;
> [...]
>> @@ -97,21 +95,22 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
> [...]
>>  	for (i = 0; i < extra->nr; i++)
>> -		if (!feed_object(extra->sha1[i], po.in, 1))
>> -			break;
>> +		feed_object(extra->sha1[i], po_in, 1);
> 
> I may have missed the obvious, but doesn't this change the behavior when
> "negative && !has_sha1_file(sha1)" happens? I understand that you don't
> need write_or_whine anymore, but don't understand how you get rid of the
> "return 1" here.

Just FYI, this patch removes the last use of write_or_whine() - should it
be removed?

ATB,
Ramsay Jones

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 12:10                     ` Matthieu Moy
  2016-06-09 14:34                       ` Ramsay Jones
@ 2016-06-09 16:40                       ` Junio C Hamano
  2016-06-09 17:14                         ` Jeff King
  2016-06-09 17:22                         ` Matthieu Moy
  1 sibling, 2 replies; 32+ messages in thread
From: Junio C Hamano @ 2016-06-09 16:40 UTC (permalink / raw)
  To: Matthieu Moy
  Cc: Jeff King, Michael Haggerty, William Duclot, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

Matthieu Moy <Matthieu.Moy@grenoble-inp.fr> writes:

> Jeff King <peff@peff.net> writes:
>
>> --- a/send-pack.c
>> +++ b/send-pack.c
>> @@ -36,18 +36,15 @@ int option_parse_push_signed(const struct option *opt,
>>  	die("bad %s argument: %s", opt->long_name, arg);
>>  }
>>  
>> -static int feed_object(const unsigned char *sha1, int fd, int negative)
>> +static void feed_object(const unsigned char *sha1, FILE *fh, int negative)
>>  {
>> -	char buf[42];
>> -
>>  	if (negative && !has_sha1_file(sha1))
>> -		return 1;
>> +		return;
> [...]
>> @@ -97,21 +95,22 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
> [...]
>>  	for (i = 0; i < extra->nr; i++)
>> -		if (!feed_object(extra->sha1[i], po.in, 1))
>> -			break;
>> +		feed_object(extra->sha1[i], po_in, 1);
>
> I may have missed the obvious, but doesn't this change the behavior when
> "negative && !has_sha1_file(sha1)" happens? I understand that you don't
> need write_or_whine anymore, but don't understand how you get rid of the
> "return 1" here.

The original feed_object() has somewhat strange interface in that a
non-zero return from it is "Everything went alright!", and zero
means "Oops, something went wrong".  When the function actually
writes things out, it calls write_or_whine(), whose return value
also uses that (unusual) convention, which is the reason why it
behaves that way.

The "return 1" you noticed happens when the function is told not to
send history behind one object, but that object does not exist in
our repository.  This is a perfectly normal condition and the
function just ignores it and returns without feeding it to
pack-objects.  It reports to the caller "Everything went alright!".

The original caller checks for errors to break out the feeding of
the process early, with things like:

	if (!feed_object(...))
        	break;

IOW, the caller would have continued when hitting that "return 1"
codepath.

And the code with the patch, the caller continues unconditionally,
so there is no behaviour change, if I am reading the code correctly.

Thanks for carefully reading the change and pointing out hard-to-read
parts, as always.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 14:34                       ` Ramsay Jones
@ 2016-06-09 17:12                         ` Jeff King
  2016-06-09 22:40                           ` Ramsay Jones
  0 siblings, 1 reply; 32+ messages in thread
From: Jeff King @ 2016-06-09 17:12 UTC (permalink / raw)
  To: Ramsay Jones
  Cc: Matthieu Moy, Michael Haggerty, William Duclot, Junio C Hamano,
	git, antoine.queru, francois.beutin, Johannes.Schindelin, mh

On Thu, Jun 09, 2016 at 03:34:59PM +0100, Ramsay Jones wrote:

> Just FYI, this patch removes the last use of write_or_whine() - should it
> be removed?

That sounds reasonable. Want to do a patch on top?

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 16:40                       ` Junio C Hamano
@ 2016-06-09 17:14                         ` Jeff King
  2016-06-09 17:22                         ` Matthieu Moy
  1 sibling, 0 replies; 32+ messages in thread
From: Jeff King @ 2016-06-09 17:14 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthieu Moy, Michael Haggerty, William Duclot, git,
	antoine.queru, francois.beutin, Johannes.Schindelin, mh

On Thu, Jun 09, 2016 at 09:40:42AM -0700, Junio C Hamano wrote:

> >>  	for (i = 0; i < extra->nr; i++)
> >> -		if (!feed_object(extra->sha1[i], po.in, 1))
> >> -			break;
> >> +		feed_object(extra->sha1[i], po_in, 1);
> >
> > I may have missed the obvious, but doesn't this change the behavior when
> > "negative && !has_sha1_file(sha1)" happens? I understand that you don't
> > need write_or_whine anymore, but don't understand how you get rid of the
> > "return 1" here.
> [...]
> The original caller checks for errors to break out the feeding of
> the process early, with things like:
> 
> 	if (!feed_object(...))
>         	break;
> 
> IOW, the caller would have continued when hitting that "return 1"
> codepath.
> 
> And the code with the patch, the caller continues unconditionally,
> so there is no behaviour change, if I am reading the code correctly.

Right, that's my reading as well (and IMHO another good motivation for
the patch, if it makes this all less confusing).

-Peff

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 16:40                       ` Junio C Hamano
  2016-06-09 17:14                         ` Jeff King
@ 2016-06-09 17:22                         ` Matthieu Moy
  1 sibling, 0 replies; 32+ messages in thread
From: Matthieu Moy @ 2016-06-09 17:22 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jeff King, Michael Haggerty, William Duclot, git, antoine.queru,
	francois.beutin, Johannes.Schindelin, mh

Junio C Hamano <gitster@pobox.com> writes:

> Matthieu Moy <Matthieu.Moy@grenoble-inp.fr> writes:
>
>> Jeff King <peff@peff.net> writes:
>>
>>> --- a/send-pack.c
>>> +++ b/send-pack.c
>>> @@ -36,18 +36,15 @@ int option_parse_push_signed(const struct option *opt,
>>>  	die("bad %s argument: %s", opt->long_name, arg);
>>>  }
>>>  
>>> -static int feed_object(const unsigned char *sha1, int fd, int negative)
>>> +static void feed_object(const unsigned char *sha1, FILE *fh, int negative)
>>>  {
>>> -	char buf[42];
>>> -
>>>  	if (negative && !has_sha1_file(sha1))
>>> -		return 1;
>>> +		return;
>> [...]
>>> @@ -97,21 +95,22 @@ static int pack_objects(int fd, struct ref *refs, struct sha1_array *extra, stru
>> [...]
>>>  	for (i = 0; i < extra->nr; i++)
>>> -		if (!feed_object(extra->sha1[i], po.in, 1))
>>> -			break;
>>> +		feed_object(extra->sha1[i], po_in, 1);
>>
>> I may have missed the obvious, but doesn't this change the behavior when
>> "negative && !has_sha1_file(sha1)" happens? I understand that you don't
>> need write_or_whine anymore, but don't understand how you get rid of the
>> "return 1" here.
>
> The original feed_object() has somewhat strange interface in that a
> non-zero return from it is "Everything went alright!", and zero
> means "Oops, something went wrong".

Indeed, this is the "obvious" I've missed. So, indeed, the new "return"
does the same thing as the old "return 1".

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] send-pack: use buffered I/O to talk to pack-objects
  2016-06-09 17:12                         ` Jeff King
@ 2016-06-09 22:40                           ` Ramsay Jones
  0 siblings, 0 replies; 32+ messages in thread
From: Ramsay Jones @ 2016-06-09 22:40 UTC (permalink / raw)
  To: Jeff King
  Cc: Matthieu Moy, Michael Haggerty, William Duclot, Junio C Hamano,
	git, antoine.queru, francois.beutin, Johannes.Schindelin, mh



On 09/06/16 18:12, Jeff King wrote:
> On Thu, Jun 09, 2016 at 03:34:59PM +0100, Ramsay Jones wrote:
> 
>> Just FYI, this patch removes the last use of write_or_whine() - should it
>> be removed?
> 
> That sounds reasonable. Want to do a patch on top?

OK, will do.

ATB,
Ramsay Jones

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2016-06-09 22:40 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-06 15:13 [PATCH V2 0/3] strbuf: improve API William Duclot
2016-06-06 15:13 ` [PATCH V2 1/3] strbuf: add tests William Duclot
2016-06-06 16:11   ` Matthieu Moy
2016-06-07  8:44   ` Johannes Schindelin
2016-06-06 15:13 ` [PATCH V2 2/3] pretty.c: rename strbuf_wrap() function William Duclot
2016-06-06 16:12   ` Matthieu Moy
2016-06-07  9:04   ` Johannes Schindelin
2016-06-06 15:13 ` [PATCH V2 3/3] strbuf: allow to use preallocated memory William Duclot
2016-06-06 16:17   ` Matthieu Moy
2016-06-06 17:19   ` Junio C Hamano
2016-06-06 20:39     ` William Duclot
2016-06-06 22:44       ` Junio C Hamano
2016-06-06 22:58         ` Jeff King
2016-06-06 23:24           ` Junio C Hamano
2016-06-06 23:25             ` Junio C Hamano
2016-06-06 23:30             ` Jeff King
2016-06-07  9:06             ` William Duclot
2016-06-07 18:10               ` Junio C Hamano
2016-06-08 16:20               ` Michael Haggerty
2016-06-08 18:07                 ` Junio C Hamano
2016-06-08 19:19                 ` Jeff King
2016-06-08 19:42                   ` [PATCH] send-pack: use buffered I/O to talk to pack-objects Jeff King
2016-06-09 12:10                     ` Matthieu Moy
2016-06-09 14:34                       ` Ramsay Jones
2016-06-09 17:12                         ` Jeff King
2016-06-09 22:40                           ` Ramsay Jones
2016-06-09 16:40                       ` Junio C Hamano
2016-06-09 17:14                         ` Jeff King
2016-06-09 17:22                         ` Matthieu Moy
2016-06-08 19:48                   ` [PATCH V2 3/3] strbuf: allow to use preallocated memory Junio C Hamano
2016-06-08 19:52                     ` Jeff King
2016-06-08 23:05                       ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.