All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC/PATCH 0/3]
@ 2011-06-22  7:33 David Barr
  2011-06-22  7:33 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures David Barr
  2011-06-22  7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
  0 siblings, 2 replies; 12+ messages in thread
From: David Barr @ 2011-06-22  7:33 UTC (permalink / raw)
  To: GIT Mailing-list

This series is the early stages of adding infrastructure to git
to memory-effective but fast and scalable caching of object
graphs.
The inspiration comes from a desire to build a libfastimport and
also the outstanding FIXME in hash.c
Patches 1 and 2 are the first small components that have emerged
from my experiments with different ways of representing and
indexing graphs. Patch 3 doesn't exist at the time of composing
this summary but will be an alternate hash table implementation
the builds on the first two.

I'm putting this out for early feedback, to be incorporated into
a future complete series.

--
David Barr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-22  7:33 [RFC/PATCH 0/3] David Barr
@ 2011-06-22  7:33 ` David Barr
  2011-06-22 19:42   ` Junio C Hamano
  2011-06-23 17:22   ` Junio C Hamano
  2011-06-22  7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
  1 sibling, 2 replies; 12+ messages in thread
From: David Barr @ 2011-06-22  7:33 UTC (permalink / raw)
  To: GIT Mailing-list; +Cc: David Barr

One struct to capture all types, just 4 methods:
decode_message, encode_message, sizeof_message, hash_field.

Signed-off-by: David Barr <davidbarr@google.com>
---

 This is the first in a series of small patches to introduce some higher-level
 constructs into the git toolbag.
 The motivation is to empower a libfastimport implementation that is frugal
 with memory, fast and scalable.

 This version lacks any driver or tests.

 Soon to come are:
 * an efficient allocator that provides a mapping from an integer handle to
  pointer and length
 * a hash table that is time and memory effective for very numerous entries

 protobuf.c |  193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 protobuf.h |   70 ++++++++++++++++++++++
 varint.h   |   59 ++++++++++++++++++
 3 files changed, 322 insertions(+), 0 deletions(-)
 create mode 100644 protobuf.c
 create mode 100644 protobuf.h
 create mode 100644 varint.h

diff --git a/protobuf.c b/protobuf.c
new file mode 100644
index 0000000..09223dd
--- /dev/null
+++ b/protobuf.c
@@ -0,0 +1,193 @@
+#include "git-compat-util.h"
+#include "protobuf.h"
+#include "varint.h"
+
+static int decode_binary(const char **buf, const char *end, const char **result, size_t len)
+{
+	if (end && *buf + len > end)
+		return 1;
+	*result = *buf;
+	*buf += len;
+	return 0;
+}
+
+static int encode_binary(char **buf, const char *end, const char *value, size_t len)
+{
+	if (end && *buf + len > end)
+		return 1;
+	memcpy(*buf, value, len);
+	*buf += len;
+	return 0;
+}
+
+static int decode_field(const char **buf, const char *end, struct protobuf_field *field)
+{
+	uint32_t u32;
+	uint64_t key;
+	bzero(field, sizeof(struct protobuf_field));
+	if (decode_varint(buf, end, &key))
+		return 1;
+	field->tag = key >> WT_BITS,
+	field->type = key & WT_MASK;
+	switch (field->type) {
+	case WT_VARINT:
+		if (decode_varint(buf, end, &field->val.num))
+			return 1;
+		return 0;
+	case WT_64BIT:
+		field->val.bin.len = sizeof(uint64_t);
+		break;
+	case WT_STRING:
+		if (decode_varint(buf, end, &field->val.bin.len))
+			return 1;
+		break;
+	case WT_SHA1:
+		field->val.bin.len = 20;
+		break;
+	case WT_32BIT:
+		field->val.bin.len = sizeof(uint32_t);
+		break;
+	default:
+		return 1;
+	}
+	if (decode_binary(buf, end, &field->val.bin.ptr, field->val.bin.len))
+		return 1;
+	switch (field->type) {
+	case WT_64BIT:
+		memcpy(&field->val.num, field->val.bin.ptr, field->val.bin.len);
+		break;
+	case WT_32BIT:
+		memcpy(&u32, field->val.bin.ptr, field->val.bin.len);
+		field->val.num = u32;
+	}
+	return 0;
+}
+
+int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields)
+{
+	struct protobuf_field current;
+	bzero(message, fields * sizeof(struct protobuf_field));
+	while (*buf != end) {
+		if (decode_field(buf, end, &current))
+			return 1;
+		if (current.tag < fields)
+			message[current.tag] = current;
+	}
+	return 0;
+}
+
+static size_t sizeof_field(const struct protobuf_field *field)
+{
+	size_t sizeof_key = sizeof_varint(field->tag << WT_BITS | field->type);
+	switch (field->type) {
+	case WT_VARINT:
+		if (field->val.num)
+			return sizeof_key + sizeof_varint(field->val.num);
+		break;
+	case WT_64BIT:
+		if (field->val.num)
+			return sizeof_key + sizeof(uint64_t);
+		break;
+	case WT_STRING:
+		if (field->val.bin.len && field->val.bin.ptr)
+			return sizeof_key + sizeof_varint(field->val.bin.len)
+			       + field->val.bin.len;
+		break;
+	case WT_SHA1:
+		if (field->val.bin.ptr)
+			return sizeof_key + 20;
+		break;
+	case WT_32BIT:
+		if (field->val.num)
+			return sizeof_key + sizeof(uint32_t);
+		break;
+	}
+	return 0;
+}
+
+uint64_t sizeof_message(const struct protobuf_field *message, size_t fields)
+{
+	uint64_t size = 0;
+	while (fields--)
+		size += sizeof_field(message++);
+	return size;
+}
+
+static int encode_field(char **buf, const struct protobuf_field *field, char *end)
+{
+	uint32_t u32;
+	uint64_t len;
+	const char *ptr;
+	uint64_t key = (field->tag << WT_BITS) | field->type;
+	if (!sizeof_field(field))
+		return 0;
+	if (encode_varint(buf, end, key))
+		return 1;
+	switch (field->type) {
+	case WT_VARINT:
+		if (encode_varint(buf, end, field->val.num))
+			return 1;
+		return 0;
+	case WT_64BIT:
+		ptr = (const char *)&field->val.num;
+		len = sizeof(uint64_t);
+		break;
+	case WT_STRING:
+		if (encode_varint(buf, end, field->val.bin.len))
+			return 1;
+		ptr = field->val.bin.ptr;
+		len = field->val.bin.len;
+		break;
+	case WT_SHA1:
+		ptr = field->val.bin.ptr;
+		len = 20;
+		break;
+	case WT_32BIT:
+		u32 = field->val.num;
+		ptr = (const char *)&u32;
+		len = sizeof(uint64_t);
+		break;
+	default:
+		return 1;
+	}
+	if (encode_binary(buf, end, ptr, len))
+		return 1;
+	return 0;
+}
+
+int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields)
+{
+	while (*buf != end && fields--)
+		if (encode_field(buf, message++, end))
+			return 1;
+	return 0;
+}
+
+static uint32_t x65599(const char *s, uint64_t len)
+{
+	uint32_t r = 0;
+	while (len--)
+		r = *s++ + (r << 6) + (r << 16) - r;
+	return r;
+}
+
+uint32_t hash_field(const struct protobuf_field *field)
+{
+	uint32_t hc = 0;
+	switch (field->type) {
+	case WT_VARINT:
+	case WT_64BIT:
+		hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32;
+		break;
+	case WT_SHA1:
+		memcpy(&hc, field->val.bin.ptr, sizeof(hc));
+		break;
+	case WT_STRING:
+		hc = x65599(field->val.bin.ptr, field->val.bin.len);
+		break;
+	case WT_32BIT:
+		hc = 0x9e3779b9ul * (uint32_t)field->val.num;
+		break;
+	}
+	return hc;
+}
diff --git a/protobuf.h b/protobuf.h
new file mode 100644
index 0000000..89b70a4
--- /dev/null
+++ b/protobuf.h
@@ -0,0 +1,70 @@
+#ifndef PROTOBUF_H_
+#define PROTOBUF_H_
+
+#define WT_BITS 3
+#define WT_MASK 0x7
+
+enum wire_type {
+	WT_VARINT = 0,
+	WT_64BIT  = 1,
+	WT_STRING = 2,
+	/* Custom wire type */
+	WT_SHA1   = 3,
+	WT_32BIT  = 5
+};
+
+struct protobuf_field
+{
+	uint64_t tag : 64 - WT_BITS,
+		 type : WT_BITS;
+	union protobuf_value {
+		uint64_t num;
+		struct protobuf_binary {
+			uint64_t len;
+			const char *ptr;
+		} bin;
+	} val;
+};
+
+#define PROTOBUF_KEY(p, f, wt) do { \
+	(p)->f.tag = &((p)->f) - (struct protobuf_field*)(p); \
+	(p)->f.type = wt; \
+} while(0)
+
+#define WT_VARINT(p, f, v) do { \
+	PROTOBUF_KEY(p, f, WT_VARINT); \
+	(p)->f.val.num = v; \
+} while(0)
+
+#define WT_64BIT(p, f, v) do { \
+	PROTOBUF_KEY(p, f, WT_64BIT); \
+	(p)->f.val.num = v; \
+} while(0)
+
+#define WT_STRING(p, f, s, l) do { \
+	PROTOBUF_KEY(p, f, WT_STRING); \
+	(p)->f.val.bin.ptr = s; \
+	(p)->f.val.bin.len = l; \
+} while(0)
+
+#define WT_SHA1(p, f, v) do { \
+	PROTOBUF_KEY(p, f, WT_SHA1); \
+	(p)->f.val.bin.ptr = s; \
+	(p)->f.val.bin.len = 20; \
+} while(0)
+
+#define WT_32BIT(p, f, v) do { \
+	PROTOBUF_KEY(p, f, WT_32BIT); \
+	(p)->f.val.num = v; \
+} while(0)
+
+#define PROTOBUF_CAST(p) \
+	(struct protobuf_field*)(p), \
+	sizeof(*(p)) / sizeof(struct protobuf_field)
+
+int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields);
+uint64_t sizeof_message(const struct protobuf_field *message, size_t fields);
+int encode_message(char **buf, char *end, const struct protobuf_field *message, size_t fields);
+uint32_t hash_field(const struct protobuf_field *field);
+
+#endif
diff --git a/varint.h b/varint.h
new file mode 100644
index 0000000..48a3547
--- /dev/null
+++ b/varint.h
@@ -0,0 +1,59 @@
+#ifndef VARINT_H_
+#define VARINT_H_
+
+#define VLI_CONTINUE	0x80
+#define VLI_DIGIT_MASK	0x7f
+#define VLI_BITS_PER_DIGIT 7
+
+static int decode_varint(const char **buf, const char *end, uint64_t *result)
+{
+	uint64_t rv = 0;
+	int shift = 0;
+	const char *pos;
+	for (pos = *buf; pos != end && shift < 64; pos++) {
+		unsigned char ch = *pos;
+
+		rv |= (ch & VLI_DIGIT_MASK) << shift;
+		shift += VLI_BITS_PER_DIGIT;
+		if (ch & VLI_CONTINUE)
+			continue;
+
+		*result = rv;
+		*buf = pos + 1;
+		return 0;
+	}
+	return 1;
+}
+
+static int encode_varint(char **buf, const char *end, uint64_t value)
+{
+	char *pos;
+	for (pos = *buf; pos != end; pos++) {
+		unsigned char ch = value & VLI_DIGIT_MASK;
+
+		value >>= VLI_BITS_PER_DIGIT;
+		if (value)
+			ch |= VLI_CONTINUE;
+		*pos = ch;
+		if (ch & VLI_CONTINUE)
+			continue;
+
+		*buf = pos + 1;
+		return 0;
+	}
+	return 1;
+}
+
+static size_t sizeof_varint(uint64_t value) {
+	size_t size = 0;
+	int shift = VLI_BITS_PER_DIGIT;
+	while (shift < 64) {
+		size++;
+		if (value < (1ull << shift))
+			break;
+		shift += VLI_BITS_PER_DIGIT;
+	}
+	return size;
+}
+
+#endif
-- 
1.7.5.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC/PATCH 2/3] small-alloc: add allocator for small objects
  2011-06-22  7:33 [RFC/PATCH 0/3] David Barr
  2011-06-22  7:33 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures David Barr
@ 2011-06-22  7:33 ` David Barr
  2011-06-22 20:49   ` Junio C Hamano
  2011-06-23 17:17   ` Junio C Hamano
  1 sibling, 2 replies; 12+ messages in thread
From: David Barr @ 2011-06-22  7:33 UTC (permalink / raw)
  To: GIT Mailing-list; +Cc: David Barr

This allocator assigns an integer handle to each allocation which
can be used to retrieve the pointer to the start of the allocation
and its length.
On average, the per-allocation memory overhead is twice the length
of the variable-length-encoding of the allocation size. For objects
less than 128 bytes in size, this equates to 2 bytes of overhead.

Signed-off-by: David Barr <davidbarr@google.com>
---

 This is the second in a series of patches to enable libfastimport.
 The theme of series is memory-effective, fast, scalable data structures.

 small-alloc.c |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 small-alloc.h |   18 ++++++++++++
 2 files changed, 100 insertions(+), 0 deletions(-)
 create mode 100644 small-alloc.c
 create mode 100644 small-alloc.h

diff --git a/small-alloc.c b/small-alloc.c
new file mode 100644
index 0000000..936884e
--- /dev/null
+++ b/small-alloc.c
@@ -0,0 +1,82 @@
+#include "git-compat-util.h"
+#include "cache.h"
+#include "varint.h"
+#include "small-alloc.h"
+
+static const size_t chunk_size = 2 * 1024 * 1024;
+
+void *pool_alloc(struct mem_pool *pool, size_t len, size_t *id_out)
+{
+	static size_t id = 1;
+	size_t n;
+	void *r;
+
+	if ((pool->end - pool->next_free >= len) &&
+	    (pool->len_free >= sizeof_varint(len)))
+		n = pool->nr - 1;
+	else {
+		if ((pool->end - pool->next_free < len)) {
+			size_t pool_size = chunk_size;
+			if (len >= (chunk_size/2))
+				pool_size = len;
+			pool->total_allocd += pool_size;
+			pool->next_free = malloc(pool_size);
+			pool->end = pool->next_free + pool_size;
+		}
+		pool->total_allocd += sizeof(*pool->first_id) +
+				sizeof(*pool->space) +
+				sizeof(*pool->len);
+		ALLOC_GROW(pool->first_id, pool->nr + 1, pool->f_alloc);
+		ALLOC_GROW(pool->len, pool->nr + 1, pool->l_alloc);
+		ALLOC_GROW(pool->space, pool->nr + 1, pool->s_alloc);
+		pool->first_id[pool->nr] = id;
+		pool->len_free = sizeof(*pool->len);
+		bzero(pool->len[pool->nr], sizeof(*pool->len));
+		pool->space[pool->nr] = pool->next_free;
+		n = pool->nr++;
+	}
+
+	if (id_out)
+		*id_out = id;
+	id++;
+
+	char *t = &pool->len[n][sizeof(*pool->len) - pool->len_free];
+	if (encode_varint(&t, pool->len[n] + sizeof(*pool->len), len))
+		return NULL;
+	pool->len_free = pool->len[n] + sizeof(*pool->len) - t;
+
+	r = pool->next_free;
+	pool->next_free += len;
+	return r;
+}
+
+void *pool_ptr(struct mem_pool *pool, size_t id, size_t *len_out)
+{
+	char *r;
+	const char *t;
+	uint64_t len = 0, cur;
+
+	if (!id || !pool->nr)
+		return NULL;
+
+	size_t n = pool->nr * id / pool->first_id[pool->nr - 1];
+	if (n >= pool->nr - 1)
+		n = pool->nr - 1;
+	while (n && pool->first_id[n] > id)
+		n--;
+	while (n + 1 < pool->nr && pool->first_id[n + 1] <= id)
+		n++;
+	if (pool->first_id[n] > id)
+		return NULL;
+
+	cur = pool->first_id[n];
+	for (r = pool->space[n], t = (const char*) pool->len[n];
+	     !decode_varint(&t, pool->len[n] + sizeof(*pool->len), &len);
+	     r += len, cur++)
+		if (cur == id) {
+			if (len_out)
+				*len_out = len;
+			return r;
+		}
+	return NULL;
+}
diff --git a/small-alloc.h b/small-alloc.h
new file mode 100644
index 0000000..eb77491
--- /dev/null
+++ b/small-alloc.h
@@ -0,0 +1,18 @@
+#ifndef SMALL_ALLOC_H_
+#define SMALL_ALLOC_H_
+
+struct mem_pool {
+	size_t *first_id;
+	char **space;
+	char (*len)[sizeof(size_t) + sizeof(char*)];
+	size_t f_alloc, s_alloc, l_alloc, nr;
+	char *next_free;
+	char *end;
+	int len_free;
+	size_t total_allocd;
+};
+
+void *pool_alloc(struct mem_pool *pool, size_t len, size_t *id_out);
+void *pool_ptr(struct mem_pool *pool, size_t id, size_t *len_out);
+
+#endif
-- 
1.7.5.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-22  7:33 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures David Barr
@ 2011-06-22 19:42   ` Junio C Hamano
  2011-06-24 14:39     ` David Barr
  2011-06-24 16:04     ` David Barr
  2011-06-23 17:22   ` Junio C Hamano
  1 sibling, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-06-22 19:42 UTC (permalink / raw)
  To: David Barr; +Cc: GIT Mailing-list

David Barr <davidbarr@google.com> writes:

> One struct to capture all types, just 4 methods:
> decode_message, encode_message, sizeof_message, hash_field.
>
> Signed-off-by: David Barr <davidbarr@google.com>
> ---
>
>  This is the first in a series of small patches to introduce some higher-level
>  constructs into the git toolbag.
>  The motivation is to empower a libfastimport implementation that is frugal
>  with memory, fast and scalable.
>
>  This version lacks any driver or tests.
>
>  Soon to come are:
>  * an efficient allocator that provides a mapping from an integer handle to
>   pointer and length
>  * a hash table that is time and memory effective for very numerous entries
>
>  protobuf.c |  193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  protobuf.h |   70 ++++++++++++++++++++++

How does this relate to http://code.google.com/p/protobuf/ which has a
very similar name?  If we do not intend to have any interoperability with
it, we should avoid such a confusing name, I think.

> diff --git a/protobuf.c b/protobuf.c
> new file mode 100644
> index 0000000..09223dd
> --- /dev/null
> +++ b/protobuf.c
> @@ -0,0 +1,193 @@
> +#include "git-compat-util.h"
> +#include "protobuf.h"
> +#include "varint.h"
> +
> +static int decode_binary(const char **buf, const char *end, const char **result, size_t len)
> +{
> +	if (end && *buf + len > end)
> +		return 1;

In low-level library-ish code, we tend to signal an error with a negative
value and success with zero, unless there is a compelling reason not to.

When this error is triggered, what does it tell us?  Programming error
(i.e. BUG())?  Incoming data stream error?

> +	*result = *buf;
> +	*buf += len;
> +	return 0;
> +}
> +
> +static int encode_binary(char **buf, const char *end, const char *value, size_t len)
> +{
> +	if (end && *buf + len > end)
> +		return 1;
> +	memcpy(*buf, value, len);
> +	*buf += len;
> +	return 0;
> +}
> +
> +static int decode_field(const char **buf, const char *end, struct protobuf_field *field)
> +{
> +	uint32_t u32;
> +	uint64_t key;
> +	bzero(field, sizeof(struct protobuf_field));

Please use memset(..., '\0', ...), as we use functions from memXXX()
family declared in <string.h>, just like you chose to use memcpy() over
bcopy() in encode_binary() above.

> +	if (decode_varint(buf, end, &key))
> +		return 1;
> +	field->tag = key >> WT_BITS,
> +	field->type = key & WT_MASK;
> +	switch (field->type) {
> +	case WT_VARINT:
> +		if (decode_varint(buf, end, &field->val.num))
> +			return 1;
> +		return 0;
> +	case WT_64BIT:
> +		field->val.bin.len = sizeof(uint64_t);
> +		break;
> +	case WT_STRING:
> +		if (decode_varint(buf, end, &field->val.bin.len))
> +			return 1;
> +		break;
> +	case WT_SHA1:
> +		field->val.bin.len = 20;
> +		break;
> +	case WT_32BIT:
> +		field->val.bin.len = sizeof(uint32_t);
> +		break;
> +	default:
> +		return 1;
> +	}
> +	if (decode_binary(buf, end, &field->val.bin.ptr, field->val.bin.len))
> +		return 1;
> +	switch (field->type) {
> +	case WT_64BIT:
> +		memcpy(&field->val.num, field->val.bin.ptr, field->val.bin.len);
> +		break;
> +	case WT_32BIT:
> +		memcpy(&u32, field->val.bin.ptr, field->val.bin.len);
> +		field->val.num = u32;

Is there any need for byte-order considerations, or an encoded "message"
is defined (by this library) to use host encoding, and will never be sent
over the wire to other systems in the future?

> +int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields)
> +{
> +	struct protobuf_field current;
> +	bzero(message, fields * sizeof(struct protobuf_field));
> +	while (*buf != end) {
> +		if (decode_field(buf, end, &current))
> +			return 1;
> +		if (current.tag < fields)
> +			message[current.tag] = current;

Is there any need to check the incoming *buf for duplicated tags, whose
payload may overwrite an element in message[] that was populated in the
previous iteration of this loop?

Could decode_field() move *buf beyond end (IOW, would it be an improvement
if we said "while (*buf < end)" instead)?

> +static size_t sizeof_field(const struct protobuf_field *field)
> +{
> +	size_t sizeof_key = sizeof_varint(field->tag << WT_BITS | field->type);
> +	switch (field->type) {
> +	case WT_VARINT:
> +		if (field->val.num)
> +			return sizeof_key + sizeof_varint(field->val.num);
> +		break;
> +	case WT_64BIT:
> +		if (field->val.num)
> +			return sizeof_key + sizeof(uint64_t);
> +		break;
> +	case WT_STRING:
> +		if (field->val.bin.len && field->val.bin.ptr)
> +			return sizeof_key + sizeof_varint(field->val.bin.len)
> +			       + field->val.bin.len;
> +		break;
> +	case WT_SHA1:
> +		if (field->val.bin.ptr)
> +			return sizeof_key + 20;
> +		break;
> +	case WT_32BIT:
> +		if (field->val.num)
> +			return sizeof_key + sizeof(uint32_t);
> +		break;
> +	}
> +	return 0;

Doesn't this function want to return an error when it does not understand
what is in field->type field? If yes, I would imagine that this function
needs to be of type ssize_t, you would return -1 here, and the callers
would check the return value.

> +static uint32_t x65599(const char *s, uint64_t len)
> +{
> +	uint32_t r = 0;
> +	while (len--)
> +		r = *s++ + (r << 6) + (r << 16) - r;
> +	return r;

Would it be an improvement if we made sure that each byte in s is taken as
unsigned (or signed if you really wanted to but I do not see why) char on
all platforms to guarantee reproducibility?

> +}
> +
> +uint32_t hash_field(const struct protobuf_field *field)
> +{
> +	uint32_t hc = 0;
> +	switch (field->type) {
> +	case WT_VARINT:
> +	case WT_64BIT:
> +		hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32;
> +		break;
> +	case WT_SHA1:
> +		memcpy(&hc, field->val.bin.ptr, sizeof(hc));
> +		break;
> +	case WT_STRING:
> +		hc = x65599(field->val.bin.ptr, field->val.bin.len);
> +		break;
> +	case WT_32BIT:
> +		hc = 0x9e3779b9ul * (uint32_t)field->val.num;
> +		break;
> +	}
> +	return hc;
> +}

Please make the magic Q64 and Q32 into symbolic constants somewhere in
this file, or at least give comment. Naming 65599 to SOMETHING_PRIME would
also be better.

> diff --git a/varint.h b/varint.h
> new file mode 100644
> index 0000000..48a3547
> --- /dev/null
> +++ b/varint.h
> @@ -0,0 +1,59 @@
> +#ifndef VARINT_H_
> +#define VARINT_H_
> +
> +#define VLI_CONTINUE	0x80
> +#define VLI_DIGIT_MASK	0x7f
> +#define VLI_BITS_PER_DIGIT 7
> +
> +static int decode_varint(const char **buf, const char *end, uint64_t *result)
> +{
> +	uint64_t rv = 0;
> +	int shift = 0;
> +	const char *pos;
> +	for (pos = *buf; pos != end && shift < 64; pos++) {
> +		unsigned char ch = *pos;
> +
> +		rv |= (ch & VLI_DIGIT_MASK) << shift;
> +		shift += VLI_BITS_PER_DIGIT;
> +		if (ch & VLI_CONTINUE)
> +			continue;
> +
> +		*result = rv;
> +		*buf = pos + 1;
> +		return 0;

This seems to be the same 7-bit-at-a-time encoding used to encode object
size in packfiles, which is slightly looser than the one used to express
the pack offset of base objects (Cf. eb32d23 -- introduce delta objects
with offset to base, 2006-09-21). Would it make more sense to use the
slightly tighter one?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 2/3] small-alloc: add allocator for small objects
  2011-06-22  7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
@ 2011-06-22 20:49   ` Junio C Hamano
  2011-06-24 14:38     ` David Barr
  2011-06-23 17:17   ` Junio C Hamano
  1 sibling, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2011-06-22 20:49 UTC (permalink / raw)
  To: David Barr; +Cc: GIT Mailing-list

David Barr <davidbarr@google.com> writes:

> This allocator assigns an integer handle to each allocation which
> can be used to retrieve the pointer to the start of the allocation
> and its length.
> On average, the per-allocation memory overhead is twice the length
> of the variable-length-encoding of the allocation size. For objects
> less than 128 bytes in size, this equates to 2 bytes of overhead.

> diff --git a/small-alloc.c b/small-alloc.c
> new file mode 100644
> index 0000000..936884e
> --- /dev/null
> +++ b/small-alloc.c
> @@ -0,0 +1,82 @@
> +#include "git-compat-util.h"
> +#include "cache.h"
> +#include "varint.h"
> +#include "small-alloc.h"
> +
> +static const size_t chunk_size = 2 * 1024 * 1024;
> +
> +void *pool_alloc(struct mem_pool *pool, size_t len, size_t *id_out)
> +{
> +	static size_t id = 1;

Does this mean that even though you can have more than one mem_pool object
active in the system, you won't have id collision throughout the system?
That is a nice property (e.g. given an ID that does not belong to a pool,
you won't risk returning a wrong chunk of <mem,len> pair from pool_ptr()),
but is there a downside using function-scope static like this, I wonder?

For example, if I have two pools A and B, and call pool_alloc on A and
then on B and then on A again, A's space[0] will have the first and the
third object, with id=1 and id=3. How does this interact with your
implementation of pool_ptr() which seems to assume that id's are
consecutive within a single pool->space[]?

> +	size_t n;
> +	void *r;
> +
> +	if ((pool->end - pool->next_free >= len) &&
> +	    (pool->len_free >= sizeof_varint(len)))
> +		n = pool->nr - 1;

Help me make sure I understand what is going on here.

A mem-pool has pool->nr chunks (and it can grow as more memory is asked
from this pool). A new memory request is satisfied by carving out of
pool->space[n] (where n is the "newest chunk" in the pool).
pool->len[n] is a fixed sized byte array and stores the length of each
memory block carved out of pool->space[n] as a sequence of varint.
If pool->space[n] has enough space to fit "len" bytes, and if pool->len[n]
still has enough space to record the length, you use the current chunk,
otherwise (i.e. the else clause) you add a new chunk.

Am I with you so far?

With the chunk_size of 2MB, you would fit roughly 16k allocation requests
for 128-byte memory, and you would need 2-bytes to express the length of
one piece of memory in your varint encoding, i.e. you would need to size
an element of pool->len[] to 32kB if you wanted to store 16k allocation of
128-byte for a chunk.

This would all depend on what the expected distribution of request size,
but it somehow feels wasteful to be limited by both space[] and len[]. If
you chose sizeof(*pool->len) that is too small for the workload, wouldn't
you end up allocating many 2MB space[], only to use potentially very
initial parts of them before len[] fills up?

> +	else {
> +		if ((pool->end - pool->next_free < len)) {
> +			size_t pool_size = chunk_size;
> +			if (len >= (chunk_size/2))
> +				pool_size = len;
> +			pool->total_allocd += pool_size;
> +			pool->next_free = malloc(pool_size);
> +			pool->end = pool->next_free + pool_size;
> +		}
> +		pool->total_allocd += sizeof(*pool->first_id) +
> +				sizeof(*pool->space) +
> +				sizeof(*pool->len);
> +		ALLOC_GROW(pool->first_id, pool->nr + 1, pool->f_alloc);
> +		ALLOC_GROW(pool->len, pool->nr + 1, pool->l_alloc);
> +		ALLOC_GROW(pool->space, pool->nr + 1, pool->s_alloc);
> +		pool->first_id[pool->nr] = id;
> +		pool->len_free = sizeof(*pool->len);
> +		bzero(pool->len[pool->nr], sizeof(*pool->len));
> +		pool->space[pool->nr] = pool->next_free;
> +		n = pool->nr++;
> +	}
> +
> +	if (id_out)
> +		*id_out = id;
> +	id++;
> +
> +	char *t = &pool->len[n][sizeof(*pool->len) - pool->len_free];

Please avoid decl_after_statement.

> +	if (encode_varint(&t, pool->len[n] + sizeof(*pool->len), len))
> +		return NULL;
> +	pool->len_free = pool->len[n] + sizeof(*pool->len) - t;
> +
> +	r = pool->next_free;
> +	pool->next_free += len;
> +	return r;
> +}
> +
> +void *pool_ptr(struct mem_pool *pool, size_t id, size_t *len_out)
> +{
> +	char *r;
> +	const char *t;
> +	uint64_t len = 0, cur;
> +
> +	if (!id || !pool->nr)
> +		return NULL;
> +
> +	size_t n = pool->nr * id / pool->first_id[pool->nr - 1];
> +	if (n >= pool->nr - 1)
> +		n = pool->nr - 1;
> +	while (n && pool->first_id[n] > id)
> +		n--;
> +	while (n + 1 < pool->nr && pool->first_id[n + 1] <= id)
> +		n++;

I was about to say "bsearch?", but perhaps it is not worth it.

> +	if (pool->first_id[n] > id)
> +		return NULL;
> +
> +	cur = pool->first_id[n];
> +	for (r = pool->space[n], t = (const char*) pool->len[n];
> +	     !decode_varint(&t, pool->len[n] + sizeof(*pool->len), &len);
> +	     r += len, cur++)
> +		if (cur == id) {
> +			if (len_out)
> +				*len_out = len;
> +			return r;
> +		}
> +	return NULL;
> +}
> diff --git a/small-alloc.h b/small-alloc.h
> new file mode 100644
> index 0000000..eb77491
> --- /dev/null
> +++ b/small-alloc.h
> @@ -0,0 +1,18 @@
> +#ifndef SMALL_ALLOC_H_
> +#define SMALL_ALLOC_H_
> +
> +struct mem_pool {
> +	size_t *first_id;
> +	char **space;
> +	char (*len)[sizeof(size_t) + sizeof(char*)];

Each element of pool->len[] is just a byte-array that stores varint, no?
It is very misleading to specify its size as sizeof(size_t)+sizeof(char*)
as if you would store a "struct { size_t some; char *thing; }" here.

Instead of having two independently depleted byte-buffer (space[] and
len[]), I wonder if it would be more space efficient (without being less
processing efficient) to use a single buffer space.  Your pool_ptr() would
start at the beginning of pool->space[n], decode a varint and take it as a
length, if that is not the object you are looking for, skip that many
bytes (i.e. payload immediately follows the length) to the next object,
and so on.

Also what kind of alignment guarantee would we _want_ to give the callers?
As far as I can tell, this implementation does not guarantee any alignment.

> +	size_t f_alloc, s_alloc, l_alloc, nr;
> +	char *next_free;
> +	char *end;
> +	int len_free;
> +	size_t total_allocd;
> +};
> +
> +void *pool_alloc(struct mem_pool *pool, size_t len, size_t *id_out);
> +void *pool_ptr(struct mem_pool *pool, size_t id, size_t *len_out);
> +
> +#endif

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 2/3] small-alloc: add allocator for small objects
  2011-06-22  7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
  2011-06-22 20:49   ` Junio C Hamano
@ 2011-06-23 17:17   ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-06-23 17:17 UTC (permalink / raw)
  To: David Barr; +Cc: GIT Mailing-list

David Barr <davidbarr@google.com> writes:

> This allocator assigns an integer handle to each allocation which can be
> used to retrieve the pointer to the start of the allocation and its
> length.

One more thing to add to yesterday's review. I think you would need to
include a "method" that initializes a mem_pool object, and possibly
another to destroy an existing one, freeing the resources (unless the API
is meant to replace something like obj_hash in object.c).

It was quite difficult to judge how good this API is as it took
imagination on the reviewer's part on how a typical caller would look
like.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-22  7:33 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures David Barr
  2011-06-22 19:42   ` Junio C Hamano
@ 2011-06-23 17:22   ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-06-23 17:22 UTC (permalink / raw)
  To: David Barr; +Cc: GIT Mailing-list

David Barr <davidbarr@google.com> writes:

> One struct to capture all types, just 4 methods: decode_message,
> encode_message, sizeof_message, hash_field.

Adding to the review from yesterday, hash_field() looked quite out of
place. If you are going to implement a hash table that holds protobuf
objects in a separate file/module, I would imagine the function belongs
there, not here.

> +uint32_t hash_field(const struct protobuf_field *field)
> +{
> +	uint32_t hc = 0;
> +	switch (field->type) {
> +	case WT_VARINT:
> +	case WT_64BIT:
> +		hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32;
> +		break;
> +	case WT_SHA1:
> +		memcpy(&hc, field->val.bin.ptr, sizeof(hc));
> +		break;
> +	case WT_STRING:
> +		hc = x65599(field->val.bin.ptr, field->val.bin.len);
> +		break;
> +	case WT_32BIT:
> +		hc = 0x9e3779b9ul * (uint32_t)field->val.num;
> +		break;
> +	}
> +	return hc;
> +}

It all depends on how you envision a "hash table of protobuf objects" is
to be used, but what is the point of using a complex math for 64BIT/32BIT
integer values? If you plan to have different kinds of protobuf objects
thrown into a single hash table, it may make sense, but without a crystal
ball it was kind of hard to judge.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 2/3] small-alloc: add allocator for small objects
  2011-06-22 20:49   ` Junio C Hamano
@ 2011-06-24 14:38     ` David Barr
  2011-06-24 17:02       ` David Barr
  0 siblings, 1 reply; 12+ messages in thread
From: David Barr @ 2011-06-24 14:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: GIT Mailing-list

Junio,

Sorry for the repeat, accidentally sent as HTML, rejected by the list.

On Wednesday, June 22, 2011, Junio C Hamano wrote:
David Barr <davidbarr@google.com> writes:

> +void *pool_alloc(struct mem_pool *pool, size_t len, size_t *id_out)
> +{
> +     static size_t id = 1;

Does this mean that even though you can have more than one mem_pool object
active in the system, you won't have id collision throughout the system?
That is a nice property (e.g. given an ID that does not belong to a pool,
you won't risk returning a wrong chunk of <mem,len> pair from pool_ptr()),
but is there a downside using function-scope static like this, I wonder?

This is an artifact that I missed in refactoring my experimental code.
This ought to be a field in struct mem_pool.
My intent was for id's to be unique and contiguous within a pool.

For example, if I have two pools A and B, and call pool_alloc on A and
then on B and then on A again, A's space[0] will have the first and the
third object, with id=1 and id=3. How does this interact with your
implementation of pool_ptr() which seems to assume that id's are
consecutive within a single pool->space[]?

It would interact very poorly.

> +     size_t n;

> +     void *r;
> +
> +     if ((pool->end - pool->next_free >= len) &&
> +         (pool->len_free >= sizeof_varint(len)))
> +             n = pool->nr - 1;

Help me make sure I understand what is going on here.

A mem-pool has pool->nr chunks (and it can grow as more memory is asked
from this pool). A new memory request is satisfied by carving out of
pool->space[n] (where n is the "newest chunk" in the pool).
pool->len[n] is a fixed sized byte array and stores the length of each
memory block carved out of pool->space[n] as a sequence of varint.
If pool->space[n] has enough space to fit "len" bytes, and if pool->len[n]
still has enough space to record the length, you use the current chunk,
otherwise (i.e. the else clause) you add a new chunk.

Am I with you so far?

Absolutely.

With the chunk_size of 2MB, you would fit roughly 16k allocation requests
for 128-byte memory, and you would need 2-bytes to express the length of
one piece of memory in your varint encoding, i.e. you would need to size
an element of pool->len[] to 32kB if you wanted to store 16k allocation of
128-byte for a chunk.

This would all depend on what the expected distribution of request size,
but it somehow feels wasteful to be limited by both space[] and len[]. If
you chose sizeof(*pool->len) that is too small for the workload, wouldn't
you end up allocating many 2MB space[], only to use potentially very
initial parts of them before len[] fills up?

Yes, this is a weakness of this iteration of the design.

> +     if (id_out)

> +             *id_out = id;
> +     id++;
> +
> +     char *t = &pool->len[n][sizeof(*pool->len) - pool->len_free];

Please avoid decl_after_statement.

Thanks for the reminder, will clean up some more.

> +     size_t n = pool->nr * id / pool->first_id[pool->nr - 1];

> +     if (n >= pool->nr - 1)
> +             n = pool->nr - 1;
> +     while (n && pool->first_id[n] > id)
> +             n--;
> +     while (n + 1 < pool->nr && pool->first_id[n + 1] <= id)
> +             n++;

I was about to say "bsearch?", but perhaps it is not worth it.

A linear guesstimate is typically off by a small amount, so linear
search is fine.
Also, the index of id's is contiguous, so linear search has good cache locality.
If I address the next concern in this review, this search will no
longer be necessary.

> +struct mem_pool {
> +     size_t *first_id;
> +     char **space;
> +     char (*len)[sizeof(size_t) + sizeof(char*)];

Each element of pool->len[] is just a byte-array that stores varint, no?
It is very misleading to specify its size as sizeof(size_t)+sizeof(char*)
as if you would store a "struct { size_t some; char *thing; }" here.

Instead of having two independently depleted byte-buffer (space[] and
len[]), I wonder if it would be more space efficient (without being less
processing efficient) to use a single buffer space.  Your pool_ptr() would
start at the beginning of pool->space[n], decode a varint and take it as a
length, if that is not the object you are looking for, skip that many
bytes (i.e. payload immediately follows the length) to the next object,
and so on.

I have already investigated this arrangement, it has very poor
locality of access.
For objects <32 bytes long, its not too bad since typically 2 bytes of a 64 byte
cache line would be read consecutively. For larger objects this is pathological
cache behavior. On the other hand, the current design means that the entire
sequence of lengths will fit on a single >=16 byte cache line.

Also what kind of alignment guarantee would we _want_ to give the callers?
As far as I can tell, this implementation does not guarantee any alignment.

No alignment guarantee provided. An interesting possibility is to provide a
guarantee and use the padding bytes for metadata.

Although on second reading I think I must have mis-read, I've been investigating
fixing the number of objects per chunk rather than fixing the space for lengths.
I have an inkling that I already considered this and found that it introduced a
nasty corner case. This investigation is ongoing.

--
David Barr.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-22 19:42   ` Junio C Hamano
@ 2011-06-24 14:39     ` David Barr
  2011-06-24 16:04     ` David Barr
  1 sibling, 0 replies; 12+ messages in thread
From: David Barr @ 2011-06-24 14:39 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: GIT Mailing-list

Junio,

Sorry for the repeat, accidentally sent as HTML, rejected by the list.

On Thursday, June 23, 2011, Junio C Hamano wrote:
David Barr <davidbarr@google.com> writes:

> One struct to capture all types, just 4 methods: decode_message,
> encode_message, sizeof_message, hash_field.

Adding to the review from yesterday, hash_field() looked quite out of
place. If you are going to implement a hash table that holds protobuf
objects in a separate file/module, I would imagine the function belongs
there, not here.

I agree completely, another artifact of refactoring from experimental code.

--
David Barr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-22 19:42   ` Junio C Hamano
  2011-06-24 14:39     ` David Barr
@ 2011-06-24 16:04     ` David Barr
  2011-06-24 16:48       ` Junio C Hamano
  1 sibling, 1 reply; 12+ messages in thread
From: David Barr @ 2011-06-24 16:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: GIT Mailing-list

Hi,

On Wed, Jun 22, 2011 at 12:42 PM, Junio C Hamano <gitster@pobox.com> wrote:
> David Barr <davidbarr@google.com> writes:
>>  protobuf.c |  193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  protobuf.h |   70 ++++++++++++++++++++++
>
> How does this relate to http://code.google.com/p/protobuf/ which has a
> very similar name?  If we do not intend to have any interoperability with
> it, we should avoid such a confusing name, I think.

The relationship is that the design is shamelessly copied.
The reason for going with this particular design is that it very closely mirrors
my though experiments on the subject but is supported by heavy use for
real workloads. This is of course offset by introducing git-specific tweaks
and removing network order constraints.

>> diff --git a/protobuf.c b/protobuf.c
>> new file mode 100644
>> index 0000000..09223dd
>> --- /dev/null
>> +++ b/protobuf.c
>> @@ -0,0 +1,193 @@
>> +#include "git-compat-util.h"
>> +#include "protobuf.h"
>> +#include "varint.h"
>> +
>> +static int decode_binary(const char **buf, const char *end, const char **result, size_t len)
>> +{
>> +     if (end && *buf + len > end)
>> +             return 1;
>
> In low-level library-ish code, we tend to signal an error with a negative
> value and success with zero, unless there is a compelling reason not to.
>
> When this error is triggered, what does it tell us?  Programming error
> (i.e. BUG())?  Incoming data stream error?

This would be a programming error, since the interface provides a method
to calculate the required buffer size before persisting it.

>> +static int decode_field(const char **buf, const char *end, struct protobuf_field *field)
>> +{
>> +     uint32_t u32;
>> +     uint64_t key;
>> +     bzero(field, sizeof(struct protobuf_field));
>
> Please use memset(..., '\0', ...), as we use functions from memXXX()
> family declared in <string.h>, just like you chose to use memcpy() over
> bcopy() in encode_binary() above.

Will do.
>> +     case WT_64BIT:
>> +             memcpy(&field->val.num, field->val.bin.ptr, field->val.bin.len);
>> +             break;
>> +     case WT_32BIT:
>> +             memcpy(&u32, field->val.bin.ptr, field->val.bin.len);
>> +             field->val.num = u32;
>
> Is there any need for byte-order considerations, or an encoded "message"
> is defined (by this library) to use host encoding, and will never be sent
> over the wire to other systems in the future?

Host encoding is intentional but only because network encoding is a little
messy to write for 64-bit values. I don't think its prohibitive to implement.

>> +int decode_message(const char **buf, const char *end, struct protobuf_field *message, size_t fields)
>> +{
>> +     struct protobuf_field current;
>> +     bzero(message, fields * sizeof(struct protobuf_field));
>> +     while (*buf != end) {
>> +             if (decode_field(buf, end, &current))
>> +                     return 1;
>> +             if (current.tag < fields)
>> +                     message[current.tag] = current;
>
> Is there any need to check the incoming *buf for duplicated tags, whose
> payload may overwrite an element in message[] that was populated in the
> previous iteration of this loop?

This feature comes from the original protobuf design.
It allows values to be updated by appending to the buffer.

> Could decode_field() move *buf beyond end (IOW, would it be an improvement
> if we said "while (*buf < end)" instead)?

The contract is that decode_field() will not move *buf beyond end.
However for clarity, "while (*buf < end)" is better.

>> +static size_t sizeof_field(const struct protobuf_field *field)
>> +{
>> +     size_t sizeof_key = sizeof_varint(field->tag << WT_BITS | field->type);
>> +     switch (field->type) {
>> +     case WT_VARINT:
>> +             if (field->val.num)
>> +                     return sizeof_key + sizeof_varint(field->val.num);
>> +             break;
>> +     case WT_64BIT:
>> +             if (field->val.num)
>> +                     return sizeof_key + sizeof(uint64_t);
>> +             break;
>> +     case WT_STRING:
>> +             if (field->val.bin.len && field->val.bin.ptr)
>> +                     return sizeof_key + sizeof_varint(field->val.bin.len)
>> +                            + field->val.bin.len;
>> +             break;
>> +     case WT_SHA1:
>> +             if (field->val.bin.ptr)
>> +                     return sizeof_key + 20;
>> +             break;
>> +     case WT_32BIT:
>> +             if (field->val.num)
>> +                     return sizeof_key + sizeof(uint32_t);
>> +             break;
>> +     }
>> +     return 0;
>
> Doesn't this function want to return an error when it does not understand
> what is in field->type field? If yes, I would imagine that this function
> needs to be of type ssize_t, you would return -1 here, and the callers
> would check the return value.

Yes, just to be clear, something like:
  break;
+ default:
+   return -1;
  }

>> +static uint32_t x65599(const char *s, uint64_t len)
>> +{
>> +     uint32_t r = 0;
>> +     while (len--)
>> +             r = *s++ + (r << 6) + (r << 16) - r;
>> +     return r;
>
> Would it be an improvement if we made sure that each byte in s is taken as
> unsigned (or signed if you really wanted to but I do not see why) char on
> all platforms to guarantee reproducibility?

Although it shouldn't affect hash distribution, reproducibility is good.

>> +}
>> +
>> +uint32_t hash_field(const struct protobuf_field *field)
>> +{
>> +     uint32_t hc = 0;
>> +     switch (field->type) {
>> +     case WT_VARINT:
>> +     case WT_64BIT:
>> +             hc = (0x9e3779b97f4a7c15ull * field->val.num) >> 32;
>> +             break;
>> +     case WT_SHA1:
>> +             memcpy(&hc, field->val.bin.ptr, sizeof(hc));
>> +             break;
>> +     case WT_STRING:
>> +             hc = x65599(field->val.bin.ptr, field->val.bin.len);
>> +             break;
>> +     case WT_32BIT:
>> +             hc = 0x9e3779b9ul * (uint32_t)field->val.num;
>> +             break;
>> +     }
>> +     return hc;
>> +}
>
> Please make the magic Q64 and Q32 into symbolic constants somewhere in
> this file, or at least give comment. Naming 65599 to SOMETHING_PRIME would
> also be better.

Maybe some documentation ought to be attached describing why 65599.
Its not just any prime [1], it has very good mixing characteristics mod 2^32.
Also its form, 2^a +/- 2^b +/- 1, enables multiplication using just 2
shift operations.
It produces less collisions than 31 for short strings.

>> +#define VLI_CONTINUE 0x80
>> +#define VLI_DIGIT_MASK       0x7f
>> +#define VLI_BITS_PER_DIGIT 7
>> +
>> +static int decode_varint(const char **buf, const char *end, uint64_t *result)
>> +{
>> +     uint64_t rv = 0;
>> +     int shift = 0;
>> +     const char *pos;
>> +     for (pos = *buf; pos != end && shift < 64; pos++) {
>> +             unsigned char ch = *pos;
>> +
>> +             rv |= (ch & VLI_DIGIT_MASK) << shift;
>> +             shift += VLI_BITS_PER_DIGIT;
>> +             if (ch & VLI_CONTINUE)
>> +                     continue;
>> +
>> +             *result = rv;
>> +             *buf = pos + 1;
>> +             return 0;
>
> This seems to be the same 7-bit-at-a-time encoding used to encode object
> size in packfiles, which is slightly looser than the one used to express
> the pack offset of base objects (Cf. eb32d23 -- introduce delta objects
> with offset to base, 2006-09-21). Would it make more sense to use the
> slightly tighter one?

I'm inclined to use the tighter encoding but it does have implications for the
complexity of encoding. Decoding remains simple.

--
David Barr

[1] http://www.cse.yorku.ca/~oz/hash.html#sdbm

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures
  2011-06-24 16:04     ` David Barr
@ 2011-06-24 16:48       ` Junio C Hamano
  0 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2011-06-24 16:48 UTC (permalink / raw)
  To: David Barr; +Cc: GIT Mailing-list

David Barr <davidbarr@google.com> writes:

>> How does this relate to http://code.google.com/p/protobuf/ which has a
>> very similar name?  If we do not intend to have any interoperability with
>> it, we should avoid such a confusing name, I think.
>
> The relationship is that the design is shamelessly copied.

... and this will be API and bytestream compatible with that other
protobuf?

If not, please don't use that name. It confuses people, and if somebody
wants to take libified part of our codebase into their application and
link with the real protobuf, it will get even more confusing ;-).

You can (and should) still state that the design was inspired by the other
work in the comment at the beginning of the file or something.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 2/3] small-alloc: add allocator for small objects
  2011-06-24 14:38     ` David Barr
@ 2011-06-24 17:02       ` David Barr
  0 siblings, 0 replies; 12+ messages in thread
From: David Barr @ 2011-06-24 17:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: GIT Mailing-list

Junio wrote:
> Instead of having two independently depleted byte-buffer (space[] and
> len[]), I wonder if it would be more space efficient (without being less
> processing efficient) to use a single buffer space.  Your pool_ptr() would
> start at the beginning of pool->space[n], decode a varint and take it as a
> length, if that is not the object you are looking for, skip that many
> bytes (i.e. payload immediately follows the length) to the next object,
> and so on.

David Barr wrote:
> I have already investigated this arrangement, it has very poor
> locality of access.
> For objects <32 bytes long, its not too bad since typically 2 bytes of a 64 byte
> cache line would be read consecutively. For larger objects this is pathological
> cache behavior. On the other hand, the current design means that the entire
> sequence of lengths will fit on a single >=16 byte cache line.

Another approach is to keep the buffers separate but interleave
pointers and lengths.
I'll give this a go and see if it's an overall improvement.

--
David Barr

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-06-24 17:02 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-22  7:33 [RFC/PATCH 0/3] David Barr
2011-06-22  7:33 ` [RFC/PATCH 1/3] protobuf: minimal implementation for compact in-memory structures David Barr
2011-06-22 19:42   ` Junio C Hamano
2011-06-24 14:39     ` David Barr
2011-06-24 16:04     ` David Barr
2011-06-24 16:48       ` Junio C Hamano
2011-06-23 17:22   ` Junio C Hamano
2011-06-22  7:33 ` [RFC/PATCH 2/3] small-alloc: add allocator for small objects David Barr
2011-06-22 20:49   ` Junio C Hamano
2011-06-24 14:38     ` David Barr
2011-06-24 17:02       ` David Barr
2011-06-23 17:17   ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.