All of lore.kernel.org
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Jeff King <peff@peff.net>, Duy Nguyen <pclouds@gmail.com>
Cc: Junio C Hamano <gitster@pobox.com>,
	Git List <git@vger.kernel.org>,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: Re: [PATCH] fast-export: avoid NULL pointer arithmetic
Date: Sat, 12 May 2018 10:45:16 +0200	[thread overview]
Message-ID: <80397e16-8667-e0cd-4049-aad453d35e6f@web.de> (raw)
In-Reply-To: <20180511174237.GA19670@sigill.intra.peff.net>

Am 11.05.2018 um 19:42 schrieb Jeff King:
> On Fri, May 11, 2018 at 03:34:19PM +0200, Duy Nguyen wrote:
> 
>> On Fri, May 11, 2018 at 03:11:46PM +0200, Duy Nguyen wrote:
>>> Back to fast-export, can we just allocate a new int on heap and point
>>> it there? Allocating small pieces becomes quite cheap and fast with
>>> mem-pool.h and we can avoid this storing integer in pointer business.
>>
>> Something like this seems to work, but we use 4-ish more bytes per
>> object, or 100MB overhead on a repo with 25M objects. I think it's a
>> reasonable trade off.
> 
> I'm not sure I agree. 4 bytes per object certainly isn't the end of the
> world, but what was the problem we were solving in the first place? Just
> that we weren't comfortable with the round-trip from uintptr_t to void
> and back? Is this actually a problem on real platforms? If not, it seems
> silly to incur a run-time cost.

Storing integer values in pointers is a trick that seems to have worked
so far for fast-export.  A portable way to avoid that trick without
requiring more memory would be to use a union.

Or we could roll our own custom hash map, as I mused in an earlier post.
That would duplicate quite a bit of code; are there reusable pieces
hidden within that could be extracted into common functions?

---
 builtin/fast-export.c | 105 ++++++++++++++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 24 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 530df12f05..627b0032f3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -14,7 +14,6 @@
 #include "diffcore.h"
 #include "log-tree.h"
 #include "revision.h"
-#include "decorate.h"
 #include "string-list.h"
 #include "utf8.h"
 #include "parse-options.h"
@@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt,
 	return 0;
 }
 
-static struct decoration idnums;
+struct object_mark_entry {
+	const struct object *base;
+	uint32_t mark;
+};
+
+struct object_marks {
+	unsigned int size;
+	unsigned int nr;
+	struct object_mark_entry *entries;
+};
+
+static struct object_marks idnums;
 static uint32_t last_idnum;
 
+static unsigned int hash_obj(const struct object *obj, unsigned int n)
+{
+	return sha1hash(obj->oid.hash) % n;
+}
+
+static void set_object_mark(struct object_marks *n, const struct object *base,
+			    uint32_t mark)
+{
+	unsigned int size = n->size;
+	struct object_mark_entry *entries = n->entries;
+	unsigned int j = hash_obj(base, size);
+
+	while (entries[j].base) {
+		if (entries[j].base == base) {
+			entries[j].mark = mark;
+			return;
+		}
+		if (++j >= size)
+			j = 0;
+	}
+	entries[j].base = base;
+	entries[j].mark = mark;
+	n->nr++;
+}
+
+static void grow_object_marks(struct object_marks *n)
+{
+	unsigned int i;
+	unsigned int old_size = n->size;
+	struct object_mark_entry *old_entries = n->entries;
+
+	n->size = (old_size + 1000) * 3 / 2;
+	n->entries = xcalloc(n->size, sizeof(n->entries[0]));
+	n->nr = 0;
+
+	for (i = 0; i < old_size; i++) {
+		const struct object *base = old_entries[i].base;
+		uint32_t mark = old_entries[i].mark;
+
+		if (mark)
+			set_object_mark(n, base, mark);
+	}
+	free(old_entries);
+}
+
 static int has_unshown_parent(struct commit *commit)
 {
 	struct commit_list *parent;
@@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char *path,
 	}
 }
 
-/* Since intptr_t is C99, we do not use it here */
-static inline uint32_t *mark_to_ptr(uint32_t mark)
-{
-	return ((uint32_t *)NULL) + mark;
-}
-
-static inline uint32_t ptr_to_mark(void * mark)
-{
-	return (uint32_t *)mark - (uint32_t *)NULL;
-}
-
 static inline void mark_object(struct object *object, uint32_t mark)
 {
-	add_decoration(&idnums, object, mark_to_ptr(mark));
+	unsigned int nr = idnums.nr + 1;
+
+	if (nr > idnums.size * 2 / 3)
+		grow_object_marks(&idnums);
+	return set_object_mark(&idnums, object, mark);
 }
 
 static inline void mark_next_object(struct object *object)
@@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object)
 
 static int get_object_mark(struct object *object)
 {
-	void *decoration = lookup_decoration(&idnums, object);
-	if (!decoration)
+	unsigned int j;
+
+	/* nothing to lookup */
+	if (!idnums.size)
 		return 0;
-	return ptr_to_mark(decoration);
+	j = hash_obj(object, idnums.size);
+	for (;;) {
+		struct object_mark_entry *ref = idnums.entries + j;
+		if (ref->base == object)
+			return ref->mark;
+		if (!ref->base)
+			return 0;
+		if (++j == idnums.size)
+			j = 0;
+	}
 }
 
 static void show_progress(void)
@@ -897,8 +956,7 @@ static void handle_tags_and_duplicates(void)
 static void export_marks(char *file)
 {
 	unsigned int i;
-	uint32_t mark;
-	struct decoration_entry *deco = idnums.entries;
+	struct object_mark_entry *entry = idnums.entries;
 	FILE *f;
 	int e = 0;
 
@@ -907,15 +965,14 @@ static void export_marks(char *file)
 		die_errno("Unable to open marks file %s for writing.", file);
 
 	for (i = 0; i < idnums.size; i++) {
-		if (deco->base && deco->base->type == 1) {
-			mark = ptr_to_mark(deco->decoration);
-			if (fprintf(f, ":%"PRIu32" %s\n", mark,
-				oid_to_hex(&deco->base->oid)) < 0) {
+		if (entry->base && entry->base->type == 1) {
+			if (fprintf(f, ":%"PRIu32" %s\n", entry->mark,
+				    oid_to_hex(&entry->base->oid)) < 0) {
 			    e = 1;
 			    break;
 			}
 		}
-		deco++;
+		entry++;
 	}
 
 	e |= ferror(f);
-- 
2.17.0

  reply	other threads:[~2018-05-12  8:45 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09 21:06 [PATCH] fast-export: avoid NULL pointer arithmetic René Scharfe
2018-05-09 21:43 ` Johannes Schindelin
2018-05-10  9:24 ` René Scharfe
2018-05-10 10:51   ` Junio C Hamano
2018-05-10 19:47     ` René Scharfe
2018-05-11  2:16       ` Junio C Hamano
2018-05-11  4:49         ` Junio C Hamano
2018-05-11  6:19           ` Duy Nguyen
2018-05-11  8:56             ` Jeff King
2018-05-11 13:11               ` Duy Nguyen
2018-05-11 13:34                 ` Duy Nguyen
2018-05-11 17:42                   ` Jeff King
2018-05-12  8:45                     ` René Scharfe [this message]
2018-05-12  8:49                       ` René Scharfe
2018-05-14  1:37                       ` Junio C Hamano
2018-05-15 19:36                         ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=80397e16-8667-e0cd-4049-aad453d35e6f@web.de \
    --to=l.s.r@web.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=johannes.schindelin@gmx.de \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.