All of lore.kernel.org
 help / color / mirror / Atom feed
* fast-import: use struct hash_table
@ 2011-03-31 11:59 David Barr
  2011-03-31 11:59 ` [PATCH 1/2] fast-import: use struct hash_table for atom strings David Barr
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: David Barr @ 2011-03-31 11:59 UTC (permalink / raw)
  To: Git List; +Cc: Jonathan Nieder

The current custom hash tables in fast-import.c do not grow.
This causes poor performance for very large imports.
Fortunately, we have struct hash_table and friends so there's
no need to write cumbersome hash table growth code.

If anyone is interested, I think the hash API documentation
could use an example or two.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] fast-import: use struct hash_table for atom strings
  2011-03-31 11:59 fast-import: use struct hash_table David Barr
@ 2011-03-31 11:59 ` David Barr
  2011-04-02  2:42   ` Jonathan Nieder
  2011-03-31 11:59 ` [PATCH 2/2] fast-import: use struct hash_table for objects David Barr
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: David Barr @ 2011-03-31 11:59 UTC (permalink / raw)
  To: Git List; +Cc: Jonathan Nieder, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
---
 fast-import.c |   17 ++++++++++-------
 1 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 65d65bf..0592b21 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -300,9 +300,8 @@ static size_t total_allocd;
 static struct mem_pool *mem_pool;
 
 /* Atom management */
-static unsigned int atom_table_sz = 4451;
 static unsigned int atom_cnt;
-static struct atom_str **atom_table;
+static struct hash_table atom_table;
 
 /* The .pack file being generated */
 static unsigned int pack_id;
@@ -680,10 +679,11 @@ static struct object_entry *find_mark(uintmax_t idnum)
 
 static struct atom_str *to_atom(const char *s, unsigned short len)
 {
-	unsigned int hc = hc_str(s, len) % atom_table_sz;
+	unsigned int hc = hc_str(s, len);
 	struct atom_str *c;
+	void **pos;
 
-	for (c = atom_table[hc]; c; c = c->next_atom)
+	for (c = lookup_hash(hc, &atom_table); c; c = c->next_atom)
 		if (c->str_len == len && !strncmp(s, c->str_dat, len))
 			return c;
 
@@ -691,8 +691,12 @@ static struct atom_str *to_atom(const char *s, unsigned short len)
 	c->str_len = len;
 	strncpy(c->str_dat, s, len);
 	c->str_dat[len] = 0;
-	c->next_atom = atom_table[hc];
-	atom_table[hc] = c;
+	c->next_atom = NULL;
+	pos = insert_hash(hc, c, &atom_table);
+	if (pos) {
+		c->next_atom = *pos;
+		*pos = c;
+	}
 	atom_cnt++;
 	return c;
 }
@@ -3263,7 +3267,6 @@ int main(int argc, const char **argv)
 
 	alloc_objects(object_entry_alloc);
 	strbuf_init(&command_buf, 0);
-	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
 	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
 	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
 	marks = pool_calloc(1, sizeof(struct mark_set));
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] fast-import: use struct hash_table for objects
  2011-03-31 11:59 fast-import: use struct hash_table David Barr
  2011-03-31 11:59 ` [PATCH 1/2] fast-import: use struct hash_table for atom strings David Barr
@ 2011-03-31 11:59 ` David Barr
  2011-04-02  2:46   ` Jonathan Nieder
  2011-04-02  2:48 ` fast-import: use struct hash_table Jonathan Nieder
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: David Barr @ 2011-03-31 11:59 UTC (permalink / raw)
  To: Git List; +Cc: Jonathan Nieder, David Barr

Signed-off-by: David Barr <david.barr@cordelta.com>
---
 fast-import.c |   19 ++++++++++++-------
 1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 0592b21..8fd8ea9 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -313,7 +313,7 @@ static off_t pack_size;
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 5000;
 static struct object_entry_pool *blocks;
-static struct object_entry *object_table[1 << 16];
+static struct hash_table object_table;
 static struct mark_set *marks;
 static const char *export_marks_file;
 static const char *import_marks_file;
@@ -555,9 +555,9 @@ static struct object_entry *new_object(unsigned char *sha1)
 
 static struct object_entry *find_object(unsigned char *sha1)
 {
-	unsigned int h = sha1[0] << 8 | sha1[1];
+	unsigned int h = sha1[0] << 24 | sha1[1] << 16 | sha1[2] << 8 | sha1[3];
 	struct object_entry *e;
-	for (e = object_table[h]; e; e = e->next)
+	for (e = lookup_hash(h, &object_table); e; e = e->next)
 		if (!hashcmp(sha1, e->idx.sha1))
 			return e;
 	return NULL;
@@ -565,8 +565,9 @@ static struct object_entry *find_object(unsigned char *sha1)
 
 static struct object_entry *insert_object(unsigned char *sha1)
 {
-	unsigned int h = sha1[0] << 8 | sha1[1];
-	struct object_entry *e = object_table[h];
+	unsigned int h = sha1[0] << 24 | sha1[1] << 16 | sha1[2] << 8 | sha1[3];
+	struct object_entry *e = lookup_hash(h, &object_table);
+	void **pos;
 
 	while (e) {
 		if (!hashcmp(sha1, e->idx.sha1))
@@ -575,9 +576,13 @@ static struct object_entry *insert_object(unsigned char *sha1)
 	}
 
 	e = new_object(sha1);
-	e->next = object_table[h];
+	e->next = NULL;
 	e->idx.offset = 0;
-	object_table[h] = e;
+	pos = insert_hash(h, e, &object_table);
+	if (pos) {
+		e->next = *pos;
+		*pos = e;
+	}
 	return e;
 }
 
-- 
1.7.3.2.846.gf4b062

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] fast-import: use struct hash_table for atom strings
  2011-03-31 11:59 ` [PATCH 1/2] fast-import: use struct hash_table for atom strings David Barr
@ 2011-04-02  2:42   ` Jonathan Nieder
  2011-04-02  3:33     ` Jonathan Nieder
  0 siblings, 1 reply; 13+ messages in thread
From: Jonathan Nieder @ 2011-04-02  2:42 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Shawn O. Pearce, Stephen Boyd

Hi,

David Barr wrote:

> Signed-off-by: David Barr <david.barr@cordelta.com>

Thanks, this is a welcome change.  But perhaps it would be nice to
explain why, here? :)

E.g., what is stored in the atom table? does it tend to get big?  does
the existing code allow it to grow? this change will allow it to grow,
right? what is the downside to this change (if any)?

Especially, numbers (timings) illustrating the effect on typical
use and effect on scalability would be interesting.

> ---
>  fast-import.c |   17 ++++++++++-------
>  1 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/fast-import.c b/fast-import.c
> index 65d65bf..0592b21 100644
> --- a/fast-import.c
> +++ b/fast-import.c
> @@ -300,9 +300,8 @@ static size_t total_allocd;
>  static struct mem_pool *mem_pool;
>  
>  /* Atom management */
> -static unsigned int atom_table_sz = 4451;
>  static unsigned int atom_cnt;
> -static struct atom_str **atom_table;
> +static struct hash_table atom_table;
>  
>  /* The .pack file being generated */
>  static unsigned int pack_id;
> @@ -680,10 +679,11 @@ static struct object_entry *find_mark(uintmax_t idnum)
>  
>  static struct atom_str *to_atom(const char *s, unsigned short len)
>  {
> -	unsigned int hc = hc_str(s, len) % atom_table_sz;
> +	unsigned int hc = hc_str(s, len);
>  	struct atom_str *c;
> +	void **pos;
>  
> -	for (c = atom_table[hc]; c; c = c->next_atom)
> +	for (c = lookup_hash(hc, &atom_table); c; c = c->next_atom)
>  		if (c->str_len == len && !strncmp(s, c->str_dat, len))
>  			return c;
>  
> @@ -691,8 +691,12 @@ static struct atom_str *to_atom(const char *s, unsigned short len)
>  	c->str_len = len;
>  	strncpy(c->str_dat, s, len);
>  	c->str_dat[len] = 0;
> -	c->next_atom = atom_table[hc];
> -	atom_table[hc] = c;
> +	c->next_atom = NULL;
> +	pos = insert_hash(hc, c, &atom_table);
> +	if (pos) {
> +		c->next_atom = *pos;
> +		*pos = c;
> +	}

If I understand correctly, this puts new atoms at the start of the
chain, just like v1.7.4-rc0~40^2 (fast-import: insert new object
entries at start of hash bucket, 2010-11-23) did for objects.  Did you
measure and find this faster, or is it just for simplicity or
consistency?  (I'd personally be fine with it either way, but it seems
prudent to ask.)

>  	atom_cnt++;
>  	return c;
>  }
> @@ -3263,7 +3267,6 @@ int main(int argc, const char **argv)
>  
>  	alloc_objects(object_entry_alloc);
>  	strbuf_init(&command_buf, 0);
> -	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
>  	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
>  	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
>  	marks = pool_calloc(1, sizeof(struct mark_set));

We never call init_hash.  That's technically safe because init_hash
just zeroes out the table, but I think I'd rather see us using it
anyway or documenting in api-hash.txt that it's safe not to use.

Looks good.  Will queue to give it some testing.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] fast-import: use struct hash_table for objects
  2011-03-31 11:59 ` [PATCH 2/2] fast-import: use struct hash_table for objects David Barr
@ 2011-04-02  2:46   ` Jonathan Nieder
  0 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2011-04-02  2:46 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Shawn O. Pearce, Stephen Boyd

David Barr wrote:

> Signed-off-by: David Barr <david.barr@cordelta.com>

Thanks, this one is even more welcome. :)  Same comments as the other
patch apply.  Keeping the patch in full below so others can comment.

One comment below (search for object.c to find it; sorry).

> ---
>  fast-import.c |   19 ++++++++++++-------
>  1 files changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/fast-import.c b/fast-import.c
> index 0592b21..8fd8ea9 100644
> --- a/fast-import.c
> +++ b/fast-import.c
> @@ -313,7 +313,7 @@ static off_t pack_size;
>  /* Table of objects we've written. */
>  static unsigned int object_entry_alloc = 5000;
>  static struct object_entry_pool *blocks;
> -static struct object_entry *object_table[1 << 16];
> +static struct hash_table object_table;
>  static struct mark_set *marks;
>  static const char *export_marks_file;
>  static const char *import_marks_file;
> @@ -555,9 +555,9 @@ static struct object_entry *new_object(unsigned char *sha1)
>  
>  static struct object_entry *find_object(unsigned char *sha1)
>  {
> -	unsigned int h = sha1[0] << 8 | sha1[1];
> +	unsigned int h = sha1[0] << 24 | sha1[1] << 16 | sha1[2] << 8 | sha1[3];
>  	struct object_entry *e;
> -	for (e = object_table[h]; e; e = e->next)
> +	for (e = lookup_hash(h, &object_table); e; e = e->next)
>  		if (!hashcmp(sha1, e->idx.sha1))
>  			return e;
>  	return NULL;
> @@ -565,8 +565,9 @@ static struct object_entry *find_object(unsigned char *sha1)
>  
>  static struct object_entry *insert_object(unsigned char *sha1)
>  {
> -	unsigned int h = sha1[0] << 8 | sha1[1];
> -	struct object_entry *e = object_table[h];
> +	unsigned int h = sha1[0] << 24 | sha1[1] << 16 | sha1[2] << 8 | sha1[3];
> +	struct object_entry *e = lookup_hash(h, &object_table);
> +	void **pos;

object.c uses memcpy for this, like so:

	memcpy(&h, sha1, sizeof(unsigned int));

which strikes me as sensible (to avoid fighting with the machine about
endianness since this table is only in memory).

>  
>  	while (e) {
>  		if (!hashcmp(sha1, e->idx.sha1))
> @@ -575,9 +576,13 @@ static struct object_entry *insert_object(unsigned char *sha1)
>  	}
>  
>  	e = new_object(sha1);
> -	e->next = object_table[h];
> +	e->next = NULL;
>  	e->idx.offset = 0;
> -	object_table[h] = e;
> +	pos = insert_hash(h, e, &object_table);
> +	if (pos) {
> +		e->next = *pos;
> +		*pos = e;
> +	}
>  	return e;
>  }
>  
> -- 
> 1.7.3.2.846.gf4b062
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: fast-import: use struct hash_table
  2011-03-31 11:59 fast-import: use struct hash_table David Barr
  2011-03-31 11:59 ` [PATCH 1/2] fast-import: use struct hash_table for atom strings David Barr
  2011-03-31 11:59 ` [PATCH 2/2] fast-import: use struct hash_table for objects David Barr
@ 2011-04-02  2:48 ` Jonathan Nieder
  2012-04-11 12:11 ` [PATCH/RFC v2 0/4] " Jonathan Nieder
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
  4 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2011-04-02  2:48 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Stephen Boyd

David Barr wrote:

> If anyone is interested, I think the hash API documentation
> could use an example or two.

Yes, please. :)  At the very least, a "here is a quick checklist
for how to use it" tutorial would be very nice.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] fast-import: use struct hash_table for atom strings
  2011-04-02  2:42   ` Jonathan Nieder
@ 2011-04-02  3:33     ` Jonathan Nieder
  0 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2011-04-02  3:33 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Shawn O. Pearce, Stephen Boyd

Jonathan Nieder wrote:

>> @@ -691,8 +691,12 @@ static struct atom_str *to_atom(const char *s, unsigned short len)
>>  	c->str_len = len;
>>  	strncpy(c->str_dat, s, len);
>>  	c->str_dat[len] = 0;
>> -	c->next_atom = atom_table[hc];
>> -	atom_table[hc] = c;
>> +	c->next_atom = NULL;
>> +	pos = insert_hash(hc, c, &atom_table);
>> +	if (pos) {
>> +		c->next_atom = *pos;
>> +		*pos = c;
>> +	}
>
> If I understand correctly, this puts new atoms at the start of the
> chain, just like v1.7.4-rc0~40^2 (fast-import: insert new object
> entries at start of hash bucket, 2010-11-23) did for objects.  Did you
> measure and find this faster, or is it just for simplicity or
> consistency?  (I'd personally be fine with it either way, but it seems
> prudent to ask.)

Agh.  Too-quick reading on my part (or rather, I lazily made an
assumption and didn't pay much attention to the old code at all).  I
have no reason to believe inserting at the end of the bucket would be
better, and it would certainly be more complex.

Sorry, folks.  Don't mind me.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH/RFC v2 0/4] Re: fast-import: use struct hash_table
  2011-03-31 11:59 fast-import: use struct hash_table David Barr
                   ` (2 preceding siblings ...)
  2011-04-02  2:48 ` fast-import: use struct hash_table Jonathan Nieder
@ 2012-04-11 12:11 ` Jonathan Nieder
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
  4 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:11 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

Hi David,

David Barr wrote:

> The current custom hash tables in fast-import.c do not grow.
> This causes poor performance for very large imports.
> Fortunately, we have struct hash_table and friends so there's
> no need to write cumbersome hash table growth code.

Thanks for these patches.  I've tentatively queued the following
patches at

  git://repo.or.cz/git/jrn.git fast-import-pu

and would be happy to ask Junio to pull them if they look sane to you.

Sorry for the long delay.

David Barr (2):
  fast-import: allow object_table to grow dynamically
  fast-import: allow atom_table to grow dynamically

Jonathan Nieder (2):
  fast-import: allow branch_table to grow dynamically
  fast-import: use DIV_ROUND_UP

 fast-import.c |  153 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 102 insertions(+), 51 deletions(-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH/RFC v2 0/4 resend] Re: fast-import: use struct hash_table
  2011-03-31 11:59 fast-import: use struct hash_table David Barr
                   ` (3 preceding siblings ...)
  2012-04-11 12:11 ` [PATCH/RFC v2 0/4] " Jonathan Nieder
@ 2012-04-11 12:12 ` Jonathan Nieder
  2012-04-11 12:13   ` [PATCH 1/4] fast-import: allow object_table to grow dynamically Jonathan Nieder
                     ` (3 more replies)
  4 siblings, 4 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:12 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

(resending with newer address for David.  Sorry for the noise)
Hi David,

David Barr wrote:

> The current custom hash tables in fast-import.c do not grow.
> This causes poor performance for very large imports.
> Fortunately, we have struct hash_table and friends so there's
> no need to write cumbersome hash table growth code.

Thanks for these patches.  I've tentatively queued the following
patches at

  git://repo.or.cz/git/jrn.git fast-import-pu

and would be happy to ask Junio to pull them if they look sane to you.

Sorry for the long delay.

David Barr (2):
  fast-import: allow object_table to grow dynamically
  fast-import: allow atom_table to grow dynamically

Jonathan Nieder (2):
  fast-import: allow branch_table to grow dynamically
  fast-import: use DIV_ROUND_UP

 fast-import.c |  153 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 102 insertions(+), 51 deletions(-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] fast-import: allow object_table to grow dynamically
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
@ 2012-04-11 12:13   ` Jonathan Nieder
  2012-04-11 12:14   ` [PATCH 2/4] fast-import: allow atom_table " Jonathan Nieder
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:13 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

From: David Barr <david.barr@cordelta.com>
Date: Thu, 31 Mar 2011 22:59:58 +1100

The current custom hash tables in fast-import.c do not grow.  This
causes poor performance for very large imports.

Shawn O. Pearce writes:

> I can tell you why its fixed size... when I wrote fast-import to
> support the Mozilla repository import, we had an estimate on the
> number of objects that we needed fast-import to handle in a given run.
> From that estimate we concluded that a table of 2^16 was sufficiently
> large enough, as the hash chains would only be some small N long given
> the total number of objects we needed to handle for Mozilla.  Doubling
> that into 2^17 or larger wasn't useful, and using a smaller table like
> 2^14 produced too long of a chain.
>
> Once this code was working, we moved on to other features, and never
> reconsidered the table size.  This table should be growing if the
> initial 2^16 isn't big enough.  Its just that nobody has ever noticed
> that I hardcoded the size.  :-)

Fortunately, we have struct hash_table and friends so there's no need
to write cumbersome hash table growth code.

[jn: using native endianness for hash, new commit message; with
 init_hash call, even though it's not currently needed]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fast-import.c |   27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 78d97868..a79a1260 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -313,7 +313,7 @@ static off_t pack_size;
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 5000;
 static struct object_entry_pool *blocks;
-static struct object_entry *object_table[1 << 16];
+static struct hash_table object_table;
 static struct mark_set *marks;
 static const char *export_marks_file;
 static const char *import_marks_file;
@@ -553,11 +553,18 @@ static struct object_entry *new_object(unsigned char *sha1)
 	return e;
 }
 
+static unsigned int hash_sha1(const unsigned char *sha1)
+{
+	unsigned int h;
+	memcpy(&h, sha1, sizeof(unsigned int));
+	return h;
+}
+
 static struct object_entry *find_object(unsigned char *sha1)
 {
-	unsigned int h = sha1[0] << 8 | sha1[1];
+	unsigned int h = hash_sha1(sha1);
 	struct object_entry *e;
-	for (e = object_table[h]; e; e = e->next)
+	for (e = lookup_hash(h, &object_table); e; e = e->next)
 		if (!hashcmp(sha1, e->idx.sha1))
 			return e;
 	return NULL;
@@ -565,8 +572,9 @@ static struct object_entry *find_object(unsigned char *sha1)
 
 static struct object_entry *insert_object(unsigned char *sha1)
 {
-	unsigned int h = sha1[0] << 8 | sha1[1];
-	struct object_entry *e = object_table[h];
+	unsigned int h = hash_sha1(sha1);
+	struct object_entry *e = lookup_hash(h, &object_table);
+	void **pos;
 
 	while (e) {
 		if (!hashcmp(sha1, e->idx.sha1))
@@ -575,9 +583,13 @@ static struct object_entry *insert_object(unsigned char *sha1)
 	}
 
 	e = new_object(sha1);
-	e->next = object_table[h];
+	e->next = NULL;
 	e->idx.offset = 0;
-	object_table[h] = e;
+	pos = insert_hash(h, e, &object_table);
+	if (pos) {
+		e->next = *pos;
+		*pos = e;
+	}
 	return e;
 }
 
@@ -3261,6 +3273,7 @@ int main(int argc, const char **argv)
 	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
 	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
 	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
+	init_hash(&object_table);
 	marks = pool_calloc(1, sizeof(struct mark_set));
 
 	global_argc = argc;
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/4] fast-import: allow atom_table to grow dynamically
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
  2012-04-11 12:13   ` [PATCH 1/4] fast-import: allow object_table to grow dynamically Jonathan Nieder
@ 2012-04-11 12:14   ` Jonathan Nieder
  2012-04-11 12:15   ` [PATCH 3/4] fast-import: allow branch_table " Jonathan Nieder
  2012-04-11 12:15   ` [PATCH 4/4] fast-import: use DIV_ROUND_UP Jonathan Nieder
  3 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:14 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

From: David Barr <david.barr@cordelta.com>
Date: Thu, 31 Mar 2011 22:59:57 +1100

Use a struct hash_table to allow the table for atom strings to
grow.  See the previous commit for explanations.

[jn: with init_hash call, even though it's technically not needed]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fast-import.c |   18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index a79a1260..67769573 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -299,9 +299,8 @@ static size_t total_allocd;
 static struct mem_pool *mem_pool;
 
 /* Atom management */
-static unsigned int atom_table_sz = 4451;
 static unsigned int atom_cnt;
-static struct atom_str **atom_table;
+static struct hash_table atom_table;
 
 /* The .pack file being generated */
 static unsigned int pack_id;
@@ -691,10 +690,11 @@ static struct object_entry *find_mark(uintmax_t idnum)
 
 static struct atom_str *to_atom(const char *s, unsigned short len)
 {
-	unsigned int hc = hc_str(s, len) % atom_table_sz;
+	unsigned int hc = hc_str(s, len);
 	struct atom_str *c;
+	void **pos;
 
-	for (c = atom_table[hc]; c; c = c->next_atom)
+	for (c = lookup_hash(hc, &atom_table); c; c = c->next_atom)
 		if (c->str_len == len && !strncmp(s, c->str_dat, len))
 			return c;
 
@@ -702,8 +702,12 @@ static struct atom_str *to_atom(const char *s, unsigned short len)
 	c->str_len = len;
 	strncpy(c->str_dat, s, len);
 	c->str_dat[len] = 0;
-	c->next_atom = atom_table[hc];
-	atom_table[hc] = c;
+	c->next_atom = NULL;
+	pos = insert_hash(hc, c, &atom_table);
+	if (pos) {
+		c->next_atom = *pos;
+		*pos = c;
+	}
 	atom_cnt++;
 	return c;
 }
@@ -3270,7 +3274,7 @@ int main(int argc, const char **argv)
 
 	alloc_objects(object_entry_alloc);
 	strbuf_init(&command_buf, 0);
-	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
+	init_hash(&atom_table);
 	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
 	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
 	init_hash(&object_table);
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/4] fast-import: allow branch_table to grow dynamically
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
  2012-04-11 12:13   ` [PATCH 1/4] fast-import: allow object_table to grow dynamically Jonathan Nieder
  2012-04-11 12:14   ` [PATCH 2/4] fast-import: allow atom_table " Jonathan Nieder
@ 2012-04-11 12:15   ` Jonathan Nieder
  2012-04-11 12:15   ` [PATCH 4/4] fast-import: use DIV_ROUND_UP Jonathan Nieder
  3 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:15 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

Date: Mon, 30 May 2011 22:25:35 -0500

Use a struct hash_table to allow the table for branches to grow.  The
main benefit is to make the code more self-consistent and avoid a
magic number for table size.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 fast-import.c |  104 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 69 insertions(+), 35 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 67769573..ebb27006 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -334,8 +334,7 @@ static struct strbuf new_tree = STRBUF_INIT;
 /* Branch data */
 static unsigned long max_active_branches = 5;
 static unsigned long cur_active_branches;
-static unsigned long branch_table_sz = 1039;
-static struct branch **branch_table;
+static struct hash_table branch_table;
 static struct branch *active_branches;
 
 /* Tag data */
@@ -364,8 +363,10 @@ static void parse_argv(void);
 static void parse_cat_blob(void);
 static void parse_ls(struct branch *b);
 
-static void write_branch_report(FILE *rpt, struct branch *b)
+static int write_branch_report(struct branch *b, void *cb)
 {
+	FILE *rpt = cb;
+
 	fprintf(rpt, "%s:\n", b->name);
 
 	fprintf(rpt, "  status      :");
@@ -388,6 +389,36 @@ static void write_branch_report(FILE *rpt, struct branch *b)
 	fputc('\n', rpt);
 
 	fputc('\n', rpt);
+
+	return 0;
+}
+
+struct for_each_branch_data {
+	int (*fn)(struct branch *, void *);
+	void *data;
+};
+
+static int for_each_branch_helper(void *bucket, void *helper_data)
+{
+	struct for_each_branch_data *cb = helper_data;
+	struct branch *b;
+	int sum = 0;
+
+	for (b = bucket; b; b = b->table_next_branch) {
+		int val = cb->fn(b, cb->data);
+		if (val < 0)
+			return val;
+		sum += val;
+	}
+	return sum;
+}
+
+static int for_each_branch(int (*fn)(struct branch *, void *), void *data)
+{
+	struct for_each_branch_data cb;
+	cb.fn = fn;
+	cb.data = data;
+	return for_each_hash(&branch_table, for_each_branch_helper, &cb);
 }
 
 static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *);
@@ -445,10 +476,7 @@ static void write_crash_report(const char *err)
 	fputc('\n', rpt);
 	fputs("Inactive Branches\n", rpt);
 	fputs("-----------------\n", rpt);
-	for (lu = 0; lu < branch_table_sz; lu++) {
-		for (b = branch_table[lu]; b; b = b->table_next_branch)
-			write_branch_report(rpt, b);
-	}
+	for_each_branch(write_branch_report, rpt);
 
 	if (first_tag) {
 		struct tag *tg;
@@ -714,10 +742,10 @@ static struct atom_str *to_atom(const char *s, unsigned short len)
 
 static struct branch *lookup_branch(const char *name)
 {
-	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
+	unsigned int hc = hc_str(name, strlen(name));
 	struct branch *b;
 
-	for (b = branch_table[hc]; b; b = b->table_next_branch)
+	for (b = lookup_hash(hc, &branch_table); b; b = b->table_next_branch)
 		if (!strcmp(name, b->name))
 			return b;
 	return NULL;
@@ -725,8 +753,9 @@ static struct branch *lookup_branch(const char *name)
 
 static struct branch *new_branch(const char *name)
 {
-	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
+	unsigned int hc = hc_str(name, strlen(name));
 	struct branch *b = lookup_branch(name);
+	void **pos;
 
 	if (b)
 		die("Invalid attempt to create duplicate branch: %s", name);
@@ -740,13 +769,16 @@ static struct branch *new_branch(const char *name)
 
 	b = pool_calloc(1, sizeof(struct branch));
 	b->name = pool_strdup(name);
-	b->table_next_branch = branch_table[hc];
 	b->branch_tree.versions[0].mode = S_IFDIR;
 	b->branch_tree.versions[1].mode = S_IFDIR;
 	b->num_notes = 0;
 	b->active = 0;
 	b->pack_id = MAX_PACK_ID;
-	branch_table[hc] = b;
+	pos = insert_hash(hc, b, &branch_table);
+	if (pos) {
+		b->table_next_branch = *pos;
+		*pos = b;
+	}
 	branch_count++;
 	return b;
 }
@@ -956,6 +988,15 @@ static void unkeep_all_packs(void)
 	}
 }
 
+static int print_sha1_if_same_pack(struct branch *b, void *cb)
+{
+	const unsigned int *pack_id = cb;
+
+	if (b->pack_id == *pack_id)
+		fprintf(pack_edges, " %s", sha1_to_hex(b->sha1));
+	return 0;
+}
+
 static void end_packfile(void)
 {
 	struct packed_git *old_p = pack_data, *new_p;
@@ -964,8 +1005,6 @@ static void end_packfile(void)
 	if (object_count) {
 		unsigned char cur_pack_sha1[20];
 		char *idx_name;
-		int i;
-		struct branch *b;
 		struct tag *t;
 
 		close_pack_windows(pack_data);
@@ -986,12 +1025,7 @@ static void end_packfile(void)
 		/* Print the boundary */
 		if (pack_edges) {
 			fprintf(pack_edges, "%s:", new_p->pack_name);
-			for (i = 0; i < branch_table_sz; i++) {
-				for (b = branch_table[i]; b; b = b->table_next_branch) {
-					if (b->pack_id == pack_id)
-						fprintf(pack_edges, " %s", sha1_to_hex(b->sha1));
-				}
-			}
+			for_each_branch(print_sha1_if_same_pack, &pack_id);
 			for (t = first_tag; t; t = t->next_tag) {
 				if (t->pack_id == pack_id)
 					fprintf(pack_edges, " %s", sha1_to_hex(t->sha1));
@@ -1667,7 +1701,8 @@ static int tree_content_get(
 	return 0;
 }
 
-static int update_branch(struct branch *b)
+/* 1 means failure; -1 means stop. */
+static int update_branch(struct branch *b, void *unused)
 {
 	static const char *msg = "fast-import";
 	struct ref_lock *lock;
@@ -1678,8 +1713,10 @@ static int update_branch(struct branch *b)
 	if (read_ref(b->name, old_sha1))
 		hashclr(old_sha1);
 	lock = lock_any_ref_for_update(b->name, old_sha1, 0);
-	if (!lock)
-		return error("Unable to lock %s", b->name);
+	if (!lock) {
+		error("Unable to lock %s", b->name);
+		return 1;
+	}
 	if (!force_update && !is_null_sha1(old_sha1)) {
 		struct commit *old_cmit, *new_cmit;
 
@@ -1687,7 +1724,8 @@ static int update_branch(struct branch *b)
 		new_cmit = lookup_commit_reference_gently(b->sha1, 0);
 		if (!old_cmit || !new_cmit) {
 			unlock_ref(lock);
-			return error("Branch %s is missing commits.", b->name);
+			error("Branch %s is missing commits.", b->name);
+			return 1;
 		}
 
 		if (!in_merge_bases(old_cmit, &new_cmit, 1)) {
@@ -1695,23 +1733,19 @@ static int update_branch(struct branch *b)
 			warning("Not updating %s"
 				" (new tip %s does not contain %s)",
 				b->name, sha1_to_hex(b->sha1), sha1_to_hex(old_sha1));
-			return -1;
+			return 1;
 		}
 	}
-	if (write_ref_sha1(lock, b->sha1, msg) < 0)
-		return error("Unable to update %s", b->name);
+	if (write_ref_sha1(lock, b->sha1, msg) < 0) {
+		error("Unable to update %s", b->name);
+		return 1;
+	}
 	return 0;
 }
 
 static void dump_branches(void)
 {
-	unsigned int i;
-	struct branch *b;
-
-	for (i = 0; i < branch_table_sz; i++) {
-		for (b = branch_table[i]; b; b = b->table_next_branch)
-			failure |= update_branch(b);
-	}
+	failure |= for_each_branch(update_branch, NULL);
 }
 
 static void dump_tags(void)
@@ -3275,7 +3309,7 @@ int main(int argc, const char **argv)
 	alloc_objects(object_entry_alloc);
 	strbuf_init(&command_buf, 0);
 	init_hash(&atom_table);
-	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
+	init_hash(&branch_table);
 	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
 	init_hash(&object_table);
 	marks = pool_calloc(1, sizeof(struct mark_set));
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/4] fast-import: use DIV_ROUND_UP
  2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
                     ` (2 preceding siblings ...)
  2012-04-11 12:15   ` [PATCH 3/4] fast-import: allow branch_table " Jonathan Nieder
@ 2012-04-11 12:15   ` Jonathan Nieder
  3 siblings, 0 replies; 13+ messages in thread
From: Jonathan Nieder @ 2012-04-11 12:15 UTC (permalink / raw)
  To: David Barr; +Cc: Git List, Thomas Rast, Dmitry Ivankov, Sverre Rabbelier

Date: Mon, 30 May 2011 22:45:43 -0500

fast-import keeps tree structures for reuse in pools arranged by size:
one for trees with 0 entries, one for 8-entry trees, one for 16-entry
trees, and so on up to 784-entry trees, plus another pool for larger
trees.  Use the DIV_ROUND_UP macro to determine which pool a
given-sized tree belongs in to avoid some confusing bit-twiddling.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Thanks for reading.

 fast-import.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index ebb27006..fc1b549d 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -785,7 +785,7 @@ static struct branch *new_branch(const char *name)
 
 static unsigned int hc_entries(unsigned int cnt)
 {
-	cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
+	cnt = DIV_ROUND_UP(cnt, 8);
 	return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
 }
 
@@ -805,7 +805,7 @@ static struct tree_content *new_tree_content(unsigned int cnt)
 		else
 			avail_tree_table[hc] = f->next_avail;
 	} else {
-		cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
+		cnt = DIV_ROUND_UP(cnt, 8) * 8;
 		f = pool_alloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
 		f->entry_capacity = cnt;
 	}
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-04-11 12:15 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-31 11:59 fast-import: use struct hash_table David Barr
2011-03-31 11:59 ` [PATCH 1/2] fast-import: use struct hash_table for atom strings David Barr
2011-04-02  2:42   ` Jonathan Nieder
2011-04-02  3:33     ` Jonathan Nieder
2011-03-31 11:59 ` [PATCH 2/2] fast-import: use struct hash_table for objects David Barr
2011-04-02  2:46   ` Jonathan Nieder
2011-04-02  2:48 ` fast-import: use struct hash_table Jonathan Nieder
2012-04-11 12:11 ` [PATCH/RFC v2 0/4] " Jonathan Nieder
2012-04-11 12:12 ` [PATCH/RFC v2 0/4 resend] " Jonathan Nieder
2012-04-11 12:13   ` [PATCH 1/4] fast-import: allow object_table to grow dynamically Jonathan Nieder
2012-04-11 12:14   ` [PATCH 2/4] fast-import: allow atom_table " Jonathan Nieder
2012-04-11 12:15   ` [PATCH 3/4] fast-import: allow branch_table " Jonathan Nieder
2012-04-11 12:15   ` [PATCH 4/4] fast-import: use DIV_ROUND_UP Jonathan Nieder

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.