All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix
@ 2013-09-12 10:38 Nguyễn Thái Ngọc Duy
  2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-09-12 10:38 UTC (permalink / raw)
  To: git; +Cc: Nicolas Pitre, Nguyễn Thái Ngọc Duy

We do know the length of the path name of an tree entry from the tree
dictionary. On an unoptimized build, this cuts down "git rev-list
--objects v1.8.4"'s time from 6.2s to 5.8s. This difference is less on
optimized builds.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 packv4-parse.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index c00a014..ae3e6a5 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -50,15 +50,17 @@ struct packv4_dict *pv4_create_dict(const unsigned char *data, int dict_size)
 		return NULL;
 	}
 
-	dict = xmalloc(sizeof(*dict) + nb_entries * sizeof(dict->offsets[0]));
+	dict = xmalloc(sizeof(*dict) +
+		       (nb_entries + 1) * sizeof(dict->offsets[0]));
 	dict->data = data;
 	dict->nb_entries = nb_entries;
 
+	dict->offsets[0] = 0;
 	cp = data;
 	for (i = 0; i < nb_entries; i++) {
-		dict->offsets[i] = cp - data;
 		cp += 2;
 		cp += strlen((const char *)cp) + 1;
+		dict->offsets[i + 1] = cp - data;
 	}
 
 	return dict;
@@ -163,7 +165,8 @@ static void load_path_dict(struct packed_git *p)
 	p->path_dict = paths;
 }
 
-const unsigned char *get_pathref(struct packed_git *p, unsigned int index)
+const unsigned char *get_pathref(struct packed_git *p, unsigned int index,
+				 int *len)
 {
 	if (!p->path_dict)
 		load_path_dict(p);
@@ -172,6 +175,9 @@ const unsigned char *get_pathref(struct packed_git *p, unsigned int index)
 		error("%s: index overflow", __func__);
 		return NULL;
 	}
+	if (len)
+		*len = p->path_dict->offsets[index + 1] -
+			p->path_dict->offsets[index];
 	return p->path_dict->data + p->path_dict->offsets[index];
 }
 
@@ -373,9 +379,9 @@ static int copy_canonical_tree_entries(struct packed_git *p, off_t offset,
 }
 
 static int tree_entry_prefix(unsigned char *buf, unsigned long size,
-			     const unsigned char *path, unsigned mode)
+			     const unsigned char *path, int path_len,
+			     unsigned mode)
 {
-	int path_len = strlen((const char *)path) + 1;
 	int mode_len = 0;
 	int len;
 	unsigned char mode_buf[8];
@@ -463,14 +469,15 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			 */
 			const unsigned char *path, *sha1;
 			unsigned mode;
-			int len;
+			int len, pathlen;
 
-			path = get_pathref(p, what >> 1);
+			path = get_pathref(p, what >> 1, &pathlen);
 			sha1 = get_sha1ref(p, &scp);
 			if (!path || !sha1)
 				return -1;
 			mode = (path[0] << 8) | path[1];
-			len = tree_entry_prefix(*dstp, *sizep, path + 2, mode);
+			len = tree_entry_prefix(*dstp, *sizep,
+						path + 2, pathlen - 2, mode);
 			if (!len || len + 20 > *sizep)
 				return -1;
 			hashcpy(*dstp + len, sha1);
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
@ 2013-09-12 10:38 ` Nguyễn Thái Ngọc Duy
  2013-09-13 13:27   ` Nicolas Pitre
  2013-09-12 10:38 ` [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-09-12 10:38 UTC (permalink / raw)
  To: git; +Cc: Nicolas Pitre, Nguyễn Thái Ngọc Duy

The intention is to store flat v4 trees in delta base cache to avoid
repeatedly expanding copy sequences in v4 trees. When the user needs
to unpack a v4 tree and the tree is found in the cache, the tree will
be converted back to canonical format. Future tree_desc interface may
skip canonical format and read v4 trees directly.

For that to work we need to keep track of v4 tree size after all copy
sequences are expanded, which is the purpose of this new field.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 sha1_file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 038e22e..03c66bb 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1934,6 +1934,7 @@ static struct delta_base_cache_entry {
 	struct packed_git *p;
 	off_t base_offset;
 	unsigned long size;
+	unsigned long v4_size;
 	enum object_type type;
 } delta_base_cache[MAX_DELTA_CACHE];
 
@@ -2015,7 +2016,8 @@ void clear_delta_base_cache(void)
 }
 
 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
-	void *base, unsigned long base_size, enum object_type type)
+	void *base, unsigned long base_size, unsigned long v4_size,
+	enum object_type type)
 {
 	unsigned long hash = pack_entry_hash(p, base_offset);
 	struct delta_base_cache_entry *ent = delta_base_cache + hash;
@@ -2045,6 +2047,7 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
 	ent->type = type;
 	ent->data = base;
 	ent->size = base_size;
+	ent->v4_size = v4_size;
 	ent->lru.next = &delta_base_cache_lru;
 	ent->lru.prev = delta_base_cache_lru.prev;
 	delta_base_cache_lru.prev->next = &ent->lru;
@@ -2208,7 +2211,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 		data = NULL;
 
 		if (base)
-			add_delta_base_cache(p, obj_offset, base, base_size, type);
+			add_delta_base_cache(p, obj_offset, base, base_size, 0, type);
 
 		if (!base) {
 			/*
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache
  2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
  2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
@ 2013-09-12 10:38 ` Nguyễn Thái Ngọc Duy
  2013-09-12 10:38 ` [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking Nguyễn Thái Ngọc Duy
  2013-09-12 13:29 ` [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache Nguyễn Thái Ngọc Duy
  3 siblings, 0 replies; 12+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-09-12 10:38 UTC (permalink / raw)
  To: git; +Cc: Nicolas Pitre, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The memmove in pv4_get_tree() may look inefficient. I added a
 heuristics to avoid moving if nb_entries takes 2 bytes (most common,
 I think), but it does not improve much. So memmove() is probably ok.

 packv4-parse.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 packv4-parse.h |  3 ++-
 sha1_file.c    |  8 +++++++-
 3 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index ae3e6a5..5002f42 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -406,7 +406,10 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 
 static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			  off_t offset, unsigned int start, unsigned int count,
-			  unsigned char **dstp, unsigned long *sizep, int hdr)
+			  unsigned char **dstp, unsigned long *sizep,
+			  unsigned char **v4_dstp, unsigned long *v4_sizep,
+			  unsigned int *v4_entries,
+			  int hdr)
 {
 	unsigned long avail;
 	unsigned int nb_entries;
@@ -422,10 +425,18 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			if (++scp - src >= avail - 20)
 				return -1;
 		/* is this a canonical tree object? */
-		if ((*scp & 0xf) == OBJ_TREE)
+		if ((*scp & 0xf) == OBJ_TREE) {
+			/*
+			 * we could try to convert to v4 tree before
+			 * giving up, provided that the number of
+			 * inconvertible trees is small. But that's
+			 * for later.
+			 */
+			*v4_dstp = NULL;
 			return copy_canonical_tree_entries(p, offset,
 							   start, count,
 							   dstp, sizep);
+		}
 		/* let's still make sure this is actually a pv4 tree */
 		if ((*scp++ & 0xf) != OBJ_PV4_TREE)
 			return -1;
@@ -484,6 +495,16 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			*dstp += len + 20;
 			*sizep -= len + 20;
 			count--;
+			if (*v4_dstp) {
+				if (scp - src > *v4_sizep)
+					*v4_dstp = NULL;
+				else {
+					memcpy(*v4_dstp, src, scp - src);
+					*v4_dstp += scp - src;
+					*v4_sizep -= scp - src;
+					(*v4_entries)++;
+				}
+			}
 		} else if (what & 1) {
 			/*
 			 * Copy from another tree object.
@@ -537,7 +558,8 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 				count -= copy_count;
 				ret = decode_entries(p, w_curs,
 					copy_objoffset, copy_start, copy_count,
-					dstp, sizep, 1);
+					dstp, sizep, v4_dstp, v4_sizep,
+					v4_entries, 1);
 				if (ret)
 					return ret;
 				/* force pack window readjustment */
@@ -554,11 +576,13 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 }
 
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size)
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size)
 {
-	unsigned long avail;
-	unsigned int nb_entries;
+	unsigned long avail, v4_max_size;
+	unsigned int nb_entries, v4_entries;
 	unsigned char *dst, *dcp;
+	unsigned char *v4_dst, *v4_dcp;
 	const unsigned char *src, *scp;
 	int ret;
 
@@ -570,11 +594,33 @@ void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 
 	dst = xmallocz(size);
 	dcp = dst;
-	ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
+	if (v4_data) {
+		/*
+		 * v4 can't be larger than canonical, so "size" should
+		 * be enough
+		 */
+		v4_max_size = size;
+		v4_dst = v4_dcp = xmallocz(v4_max_size);
+		v4_entries = 0;
+	}
+	ret = decode_entries(p, w_curs, offset, 0, nb_entries,
+			     &dcp, &size,
+			     v4_data ? &v4_dcp : NULL, &v4_max_size,
+			     &v4_entries, 0);
 	if (ret < 0 || size != 0) {
 		free(dst);
+		free(v4_dst);
 		return NULL;
 	}
+	if (v4_data && v4_dcp) {
+		unsigned char hdr[10];
+		int len = encode_varint(v4_entries, hdr);
+		memmove(v4_dst + len, v4_dst, v4_dcp - v4_dst);
+		memcpy(v4_dst, hdr, len);
+		*v4_data = v4_dst;
+		*v4_size = len + v4_dcp - v4_dst;
+	} else
+		free(v4_dst);
 	return dst;
 }
 
diff --git a/packv4-parse.h b/packv4-parse.h
index d674a3f..647b73c 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -20,6 +20,7 @@ const unsigned char *get_sha1ref(struct packed_git *p,
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size);
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 03c66bb..b176316 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2103,6 +2103,8 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 	struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
 	int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
 	int base_from_cache = 0;
+	void *v4_data;
+	unsigned long v4_size;
 
 	if (log_pack_access != no_log_pack_access)
 		write_pack_access_log(p, obj_offset);
@@ -2181,7 +2183,11 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 		type -= 8;
 		break;
 	case OBJ_PV4_TREE:
-		data = pv4_get_tree(p, &w_curs, curpos, size);
+		v4_data = NULL;
+		data = pv4_get_tree(p, &w_curs, curpos, size, &v4_data, &v4_size);
+		if (v4_data)
+			add_delta_base_cache(p, obj_offset, v4_data,
+					     size, v4_size, type);
 		type -= 8;
 		break;
 	case OBJ_COMMIT:
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking
  2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
  2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
  2013-09-12 10:38 ` [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache Nguyễn Thái Ngọc Duy
@ 2013-09-12 10:38 ` Nguyễn Thái Ngọc Duy
  2013-09-12 13:29 ` [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache Nguyễn Thái Ngọc Duy
  3 siblings, 0 replies; 12+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-09-12 10:38 UTC (permalink / raw)
  To: git; +Cc: Nicolas Pitre, Nguyễn Thái Ngọc Duy

"git rev-list --objects v1.8.4" time is reduced from 29s to 10s with
this patch. But it is still a long way to catch up with v2: 4s.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The problem I see with decode_entries() is that given n copy
 sequences, it re-reads the same base n times. 30+ copy sequences are
 not unusual at all with git.git.

 I'm thinking of adding a cache to deal with one-base trees, which is
 all we have now. If we know in advance what base a tree needs without
 parsing the tree, we could unpack from base up like we do with
 ref-deltas. Because in this case we know the base is always flat, we
 could have a more efficient decode_entries that only goes through the
 base once. I want to get the timing down to as close as possible to
 v2 before adding v4-aware interface.

 Pack cache is an idea being cooked for a while by Jeff. Maybe we
 could merge his work to pack v4 or require it when pack v4 is finally
 merged to 'next'.

 packv4-parse.c | 17 +++++++++++++++--
 packv4-parse.h |  2 ++
 sha1_file.c    | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index 5002f42..b8855b0 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -415,8 +415,20 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 	unsigned int nb_entries;
 	const unsigned char *src, *scp;
 	off_t copy_objoffset = 0;
+	const void *cached = NULL;
+	unsigned long cached_size, cached_v4_size;
+
+	if (hdr)	      /* we need offset point at obj header */
+		cached = get_cached_v4_tree(p, offset,
+					    &cached_size, &cached_v4_size);
+
+	if (cached) {
+		src = cached;
+		avail = cached_v4_size;
+		hdr = 0;
+	} else
+		src = use_pack(p, w_curs, offset, &avail);
 
-	src = use_pack(p, w_curs, offset, &avail);
 	scp = src;
 
 	if (hdr) {
@@ -452,7 +464,8 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 	while (count) {
 		unsigned int what;
 
-		if (avail < 20) {
+		/* fixme: need to put bach the out-of-bound check when cached == 1 */
+		if (!cached && avail < 20) {
 			src = use_pack(p, w_curs, offset, &avail);
 			if (avail < 20)
 				return -1;
diff --git a/packv4-parse.h b/packv4-parse.h
index 647b73c..f584c31 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -16,6 +16,8 @@ unsigned long pv4_unpack_object_header_buffer(const unsigned char *base,
 					      unsigned long *sizep);
 const unsigned char *get_sha1ref(struct packed_git *p,
 				 const unsigned char **bufp);
+const void *get_cached_v4_tree(struct packed_git *p, off_t base_offset,
+			 unsigned long *size, unsigned long *v4_size);
 
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
diff --git a/sha1_file.c b/sha1_file.c
index b176316..82570be 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1967,6 +1967,20 @@ static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
 	return eq_delta_base_cache_entry(ent, p, base_offset);
 }
 
+const void *get_cached_v4_tree(struct packed_git *p, off_t base_offset,
+			 unsigned long *size, unsigned long *v4_size)
+{
+	struct delta_base_cache_entry *ent;
+	ent = get_delta_base_cache_entry(p, base_offset);
+
+	if (!eq_delta_base_cache_entry(ent, p, base_offset) ||
+	    ent->type != OBJ_PV4_TREE)
+		return NULL;
+	*size = ent->size;
+	*v4_size = ent->v4_size;
+	return ent->data;
+}
+
 static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent)
 {
 	ent->data = NULL;
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache
  2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2013-09-12 10:38 ` [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking Nguyễn Thái Ngọc Duy
@ 2013-09-12 13:29 ` Nguyễn Thái Ngọc Duy
  3 siblings, 0 replies; 12+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-09-12 13:29 UTC (permalink / raw)
  To: git; +Cc: Nicolas Pitre, Nguyễn Thái Ngọc Duy

"git log --stat -10000 v1.4.8 >/dev/null" takes 13s with v4 (8s with
v2). Of course we could do better when v4-aware tree-diff interface is
in place..

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Oops.. forgot this and broke git.

 Another option is change cache_or_unpack_entry() to force
 OBJ_PV4_TREE to go through unpack_entry(), then update pv4_get_tree() to
 lookup the base cache at the first decode_entries() call. Right now
 it does not (hdr == 0) so we need more processing.

 packv4-parse.c | 22 ++++++++++++++++++++++
 packv4-parse.h |  2 ++
 sha1_file.c    | 11 +++++++++++
 3 files changed, 35 insertions(+)

diff --git a/packv4-parse.c b/packv4-parse.c
index b8855b0..448c91e 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -461,6 +461,10 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 	avail -= scp - src;
 	src = scp;
 
+	/* special case for pv4_cached_tree_to_canonical() */
+	if (!count && cached)
+		count = nb_entries;
+
 	while (count) {
 		unsigned int what;
 
@@ -648,3 +652,21 @@ unsigned long pv4_unpack_object_header_buffer(const unsigned char *base,
 	*sizep = val >> 4;
 	return cp - base;
 }
+
+/* offset must already be cached! */
+void *pv4_cached_tree_to_canonical(struct packed_git *p, off_t offset,
+				   unsigned long size)
+{
+	int ret;
+	unsigned char *dst, *dcp;
+	unsigned char *v4_dstp = NULL;
+	dst = xmallocz(size);
+	dcp = dst;
+	ret = decode_entries(p, NULL, offset, 0, 0,
+			     &dcp, &size, &v4_dstp, NULL, NULL, 1);
+	if (ret < 0 || size != 0) {
+		free(dst);
+		return NULL;
+	}
+	return dst;
+}
diff --git a/packv4-parse.h b/packv4-parse.h
index f584c31..ad21e19 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -24,5 +24,7 @@ void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 		   off_t offset, unsigned long size,
 		   void **v4_data, unsigned long *v4_size);
+void *pv4_cached_tree_to_canonical(struct packed_git *p, off_t offset,
+				   unsigned long size);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 82570be..0944ef6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2000,6 +2000,17 @@ static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
 	if (!eq_delta_base_cache_entry(ent, p, base_offset))
 		return unpack_entry(p, base_offset, type, base_size);
 
+	if (ent->type == OBJ_PV4_TREE) {
+		ret = pv4_cached_tree_to_canonical(p, base_offset, ent->size);
+		if (!ret)
+			return NULL;
+		if (!keep_cache)
+			clear_delta_base_cache_entry(ent);
+		*type = OBJ_TREE;
+		*base_size = ent->size;
+		return ret;
+	}
+
 	ret = ent->data;
 
 	if (!keep_cache)
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
@ 2013-09-13 13:27   ` Nicolas Pitre
  2013-09-13 13:59     ` Duy Nguyen
  0 siblings, 1 reply; 12+ messages in thread
From: Nicolas Pitre @ 2013-09-13 13:27 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 7991 bytes --]

On Thu, 12 Sep 2013, Nguyễn Thái Ngọc Duy wrote:

> The intention is to store flat v4 trees in delta base cache to avoid
> repeatedly expanding copy sequences in v4 trees. When the user needs
> to unpack a v4 tree and the tree is found in the cache, the tree will
> be converted back to canonical format. Future tree_desc interface may
> skip canonical format and read v4 trees directly.
> 
> For that to work we need to keep track of v4 tree size after all copy
> sequences are expanded, which is the purpose of this new field.

Hmmm.... I think this is going in a wrong direction.

If we want a cache for pv4 objects, it would be best to keep it separate 
from the current delta cache in sha1_file.c for code maintainability 
reasons.  I'd suggest creating another cache in packv4-parse.c instead.

Yet, pavkv4 tree walking shouldn't need a cache since there is nothing 
to expand in the end.  Entries should be advanced one by one as they are 
needed.  Granted when converting back to a canonical object we need all 
of them, but eventually this shouldn't be the main mode of operation.

However I can see that, as you say, the same base object is repeatedly 
referenced.  This means that we need to parse it over and over again 
just to find the right offset where the needed entries start.  We 
probably end up skipping over the first entries in a tree object 
multiple times.  And that would be true even when the core code learns 
to walk pv4 trees directly.

So here's the beginning of a tree offset cache to mitigate this problem.  
It is incomplete as the cache function is not implemented, etc.  But 
that should give you the idea.


diff --git a/packv4-parse.c b/packv4-parse.c
index ae3e6a5..39b4f31 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -339,9 +339,9 @@ void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 	return dst;
 }
 
-static int copy_canonical_tree_entries(struct packed_git *p, off_t offset,
-				       unsigned int start, unsigned int count,
-				       unsigned char **dstp, unsigned long *sizep)
+static off_t copy_canonical_tree_entries(struct packed_git *p, off_t offset,
+				unsigned int start, unsigned int count,
+				unsigned char **dstp, unsigned long *sizep)
 {
 	void *data;
 	const unsigned char *from, *end;
@@ -369,13 +369,19 @@ static int copy_canonical_tree_entries(struct packed_git *p, off_t offset,
 
 	if (end - from > *sizep) {
 		free(data);
-		return -1;
+		return 0;
 	}
 	memcpy(*dstp, from, end - from);
 	*dstp += end - from;
 	*sizep -= end - from;
 	free(data);
-	return 0;
+
+
+	/*
+	 * We won't cache this tree's current offset so let's just
+	 * indicate success with a non-zero return value.
+	 */
+	return 1;
 }
 
 static int tree_entry_prefix(unsigned char *buf, unsigned long size,
@@ -404,39 +410,56 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 	return len;
 }
 
-static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
-			  off_t offset, unsigned int start, unsigned int count,
-			  unsigned char **dstp, unsigned long *sizep, int hdr)
+static off_t decode_entries(struct packed_git *p, struct pack_window **w_curs,
+			off_t offset, unsigned int start, unsigned int count,
+			unsigned char **dstp, unsigned long *sizep, int hdr)
 {
-	unsigned long avail;
-	unsigned int nb_entries;
+	unsigned long avail = 0;
 	const unsigned char *src, *scp;
 	off_t copy_objoffset = 0;
 
-	src = use_pack(p, w_curs, offset, &avail);
-	scp = src;
-
 	if (hdr) {
+		unsigned int nb_entries;
+
+		src = use_pack(p, w_curs, offset, &avail);
+		scp = src;
+
 		/* we need to skip over the object header */
 		while (*scp & 128)
 			if (++scp - src >= avail - 20)
-				return -1;
+				return 0;
+
 		/* is this a canonical tree object? */
 		if ((*scp & 0xf) == OBJ_TREE)
 			return copy_canonical_tree_entries(p, offset,
 							   start, count,
 							   dstp, sizep);
+
 		/* let's still make sure this is actually a pv4 tree */
 		if ((*scp++ & 0xf) != OBJ_PV4_TREE)
-			return -1;
-	}
+			return 0;
+
+		nb_entries = decode_varint(&scp);
+		if (scp == src || start > nb_entries ||
+		    count > nb_entries - start)
+			return 0;
+
+		/*
+		 * If this is a partial copy, let's create a cache entry
+		 * to speed things up if the remaining of the tree is needed in
+		 * the future.
+		 */
+		if (start + count < nb_entries) {
+			c = get_tree_offset_cache(p, offset);
+			c->offset = offset + scp - src;
+			c->pos = 0;
+			c->nb_entries = nb_entries;
+		}
 
-	nb_entries = decode_varint(&scp);
-	if (scp == src || start > nb_entries || count > nb_entries - start)
-		return -1;
-	offset += scp - src;
-	avail -= scp - src;
-	src = scp;
+		offset += scp - src;
+		avail -= scp - src;
+		src = scp;
+	}
 
 	while (count) {
 		unsigned int what;
@@ -444,13 +467,13 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 		if (avail < 20) {
 			src = use_pack(p, w_curs, offset, &avail);
 			if (avail < 20)
-				return -1;
+				return 0;
 		}
 		scp = src;
 
 		what = decode_varint(&scp);
 		if (scp == src)
-			return -1;
+			return 0;
 
 		if (!(what & 1) && start != 0) {
 			/*
@@ -474,12 +497,12 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			path = get_pathref(p, what >> 1, &pathlen);
 			sha1 = get_sha1ref(p, &scp);
 			if (!path || !sha1)
-				return -1;
+				return 0;
 			mode = (path[0] << 8) | path[1];
 			len = tree_entry_prefix(*dstp, *sizep,
 						path + 2, pathlen - 2, mode);
 			if (!len || len + 20 > *sizep)
-				return -1;
+				return 0;
 			hashcpy(*dstp + len, sha1);
 			*dstp += len + 20;
 			*sizep -= len + 20;
@@ -493,7 +516,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			copy_start = what >> 1;
 			copy_count = decode_varint(&scp);
 			if (!copy_count)
-				return -1;
+				return 0;
 
 			/*
 			 * The LSB of copy_count is a flag indicating if
@@ -522,24 +545,41 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 				}
 			}
 			if (!copy_objoffset)
-				return -1;
+				return 0;
 			copy_count >>= 1;
 
 			if (start >= copy_count) {
 				start -= copy_count;
 			} else {
-				int ret;
+				off_t ret;
 				copy_count -= start;
 				copy_start += start;
 				start = 0;
 				if (copy_count > count)
 					copy_count = count;
 				count -= copy_count;
-				ret = decode_entries(p, w_curs,
-					copy_objoffset, copy_start, copy_count,
-					dstp, sizep, 1);
-				if (ret)
-					return ret;
+
+				c = get_tree_offset_cache(p, copy_objoffset);
+				if (c->offset &&
+				    copy_start >= c->pos &&
+				    copy_start < c->nb_entries &&
+				    copy_count <= c->nb_entries - copy_start) {
+					copy_start -= c->pos;
+					ret = decode_entries(
+							p, w_curs, c->offset,
+							copy_start, copy_count,
+							dstp, sizep, 0);
+					if (ret && c->pos + copy_count < c->nb_entries) {
+						c->offset = ret;
+						c->pos += copy_start + copy_count;
+					}
+				} else
+					ret = decode_entries(
+							p, w_curs, copy_objoffset,
+							copy_start, copy_count,
+							dstp, sizep, 1);
+				if (!ret)
+					return 0;
 				/* force pack window readjustment */
 				avail = scp - src;
 			}
@@ -550,7 +590,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 		src = scp;
 	}
 
-	return 0;
+	return offset;
 }
 
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
@@ -560,7 +600,7 @@ void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 	unsigned int nb_entries;
 	unsigned char *dst, *dcp;
 	const unsigned char *src, *scp;
-	int ret;
+	off_t ret;
 
 	src = use_pack(p, w_curs, offset, &avail);
 	scp = src;
@@ -571,7 +611,7 @@ void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 	dst = xmallocz(size);
 	dcp = dst;
 	ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
-	if (ret < 0 || size != 0) {
+	if (ret == 0 || size != 0) {
 		free(dst);
 		return NULL;
 	}

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-13 13:27   ` Nicolas Pitre
@ 2013-09-13 13:59     ` Duy Nguyen
  2013-09-14  2:06       ` Nicolas Pitre
  0 siblings, 1 reply; 12+ messages in thread
From: Duy Nguyen @ 2013-09-13 13:59 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List

On Fri, Sep 13, 2013 at 8:27 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> On Thu, 12 Sep 2013, Nguyễn Thái Ngọc Duy wrote:
>
>> The intention is to store flat v4 trees in delta base cache to avoid
>> repeatedly expanding copy sequences in v4 trees. When the user needs
>> to unpack a v4 tree and the tree is found in the cache, the tree will
>> be converted back to canonical format. Future tree_desc interface may
>> skip canonical format and read v4 trees directly.
>>
>> For that to work we need to keep track of v4 tree size after all copy
>> sequences are expanded, which is the purpose of this new field.
>
> Hmmm.... I think this is going in a wrong direction.

Good thing you caught me early. I was planning to implement a better
version of this on the weekend. And you are not wrong about code
maintainability, unpack_entry() so far looks very close to a real
mess.

> Yet, pavkv4 tree walking shouldn't need a cache since there is nothing
> to expand in the end.  Entries should be advanced one by one as they are
> needed.  Granted when converting back to a canonical object we need all
> of them, but eventually this shouldn't be the main mode of operation.

There's another case where one of the base tree is not v4 (the packer
is inefficient, like my index-pack --fix-thin). For trees with leading
zeros in entry mode, we can just do a lossy conversion to v4, but I
wonder if there is a case where we can't even convert to v4 and the v4
treewalk interface has to fall back to canonical format.. I guess that
can't happen.

> However I can see that, as you say, the same base object is repeatedly
> referenced.  This means that we need to parse it over and over again
> just to find the right offset where the needed entries start.  We
> probably end up skipping over the first entries in a tree object
> multiple times.  And that would be true even when the core code learns
> to walk pv4 trees directly.
>
> So here's the beginning of a tree offset cache to mitigate this problem.
> It is incomplete as the cache function is not implemented, etc.  But
> that should give you the idea.

Thanks. I'll have a closer look and maybe complete your patch.
-- 
Duy

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-13 13:59     ` Duy Nguyen
@ 2013-09-14  2:06       ` Nicolas Pitre
  2013-09-14  4:22         ` Nicolas Pitre
  0 siblings, 1 reply; 12+ messages in thread
From: Nicolas Pitre @ 2013-09-14  2:06 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Git Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3007 bytes --]

On Fri, 13 Sep 2013, Duy Nguyen wrote:

> On Fri, Sep 13, 2013 at 8:27 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> > On Thu, 12 Sep 2013, Nguyễn Thái Ngọc Duy wrote:
> >
> >> The intention is to store flat v4 trees in delta base cache to avoid
> >> repeatedly expanding copy sequences in v4 trees. When the user needs
> >> to unpack a v4 tree and the tree is found in the cache, the tree will
> >> be converted back to canonical format. Future tree_desc interface may
> >> skip canonical format and read v4 trees directly.
> >>
> >> For that to work we need to keep track of v4 tree size after all copy
> >> sequences are expanded, which is the purpose of this new field.
> >
> > Hmmm.... I think this is going in a wrong direction.
> 
> Good thing you caught me early. I was planning to implement a better
> version of this on the weekend. And you are not wrong about code
> maintainability, unpack_entry() so far looks very close to a real
> mess.

Yes.  We might have to break it into sub-functions.  However this has 
the potential to recurse rather deeply so it is necessary to limit its 
stack footprint as well.

> > Yet, pavkv4 tree walking shouldn't need a cache since there is nothing
> > to expand in the end.  Entries should be advanced one by one as they are
> > needed.  Granted when converting back to a canonical object we need all
> > of them, but eventually this shouldn't be the main mode of operation.
> 
> There's another case where one of the base tree is not v4 (the packer
> is inefficient, like my index-pack --fix-thin). For trees with leading
> zeros in entry mode, we can just do a lossy conversion to v4, but I
> wonder if there is a case where we can't even convert to v4 and the v4
> treewalk interface has to fall back to canonical format.. I guess that
> can't happen.

Well... There is little gain in converting loose tree objects into pv4 
format so there will always be a place for the canolical tree parser.

Eventually the base trees added by index-pack should be pv4 trees.  The 
only case where this might not work is for zero padded mode trees and we 
don't have to optimize for that i.e. a fallback to the canonical tree 
format will be good enough.

> > However I can see that, as you say, the same base object is repeatedly
> > referenced.  This means that we need to parse it over and over again
> > just to find the right offset where the needed entries start.  We
> > probably end up skipping over the first entries in a tree object
> > multiple times.  And that would be true even when the core code learns
> > to walk pv4 trees directly.
> >
> > So here's the beginning of a tree offset cache to mitigate this problem.
> > It is incomplete as the cache function is not implemented, etc.  But
> > that should give you the idea.
> 
> Thanks. I'll have a closer look and maybe complete your patch.

I've committed an almost final patch and pushed it out.  It is disabled 
right now due to some bug I've not found yet.  But you can have a look.


Nicolas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-14  2:06       ` Nicolas Pitre
@ 2013-09-14  4:22         ` Nicolas Pitre
  2013-09-15  7:35           ` Duy Nguyen
  0 siblings, 1 reply; 12+ messages in thread
From: Nicolas Pitre @ 2013-09-14  4:22 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Git Mailing List

On Fri, 13 Sep 2013, Nicolas Pitre wrote:

> On Fri, 13 Sep 2013, Duy Nguyen wrote:
> 
> > On Fri, Sep 13, 2013 at 8:27 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> > > However I can see that, as you say, the same base object is repeatedly
> > > referenced.  This means that we need to parse it over and over again
> > > just to find the right offset where the needed entries start.  We
> > > probably end up skipping over the first entries in a tree object
> > > multiple times.  And that would be true even when the core code learns
> > > to walk pv4 trees directly.
> > >
> > > So here's the beginning of a tree offset cache to mitigate this problem.
> > > It is incomplete as the cache function is not implemented, etc.  But
> > > that should give you the idea.
> > 
> > Thanks. I'll have a closer look and maybe complete your patch.
> 
> I've committed an almost final patch and pushed it out.  It is disabled 
> right now due to some bug I've not found yet.  But you can have a look.

Found the bug.

The cache is currently updated by the caller.  The caller may ask for a 
copy of 2 entries from a base object, but that base object may itself 
copy those objects from somewhere else in a larger chunk.

Let's consider this example:

tree A
------
0 (0) copy 2 entries from tree B starting at entry 0
1 (2) copy 1 entry from tree B starting at entry 3

tree B
------
0 (0) copy 6 entries from tree C starting at entry 0
1 (6) entry "foo.txt"
2 (7) entry "bar.txt"

Right now, the code calls decode_entries() to decode 2 entries from tree 
B but those entries are part of a copy from tree C.  When that call 
returns, the cache is updated as if tree B entry #2 would start at 
offset 1 but this is wrong because offset 0 in tree B covers 6 entries 
and therefore offset 1 is for entry #6.

So this needs a rethink.

I won't be able to work on this for a few days.


Nicolas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-14  4:22         ` Nicolas Pitre
@ 2013-09-15  7:35           ` Duy Nguyen
  2013-09-16  4:42             ` Nicolas Pitre
  0 siblings, 1 reply; 12+ messages in thread
From: Duy Nguyen @ 2013-09-15  7:35 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List

On Sat, Sep 14, 2013 at 11:22 AM, Nicolas Pitre <nico@fluxnic.net> wrote:
> The cache is currently updated by the caller.  The caller may ask for a
> copy of 2 entries from a base object, but that base object may itself
> copy those objects from somewhere else in a larger chunk.
>
> Let's consider this example:
>
> tree A
> ------
> 0 (0) copy 2 entries from tree B starting at entry 0
> 1 (2) copy 1 entry from tree B starting at entry 3
>
> tree B
> ------
> 0 (0) copy 6 entries from tree C starting at entry 0
> 1 (6) entry "foo.txt"
> 2 (7) entry "bar.txt"
>
> Right now, the code calls decode_entries() to decode 2 entries from tree
> B but those entries are part of a copy from tree C.  When that call
> returns, the cache is updated as if tree B entry #2 would start at
> offset 1 but this is wrong because offset 0 in tree B covers 6 entries
> and therefore offset 1 is for entry #6.
>
> So this needs a rethink.

I've given it some thought and see no simple/efficient way do it when
2+ depth is involved. Ideally tree A should refer to tree C directly
for the first two entries, but in general we can't enforce that a copy
sequence must refer to non-copy sequences only. Caching flattened tree
B up until the 6th entry may help, but then there's no need to cache
offsets anymore because we could just cache tree A..
--
Duy

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-15  7:35           ` Duy Nguyen
@ 2013-09-16  4:42             ` Nicolas Pitre
  2013-09-16  5:24               ` Duy Nguyen
  0 siblings, 1 reply; 12+ messages in thread
From: Nicolas Pitre @ 2013-09-16  4:42 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Git Mailing List

On Sun, 15 Sep 2013, Duy Nguyen wrote:

> On Sat, Sep 14, 2013 at 11:22 AM, Nicolas Pitre <nico@fluxnic.net> wrote:
> > The cache is currently updated by the caller.  The caller may ask for a
> > copy of 2 entries from a base object, but that base object may itself
> > copy those objects from somewhere else in a larger chunk.
> >
> > Let's consider this example:
> >
> > tree A
> > ------
> > 0 (0) copy 2 entries from tree B starting at entry 0
> > 1 (2) copy 1 entry from tree B starting at entry 3
> >
> > tree B
> > ------
> > 0 (0) copy 6 entries from tree C starting at entry 0
> > 1 (6) entry "foo.txt"
> > 2 (7) entry "bar.txt"
> >
> > Right now, the code calls decode_entries() to decode 2 entries from tree
> > B but those entries are part of a copy from tree C.  When that call
> > returns, the cache is updated as if tree B entry #2 would start at
> > offset 1 but this is wrong because offset 0 in tree B covers 6 entries
> > and therefore offset 1 is for entry #6.
> >
> > So this needs a rethink.
> 
> I've given it some thought and see no simple/efficient way do it when
> 2+ depth is involved. Ideally tree A should refer to tree C directly
> for the first two entries, but in general we can't enforce that a copy
> sequence must refer to non-copy sequences only. Caching flattened tree
> B up until the 6th entry may help, but then there's no need to cache
> offsets anymore because we could just cache tree A..

Well... for the case where tree A should refer to tree C directly, that 
should be optimized by packv4-create/pack-objects.

But as far as my offset cache is concerned, I came around to rethink it.
I managed to write decent and logical code this time.
The speed-up is significant even without any tuning.  Here it is:

commit aa43ec18956a2c835112f086a0a59d7fbc35a021
Author: Nicolas Pitre <nico@fluxnic.net>
Date:   Fri Sep 13 20:56:31 2013 -0400

    packv4-parse.c: add tree offset caching
    
    It is a common pattern to see a tree object copy a few entries from another
    tree object, possibly from a non zero offset, then provide a few entries
    of its own just to go back to the previous tree object to copy some more
    entries.  Each time this happens, the tree object being copied has to be
    parsed from the beginning over again up to the desired offset which is
    rather wasteful.
    
    Let's introduce a tree offset cache to record the entry position and offset
    when tree parsing stops so a subsequent copy from the same tree object
    can be resumed without having to start from the beginning all the time.
    
    Without doing further tuning on this cache, performing "git rev-list --all
    --objects | wc -l" on my copy of git.git went from 14.5s down to 6.6s.
    
    Signed-off-by: Nicolas Pitre <nico@fluxnic.net>

diff --git a/packv4-parse.c b/packv4-parse.c
index 38c22b0..b8af702 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -378,6 +378,40 @@ static int copy_canonical_tree_entries(struct packed_git *p, off_t offset,
 	return 0;
 }
 
+/* ordering is so that member alignment takes the least amount of space */
+struct pv4_tree_cache {
+	off_t base_offset;
+	off_t offset;
+	off_t last_copy_base;
+	struct packed_git *p;
+	unsigned int pos;
+	unsigned int nb_entries;
+};
+
+#define CACHE_SIZE 256
+static struct pv4_tree_cache pv4_tree_cache[CACHE_SIZE];
+
+static struct pv4_tree_cache *get_tree_offset_cache(struct packed_git *p, off_t base_offset)
+{
+	struct pv4_tree_cache *c;
+	unsigned long hash;
+
+	hash = (unsigned long)p + (unsigned long)base_offset;
+	hash += (hash >> 8) + (hash >> 16);
+	hash %= CACHE_SIZE;
+
+	c = &pv4_tree_cache[hash];
+	if (c->p != p || c->base_offset != base_offset) {
+		c->p = p;
+		c->base_offset = base_offset;
+		c->offset = 0;
+		c->last_copy_base = 0;
+		c->pos = 0;
+		c->nb_entries = 0;
+	}
+	return c;
+}
+
 static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 			     const unsigned char *path, int path_len,
 			     unsigned mode)
@@ -405,39 +439,72 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 }
 
 static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
-			  off_t offset, unsigned int start, unsigned int count,
-			  unsigned char **dstp, unsigned long *sizep,
-			  int parse_hdr)
+			  off_t obj_offset, unsigned int start, unsigned int count,
+			  unsigned char **dstp, unsigned long *sizep)
 {
 	unsigned long avail;
-	unsigned int nb_entries;
 	const unsigned char *src, *scp;
-	off_t copy_objoffset = 0;
+	unsigned int curpos;
+	off_t offset, copy_objoffset;
+	struct pv4_tree_cache *c;
+
+	c = get_tree_offset_cache(p, obj_offset);
+	if (count && start < c->nb_entries && start >= c->pos &&
+	    count <= c->nb_entries - start) {
+		offset = c->offset;
+		copy_objoffset = c->last_copy_base;
+		curpos = c->pos;
+		start -= curpos;
+		src = NULL;
+		avail = 0;
+	} else {
+		unsigned int nb_entries;
 
-	src = use_pack(p, w_curs, offset, &avail);
-	scp = src;
+		src = use_pack(p, w_curs, obj_offset, &avail);
+		scp = src;
 
-	if (parse_hdr) {
 		/* we need to skip over the object header */
 		while (*scp & 128)
 			if (++scp - src >= avail - 20)
 				return -1;
+
 		/* is this a canonical tree object? */
-		if ((*scp & 0xf) == OBJ_TREE)
+		if ((*scp & 0xf) == OBJ_TREE) {
+			offset = obj_offset + (scp - src);
 			return copy_canonical_tree_entries(p, offset,
 							   start, count,
 							   dstp, sizep);
+		}
+
 		/* let's still make sure this is actually a pv4 tree */
 		if ((*scp++ & 0xf) != OBJ_PV4_TREE)
 			return -1;
-	}
 
-	nb_entries = decode_varint(&scp);
-	if (scp == src || start > nb_entries || count > nb_entries - start)
-		return -1;
-	offset += scp - src;
-	avail -= scp - src;
-	src = scp;
+		nb_entries = decode_varint(&scp);
+		if (!count)
+			count = nb_entries;
+		if (!nb_entries || start > nb_entries ||
+		    count > nb_entries - start)
+			return -1;
+
+		curpos = 0;
+		copy_objoffset = 0;
+		offset = obj_offset + scp - src;
+		avail -= scp - src;
+		src = scp;
+
+		/*
+		 * If this is a partial copy, let's (re)initialize a cache
+		 * entry to speed things up if the remaining of this tree
+		 * is needed in the future.
+		 */
+		if (start + count < nb_entries) {
+			c->offset = offset;
+			c->pos = 0;
+			c->nb_entries = nb_entries;
+			c->last_copy_base = 0;
+		}
+	}
 
 	while (count) {
 		unsigned int what;
@@ -464,6 +531,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			else
 				while (*scp++ & 128);
 			start--;
+			curpos++;
 		} else if (!(what & 1) && start == 0) {
 			/*
 			 * This is an actual tree entry to recreate.
@@ -485,6 +553,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			*dstp += len + 20;
 			*sizep -= len + 20;
 			count--;
+			curpos++;
 		} else if (what & 1) {
 			/*
 			 * Copy from another tree object.
@@ -522,25 +591,34 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 						nth_packed_object_offset(p, index - 1);
 				}
 			}
-			if (!copy_objoffset)
-				return -1;
 			copy_count >>= 1;
+			if (!copy_count || !copy_objoffset)
+				return -1;
 
 			if (start >= copy_count) {
 				start -= copy_count;
+				curpos += copy_count;
 			} else {
 				int ret;
+
 				copy_count -= start;
 				copy_start += start;
-				start = 0;
-				if (copy_count > count)
+				if (copy_count > count) {
 					copy_count = count;
-				count -= copy_count;
-				ret = decode_entries(p, w_curs,
-					copy_objoffset, copy_start, copy_count,
-					dstp, sizep, 1);
+					count = 0;
+					scp = src;
+				} else {
+					count -= copy_count;
+					curpos += start + copy_count;
+					start = 0;
+				}
+
+				ret = decode_entries(p, w_curs, copy_objoffset,
+						     copy_start, copy_count,
+						     dstp, sizep);
 				if (ret)
 					return ret;
+
 				/* force pack window readjustment */
 				avail = scp - src;
 			}
@@ -551,27 +629,30 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 		src = scp;
 	}
 
+	/*
+	 * Update the cache if we didn't run through the entire tree.
+	 * We have to "get" it again as a recursion into decode_entries()
+	 * could have invalidated what we obtained initially.
+	 */
+	c = get_tree_offset_cache(p, obj_offset);
+	if (curpos < c->nb_entries) {
+		c->pos = curpos;
+		c->offset = offset;
+		c->last_copy_base = copy_objoffset;
+	}
+						
 	return 0;
 }
 
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size)
+		   off_t obj_offset, unsigned long size)
 {
-	unsigned long avail;
-	unsigned int nb_entries;
 	unsigned char *dst, *dcp;
-	const unsigned char *src, *scp;
 	int ret;
 
-	src = use_pack(p, w_curs, offset, &avail);
-	scp = src;
-	nb_entries = decode_varint(&scp);
-	if (scp == src)
-		return NULL;
-
 	dst = xmallocz(size);
 	dcp = dst;
-	ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
+	ret = decode_entries(p, w_curs, obj_offset, 0, 0, &dcp, &size);
 	if (ret < 0 || size != 0) {
 		free(dst);
 		return NULL;
diff --git a/packv4-parse.h b/packv4-parse.h
index d674a3f..b437159 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -20,6 +20,6 @@ const unsigned char *get_sha1ref(struct packed_git *p,
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size);
+		   off_t obj_offset, unsigned long size);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 038e22e..e98eb8b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2178,7 +2178,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 		type -= 8;
 		break;
 	case OBJ_PV4_TREE:
-		data = pv4_get_tree(p, &w_curs, curpos, size);
+		data = pv4_get_tree(p, &w_curs, obj_offset, size);
 		type -= 8;
 		break;
 	case OBJ_COMMIT:


Nicolas

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry
  2013-09-16  4:42             ` Nicolas Pitre
@ 2013-09-16  5:24               ` Duy Nguyen
  0 siblings, 0 replies; 12+ messages in thread
From: Duy Nguyen @ 2013-09-16  5:24 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Git Mailing List

On Mon, Sep 16, 2013 at 11:42 AM, Nicolas Pitre <nico@fluxnic.net> wrote:
> On Sun, 15 Sep 2013, Duy Nguyen wrote:
>
>> On Sat, Sep 14, 2013 at 11:22 AM, Nicolas Pitre <nico@fluxnic.net> wrote:
>> > The cache is currently updated by the caller.  The caller may ask for a
>> > copy of 2 entries from a base object, but that base object may itself
>> > copy those objects from somewhere else in a larger chunk.
>> >
>> > Let's consider this example:
>> >
>> > tree A
>> > ------
>> > 0 (0) copy 2 entries from tree B starting at entry 0
>> > 1 (2) copy 1 entry from tree B starting at entry 3
>> >
>> > tree B
>> > ------
>> > 0 (0) copy 6 entries from tree C starting at entry 0
>> > 1 (6) entry "foo.txt"
>> > 2 (7) entry "bar.txt"
>> >
>> > Right now, the code calls decode_entries() to decode 2 entries from tree
>> > B but those entries are part of a copy from tree C.  When that call
>> > returns, the cache is updated as if tree B entry #2 would start at
>> > offset 1 but this is wrong because offset 0 in tree B covers 6 entries
>> > and therefore offset 1 is for entry #6.
>> >
>> > So this needs a rethink.
>>
>> I've given it some thought and see no simple/efficient way do it when
>> 2+ depth is involved. Ideally tree A should refer to tree C directly
>> for the first two entries, but in general we can't enforce that a copy
>> sequence must refer to non-copy sequences only. Caching flattened tree
>> B up until the 6th entry may help, but then there's no need to cache
>> offsets anymore because we could just cache tree A..
>
> Well... for the case where tree A should refer to tree C directly, that
> should be optimized by packv4-create/pack-objects.
>
> But as far as my offset cache is concerned, I came around to rethink it.
> I managed to write decent and logical code this time.

OK if I read your patch correctly, in the example above, after
processing tree A entry #0, the position of tree B entry #0 is cached
(instead of #6). When tree A #2 is processed, we check out the cache
and parse tree B's copy sequence again, which leads to a cached
position of tree C, correct? We still have to jump through tree B
unnnecessarily, but that can be dealt with later. Good thing that it
works, and the perf improvement is impressive too. I think we now have
the core of "struct pv4_tree_desc" :)
-- 
Duy

> The speed-up is significant even without any tuning.  Here it is:
>
> commit aa43ec18956a2c835112f086a0a59d7fbc35a021
> Author: Nicolas Pitre <nico@fluxnic.net>
> Date:   Fri Sep 13 20:56:31 2013 -0400
>
>     packv4-parse.c: add tree offset caching
>
>     It is a common pattern to see a tree object copy a few entries from another
>     tree object, possibly from a non zero offset, then provide a few entries
>     of its own just to go back to the previous tree object to copy some more
>     entries.  Each time this happens, the tree object being copied has to be
>     parsed from the beginning over again up to the desired offset which is
>     rather wasteful.
>
>     Let's introduce a tree offset cache to record the entry position and offset
>     when tree parsing stops so a subsequent copy from the same tree object
>     can be resumed without having to start from the beginning all the time.
>
>     Without doing further tuning on this cache, performing "git rev-list --all
>     --objects | wc -l" on my copy of git.git went from 14.5s down to 6.6s.
>
>     Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
>
> diff --git a/packv4-parse.c b/packv4-parse.c
> index 38c22b0..b8af702 100644
> --- a/packv4-parse.c
> +++ b/packv4-parse.c
> @@ -378,6 +378,40 @@ static int copy_canonical_tree_entries(struct packed_git *p, off_t offset,
>         return 0;
>  }
>
> +/* ordering is so that member alignment takes the least amount of space */
> +struct pv4_tree_cache {
> +       off_t base_offset;
> +       off_t offset;
> +       off_t last_copy_base;
> +       struct packed_git *p;
> +       unsigned int pos;
> +       unsigned int nb_entries;
> +};
> +
> +#define CACHE_SIZE 256
> +static struct pv4_tree_cache pv4_tree_cache[CACHE_SIZE];
> +
> +static struct pv4_tree_cache *get_tree_offset_cache(struct packed_git *p, off_t base_offset)
> +{
> +       struct pv4_tree_cache *c;
> +       unsigned long hash;
> +
> +       hash = (unsigned long)p + (unsigned long)base_offset;
> +       hash += (hash >> 8) + (hash >> 16);
> +       hash %= CACHE_SIZE;
> +
> +       c = &pv4_tree_cache[hash];
> +       if (c->p != p || c->base_offset != base_offset) {
> +               c->p = p;
> +               c->base_offset = base_offset;
> +               c->offset = 0;
> +               c->last_copy_base = 0;
> +               c->pos = 0;
> +               c->nb_entries = 0;
> +       }
> +       return c;
> +}
> +
>  static int tree_entry_prefix(unsigned char *buf, unsigned long size,
>                              const unsigned char *path, int path_len,
>                              unsigned mode)
> @@ -405,39 +439,72 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
>  }
>
>  static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
> -                         off_t offset, unsigned int start, unsigned int count,
> -                         unsigned char **dstp, unsigned long *sizep,
> -                         int parse_hdr)
> +                         off_t obj_offset, unsigned int start, unsigned int count,
> +                         unsigned char **dstp, unsigned long *sizep)
>  {
>         unsigned long avail;
> -       unsigned int nb_entries;
>         const unsigned char *src, *scp;
> -       off_t copy_objoffset = 0;
> +       unsigned int curpos;
> +       off_t offset, copy_objoffset;
> +       struct pv4_tree_cache *c;
> +
> +       c = get_tree_offset_cache(p, obj_offset);
> +       if (count && start < c->nb_entries && start >= c->pos &&
> +           count <= c->nb_entries - start) {
> +               offset = c->offset;
> +               copy_objoffset = c->last_copy_base;
> +               curpos = c->pos;
> +               start -= curpos;
> +               src = NULL;
> +               avail = 0;
> +       } else {
> +               unsigned int nb_entries;
>
> -       src = use_pack(p, w_curs, offset, &avail);
> -       scp = src;
> +               src = use_pack(p, w_curs, obj_offset, &avail);
> +               scp = src;
>
> -       if (parse_hdr) {
>                 /* we need to skip over the object header */
>                 while (*scp & 128)
>                         if (++scp - src >= avail - 20)
>                                 return -1;
> +
>                 /* is this a canonical tree object? */
> -               if ((*scp & 0xf) == OBJ_TREE)
> +               if ((*scp & 0xf) == OBJ_TREE) {
> +                       offset = obj_offset + (scp - src);
>                         return copy_canonical_tree_entries(p, offset,
>                                                            start, count,
>                                                            dstp, sizep);
> +               }
> +
>                 /* let's still make sure this is actually a pv4 tree */
>                 if ((*scp++ & 0xf) != OBJ_PV4_TREE)
>                         return -1;
> -       }
>
> -       nb_entries = decode_varint(&scp);
> -       if (scp == src || start > nb_entries || count > nb_entries - start)
> -               return -1;
> -       offset += scp - src;
> -       avail -= scp - src;
> -       src = scp;
> +               nb_entries = decode_varint(&scp);
> +               if (!count)
> +                       count = nb_entries;
> +               if (!nb_entries || start > nb_entries ||
> +                   count > nb_entries - start)
> +                       return -1;
> +
> +               curpos = 0;
> +               copy_objoffset = 0;
> +               offset = obj_offset + scp - src;
> +               avail -= scp - src;
> +               src = scp;
> +
> +               /*
> +                * If this is a partial copy, let's (re)initialize a cache
> +                * entry to speed things up if the remaining of this tree
> +                * is needed in the future.
> +                */
> +               if (start + count < nb_entries) {
> +                       c->offset = offset;
> +                       c->pos = 0;
> +                       c->nb_entries = nb_entries;
> +                       c->last_copy_base = 0;
> +               }
> +       }
>
>         while (count) {
>                 unsigned int what;
> @@ -464,6 +531,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
>                         else
>                                 while (*scp++ & 128);
>                         start--;
> +                       curpos++;
>                 } else if (!(what & 1) && start == 0) {
>                         /*
>                          * This is an actual tree entry to recreate.
> @@ -485,6 +553,7 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
>                         *dstp += len + 20;
>                         *sizep -= len + 20;
>                         count--;
> +                       curpos++;
>                 } else if (what & 1) {
>                         /*
>                          * Copy from another tree object.
> @@ -522,25 +591,34 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
>                                                 nth_packed_object_offset(p, index - 1);
>                                 }
>                         }
> -                       if (!copy_objoffset)
> -                               return -1;
>                         copy_count >>= 1;
> +                       if (!copy_count || !copy_objoffset)
> +                               return -1;
>
>                         if (start >= copy_count) {
>                                 start -= copy_count;
> +                               curpos += copy_count;
>                         } else {
>                                 int ret;
> +
>                                 copy_count -= start;
>                                 copy_start += start;
> -                               start = 0;
> -                               if (copy_count > count)
> +                               if (copy_count > count) {
>                                         copy_count = count;
> -                               count -= copy_count;
> -                               ret = decode_entries(p, w_curs,
> -                                       copy_objoffset, copy_start, copy_count,
> -                                       dstp, sizep, 1);
> +                                       count = 0;
> +                                       scp = src;
> +                               } else {
> +                                       count -= copy_count;
> +                                       curpos += start + copy_count;
> +                                       start = 0;
> +                               }
> +
> +                               ret = decode_entries(p, w_curs, copy_objoffset,
> +                                                    copy_start, copy_count,
> +                                                    dstp, sizep);
>                                 if (ret)
>                                         return ret;
> +
>                                 /* force pack window readjustment */
>                                 avail = scp - src;
>                         }
> @@ -551,27 +629,30 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
>                 src = scp;
>         }
>
> +       /*
> +        * Update the cache if we didn't run through the entire tree.
> +        * We have to "get" it again as a recursion into decode_entries()
> +        * could have invalidated what we obtained initially.
> +        */
> +       c = get_tree_offset_cache(p, obj_offset);
> +       if (curpos < c->nb_entries) {
> +               c->pos = curpos;
> +               c->offset = offset;
> +               c->last_copy_base = copy_objoffset;
> +       }
> +
>         return 0;
>  }
>
>  void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
> -                  off_t offset, unsigned long size)
> +                  off_t obj_offset, unsigned long size)
>  {
> -       unsigned long avail;
> -       unsigned int nb_entries;
>         unsigned char *dst, *dcp;
> -       const unsigned char *src, *scp;
>         int ret;
>
> -       src = use_pack(p, w_curs, offset, &avail);
> -       scp = src;
> -       nb_entries = decode_varint(&scp);
> -       if (scp == src)
> -               return NULL;
> -
>         dst = xmallocz(size);
>         dcp = dst;
> -       ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
> +       ret = decode_entries(p, w_curs, obj_offset, 0, 0, &dcp, &size);
>         if (ret < 0 || size != 0) {
>                 free(dst);
>                 return NULL;
> diff --git a/packv4-parse.h b/packv4-parse.h
> index d674a3f..b437159 100644
> --- a/packv4-parse.h
> +++ b/packv4-parse.h
> @@ -20,6 +20,6 @@ const unsigned char *get_sha1ref(struct packed_git *p,
>  void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
>                      off_t offset, unsigned long size);
>  void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
> -                  off_t offset, unsigned long size);
> +                  off_t obj_offset, unsigned long size);
>
>  #endif
> diff --git a/sha1_file.c b/sha1_file.c
> index 038e22e..e98eb8b 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2178,7 +2178,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
>                 type -= 8;
>                 break;
>         case OBJ_PV4_TREE:
> -               data = pv4_get_tree(p, &w_curs, curpos, size);
> +               data = pv4_get_tree(p, &w_curs, obj_offset, size);
>                 type -= 8;
>                 break;
>         case OBJ_COMMIT:
>
>
> Nicolas

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-09-16  5:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
2013-09-13 13:27   ` Nicolas Pitre
2013-09-13 13:59     ` Duy Nguyen
2013-09-14  2:06       ` Nicolas Pitre
2013-09-14  4:22         ` Nicolas Pitre
2013-09-15  7:35           ` Duy Nguyen
2013-09-16  4:42             ` Nicolas Pitre
2013-09-16  5:24               ` Duy Nguyen
2013-09-12 10:38 ` [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache Nguyễn Thái Ngọc Duy
2013-09-12 10:38 ` [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking Nguyễn Thái Ngọc Duy
2013-09-12 13:29 ` [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache Nguyễn Thái Ngọc Duy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.