git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 22/32] split-index: the reading part
Date: Mon, 28 Apr 2014 17:55:43 +0700	[thread overview]
Message-ID: <1398682553-11634-23-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1398682553-11634-1-git-send-email-pclouds@gmail.com>

CE_REMOVE'd entries are removed here because only parts of the code
base (unpack_trees in fact) test this bit when they look for the
presence of an entry. Leaving them may confuse the code ignores this
bit and expects to see a real entry.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 read-cache.c  |  2 --
 split-index.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 split-index.h |  2 ++
 3 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index a717171..a5517bf 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1569,8 +1569,6 @@ int read_index_from(struct index_state *istate, const char *path)
 
 	if (is_null_sha1(split_index->base_sha1))
 		return ret;
-	if (istate->cache_nr)
-		die("index in split-index mode must contain no entries");
 
 	if (split_index->base)
 		discard_index(split_index->base);
diff --git a/split-index.c b/split-index.c
index 5708807..b03a250 100644
--- a/split-index.c
+++ b/split-index.c
@@ -16,13 +16,27 @@ int read_link_extension(struct index_state *istate,
 {
 	const unsigned char *data = data_;
 	struct split_index *si;
+	int ret;
+
 	if (sz < 20)
 		return error("corrupt link extension (too short)");
 	si = init_split_index(istate);
 	hashcpy(si->base_sha1, data);
 	data += 20;
 	sz -= 20;
-	if (sz)
+	if (!sz)
+		return 0;
+	si->delete_bitmap = ewah_new();
+	ret = ewah_read_mmap(si->delete_bitmap, data, sz);
+	if (ret < 0)
+		return error("corrupt delete bitmap in link extension");
+	data += ret;
+	sz -= ret;
+	si->replace_bitmap = ewah_new();
+	ret = ewah_read_mmap(si->replace_bitmap, data, sz);
+	if (ret < 0)
+		return error("corrupt replace bitmap in link extension");
+	if (ret != sz)
 		return error("garbage at the end of link extension");
 	return 0;
 }
@@ -60,15 +74,81 @@ static void mark_base_index_entries(struct index_state *base)
 		base->cache[i]->index = i + 1;
 }
 
+static void mark_entry_for_delete(size_t pos, void *data)
+{
+	struct index_state *istate = data;
+	if (pos >= istate->cache_nr)
+		die("position for delete %d exceeds base index size %d",
+		    (int)pos, istate->cache_nr);
+	istate->cache[pos]->ce_flags |= CE_REMOVE;
+	istate->split_index->nr_deletions = 1;
+}
+
+static void replace_entry(size_t pos, void *data)
+{
+	struct index_state *istate = data;
+	struct split_index *si = istate->split_index;
+	struct cache_entry *dst, *src;
+	if (pos >= istate->cache_nr)
+		die("position for replacement %d exceeds base index size %d",
+		    (int)pos, istate->cache_nr);
+	if (si->nr_replacements >= si->saved_cache_nr)
+		die("too many replacements (%d vs %d)",
+		    si->nr_replacements, si->saved_cache_nr);
+	dst = istate->cache[pos];
+	if (dst->ce_flags & CE_REMOVE)
+		die("entry %d is marked as both replaced and deleted",
+		    (int)pos);
+	src = si->saved_cache[si->nr_replacements];
+	src->index = pos + 1;
+	src->ce_flags |= CE_UPDATE_IN_BASE;
+	free(dst);
+	dst = src;
+	si->nr_replacements++;
+}
+
 void merge_base_index(struct index_state *istate)
 {
 	struct split_index *si = istate->split_index;
+	unsigned int i;
 
 	mark_base_index_entries(si->base);
-	istate->cache_nr = si->base->cache_nr;
+
+	si->saved_cache	    = istate->cache;
+	si->saved_cache_nr  = istate->cache_nr;
+	istate->cache_nr    = si->base->cache_nr;
+	istate->cache	    = NULL;
+	istate->cache_alloc = 0;
 	ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
 	memcpy(istate->cache, si->base->cache,
 	       sizeof(*istate->cache) * istate->cache_nr);
+
+	si->nr_deletions = 0;
+	si->nr_replacements = 0;
+	ewah_each_bit(si->replace_bitmap, replace_entry, istate);
+	ewah_each_bit(si->delete_bitmap, mark_entry_for_delete, istate);
+	if (si->nr_deletions)
+		remove_marked_cache_entries(istate);
+
+	for (i = si->nr_replacements; i < si->saved_cache_nr; i++) {
+		add_index_entry(istate, si->saved_cache[i],
+				ADD_CACHE_OK_TO_ADD |
+				/*
+				 * we may have to replay what
+				 * merge-recursive.c:update_stages()
+				 * does, which has this flag on
+				 */
+				ADD_CACHE_SKIP_DFCHECK);
+		si->saved_cache[i] = NULL;
+	}
+
+	ewah_free(si->delete_bitmap);
+	ewah_free(si->replace_bitmap);
+	free(si->saved_cache);
+	si->delete_bitmap  = NULL;
+	si->replace_bitmap = NULL;
+	si->saved_cache	   = NULL;
+	si->saved_cache_nr = 0;
 }
 
 void prepare_to_write_split_index(struct index_state *istate)
diff --git a/split-index.h b/split-index.h
index 53b778f..c1324f5 100644
--- a/split-index.h
+++ b/split-index.h
@@ -12,6 +12,8 @@ struct split_index {
 	struct ewah_bitmap *replace_bitmap;
 	struct cache_entry **saved_cache;
 	unsigned int saved_cache_nr;
+	unsigned int nr_deletions;
+	unsigned int nr_replacements;
 	int refcount;
 };
 
-- 
1.9.1.346.ga2b5940

  parent reply	other threads:[~2014-04-28 10:56 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-28 10:55 [PATCH 00/32] Split index mode for very large indexes Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 01/32] ewah: fix constness of ewah_read_mmap Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 02/32] ewah: delete unused ewah_read_mmap_native declaration Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 03/32] sequencer: do not update/refresh index if the lock cannot be held Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 04/32] read-cache: new API write_locked_index instead of write_index/write_cache Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 05/32] read-cache: relocate and unexport commit_locked_index() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 06/32] read-cache: store in-memory flags in the first 12 bits of ce_flags Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 07/32] read-cache: be strict about "changed" in remove_marked_cache_entries() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 08/32] read-cache: be specific what part of the index has changed Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 09/32] update-index: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 10/32] resolve-undo: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 11/32] unpack-trees: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 12/32] cache-tree: mark istate->cache_changed on cache tree invalidation Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 13/32] cache-tree: mark istate->cache_changed on cache tree update Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 14/32] cache-tree: mark istate->cache_changed on prime_cache_tree() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 15/32] entry.c: update cache_changed if refresh_cache is set in checkout_entry() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 16/32] read-cache: save index SHA-1 after reading Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 17/32] read-cache: split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 22:46   ` Junio C Hamano
2014-04-29  1:43     ` Duy Nguyen
2014-04-29 17:23       ` Junio C Hamano
2014-04-29 22:45         ` Duy Nguyen
2014-04-30 13:57           ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 18/32] read-cache: mark new entries for split index Nguyễn Thái Ngọc Duy
2014-04-30 20:35   ` Eric Sunshine
2014-04-28 10:55 ` [PATCH 19/32] read-cache: save deleted entries in " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 20/32] read-cache: mark updated entries for " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 21/32] split-index: the writing part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` Nguyễn Thái Ngọc Duy [this message]
2014-04-28 10:55 ` [PATCH 23/32] split-index: do not invalidate cache-tree at read time Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 24/32] split-index: strip pathname of on-disk replaced entries Nguyễn Thái Ngọc Duy
2014-04-29 20:25   ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 25/32] update-index: new options to enable/disable split index mode Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 26/32] update-index --split-index: do not split if $GIT_DIR is read only Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 27/32] rev-parse: add --shared-index-path to get shared index path Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 28/32] read-tree: force split-index mode off on --index-output Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 29/32] read-tree: note about dropping split-index mode or index version Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 30/32] read-cache: force split index mode with GIT_TEST_SPLIT_INDEX Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 31/32] t2104: make sure split index mode is off for the version test Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 32/32] t1700: new tests for split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 21:18 ` [PATCH 00/32] Split index mode for very large indexes Shawn Pearce
2014-04-29  1:52   ` Duy Nguyen
2014-05-09 10:27   ` Duy Nguyen
2014-05-09 17:55     ` Junio C Hamano
2014-05-13 11:15   ` [PATCH 0/8] Speed up cache loading time Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 1/8] read-cache: allow to keep mmap'd memory after reading Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 2/3] Add read-cache--daemon Nguyễn Thái Ngọc Duy
2014-05-13 11:52       ` Erik Faye-Lund
2014-05-13 12:01         ` Duy Nguyen
2014-05-13 13:01         ` Duy Nguyen
2014-05-13 13:37           ` Erik Faye-Lund
2014-05-13 13:49             ` Duy Nguyen
2014-05-13 14:06               ` Erik Faye-Lund
2014-05-13 14:10                 ` Duy Nguyen
2014-05-13 14:16                   ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 2/8] unix-socket: stub impl. for platforms with no unix socket support Nguyễn Thái Ngọc Duy
2014-05-13 11:59       ` Erik Faye-Lund
2014-05-13 12:03         ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 3/8] daemonize: set a flag before exiting the main process Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 3/3] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 4/8] Add read-cache--daemon for caching index and related stuff Nguyễn Thái Ngọc Duy
2014-05-13 11:56       ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 5/8] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 12:13       ` Erik Faye-Lund
2014-05-13 11:15     ` [PATCH 6/8] read-cache--daemon: do not read index " Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 7/8] read-cache: skip verifying trailing SHA-1 on cached index Nguyễn Thái Ngọc Duy
2014-05-13 11:15     ` [PATCH 8/8] read-cache: inform the daemon that the index has been updated Nguyễn Thái Ngọc Duy
2014-05-13 12:17       ` Erik Faye-Lund
2014-05-22 16:38       ` David Turner
2014-05-13 14:24     ` [PATCH 0/8] Speed up cache loading time Stefan Beller
2014-05-13 14:35       ` Duy Nguyen
2014-05-13 11:20   ` [PATCH 9/8] even faster loading time with index version 254 Nguyễn Thái Ngọc Duy
2014-04-28 22:23 ` [PATCH 00/32] Split index mode for very large indexes Junio C Hamano
2014-04-30 20:48 ` Richard Hansen
2014-05-01  0:09   ` Duy Nguyen
2014-06-13 12:19 [PATCH 00/32] Split index resend Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 22/32] split-index: the reading part Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1398682553-11634-23-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).