All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiang Xin <worldhello.net@gmail.com>
To: Git List <git@vger.kernel.org>
Cc: "Sun Chao" <sunchao9@huawei.com>,
	"Lukas Sandström" <lukass@etek.chalmers.se>,
	"Jiang Xin" <worldhello.net@gmail.com>
Subject: [PATCH 1/2] pack-redundant: new algorithm to find min packs
Date: Tue, 18 Dec 2018 17:58:28 +0800	[thread overview]
Message-ID: <20181218095829.20092-1-worldhello.net@gmail.com> (raw)

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 116 ++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 43 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..19dcf74750 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static void minimize(struct pack_list **min)
+static int cmp_pack_list_reverse(const void *a, const void *b)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
+static void minimize(struct pack_list **min, struct llist *ignore)
+{
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,41 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove all the ignored objects and unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while ((pl = non_unique)) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		while ((pl = pl->next)) {
+			if (pl->all_objects->size == 0)
+				break;
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		}
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +633,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -667,7 +697,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 		pl = pl->next;
 	}
 
-	minimize(&min);
+	minimize(&min, ignore);
 
 	if (verbose) {
 		fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
-- 
2.20.0.2.g660e9286fc


             reply	other threads:[~2018-12-18  9:58 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-18  9:58 Jiang Xin [this message]
2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-09 12:56       ` SZEDER Gábor
2019-01-09 16:47         ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 " Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-01 19:42                   ` Eric Sunshine
2019-02-01 21:03                     ` Junio C Hamano
2019-02-01 21:49                       ` Eric Sunshine
2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 6/6] pack-redundant: consistent sort method Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-31 21:44                 ` Junio C Hamano
2019-02-01  5:44                   ` Jiang Xin
2019-02-01  6:11                     ` Eric Sunshine
2019-02-01  7:23                       ` Jiang Xin
2019-02-01  7:25                         ` Jiang Xin
2019-02-01  9:51                       ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-31 19:30                 ` Junio C Hamano
2019-02-01  9:55                   ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
2019-01-30 15:03                 ` [PATCH v8 1/1] pack-redundant: delete redundant code 16657101987
2019-01-30 11:47               ` [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10 21:11             ` Junio C Hamano
2019-01-11  1:59               ` Jiang Xin
2019-01-11 18:00                 ` Junio C Hamano
2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-11  1:19             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 20:05             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10  7:11           ` Johannes Sixt
2019-01-10 11:57           ` SZEDER Gábor
2019-01-10 12:25             ` Torsten Bögershausen
2019-01-10 17:36             ` Junio C Hamano
2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
2019-01-15 21:09               ` Eric Sunshine
2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
2019-01-22 19:47               ` Junio C Hamano
2019-01-22 20:00                 ` Torsten Bögershausen
2019-01-22 21:15                   ` Eric Sunshine
2019-01-23  6:35                     ` Torsten Bögershausen
2019-01-23 17:54                       ` Junio C Hamano
2019-01-25 19:12                         ` Torsten Bögershausen
2019-01-27 22:34                           ` Junio C Hamano
2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
2019-01-08 19:30         ` Junio C Hamano
2019-01-09  0:29           ` 16657101987
2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181218095829.20092-1-worldhello.net@gmail.com \
    --to=worldhello.net@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=lukass@etek.chalmers.se \
    --cc=sunchao9@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.