git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [PATCH 1/2] pack-redundant: new algorithm to find min packs
@ 2018-12-18  9:58 Jiang Xin
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  0 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2018-12-18  9:58 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Lukas Sandström, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 116 ++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 43 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..19dcf74750 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static void minimize(struct pack_list **min)
+static int cmp_pack_list_reverse(const void *a, const void *b)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
+static void minimize(struct pack_list **min, struct llist *ignore)
+{
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,41 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove all the ignored objects and unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while ((pl = non_unique)) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		while ((pl = pl->next)) {
+			if (pl->all_objects->size == 0)
+				break;
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		}
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +633,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -667,7 +697,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 		pl = pl->next;
 	}
 
-	minimize(&min);
+	minimize(&min, ignore);
 
 	if (verbose) {
 		fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
-- 
2.20.0.2.g660e9286fc


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH 2/2] pack-redundant: remove unused functions
  2018-12-18  9:58 [PATCH 1/2] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-18  9:58 ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
                     ` (3 more replies)
  0 siblings, 4 replies; 83+ messages in thread
From: Jiang Xin @ 2018-12-18  9:58 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Lukas Sandström, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 81 ----------------------------------------
 1 file changed, 81 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 19dcf74750..d0ff2377f3 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -63,15 +63,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.2.g660e9286fc


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v2 0/3] pack-redundant: new algorithm to find min packs
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
                       ` (3 more replies)
  2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
                     ` (2 subsequent siblings)
  3 siblings, 4 replies; 83+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Jiang Xin, Sun Chao

Sun Chao is my former colleague at Huawei. He finds a bug of git-pack-redundant.

When I was in Huawei, I develop a program to manage fork tree of repositories,
using alternate repo for forks to save disk spaces. 

Sun Chao finds if there are too many packs and many of them overlap each
other, running `git pack-redundant --all` will exhaust all memories and the
process will be killed by kernel.

There is a script in commit log of commit 2/3, which can be used to create a
repository with lots of redundant packs. Running `git pack-redundant
--all` in it can reproduce this issue.

Updates of reroll v2:

* Add test cases in t5322.
* Fix a bug in patch 2/3.

--

Jiang Xin (1):
  t5322: test cases for git-pack-redundant

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 181 ++++++++++++++------------------------
 t/t5322-pack-redundant.sh |  69 +++++++++++++++
 2 files changed, 137 insertions(+), 113 deletions(-)
 create mode 100755 t/t5322-pack-redundant.sh

-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v2 1/3] t5322: test cases for git-pack-redundant
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5322-pack-redundant.sh | 69 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100755 t/t5322-pack-redundant.sh

diff --git a/t/t5322-pack-redundant.sh b/t/t5322-pack-redundant.sh
new file mode 100755
index 0000000000..4add9c2bb1
--- /dev/null
+++ b/t/t5322-pack-redundant.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	set -e
+	parent=
+	for name in A B C D E F G H I J K L M
+	do
+		test_tick
+		T=$(git write-tree)
+		if test -z "$parent"
+		then
+			sha1=$(echo $name | git commit-tree $T)
+		else
+			sha1=$(echo $name | git commit-tree -p $parent $T)
+		fi
+		eval $name=$sha1
+		parent=$sha1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_redundant_packs()
+{
+	set -e
+	cd .git/objects/pack
+	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
+	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
+	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
+	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
+	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
+	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
+	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
+	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
+	cd -
+}
+
+# Create commits and packs
+create_commits
+create_redundant_packs
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
+'
+
+printf "$P1\n$P4\n$P5\n$P6\n" | sort >expected
+
+test_expect_success 'git pack-redundant --all' '
+	git pack-redundant --all | \
+		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
+		sort -u >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	test $(git pack-redundant --all | wc -l) -eq 0
+'
+
+test_done
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v2 2/3] pack-redundant: new algorithm to find min packs
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v2 3/3] pack-redundant: remove unused functions
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
                     ` (2 preceding siblings ...)
  2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  3 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 72 ----------------------------------------
 1 file changed, 72 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..9630117c90 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -285,78 +285,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v3 0/3] pack-redundant: new algorithm to find min packs
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-02  4:34     ` " Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them overlap
each other, running `git pack-redundant --all` will exhaust all memories
and the process will be killed by kernel.

There is a script in commit log of commit 2/3, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

Updates of reroll v3:

* Rename test case file from t5322 to t5323, for I see t5322 exist in
  commit 404dead121: "pack-objects: add --sparse option".

Jiang Xin (1):
  t5323: test cases for git-pack-redundant

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 181 +++++++++++++++++-----------------------------
 t/t5323-pack-redundant.sh |  84 +++++++++++++++++++++
 2 files changed, 152 insertions(+), 113 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-09 12:56       ` SZEDER Gábor
  2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..ef6076f065
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	set -e
+	parent=
+	for name in A B C D E F G H I J K L M
+	do
+		test_tick
+		T=$(git write-tree)
+		if test -z "$parent"
+		then
+			sha1=$(echo $name | git commit-tree $T)
+		else
+			sha1=$(echo $name | git commit-tree -p $parent $T)
+		fi
+		eval $name=$sha1
+		parent=$sha1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_redundant_packs()
+{
+	set -e
+	cd .git/objects/pack
+	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
+	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
+	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
+	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
+	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
+	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
+	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
+	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
+	cd -
+	eval P$P1=P1:$P1
+	eval P$P2=P2:$P2
+	eval P$P3=P3:$P3
+	eval P$P4=P4:$P4
+	eval P$P5=P5:$P5
+	eval P$P6=P6:$P6
+	eval P$P7=P7:$P7
+	eval P$P8=P8:$P8
+}
+
+# Create commits and packs
+create_commits
+create_redundant_packs
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
+'
+
+cat >expected <<EOF
+P1:$P1
+P4:$P4
+P5:$P5
+P6:$P6
+EOF
+
+test_expect_success 'git pack-redundant --all' '
+	git pack-redundant --all | \
+		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort > actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	test $(git pack-redundant --all | wc -l) -eq 0
+'
+
+test_done
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v3 2/3] pack-redundant: new algorithm to find min packs
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 109 +++++++++++++++++++++++++++++------------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v3 3/3] pack-redundant: remove unused functions
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
                       ` (2 preceding siblings ...)
  2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
                         ` (2 more replies)
  3 siblings, 3 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 72 ------------------------------------------------
 1 file changed, 72 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..9630117c90 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -285,78 +285,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-08 16:40       ` " 16657101987
  2019-01-08 19:30         ` Junio C Hamano
  2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
  2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
  2 siblings, 1 reply; 83+ messages in thread
From: 16657101987 @ 2019-01-08 16:40 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

I'm particularly grateful to Junio and JiangXin for fixing the patches,
and I noticed Junio send a new commit to remove more unused codes and
suggest to SQUASH it.

So I create this new version of patches to do this work, I also have
checked the left codes and remove a unused struct based on Junio's
last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.

--

Sun Chao (1):
  pack-redundant: remove unused functions

 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

-- 
2.8.1



^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v4 1/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
@ 2019-01-08 16:43       ` " 16657101987
  2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
  2 siblings, 0 replies; 83+ messages in thread
From: 16657101987 @ 2019-01-08 16:43 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7..eac2350 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +271,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.8.1



^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
  2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
@ 2019-01-08 16:45       ` " 16657101987
  2 siblings, 0 replies; 83+ messages in thread
From: 16657101987 @ 2019-01-08 16:45 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

I'm particularly grateful to Junio and JiangXin for fixing the patches,
and I noticed Junio send a new commit to remove more unused codes and
suggest to SQUASH it.

So I create this new version of patches to do this work, I also have
checked the left codes and remove a unused struct based on Junio's
last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.

--

Sun Chao (1):
  pack-redundant: remove unused functions

 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

-- 
2.8.1



^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
@ 2019-01-08 19:30         ` Junio C Hamano
  2019-01-09  0:29           ` 16657101987
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-08 19:30 UTC (permalink / raw)
  To: 16657101987; +Cc: worldhello.net, git, sunchao9

16657101987@163.com writes:

> From: Sun Chao <sunchao9@huawei.com>
>
> I'm particularly grateful to Junio and JiangXin for fixing the patches,
> and I noticed Junio send a new commit to remove more unused codes and
> suggest to SQUASH it.
>
> So I create this new version of patches to do this work, I also have
> checked the left codes and remove a unused struct based on Junio's
> last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.
>
> --
>
> Sun Chao (1):
>   pack-redundant: remove unused functions

Is this meant to replace [v3 3/3]?


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-08 19:30         ` Junio C Hamano
@ 2019-01-09  0:29           ` 16657101987
  0 siblings, 0 replies; 83+ messages in thread
From: 16657101987 @ 2019-01-09  0:29 UTC (permalink / raw)
  To: gitster; +Cc: 16657101987, git, sunchao9, worldhello.net

> 16657101987@163.com writes:
> 
>> From: Sun Chao <sunchao9@huawei.com>
>>
>> I'm particularly grateful to Junio and JiangXin for fixing the patches,
>> and I noticed Junio send a new commit to remove more unused codes and
>> suggest to SQUASH it.
>>
>> So I create this new version of patches to do this work, I also have
>> checked the left codes and remove a unused struct based on Junio's
>> last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.
>>
>> --
>>
>> Sun Chao (1):
>>   pack-redundant: remove unused functions
> 
> Is this meant to replace [v3 3/3]?

I'm Sun Chao and because my huawei email account can't send
email outside from company, so I used 163 email account to
send new path at home. I'm sorry for not explaining that.

Yes, this is meant to replace [v3 3/3], and I have noticed the
patch is applied, Thanks very much.


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-09 12:56       ` SZEDER Gábor
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  0 siblings, 2 replies; 83+ messages in thread
From: SZEDER Gábor @ 2019-01-09 12:56 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> 
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
> 
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> ---
>  t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 84 insertions(+)
>  create mode 100755 t/t5323-pack-redundant.sh
> 
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..ef6076f065
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,84 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git redundant test'

s/redundant/pack-redundant/ ?

> +
> +. ./test-lib.sh
> +
> +create_commits()
> +{
> +	set -e
> +	parent=
> +	for name in A B C D E F G H I J K L M
> +	do
> +		test_tick
> +		T=$(git write-tree)
> +		if test -z "$parent"
> +		then
> +			sha1=$(echo $name | git commit-tree $T)

There is a considerable effort going on to switch from SHA-1 to a
different hash function, so please don't add any new $sha1 variable;
call it $oid or $commit instead.

> +		else
> +			sha1=$(echo $name | git commit-tree -p $parent $T)
> +		fi
> +		eval $name=$sha1
> +		parent=$sha1
> +	done
> +	git update-ref refs/heads/master $M
> +}
> +
> +create_redundant_packs()
> +{
> +	set -e
> +	cd .git/objects/pack
> +	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
> +	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
> +	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
> +	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
> +	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
> +	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
> +	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
> +	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
> +	cd -
> +	eval P$P1=P1:$P1
> +	eval P$P2=P2:$P2
> +	eval P$P3=P3:$P3
> +	eval P$P4=P4:$P4
> +	eval P$P5=P5:$P5
> +	eval P$P6=P6:$P6
> +	eval P$P7=P7:$P7
> +	eval P$P8=P8:$P8
> +}
> +
> +# Create commits and packs
> +create_commits
> +create_redundant_packs

Please perform all setup tasks in a test_expect_success block, so we
get verbose and trace output about what's going on.

Don't use 'set -e', use an &&-chain instead.  To fail the test if a
command in the for loop were to fail you could do something like this:

  for ....
  do
    do-this &&
    do-that ||
    return 1
  done

> +
> +test_expect_success 'clear loose objects' '
> +	git prune-packed &&
> +	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0

Use something like

  find .git/objects -type f | grep -v pack >out &&
  test_must_be_empty out

instead, so we get an informative error message on failure.

> +'
> +
> +cat >expected <<EOF
> +P1:$P1
> +P4:$P4
> +P5:$P5
> +P6:$P6
> +EOF
> +
> +test_expect_success 'git pack-redundant --all' '
> +	git pack-redundant --all | \

Don't run a git command (especially the particular command the test
script focuses on) upstream of a pipe, because it hides the command's
exit code.  Use an intermediate file instead.

> +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \

This sed command doesn't seem to work on macOS (on Travis CI), and
causes the test to fail with:

  ++git pack-redundant --all
  ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
  ++sort -u
  ++read p
  ++sort
  ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
  ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
  ++test_cmp expected actual
  ++diff -u expected actual
  --- expected    2019-01-09 01:53:45.000000000 +0000
  +++ actual      2019-01-09 01:53:45.000000000 +0000
  @@ -1,4 +0,0 @@
  -P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  error: last command exited with $?=1
  not ok 2 - git pack-redundant --all

I'm not sure what's wrong with it, though.

Minor nit: 'git pack-redundant' prints one filename per line, so the
'g' at the end of the 's###g' is not necessary.

> +		sort -u | \
> +		while read p; do eval echo "\${P$p}"; done | \
> +		sort > actual && \

Style nit: no space between redirection operator and filename

> +	test_cmp expected actual
> +'
> +
> +test_expect_success 'remove redundant packs' '
> +	git pack-redundant --all | xargs rm &&
> +	git fsck &&
> +	test $(git pack-redundant --all | wc -l) -eq 0
> +'
> +
> +test_done
> -- 
> 2.14.5.agit.2
> 

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-09 12:56       ` SZEDER Gábor
@ 2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                             ` (5 more replies)
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  1 sibling, 6 replies; 83+ messages in thread
From: SZEDER Gábor @ 2019-01-09 16:47 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Wed, Jan 09, 2019 at 01:56:28PM +0100, SZEDER Gábor wrote:
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF

Creating the expected results could be moved into the
test_expect_success block as well.

> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +	git pack-redundant --all | \
> 
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
> 
> > +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> 
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
> 
>   ++git pack-redundant --all
>   ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
>   ++sort -u
>   ++read p
>   ++sort
>   ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
>   ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
>   ++test_cmp expected actual
>   ++diff -u expected actual
>   --- expected    2019-01-09 01:53:45.000000000 +0000
>   +++ actual      2019-01-09 01:53:45.000000000 +0000
>   @@ -1,4 +0,0 @@
>   -P1:24ee080366509364d04a138cd4e168dc4ff33354
>   -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
>   -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
>   -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
>   error: last command exited with $?=1
>   not ok 2 - git pack-redundant --all
> 
> I'm not sure what's wrong with it, though.

So, it appears that 'sed' in macOS doesn't understand the
'\(idx\|pack\)' part of that regex.  Turning that command into

  sed -e "s#^.git/objects/pack/pack-\($OID_REGEX\)\..*#\1#" out | \

makes it work even on macOS, but note that those 40 hexdigits are not
actual OIDs but file content checksums, so using $OID_REGEX is not the
right thing to do here (though I'm not sure what is supposed to be
used instead, as $_x40 hardcodes the number of hexdigits).

Alas, the test as a whole still fails with the following on macOS:

  ++diff -u expected actual
  --- expected    2019-01-09 15:54:49.000000000 +0000
  +++ actual      2019-01-09 15:54:49.000000000 +0000
  @@ -1,4 +1,4 @@
   P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  +P3:0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e
   P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  +P7:4ecc1eb138516a26654cd4e3570b322c0820f170
  error: last command exited with $?=1


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-09 12:56       ` SZEDER Gábor
  2019-01-09 16:47         ` SZEDER Gábor
@ 2019-01-10  3:28         ` Jiang Xin
  2019-01-10  7:11           ` Johannes Sixt
  2019-01-10 11:57           ` SZEDER Gábor
  1 sibling, 2 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-10  3:28 UTC (permalink / raw)
  To: SZEDER Gábor; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > +test_description='git redundant test'
>
> s/redundant/pack-redundant/ ?

Yes, will correct it.

> > +
> > +. ./test-lib.sh
> > +
> > +create_commits()
> > +{
> > +     set -e
> > +     parent=
> > +     for name in A B C D E F G H I J K L M
> > +     do
> > +             test_tick
> > +             T=$(git write-tree)
> > +             if test -z "$parent"
> > +             then
> > +                     sha1=$(echo $name | git commit-tree $T)
>
> There is a considerable effort going on to switch from SHA-1 to a
> different hash function, so please don't add any new $sha1 variable;
> call it $oid or $commit instead.
>

Will do.

> > +
> > +# Create commits and packs
> > +create_commits
> > +create_redundant_packs
>
> Please perform all setup tasks in a test_expect_success block, so we
> get verbose and trace output about what's going on.

Will do like this:

    test_expect_success 'setup' '
            create_commits  &&
            create_redundant_packs
    '

> Don't use 'set -e', use an &&-chain instead.  To fail the test if a
> command in the for loop were to fail you could do something like this:
>
>   for ....
>   do
>     do-this &&
>     do-that ||
>     return 1
>   done

Will do.

> > +
> > +test_expect_success 'clear loose objects' '
> > +     git prune-packed &&
> > +     test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
>
> Use something like
>
>   find .git/objects -type f | grep -v pack >out &&
>   test_must_be_empty out
>
> instead, so we get an informative error message on failure.

if `grep -v pack` return empty output, it will return error, so
I will use `sed -e "/objects\/pack\//d" >out` instead.

>
> > +'
> > +
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF
> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +     git pack-redundant --all | \
>
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
>
> > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
>

It works if rewrite as follows:

    git pack-redundant --all >out &&
    sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \

Without `-E`, MasOS has to write two seperate sed commands, such as:

    git pack-redundant --all >out &&
    sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
    sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
unti 4.3), released on May 11 2009.  I prefer the `-E` version.

>
> Minor nit: 'git pack-redundant' prints one filename per line, so the
> 'g' at the end of the 's###g' is not necessary.
>
> > +             sort -u | \
> > +             while read p; do eval echo "\${P$p}"; done | \
> > +             sort > actual && \
>
> Style nit: no space between redirection operator and filename

Will do.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10  7:11           ` Johannes Sixt
  2019-01-10 11:57           ` SZEDER Gábor
  1 sibling, 0 replies; 83+ messages in thread
From: Johannes Sixt @ 2019-01-10  7:11 UTC (permalink / raw)
  To: Jiang Xin
  Cc: SZEDER Gábor, Sun Chao, Git List, Junio C Hamano, Jiang Xin

Am 10.01.19 um 04:28 schrieb Jiang Xin:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>> Use something like
>>
>>    find .git/objects -type f | grep -v pack >out &&
>>    test_must_be_empty out
>>
>> instead, so we get an informative error message on failure.
> 
> if `grep -v pack` return empty output, it will return error, so
> I will use `sed -e "/objects\/pack\//d" >out` instead.

So, you could even write this as

	find .git/objects -type f >out &&
	! grep -v pack out	# must be empty
or
	! find .git/objects -type f | grep -v pack

if you want to be terse.

-- Hannes

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  2019-01-10  7:11           ` Johannes Sixt
@ 2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
                               ` (3 more replies)
  1 sibling, 4 replies; 83+ messages in thread
From: SZEDER Gábor @ 2019-01-10 11:57 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
> > > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> >
> > This sed command doesn't seem to work on macOS (on Travis CI), and
> > causes the test to fail with:
> >
> 
> It works if rewrite as follows:
> 
>     git pack-redundant --all >out &&
>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
> 
> Without `-E`, MasOS has to write two seperate sed commands, such as:
> 
>     git pack-redundant --all >out &&
>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
> 
> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
> unti 4.3), released on May 11 2009.  I prefer the `-E` version.

Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
whatnot?  I don't know, but POSIX doesn't mention it, there is not a
single instance of it in our current codebase, and it appears that
we've never used it before, either.  OTOH,
't/check-non-portable-shell.pl' doesn't catch it as non-portable
construct...



^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 0/5] pack-redundant: new algorithm to find min packs
  2019-01-09 16:47         ` SZEDER Gábor
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                               ` (5 more replies)
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
                             ` (4 subsequent siblings)
  5 siblings, 6 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

> Sun Chao (my former colleague at Huawei) found a bug of
> git-pack-redundant.  If there are too many packs and many of them
> overlap
> each other, running `git pack-redundant --all` will exhaust all memories
> and the process will be killed by kernel.
> 
> There is a script in commit log of commit 2/3, which can be used to
> create a repository with lots of redundant packs. Running `git
> pack-redundant --all` in it can reproduce this issue.

SZEDER reported that t5233 won't pass for MacOS. See solution in patch
4/5.

Changes since reroll v4:

* Rewrite t5323, add more test cases.
* Add two new patches, one for refactor, and another changed sorting
  method and fixed t5323 for the new algorithm.

Range diff with sc/pack-redundant feature branch:

    1:  702267a888 < -:  ---------- t5323: test cases for git-pack-redundant
    -:  ---------- > 1:  40fea5d67f t5323: test cases for git-pack-redundant
    2:  c4b133d858 = 2:  50cd5a5b47 pack-redundant: new algorithm to find min packs
    -:  ---------- > 3:  6338c6fad4 pack-redundant: rename pack_list.all_objects
    -:  ---------- > 4:  734f4d8a8b pack-redundant: consistent sort method
    3:  2351d7e8b5 ! 5:  b7ccdea1ad pack-redundant: remove unused functions
        @@ -13,7 +13,7 @@
          --- a/builtin/pack-redundant.c
          +++ b/builtin/pack-redundant.c
         @@
        - 	struct llist *all_objects;
        + 	size_t all_objects_size;
          } *local_packs = NULL, *altodb_packs = NULL;
          
         -struct pll {
        @@ -105,7 +105,7 @@
         -	diff = llist_copy(list);
         -
         -	while (pl) {
        --		llist_sorted_difference_inplace(diff, pl->all_objects);
        +-		llist_sorted_difference_inplace(diff, pl->remaining_objects);
         -		if (diff->size == 0) { /* we're done */
         -			llist_free(diff);
         -			return 1;


Jiang Xin (3):
  t5323: test cases for git-pack-redundant
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 221 +++++++++++++++-----------------------
 t/t5323-pack-redundant.sh | 157 +++++++++++++++++++++++++++
 2 files changed, 242 insertions(+), 136 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh


-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 21:11             ` Junio C Hamano
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
                             ` (3 subsequent siblings)
  5 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
---
 t/t5323-pack-redundant.sh | 157 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..7410426dee
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,157 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git pack-redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	parent=
+	for name in A B C D E F G H I J K L M N O P Q R
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_pack_1()
+{
+	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2()
+{
+	P2=$(cd .git/objects/pack; printf "$B\n$C\n$D\n$E\n$G\n$H\n$I\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3()
+{
+	P3=$(cd .git/objects/pack; printf "$F\n$I\n$J\n$K\n$L\n$M\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4()
+{
+	P4=$(cd .git/objects/pack; printf "$J\n$K\n$L\n$M\n$P\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5()
+{
+	P5=$(cd .git/objects/pack; printf "$G\n$H\n$N\n$O\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6()
+{
+	P6=$(cd .git/objects/pack; printf "$N\n$O\n$Q\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7()
+{
+	P7=$(cd .git/objects/pack; printf "$P\n$Q\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8()
+{
+	P8=$(cd .git/objects/pack; printf "$A\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P8=P8:$P8
+}
+
+test_expect_success 'setup' '
+	create_commits
+'
+
+test_expect_success 'no redundant packs' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+cat >expected <<EOF
+P2:$P2
+EOF
+
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+EOF
+
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+P8:$P8
+EOF
+
+test_expect_success 'pack-8, subset of pack-1, is also redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_done
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 2/5] pack-redundant: new algorithm to find min packs
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-11  1:19             ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
                             ` (2 subsequent siblings)
  5 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Johannes Sixt, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (2 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..56591d283f 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 struct pll {
@@ -346,7 +346,7 @@ static int is_superset(struct pack_list *pl, struct llist *list)
 	diff = llist_copy(list);
 
 	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
+		llist_sorted_difference_inplace(diff, pl->remaining_objects);
 		if (diff->size == 0) { /* we're done */
 			llist_free(diff);
 			return 1;
@@ -425,8 +425,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -436,7 +436,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -480,7 +480,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -498,20 +498,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -526,7 +526,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -537,7 +537,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -563,10 +563,10 @@ static void scan_alt_odb_packs(void)
 		local = local_packs;
 		while (local) {
 			llist_sorted_difference_inplace(local->unique_objects,
-							alt->all_objects);
+							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
@@ -581,7 +581,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -590,11 +590,11 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 4/5] pack-redundant: consistent sort method
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (3 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 20:05             ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
  5 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c  | 22 +++++++++++++++-------
 t/t5323-pack-redundant.sh |  2 +-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 56591d283f..e9d2586e2e 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 struct pll {
@@ -421,16 +422,22 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	/* if have the same remaining_objects, big pack first */
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+
+	/* sort according to remaining objects, more remaining objects first */
+	if (pl_a->remaining_objects->size < pl_b->remaining_objects->size)
 		return 1;
 	else
 		return -1;
@@ -451,7 +458,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -593,6 +600,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	/* this list will be pruned in cmp_two_packs later */
 	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 7410426dee..2a09ff1bfb 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -90,7 +90,7 @@ test_expect_success 'create pack 4, 5' '
 '
 
 cat >expected <<EOF
-P2:$P2
+P3:$P3
 EOF
 
 test_expect_success 'one of pack-2/pack-3 is redundant' '
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v5 5/5] pack-redundant: remove unused functions
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (4 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Johannes Sixt, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ----------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index e9d2586e2e..dd71fdd435 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -36,11 +36,6 @@ static struct pack_list {
 	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -64,15 +59,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -286,78 +272,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->remaining_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10 11:57           ` SZEDER Gábor
@ 2019-01-10 12:25             ` Torsten Bögershausen
  2019-01-10 17:36             ` Junio C Hamano
                               ` (2 subsequent siblings)
  3 siblings, 0 replies; 83+ messages in thread
From: Torsten Bögershausen @ 2019-01-10 12:25 UTC (permalink / raw)
  To: SZEDER Gábor, Jiang Xin
  Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On 10.01.19 12:57, SZEDER Gábor wrote:
> On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
>> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>>>> +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>>>
>>> This sed command doesn't seem to work on macOS (on Travis CI), and
>>> causes the test to fail with:
>>>
>>
>> It works if rewrite as follows:
>>
>>     git pack-redundant --all >out &&
>>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
>>
>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>>
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
>>
>> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
>> unti 4.3), released on May 11 2009.  I prefer the `-E` version.
> 
> Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
> whatnot?  I don't know, but POSIX doesn't mention it, there is not a
> single instance of it in our current codebase, and it appears that
> we've never used it before, either.  OTOH,

If we can use "two seperate sed commands" i would (really) prefer to so,
to avoid "sed -E".
My conclusion is that it is not portable enough.
> 't/check-non-portable-shell.pl' doesn't catch it as non-portable
> construct...

Good point.
Actually that script only checks "known non-portable" options.
Every time somebody finds a non-portable option, we update it.
A growing blacklist, so to say.
May be we should have a white list instead.




^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
@ 2019-01-10 17:36             ` Junio C Hamano
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
  2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
  3 siblings, 0 replies; 83+ messages in thread
From: Junio C Hamano @ 2019-01-10 17:36 UTC (permalink / raw)
  To: SZEDER Gábor; +Cc: Jiang Xin, Sun Chao, Git List, Jiang Xin

SZEDER Gábor <szeder.dev@gmail.com> writes:

>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>> 
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Two commands, perhaps, but does it have to be two separate sed
processes piped together?  Why won't something like this work?

	sed -e 's|.*/pack-\([0-9a-f]*\)\.idx$|\1' \
	-e 's|.*/pack-\([0-9a-f]*\)\.pack$|\1'


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v5 4/5] pack-redundant: consistent sort method
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
@ 2019-01-10 20:05             ` SZEDER Gábor
  0 siblings, 0 replies; 83+ messages in thread
From: SZEDER Gábor @ 2019-01-10 20:05 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Junio C Hamano, Git List, Sun Chao, Jiang Xin, Johannes Sixt

On Thu, Jan 10, 2019 at 08:01:41PM +0800, Jiang Xin wrote:
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index 56591d283f..e9d2586e2e 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c

> @@ -421,16 +422,22 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
>  	return ret;
>  }
>  
> -static int cmp_pack_list_reverse(const void *a, const void *b)
> +static int cmp_remaining_objects(const void *a, const void *b)
>  {
>  	struct pack_list *pl_a = *((struct pack_list **)a);
>  	struct pack_list *pl_b = *((struct pack_list **)b);
> -	size_t sz_a = pl_a->remaining_objects->size;
> -	size_t sz_b = pl_b->remaining_objects->size;
>  
> -	if (sz_a == sz_b)
> -		return 0;
> -	else if (sz_a < sz_b)
> +	/* if have the same remaining_objects, big pack first */
> +	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
> +		if (pl_a->all_objects_size == pl_b->all_objects_size)
> +			return 0;
> +		else if (pl_a->all_objects_size < pl_b->all_objects_size)
> +			return 1;
> +		else
> +			return -1;

My compiler complains about the above nested if statements:

  builtin/pack-redundant.c: In function ‘cmp_remaining_objects’:
  builtin/pack-redundant.c:345:5: error: suggest explicit braces to avoid ambiguous ‘else’ [-Werror=parentheses]
    if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
       ^
  cc1: all warnings being treated as errors
  Makefile:2302: recipe for target 'builtin/pack-redundant.o' failed

After adding a pair of {} to the outer if statement
't5323-pack-redundant.sh' passed successfully even on macOS (on Travis
CI).


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10 21:11             ` Junio C Hamano
  2019-01-11  1:59               ` Jiang Xin
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-10 21:11 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Jiang Xin <worldhello.net@gmail.com> writes:

> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
>
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
> ---
>  t/t5323-pack-redundant.sh | 157 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 157 insertions(+)
>  create mode 100755 t/t5323-pack-redundant.sh
>
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..7410426dee
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,157 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git pack-redundant test'
> +
> +. ./test-lib.sh
> +
> +create_commits()
> +{

Style (see Documentation/CodingGuidelines).

> +	parent=
> +	for name in A B C D E F G H I J K L M N O P Q R
> +	do
> +		test_tick &&
> +		T=$(git write-tree) &&
> +		if test -z "$parent"
> +		then
> +			oid=$(echo $name | git commit-tree $T)
> +		else
> +			oid=$(echo $name | git commit-tree -p $parent $T)
> +		fi &&
> +		eval $name=$oid &&
> +		parent=$oid ||
> +		return 1
> +	done
> +	git update-ref refs/heads/master $M
> +}
> +
> +create_pack_1()
> +{
> +	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&

Yikes.  Can't "git pack-objects" get the input directly without
overlong printf, something along the lines of...

	P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
		$A
		$B
		$C
		...
		$R
		EOF
	)

> +	eval P$P1=P1:$P1
> +}
> ...
> +test_expect_success 'setup' '
> +	create_commits
> +'
> +
> +test_expect_success 'no redundant packs' '
> +	create_pack_1 && create_pack_2 && create_pack_3 &&
> +	git pack-redundant --all >out &&
> +	test_must_be_empty out
> +'
> +
> +test_expect_success 'create pack 4, 5' '
> +	create_pack_4 && create_pack_5
> +'
> +
> +cat >expected <<EOF
> +P2:$P2
> +EOF

Move this to the next "expect success" block?

> +test_expect_success 'one of pack-2/pack-3 is redundant' '
> +	git pack-redundant --all >out &&
> +	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \

How portable is "sed -E" (it is not even in POSIX.1)?  Wouldn't it
be easier to work with to have two "-e" fed to a single sed
invocation instead?

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v5 2/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-11  1:19             ` SZEDER Gábor
  0 siblings, 0 replies; 83+ messages in thread
From: SZEDER Gábor @ 2019-01-11  1:19 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Junio C Hamano, Git List, Sun Chao, Johannes Sixt

On Thu, Jan 10, 2019 at 08:01:39PM +0800, Jiang Xin wrote:
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index cf9a9aabd4..3655cc7dc6 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c

> @@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
>  		pl = pl->next;
>  	}
>  
> +	*min = unique;
> +
>  	/* return if there are no objects missing from the unique set */
>  	if (missing->size == 0) {
> -		*min = unique;
>  		free(missing);
>  		return;
>  	}
>  
> -	/* find the permutations which contain all missing objects */
> -	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
> -		perm_all = perm = get_permutations(non_unique, n);
> -		while (perm) {
> -			if (is_superset(perm->pl, missing)) {
> -				new_perm = xmalloc(sizeof(struct pll));
> -				memcpy(new_perm, perm, sizeof(struct pll));
> -				new_perm->next = perm_ok;
> -				perm_ok = new_perm;
> -			}
> -			perm = perm->next;
> -		}
> -		if (perm_ok)
> -			break;
> -		pll_free(perm_all);
> -	}

Please make sure that all commits in the patch series can be build
cleanly without any warnings (with '-Werror' or preferably with 'make
DEVELOPER=1') and pass the test suite.  This is important, because
unbuildable commits will cause trouble later on, when e.g. 'git
bisect' happens to pick such a commit.

In this case, the removal of the above loop removes all callsites of
the static functions get_permutations(), is_superset(), and
pll_free(), resulting the following compiler error:

  builtin/pack-redundant.c: At top level:
  builtin/pack-redundant.c:289:13: error: ‘pll_free’ defined but not used [-Werror=unused-function]
   static void pll_free(struct pll *l)
               ^
  builtin/pack-redundant.c:309:21: error: ‘get_permutations’ defined but not used [-Werror=unused-function]
   static struct pll * get_permutations(struct pack_list *list, int n)
                       ^
  builtin/pack-redundant.c:343:12: error: ‘is_superset’ defined but not used [-Werror=unused-function]
   static int is_superset(struct pack_list *pl, struct llist *list)
              ^

I see that the last patch in this series removes those three
unused functions, but that patch should be squashed into this one to
keep Git buildable with '-Werror' or DEVELOPER=1.

Furthermore, after building this patch (without '-Werror'), several
tests in 't5323-pack-redundant.sh' fail.  To avoid the test failure I
think the fourth patch ensuring a consistent sort order should be
squashed in as well.



^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 21:11             ` Junio C Hamano
@ 2019-01-11  1:59               ` Jiang Xin
  2019-01-11 18:00                 ` Junio C Hamano
  0 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-11  1:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Junio C Hamano <gitster@pobox.com> 于2019年1月11日周五 上午5:11写道:
>
> Jiang Xin <worldhello.net@gmail.com> writes:
>
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > +create_commits()
> > +{
>
> Style (see Documentation/CodingGuidelines).

OK, parenthese after function name.
>
> > +create_pack_1()
> > +{
> > +     P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
>
> Yikes.  Can't "git pack-objects" get the input directly without
> overlong printf, something along the lines of...
>
>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
>                 $A
>                 $B
>                 $C
>                 ...
>                 $R
>                 EOF
>         )

Find that no space before <OID>,  because git-pack-objects not allow that,
and mached parentheses should in the same line.
So Will write like this:

    create_pack_1() {
            P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
    $T
    $A
    $B
    $R
    EOF
            eval P$P1=P1:$P1
    }

> > +test_expect_success 'no redundant packs' '
> > +     create_pack_1 && create_pack_2 && create_pack_3 &&
> > +     git pack-redundant --all >out &&
> > +     test_must_be_empty out
> > +'
> > +
> > +test_expect_success 'create pack 4, 5' '
> > +     create_pack_4 && create_pack_5
> > +'
> > +
> > +cat >expected <<EOF
> > +P2:$P2
> > +EOF
>
> Move this to the next "expect success" block?

$P4 and $P5 are defined after calling `create_pack_4` and `create_pack_5`,
so create pack functions should be called before write `expected` file,
if puts $P4 and/or $P5 in the expected file.

For this case, $P4 and $P5 not in expected file, we can move
create_pack_4 and 5 to the following test_expect_success block,
but the new algorithm may change the expected file.
>
> > +test_expect_success 'one of pack-2/pack-3 is redundant' '
> > +     git pack-redundant --all >out &&
> > +     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
>
> How portable is "sed -E" (it is not even in POSIX.1)?  Wouldn't it
> be easier to work with to have two "-e" fed to a single sed
> invocation instead?

will fix using two '-e' commands.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-11  1:59               ` Jiang Xin
@ 2019-01-11 18:00                 ` Junio C Hamano
  0 siblings, 0 replies; 83+ messages in thread
From: Junio C Hamano @ 2019-01-11 18:00 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Jiang Xin <worldhello.net@gmail.com> writes:

> Junio C Hamano <gitster@pobox.com> 于2019年1月11日周五 上午5:11写道:
>>
>> Jiang Xin <worldhello.net@gmail.com> writes:
>>
>> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>> > +create_commits()
>> > +{
>>
>> Style (see Documentation/CodingGuidelines).
>
> OK, parenthese after function name.
>>
>> > +create_pack_1()
>> > +{
>> > +     P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
>>
>> Yikes.  Can't "git pack-objects" get the input directly without
>> overlong printf, something along the lines of...
>>
>>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
>>                 $A
>>                 $B
>>                 $C
>>                 ...
>>                 $R
>>                 EOF
>>         )
>
> Find that no space before <OID>,  because git-pack-objects not allow that,
> and mached parentheses should in the same line.
> So Will write like this:
>
>     create_pack_1() {
>             P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
>     $T

Isn't the whole point of <<-EOF (notice the leading dash) to allow
us to indent the here-doc with horizontal tab?


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 0/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-12  9:17             ` " Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
                                 ` (6 more replies)
  2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
                               ` (4 subsequent siblings)
  5 siblings, 7 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor; +Cc: Jiang Xin, Sun Chao

> Sun Chao (my former colleague at Huawei) found a bug of
> git-pack-redundant.  If there are too many packs and many of them
> overlap each other, running `git pack-redundant --all` will
> exhaust all memories and the process will be killed by kernel.
> 
> There is a script in commit log of commit 2/5, which can be used to
> create a repository with lots of redundant packs. Running `git
> pack-redundant --all` in it can reproduce this issue.


Junio C Hamano <gitster@pobox.com> 于2019年1月12日周六 上午2:00写道:
> >> Yikes.  Can't "git pack-objects" get the input directly without
> >> overlong printf, something along the lines of...
> >>
> >>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
> >>                 $A
> >>                 $B
> >>                 $C
> >>                 ...
> >>                 $R
> >>                 EOF
> >>         )
> >
> > Find that no space before <OID>,  because git-pack-objects not allow that,
> > and mached parentheses should in the same line.
> > So Will write like this:
> >
> >     create_pack_1() {
> >             P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
> >     $T
>
> Isn't the whole point of <<-EOF (notice the leading dash) to allow
> us to indent the here-doc with horizontal tab?

The reason that indents are not stripped even with `<<-EOF` is I mixed
tabs and spaces to make a better align.

If put the heredoc outside the parentheses, it will failed on MacOS, so
use the syntax Junio previously suggested.


SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月11日周五 上午9:19写道:
> I see that the last patch in this series removes those three
> unused functions, but that patch should be squashed into this one to
> keep Git buildable with '-Werror' or DEVELOPER=1.
>
> Furthermore, after building this patch (without '-Werror'), several
> tests in 't5323-pack-redundant.sh' fail.  To avoid the test failure I
> think the fourth patch ensuring a consistent sort order should be
> squashed in as well.
Patch 3/5 to 5/5 can be squashed to patch 2/5.


## Changes since reroll v5


1:  40fea5d67f ! 1:  7e4e703083 t5323: test cases for git-pack-redundant
    @@ -22,8 +22,7 @@
     +
     +. ./test-lib.sh
     +
    -+create_commits()
    -+{
    ++create_commits() {
     +	parent=
     +	for name in A B C D E F G H I J K L M N O P Q R
     +	do
    @@ -39,54 +38,98 @@
     +		parent=$oid ||
     +		return 1
     +	done
    -+	git update-ref refs/heads/master $M
    ++	git update-ref refs/heads/master $R
     +}
     +
    -+create_pack_1()
    -+{
    -+	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_1() {
    ++	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$T
    ++		$A
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$F
    ++		$R
    ++		EOF
    ++	) &&
     +	eval P$P1=P1:$P1
     +}
     +
    -+create_pack_2()
    -+{
    -+	P2=$(cd .git/objects/pack; printf "$B\n$C\n$D\n$E\n$G\n$H\n$I\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_2() {
    ++	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$G
    ++		$H
    ++		$I
    ++		EOF
    ++	) &&
     +	eval P$P2=P2:$P2
     +}
     +
    -+create_pack_3()
    -+{
    -+	P3=$(cd .git/objects/pack; printf "$F\n$I\n$J\n$K\n$L\n$M\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_3() {
    ++	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$F
    ++		$I
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		EOF
    ++	) &&
     +	eval P$P3=P3:$P3
     +}
     +
    -+create_pack_4()
    -+{
    -+	P4=$(cd .git/objects/pack; printf "$J\n$K\n$L\n$M\n$P\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_4() {
    ++	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		$P
    ++		EOF
    ++	) &&
     +	eval P$P4=P4:$P4
     +}
     +
    -+create_pack_5()
    -+{
    -+	P5=$(cd .git/objects/pack; printf "$G\n$H\n$N\n$O\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_5() {
    ++	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$G
    ++		$H
    ++		$N
    ++		$O
    ++		EOF
    ++	) &&
     +	eval P$P5=P5:$P5
     +}
     +
    -+create_pack_6()
    -+{
    -+	P6=$(cd .git/objects/pack; printf "$N\n$O\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_6() {
    ++	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$N
    ++		$O
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P6=P6:$P6
     +}
     +
    -+create_pack_7()
    -+{
    -+	P7=$(cd .git/objects/pack; printf "$P\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_7() {
    ++	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$P
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P7=P7:$P7
     +}
     +
    -+create_pack_8()
    -+{
    -+	P8=$(cd .git/objects/pack; printf "$A\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_8() {
    ++	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$A
    ++		EOF
    ++	) &&
     +	eval P$P8=P8:$P8
     +}
     +
    @@ -110,10 +153,12 @@
     +
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -121,6 +166,7 @@
     +	create_pack_6 && create_pack_7
     +'
     +
    ++# Only after calling create_pack_6, we can use $P6 variable.
     +cat >expected <<EOF
     +P2:$P2
     +P4:$P4
    @@ -129,10 +175,12 @@
     +
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -147,24 +195,26 @@
     +P8:$P8
     +EOF
     +
    -+test_expect_success 'pack-8, subset of pack-1, is also redundant' '
    ++test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    -+test_expect_success 'clear loose objects' '
    ++test_expect_success 'clean loose objects' '
     +	git prune-packed &&
     +	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
     +	test_must_be_empty out
     +'
     +
    -+test_expect_success 'remove redundant packs' '
    ++test_expect_success 'remove redundant packs and pass fsck' '
     +	git pack-redundant --all | xargs rm &&
    -+	git fsck &&
    ++	git fsck --no-progress &&
     +	git pack-redundant --all >out &&
     +	test_must_be_empty out
     +'
2:  50cd5a5b47 ! 2:  51a9c2d8a5 pack-redundant: new algorithm to find min packs
    @@ -67,7 +67,7 @@
         Original PR and discussions: https://github.com/jiangxin/git/pull/25
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
5:  b7ccdea1ad ! 3:  c5eb21c23c pack-redundant: remove unused functions
    @@ -6,14 +6,14 @@
         `pll_free`, etc.
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
      --- a/builtin/pack-redundant.c
      +++ b/builtin/pack-redundant.c
     @@
    - 	size_t all_objects_size;
    + 	struct llist *all_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
     -struct pll {
    @@ -105,7 +105,7 @@
     -	diff = llist_copy(list);
     -
     -	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    +-		llist_sorted_difference_inplace(diff, pl->all_objects);
     -		if (diff->size == 0) { /* we're done */
     -			llist_free(diff);
     -			return 1;
3:  6338c6fad4 ! 4:  1acdd0af1e pack-redundant: rename pack_list.all_objects
    @@ -18,16 +18,7 @@
     +	struct llist *remaining_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    -@@
    - 	diff = llist_copy(list);
    - 
    - 	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->all_objects);
    -+		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    - 		if (diff->size == 0) { /* we're done */
    - 			llist_free(diff);
    - 			return 1;
    + static struct llist_item *free_nodes;
     @@
      {
      	struct pack_list *pl_a = *((struct pack_list **)a);
4:  734f4d8a8b ! 5:  306d515cda pack-redundant: consistent sort method
    @@ -26,7 +26,7 @@
     +	size_t all_objects_size;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    + static struct llist_item *free_nodes;
     @@
      	return ret;
      }
    @@ -42,20 +42,24 @@
     -	if (sz_a == sz_b)
     -		return 0;
     -	else if (sz_a < sz_b)
    -+	/* if have the same remaining_objects, big pack first */
    -+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
    ++	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
    ++		/* have the same remaining_objects, big pack first */
     +		if (pl_a->all_objects_size == pl_b->all_objects_size)
     +			return 0;
     +		else if (pl_a->all_objects_size < pl_b->all_objects_size)
     +			return 1;
     +		else
     +			return -1;
    -+
    -+	/* sort according to remaining objects, more remaining objects first */
    -+	if (pl_a->remaining_objects->size < pl_b->remaining_objects->size)
    ++	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
    ++		/* sort by remaining objects, more objects first */
      		return 1;
    - 	else
    +-	else
    ++	} else {
      		return -1;
    ++	}
    + }
    + 
    + /* Sort pack_list, greater size of remaining_objects first */
     @@
      	for (n = 0, p = *pl; p; p = p->next)
      		ary[n++] = p;

## This reroll has the following commits:

Jiang Xin (3):
  t5323: test cases for git-pack-redundant
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 221 +++++++++++++++-----------------------
 t/t5323-pack-redundant.sh | 207 +++++++++++++++++++++++++++++++++++
 2 files changed, 292 insertions(+), 136 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
---
 t/t5323-pack-redundant.sh | 207 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 207 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..407838f0e8
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,207 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git pack-redundant test'
+
+. ./test-lib.sh
+
+create_commits() {
+	parent=
+	for name in A B C D E F G H I J K L M N O P Q R
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $R
+}
+
+create_pack_1() {
+	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$T
+		$A
+		$B
+		$C
+		$D
+		$E
+		$F
+		$R
+		EOF
+	) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2() {
+	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$B
+		$C
+		$D
+		$E
+		$G
+		$H
+		$I
+		EOF
+	) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3() {
+	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$F
+		$I
+		$J
+		$K
+		$L
+		$M
+		EOF
+	) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4() {
+	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$J
+		$K
+		$L
+		$M
+		$P
+		EOF
+	) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5() {
+	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$G
+		$H
+		$N
+		$O
+		EOF
+	) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6() {
+	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$N
+		$O
+		$Q
+		EOF
+	) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7() {
+	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$P
+		$Q
+		EOF
+	) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8() {
+	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$A
+		EOF
+	) &&
+	eval P$P8=P8:$P8
+}
+
+test_expect_success 'setup' '
+	create_commits
+'
+
+test_expect_success 'no redundant packs' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+cat >expected <<EOF
+P2:$P2
+EOF
+
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+# Only after calling create_pack_6, we can use $P6 variable.
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+EOF
+
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+P8:$P8
+EOF
+
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'clean loose objects' '
+	git prune-packed &&
+	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'remove redundant packs and pass fsck' '
+	git pack-redundant --all | xargs rm &&
+	git fsck --no-progress &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_done
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 2/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Sun Chao, Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 3/5] pack-redundant: remove unused functions
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (2 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Sun Chao, Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ----------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..eac23500ee 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +271,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (3 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index eac23500ee..64eec3e297 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -339,8 +339,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -350,7 +350,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -394,7 +394,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -412,20 +412,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -440,7 +440,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -451,7 +451,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -477,10 +477,10 @@ static void scan_alt_odb_packs(void)
 		local = local_packs;
 		while (local) {
 			llist_sorted_difference_inplace(local->unique_objects,
-							alt->all_objects);
+							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
@@ -495,7 +495,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -504,11 +504,11 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -604,7 +604,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v6 5/5] pack-redundant: consistent sort method
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (4 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  5 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c  | 24 ++++++++++++++++--------
 t/t5323-pack-redundant.sh |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 64eec3e297..4448e58a10 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -335,19 +336,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
+		/* have the same remaining_objects, big pack first */
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
+		/* sort by remaining objects, more objects first */
 		return 1;
-	else
+	} else {
 		return -1;
+	}
 }
 
 /* Sort pack_list, greater size of remaining_objects first */
@@ -365,7 +372,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -507,6 +514,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	/* this list will be pruned in cmp_two_packs later */
 	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 407838f0e8..663328ab30 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -133,7 +133,7 @@ test_expect_success 'create pack 4, 5' '
 '
 
 cat >expected <<EOF
-P2:$P2
+P3:$P3
 EOF
 
 test_expect_success 'one of pack-2/pack-3 is redundant' '
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
  2019-01-10 17:36             ` Junio C Hamano
@ 2019-01-15 20:30             ` tboegi
  2019-01-15 21:09               ` Eric Sunshine
  2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
  3 siblings, 2 replies; 83+ messages in thread
From: tboegi @ 2019-01-15 20:30 UTC (permalink / raw)
  To: git, szeder.dev, zhiyou.jx; +Cc: Torsten Bögershausen

From: Torsten Bögershausen <tboegi@web.de>

From `man sed` (on a Mac OS X box):
The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
on other operating systems.

From `man sed` on a Linux box:
REGULAR EXPRESSIONS
       POSIX.2 BREs should be supported, but they aren't completely because of
       performance problems.  The \n sequence in a regular expression matches
       the newline character,  and  similarly  for \a, \t, and other sequences.
       The -E option switches to using extended regular expressions instead;
       the -E option has been supported for years by GNU sed, and is now
       included in POSIX.

Well, there are still a lot of systems out there, which don't support it.

Beside that, see IEEE Std 1003.1TM-2017
http://pubs.opengroup.org/onlinepubs/9699919799/
does not mention -E either.

To be on the safe side, don't allow it.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---

I am somewhat unsure if we should disable all options except -e -f -n
instead ?
/\bsed\s+-[^efn]/ and err 'Not portable option with sed. Only -n -e -f are portable';

That would cause a false positive in t9001 here:
"--cc-cmd=./cccmd-sed --suppress-cc=self"

which could either be fixed by an anchor:
/^\s*sed\s+-[^efn]/

Or by allowing '--' like this:
/\bsed\s+-[^-efn]/

Any thoughts, please ?

t/check-non-portable-shell.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
index b45bdac688..96b6afdeb8 100755
--- a/t/check-non-portable-shell.pl
+++ b/t/check-non-portable-shell.pl
@@ -35,7 +35,7 @@ sub err {
 		chomp;
 	}

-	/\bsed\s+-i/ and err 'sed -i is not portable';
+	/\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
 	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
 	/^\s*declare\s+/ and err 'arrays/declare not portable';
 	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';
--
2.20.1.2.gb21ebb671


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
@ 2019-01-15 21:09               ` Eric Sunshine
  2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 83+ messages in thread
From: Eric Sunshine @ 2019-01-15 21:09 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Git List, SZEDER Gábor, zhiyou.jx

On Tue, Jan 15, 2019 at 3:31 PM <tboegi@web.de> wrote:
> From `man sed` (on a Mac OS X box):
> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
> on other operating systems.
> [...]
> To be on the safe side, don't allow it.
>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
> ---
> diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
> @@ -35,7 +35,7 @@ sub err {
> -       /\bsed\s+-i/ and err 'sed -i is not portable';
> +       /\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
>         /\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
>         /^\s*declare\s+/ and err 'arrays/declare not portable';
>         /^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';

Please update the new message to be more consistent with existing
surrounding error messages. For instance:

    err 'sed -i/-a/-l/-E not portable (use only -e/-f/-n)'

or something. Thanks.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
  2019-01-15 21:09               ` Eric Sunshine
@ 2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 83+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-16 11:24 UTC (permalink / raw)
  To: tboegi; +Cc: git, szeder.dev, zhiyou.jx


On Tue, Jan 15 2019, tboegi@web.de wrote:

> From: Torsten Bögershausen <tboegi@web.de>
>
> From `man sed` (on a Mac OS X box):
> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
> on other operating systems.
>
> From `man sed` on a Linux box:
> REGULAR EXPRESSIONS
>        POSIX.2 BREs should be supported, but they aren't completely because of
>        performance problems.  The \n sequence in a regular expression matches
>        the newline character,  and  similarly  for \a, \t, and other sequences.
>        The -E option switches to using extended regular expressions instead;
>        the -E option has been supported for years by GNU sed, and is now
>        included in POSIX.
>
> Well, there are still a lot of systems out there, which don't support it.
>
> Beside that, see IEEE Std 1003.1TM-2017
> http://pubs.opengroup.org/onlinepubs/9699919799/
> does not mention -E either.
>
> To be on the safe side, don't allow it.
>
> Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
> ---
>
> I am somewhat unsure if we should disable all options except -e -f -n
> instead ?
> /\bsed\s+-[^efn]/ and err 'Not portable option with sed. Only -n -e -f are portable';
>
> That would cause a false positive in t9001 here:
> "--cc-cmd=./cccmd-sed --suppress-cc=self"
>
> which could either be fixed by an anchor:
> /^\s*sed\s+-[^efn]/
>
> Or by allowing '--' like this:
> /\bsed\s+-[^-efn]/
>
> Any thoughts, please ?
>
> t/check-non-portable-shell.pl | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
> index b45bdac688..96b6afdeb8 100755
> --- a/t/check-non-portable-shell.pl
> +++ b/t/check-non-portable-shell.pl
> @@ -35,7 +35,7 @@ sub err {
>  		chomp;
>  	}
>
> -	/\bsed\s+-i/ and err 'sed -i is not portable';
> +	/\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
>  	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
>  	/^\s*declare\s+/ and err 'arrays/declare not portable';
>  	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';

I'd just go for your /\bsed\s+-[^-efn]/ suggestion. Just a note if we do
go for the whitelist: According to GNU sed's manpage -E is also known as
-r, so /\bsed\s+-[Erail]/ would be better.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-10 11:57           ` SZEDER Gábor
                               ` (2 preceding siblings ...)
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
@ 2019-01-20  7:53             ` tboegi
  2019-01-22 19:47               ` Junio C Hamano
  3 siblings, 1 reply; 83+ messages in thread
From: tboegi @ 2019-01-20  7:53 UTC (permalink / raw)
  To: git, szeder.dev, zhiyou.jx, sunshine, avarab; +Cc: Torsten Bögershausen

From: Torsten Bögershausen <tboegi@web.de>

From `man sed` (on a Mac OS X box):
The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
on other operating systems.

From `man sed` on a Linux box:
REGULAR EXPRESSIONS
       POSIX.2 BREs should be supported, but they aren't completely because of
       performance problems.  The \n sequence in a regular expression matches the newline
       character,  and  similarly  for \a, \t, and other sequences.
       The -E option switches to using extended regular expressions instead; the -E option
       has been supported for years by GNU sed, and is now included in POSIX.

Well, there are still a lot of systems out there, which don't support it.
Beside that, IEEE Std 1003.1TM-2017, see
http://pubs.opengroup.org/onlinepubs/9699919799/
does not mention -E either.

To be on the safe side, don't allow -E (or -r, which is GNU).
Change check-non-portable-shell.pl to only accept the portable options:
sed [-n] [-e command] [-f command_file]

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 t/check-non-portable-shell.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
index b45bdac688..6c798608a9 100755
--- a/t/check-non-portable-shell.pl
+++ b/t/check-non-portable-shell.pl
@@ -35,7 +35,7 @@ sub err {
 		chomp;
 	}

-	/\bsed\s+-i/ and err 'sed -i is not portable';
+	/\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';
 	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
 	/^\s*declare\s+/ and err 'arrays/declare not portable';
 	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';
--
2.20.1.2.gb21ebb671


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
@ 2019-01-22 19:47               ` Junio C Hamano
  2019-01-22 20:00                 ` Torsten Bögershausen
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-22 19:47 UTC (permalink / raw)
  To: tboegi; +Cc: git, szeder.dev, zhiyou.jx, sunshine, avarab

tboegi@web.de writes:

> From: Torsten Bögershausen <tboegi@web.de>
>
> From `man sed` (on a Mac OS X box):
> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
> on other operating systems.
>
> -	/\bsed\s+-i/ and err 'sed -i is not portable';
> +	/\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';

"sed -n -i -E -e 's/foo/bar/p'" won't be caught with this as an
error, but that's OK ;-).

Is this still an RFC patch?

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-22 19:47               ` Junio C Hamano
@ 2019-01-22 20:00                 ` Torsten Bögershausen
  2019-01-22 21:15                   ` Eric Sunshine
  0 siblings, 1 reply; 83+ messages in thread
From: Torsten Bögershausen @ 2019-01-22 20:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, szeder.dev, zhiyou.jx, sunshine, avarab

On 22.01.19 20:47, Junio C Hamano wrote:
> tboegi@web.de writes:
>
>> From: Torsten Bögershausen <tboegi@web.de>
>>
>> From `man sed` (on a Mac OS X box):
>> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
>> on other operating systems.
>>
>> -	/\bsed\s+-i/ and err 'sed -i is not portable';
>> +	/\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';
>
> "sed -n -i -E -e 's/foo/bar/p'" won't be caught with this as an
> error, but that's OK ;-).
>
> Is this still an RFC patch?
>

It seems as if everybody is happy with it,
so it may be ready for for pu.


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-22 20:00                 ` Torsten Bögershausen
@ 2019-01-22 21:15                   ` Eric Sunshine
  2019-01-23  6:35                     ` Torsten Bögershausen
  0 siblings, 1 reply; 83+ messages in thread
From: Eric Sunshine @ 2019-01-22 21:15 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Junio C Hamano, Git List, SZEDER Gábor, zhiyou.jx,
	Ævar Arnfjörð Bjarmason

On Tue, Jan 22, 2019 at 3:00 PM Torsten Bögershausen <tboegi@web.de> wrote:
> On 22.01.19 20:47, Junio C Hamano wrote:
> > tboegi@web.de writes:
> >> -    /\bsed\s+-i/ and err 'sed -i is not portable';
> >> +    /\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';
> >
> > "sed -n -i -E -e 's/foo/bar/p'" won't be caught with this as an
> > error, but that's OK ;-).
> > Is this still an RFC patch?
>
> It seems as if everybody is happy with it,
> so it may be ready for for pu.

I'd still prefer to see a more terse[1] (and not capitalized) message
to be consistent with existing error messages and to keep the reported
errors more compact overall to make them easier to digest[2,3]:

    err 'sed option not portable (use only -n, -e, -f)'

But that's just a very minor nit.

[1]: http://public-inbox.org/git/CAPig+cSeDNYFGYC2WznjW3zYMJCWZbZFY1KM5H5ir2L=Jxwy7w@mail.gmail.com/
[2]: http://public-inbox.org/git/20180713055205.32351-3-sunshine@sunshineco.com/
[3]: http://public-inbox.org/git/20180713055205.32351-4-sunshine@sunshineco.com/

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-22 21:15                   ` Eric Sunshine
@ 2019-01-23  6:35                     ` Torsten Bögershausen
  2019-01-23 17:54                       ` Junio C Hamano
  0 siblings, 1 reply; 83+ messages in thread
From: Torsten Bögershausen @ 2019-01-23  6:35 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Junio C Hamano, Git List, SZEDER Gábor, zhiyou.jx,
	Ævar Arnfjörð Bjarmason

On 22.01.19 22:15, Eric Sunshine wrote:
> On Tue, Jan 22, 2019 at 3:00 PM Torsten Bögershausen <tboegi@web.de> wrote:
>> On 22.01.19 20:47, Junio C Hamano wrote:
>>> tboegi@web.de writes:
>>>> -    /\bsed\s+-i/ and err 'sed -i is not portable';
>>>> +    /\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';
>>>
>>> "sed -n -i -E -e 's/foo/bar/p'" won't be caught with this as an
>>> error, but that's OK ;-).
>>> Is this still an RFC patch?
>>
>> It seems as if everybody is happy with it,
>> so it may be ready for for pu.
>
> I'd still prefer to see a more terse[1] (and not capitalized) message
> to be consistent with existing error messages and to keep the reported
> errors more compact overall to make them easier to digest[2,3]:
>
>     err 'sed option not portable (use only -n, -e, -f)'
>

That's OK for me - lets see if there are more comments.


> But that's just a very minor nit.
>
> [1]: http://public-inbox.org/git/CAPig+cSeDNYFGYC2WznjW3zYMJCWZbZFY1KM5H5ir2L=Jxwy7w@mail.gmail.com/
> [2]: http://public-inbox.org/git/20180713055205.32351-3-sunshine@sunshineco.com/
> [3]: http://public-inbox.org/git/20180713055205.32351-4-sunshine@sunshineco.com/
>


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-23  6:35                     ` Torsten Bögershausen
@ 2019-01-23 17:54                       ` Junio C Hamano
  2019-01-25 19:12                         ` Torsten Bögershausen
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-23 17:54 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Eric Sunshine, Git List, SZEDER Gábor, zhiyou.jx,
	Ævar Arnfjörð Bjarmason

Torsten Bögershausen <tboegi@web.de> writes:

>> I'd still prefer to see a more terse[1] (and not capitalized) message
>> to be consistent with existing error messages and to keep the reported
>> errors more compact overall to make them easier to digest[2,3]:
>>
>>     err 'sed option not portable (use only -n, -e, -f)'
>
> That's OK for me - lets see if there are more comments.

Thanks, both.  Eric's proposed message looks good to me too.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-23 17:54                       ` Junio C Hamano
@ 2019-01-25 19:12                         ` Torsten Bögershausen
  2019-01-27 22:34                           ` Junio C Hamano
  0 siblings, 1 reply; 83+ messages in thread
From: Torsten Bögershausen @ 2019-01-25 19:12 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, Git List, SZEDER Gábor, zhiyou.jx,
	Ævar Arnfjörð Bjarmason

On Wed, Jan 23, 2019 at 09:54:25AM -0800, Junio C Hamano wrote:
> Torsten Bögershausen <tboegi@web.de> writes:
>
> >> I'd still prefer to see a more terse[1] (and not capitalized) message
> >> to be consistent with existing error messages and to keep the reported
> >> errors more compact overall to make them easier to digest[2,3]:
> >>
> >>     err 'sed option not portable (use only -n, -e, -f)'
> >
> > That's OK for me - lets see if there are more comments.
>
> Thanks, both.  Eric's proposed message looks good to me too.

Do you want to ammend the patch locally ?
Or should I send a new version ?

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-25 19:12                         ` Torsten Bögershausen
@ 2019-01-27 22:34                           ` Junio C Hamano
  0 siblings, 0 replies; 83+ messages in thread
From: Junio C Hamano @ 2019-01-27 22:34 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Eric Sunshine, Git List, SZEDER Gábor, zhiyou.jx,
	Ævar Arnfjörð Bjarmason

Torsten Bögershausen <tboegi@web.de> writes:

>> Thanks, both.  Eric's proposed message looks good to me too.
>
> Do you want to ammend the patch locally ?

I just amended it; thanks.



^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 0/6] pack-redundant: new algorithm to find min packs
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
@ 2019-01-30 11:47               ` " Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 " Jiang Xin
                                   ` (6 more replies)
  2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
                                 ` (5 subsequent siblings)
  6 siblings, 7 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them
overlap each other, running `git pack-redundant --all` will
exhaust all memories and the process will be killed by kernel.

There is a script in commit log of commit 3/6, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

Derrick Stolee <stolee@gmail.com> 于2019年1月20日周日 上午9:08写道:
>
> Here is today's test coverage report.
>
> builtin/pack-redundant.c
> a338d10395 builtin/pack-redundant.c 339) static int cmp_remaining_objects(const void *a, const void *b)
> e4e2c2884e builtin/pack-redundant.c 341) struct pack_list *pl_a = *((struct pack_list **)a);
> e4e2c2884e builtin/pack-redundant.c 342) struct pack_list *pl_b = *((struct pack_list **)b);
> ...

Add new test cases in t5323 for better test coverage.

## Changes since reroll v6

* Add new test cases in t5323.

* Add new patch 2/6 (pack-redundant: delay creation of unique_objects), 
  which will fix a bug which fail to find redundant packs if turn on alt-odb
  searching with `--alt-odb` option.  This is because alt-odb objects are
  only remove in unique_objects fields but not in all_objects fields of
  pack_list.

## Range diff

1:  be6555ae60 ! 1:  799e804d5e t5323: test cases for git-pack-redundant
    @@ -43,7 +43,7 @@
     +}
     +
     +create_pack_1 () {
    -+	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P1=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$T
     +		$A
     +		$B
    @@ -58,7 +58,7 @@
     +}
     +
     +create_pack_2 () {
    -+	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P2=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$B
     +		$C
     +		$D
    @@ -72,7 +72,7 @@
     +}
     +
     +create_pack_3 () {
    -+	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P3=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$F
     +		$I
     +		$J
    @@ -85,7 +85,7 @@
     +}
     +
     +create_pack_4 () {
    -+	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P4=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$J
     +		$K
     +		$L
    @@ -97,7 +97,7 @@
     +}
     +
     +create_pack_5 () {
    -+	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P5=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$G
     +		$H
     +		$N
    @@ -108,7 +108,7 @@
     +}
     +
     +create_pack_6 () {
    -+	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P6=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$N
     +		$O
     +		$Q
    @@ -118,7 +118,7 @@
     +}
     +
     +create_pack_7 () {
    -+	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P7=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$P
     +		$Q
     +		EOF
    @@ -127,18 +127,37 @@
     +}
     +
     +create_pack_8 () {
    -+	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++	P8=$(git -C objects/pack pack-objects -q pack <<-EOF
     +		$A
     +		EOF
     +	) &&
     +	eval P$P8=P8:$P8
     +}
     +
    -+test_expect_success 'setup' '
    ++format_packfiles () {
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" |
    ++	sort -u |
    ++	while read p
    ++	do
    ++		if test -z "$(eval echo \${P$p})"
    ++		then
    ++			echo $p
    ++		else
    ++			eval echo "\${P$p}"
    ++		fi
    ++	done |
    ++	sort
    ++}
    ++
    ++test_expect_success 'setup master.git' '
    ++	git init --bare master.git &&
    ++	cd master.git &&
     +	create_commits
     +'
     +
    -+test_expect_success 'no redundant packs' '
    ++test_expect_success 'no redundant for pack 1, 2, 3' '
     +	create_pack_1 && create_pack_2 && create_pack_3 &&
     +	git pack-redundant --all >out &&
     +	test_must_be_empty out
    @@ -154,12 +173,7 @@
     +
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
     +	git pack-redundant --all >out &&
    -+	sed \
    -+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+		sort -u |
    -+		while read p; do eval echo "\${P$p}"; done |
    -+		sort >actual &&
    ++	format_packfiles <out >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -176,12 +190,7 @@
     +
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
     +	git pack-redundant --all >out &&
    -+	sed \
    -+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+		sort -u |
    -+		while read p; do eval echo "\${P$p}"; done |
    -+		sort >actual &&
    ++	format_packfiles <out >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -198,18 +207,13 @@
     +
     +test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
     +	git pack-redundant --all >out &&
    -+	sed \
    -+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+		sort -u |
    -+		while read p; do eval echo "\${P$p}"; done |
    -+		sort >actual &&
    ++	format_packfiles <out >actual &&
     +	test_cmp expected actual
     +'
     +
     +test_expect_success 'clean loose objects' '
     +	git prune-packed &&
    -+	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
    ++	find objects -type f | sed -e "/objects\/pack\//d" >out &&
     +	test_must_be_empty out
     +'
     +
    @@ -220,4 +224,115 @@
     +	test_must_be_empty out
     +'
     +
    ++test_expect_success 'setup shared.git' '
    ++	cd "$TRASH_DIRECTORY" &&
    ++	git clone -q --mirror master.git shared.git &&
    ++	cd shared.git &&
    ++	printf "../../master.git/objects" >objects/info/alternates
    ++'
    ++
    ++test_expect_success 'no redundant packs without --alt-odb' '
    ++	git pack-redundant --all >out &&
    ++	test_must_be_empty out
    ++'
    ++
    ++cat >expected <<EOF
    ++P1:$P1
    ++P3:$P3
    ++P5:$P5
    ++P7:$P7
    ++EOF
    ++
    ++test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    ++	git pack-redundant --all --verbose >out 2>out.err &&
    ++	test_must_be_empty out &&
    ++	grep "pack$" out.err | format_packfiles >actual &&
    ++	test_cmp expected actual
    ++'
    ++
    ++cat >expected <<EOF
    ++fatal: Zero packs found!
    ++EOF
    ++
    ++test_expect_success 'remove redundant packs by alt-odb, no packs left' '
    ++	git pack-redundant --all --alt-odb | xargs rm &&
    ++	git fsck --no-progress &&
    ++	test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
    ++	test_cmp expected actual
    ++'
    ++
    ++create_commits_others () {
    ++	parent=$(git rev-parse HEAD)
    ++	for name in X Y Z
    ++	do
    ++		test_tick &&
    ++		T=$(git write-tree) &&
    ++		if test -z "$parent"
    ++		then
    ++			oid=$(echo $name | git commit-tree $T)
    ++		else
    ++			oid=$(echo $name | git commit-tree -p $parent $T)
    ++		fi &&
    ++		eval $name=$oid &&
    ++		parent=$oid ||
    ++		return 1
    ++	done
    ++	git update-ref refs/heads/master $Z
    ++}
    ++
    ++create_pack_x1 () {
    ++	Px1=$(git -C objects/pack pack-objects -q pack <<-EOF
    ++		$X
    ++		$Y
    ++		$Z
    ++		$A
    ++		$B
    ++		$C
    ++		EOF
    ++	) &&
    ++	eval P${Px1}=Px1:${Px1}
    ++}
    ++
    ++create_pack_x2 () {
    ++	Px2=$(git -C objects/pack pack-objects -q pack <<-EOF
    ++		$X
    ++		$Y
    ++		$Z
    ++		$D
    ++		$E
    ++		$F
    ++		EOF
    ++	) &&
    ++	eval P${Px2}=Px2:${Px2}
    ++}
    ++
    ++test_expect_success 'new objects and packs in shared.git' '
    ++	create_commits_others &&
    ++	create_pack_x1 &&
    ++	create_pack_x2 &&
    ++	git pack-redundant --all >out &&
    ++	test_must_be_empty out
    ++'
    ++
    ++test_expect_success 'one pack is redundant' '
    ++	git pack-redundant --all --alt-odb >out &&
    ++	format_packfiles <out >actual &&
    ++	test_line_count = 1 actual
    ++'
    ++
    ++cat >expected <<EOF
    ++Px1:$Px1
    ++Px2:$Px2
    ++EOF
    ++
    ++test_expect_success 'set ignore objects and all two packs are redundant' '
    ++	git pack-redundant --all --alt-odb >out <<-EOF &&
    ++		$X
    ++		$Y
    ++		$Z
    ++		EOF
    ++	format_packfiles <out >actual &&
    ++	test_cmp expected actual
    ++'
    ++
     +test_done
-:  ---------- > 2:  520f6277fb pack-redundant: delay creation of unique_objects
2:  e4e2c2884e ! 3:  ab1c2c4950 pack-redundant: new algorithm to find min packs
    @@ -64,6 +64,9 @@
         4. Drop the duplicated objects from other packs in the ordered
            non_unique pack list, and repeat step 3.
     
    +    Some test cases will fail on Mac OS X. Mark them and will resolve in
    +    later commit.
    +
         Original PR and discussions: https://github.com/jiangxin/git/pull/25
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    @@ -213,11 +216,61 @@
      	struct llist *ignore;
      	struct object_id *oid;
      	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
    +
    + diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
    + --- a/t/t5323-pack-redundant.sh
    + +++ b/t/t5323-pack-redundant.sh
     @@
    - 	pl = local_packs;
    - 	while (pl) {
    - 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
    -+		llist_sorted_difference_inplace(pl->all_objects, ignore);
    - 		pl = pl->next;
    - 	}
    + P2:$P2
    + EOF
    + 
    +-test_expect_success 'one of pack-2/pack-3 is redundant' '
    ++test_expect_failure 'one of pack-2/pack-3 is redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + P6:$P6
    + EOF
    + 
    +-test_expect_success 'pack 2, 4, and 6 are redundant' '
    ++test_expect_failure 'pack 2, 4, and 6 are redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + P8:$P8
    + EOF
    + 
    +-test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + 	test_must_be_empty out
    + '
    + 
    +-test_expect_success 'remove redundant packs and pass fsck' '
    ++test_expect_failure 'remove redundant packs and pass fsck' '
    + 	git pack-redundant --all | xargs rm &&
    + 	git fsck --no-progress &&
    + 	git pack-redundant --all >out &&
    +@@
    + 	printf "../../master.git/objects" >objects/info/alternates
    + '
    + 
    +-test_expect_success 'no redundant packs without --alt-odb' '
    ++test_expect_failure 'no redundant packs without --alt-odb' '
    + 	git pack-redundant --all >out &&
    + 	test_must_be_empty out
    + '
    +@@
    + P7:$P7
    + EOF
      
    +-test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    ++test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    + 	git pack-redundant --all --verbose >out 2>out.err &&
    + 	test_must_be_empty out &&
    + 	grep "pack$" out.err | format_packfiles >actual &&
3:  e60b134e66 = 4:  3c3a7ea40f pack-redundant: remove unused functions
4:  cb7e0336fc ! 5:  bc4b681f40 pack-redundant: rename pack_list.all_objects
    @@ -20,6 +20,18 @@
      } *local_packs = NULL, *altodb_packs = NULL;
      
      static struct llist_item *free_nodes;
    +@@
    + 	const unsigned int hashsz = the_hash_algo->rawsz;
    + 
    + 	if (!p1->unique_objects)
    +-		p1->unique_objects = llist_copy(p1->all_objects);
    ++		p1->unique_objects = llist_copy(p1->remaining_objects);
    + 	if (!p2->unique_objects)
    +-		p2->unique_objects = llist_copy(p2->all_objects);
    ++		p2->unique_objects = llist_copy(p2->remaining_objects);
    + 
    + 	p1_base = p1->pack->index_data;
    + 	p2_base = p2->pack->index_data;
     @@
      {
      	struct pack_list *pl_a = *((struct pack_list **)a);
    @@ -94,10 +106,12 @@
      	}
      }
     @@
    + 	while (alt) {
      		local = local_packs;
      		while (local) {
    - 			llist_sorted_difference_inplace(local->unique_objects,
    +-			llist_sorted_difference_inplace(local->all_objects,
     -							alt->all_objects);
    ++			llist_sorted_difference_inplace(local->remaining_objects,
     +							alt->remaining_objects);
      			local = local->next;
      		}
    @@ -123,16 +137,11 @@
     +		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
      		off += step;
      	}
    - 	/* this list will be pruned in cmp_two_packs later */
    --	l.unique_objects = llist_copy(l.all_objects);
    -+	l.unique_objects = llist_copy(l.remaining_objects);
    - 	if (p->pack_local)
    - 		return pack_list_insert(&local_packs, &l);
    - 	else
    + 	l.unique_objects = NULL;
     @@
    + 	llist_sorted_difference_inplace(all_objects, ignore);
      	pl = local_packs;
      	while (pl) {
    - 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
     -		llist_sorted_difference_inplace(pl->all_objects, ignore);
     +		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
      		pl = pl->next;
5:  a338d10395 ! 6:  6cfba5b4b2 pack-redundant: consistent sort method
    @@ -75,9 +75,9 @@
      		off += step;
      	}
     +	l.all_objects_size = l.remaining_objects->size;
    - 	/* this list will be pruned in cmp_two_packs later */
    - 	l.unique_objects = llist_copy(l.remaining_objects);
    + 	l.unique_objects = NULL;
      	if (p->pack_local)
    + 		return pack_list_insert(&local_packs, &l);
     
      diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
      --- a/t/t5323-pack-redundant.sh
    @@ -90,4 +90,53 @@
     +P3:$P3
      EOF
      
    - test_expect_success 'one of pack-2/pack-3 is redundant' '
    +-test_expect_failure 'one of pack-2/pack-3 is redundant' '
    ++test_expect_success 'one of pack-2/pack-3 is redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + P6:$P6
    + EOF
    + 
    +-test_expect_failure 'pack 2, 4, and 6 are redundant' '
    ++test_expect_success 'pack 2, 4, and 6 are redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + P8:$P8
    + EOF
    + 
    +-test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    + 	git pack-redundant --all >out &&
    + 	format_packfiles <out >actual &&
    + 	test_cmp expected actual
    +@@
    + 	test_must_be_empty out
    + '
    + 
    +-test_expect_failure 'remove redundant packs and pass fsck' '
    ++test_expect_success 'remove redundant packs and pass fsck' '
    + 	git pack-redundant --all | xargs rm &&
    + 	git fsck --no-progress &&
    + 	git pack-redundant --all >out &&
    +@@
    + 	printf "../../master.git/objects" >objects/info/alternates
    + '
    + 
    +-test_expect_failure 'no redundant packs without --alt-odb' '
    ++test_expect_success 'no redundant packs without --alt-odb' '
    + 	git pack-redundant --all >out &&
    + 	test_must_be_empty out
    + '
    +@@
    + P7:$P7
    + EOF
    + 
    +-test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    ++test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    + 	git pack-redundant --all --verbose >out 2>out.err &&
    + 	test_must_be_empty out &&
    + 	grep "pack$" out.err | format_packfiles >actual &&

--
Jiang Xin (4):
  t5323: test cases for git-pack-redundant
  pack-redundant: delay creation of unique_objects
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 233 +++++++++++----------------
 t/t5323-pack-redundant.sh | 322 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 415 insertions(+), 140 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  2019-01-31 21:44                 ` Junio C Hamano
  2019-01-30 11:47               ` [PATCH v7 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
                                 ` (4 subsequent siblings)
  6 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5323-pack-redundant.sh | 322 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..710fe9884c
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,322 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git pack-redundant test'
+
+. ./test-lib.sh
+
+create_commits () {
+	parent=
+	for name in A B C D E F G H I J K L M N O P Q R
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $R
+}
+
+create_pack_1 () {
+	P1=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$T
+		$A
+		$B
+		$C
+		$D
+		$E
+		$F
+		$R
+		EOF
+	) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2 () {
+	P2=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$B
+		$C
+		$D
+		$E
+		$G
+		$H
+		$I
+		EOF
+	) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3 () {
+	P3=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$F
+		$I
+		$J
+		$K
+		$L
+		$M
+		EOF
+	) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4 () {
+	P4=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$J
+		$K
+		$L
+		$M
+		$P
+		EOF
+	) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5 () {
+	P5=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$G
+		$H
+		$N
+		$O
+		EOF
+	) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6 () {
+	P6=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$N
+		$O
+		$Q
+		EOF
+	) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7 () {
+	P7=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$P
+		$Q
+		EOF
+	) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8 () {
+	P8=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$A
+		EOF
+	) &&
+	eval P$P8=P8:$P8
+}
+
+format_packfiles () {
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" |
+	sort -u |
+	while read p
+	do
+		if test -z "$(eval echo \${P$p})"
+		then
+			echo $p
+		else
+			eval echo "\${P$p}"
+		fi
+	done |
+	sort
+}
+
+test_expect_success 'setup master.git' '
+	git init --bare master.git &&
+	cd master.git &&
+	create_commits
+'
+
+test_expect_success 'no redundant for pack 1, 2, 3' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+cat >expected <<EOF
+P2:$P2
+EOF
+
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	git pack-redundant --all >out &&
+	format_packfiles <out >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+# Only after calling create_pack_6, we can use $P6 variable.
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+EOF
+
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	git pack-redundant --all >out &&
+	format_packfiles <out >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+P8:$P8
+EOF
+
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+	git pack-redundant --all >out &&
+	format_packfiles <out >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'clean loose objects' '
+	git prune-packed &&
+	find objects -type f | sed -e "/objects\/pack\//d" >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'remove redundant packs and pass fsck' '
+	git pack-redundant --all | xargs rm &&
+	git fsck --no-progress &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'setup shared.git' '
+	cd "$TRASH_DIRECTORY" &&
+	git clone -q --mirror master.git shared.git &&
+	cd shared.git &&
+	printf "../../master.git/objects" >objects/info/alternates
+'
+
+test_expect_success 'no redundant packs without --alt-odb' '
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+cat >expected <<EOF
+P1:$P1
+P3:$P3
+P5:$P5
+P7:$P7
+EOF
+
+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
+	git pack-redundant --all --verbose >out 2>out.err &&
+	test_must_be_empty out &&
+	grep "pack$" out.err | format_packfiles >actual &&
+	test_cmp expected actual
+'
+
+cat >expected <<EOF
+fatal: Zero packs found!
+EOF
+
+test_expect_success 'remove redundant packs by alt-odb, no packs left' '
+	git pack-redundant --all --alt-odb | xargs rm &&
+	git fsck --no-progress &&
+	test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
+	test_cmp expected actual
+'
+
+create_commits_others () {
+	parent=$(git rev-parse HEAD)
+	for name in X Y Z
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $Z
+}
+
+create_pack_x1 () {
+	Px1=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$X
+		$Y
+		$Z
+		$A
+		$B
+		$C
+		EOF
+	) &&
+	eval P${Px1}=Px1:${Px1}
+}
+
+create_pack_x2 () {
+	Px2=$(git -C objects/pack pack-objects -q pack <<-EOF
+		$X
+		$Y
+		$Z
+		$D
+		$E
+		$F
+		EOF
+	) &&
+	eval P${Px2}=Px2:${Px2}
+}
+
+test_expect_success 'new objects and packs in shared.git' '
+	create_commits_others &&
+	create_pack_x1 &&
+	create_pack_x2 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'one pack is redundant' '
+	git pack-redundant --all --alt-odb >out &&
+	format_packfiles <out >actual &&
+	test_line_count = 1 actual
+'
+
+cat >expected <<EOF
+Px1:$Px1
+Px2:$Px2
+EOF
+
+test_expect_success 'set ignore objects and all two packs are redundant' '
+	git pack-redundant --all --alt-odb >out <<-EOF &&
+		$X
+		$Y
+		$Z
+		EOF
+	format_packfiles <out >actual &&
+	test_cmp expected actual
+'
+
+test_done
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 2/6] pack-redundant: delay creation of unique_objects
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
                                 ` (3 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Instead of initializing unique_objects in `add_pack()`, copy from
all_objects in `cmp_two_packs()`, when unwanted objects are removed from
all_objects.

This will save memory (no allocate memory for alt-odb packs), and run
`llist_sorted_difference_inplace()` only once when removing ignored
objects and removing objects in alt-odb in `scan_alt_odb_packs()`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..f7dab0ec60 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -254,6 +254,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
+	if (!p1->unique_objects)
+		p1->unique_objects = llist_copy(p1->all_objects);
+	if (!p2->unique_objects)
+		p2->unique_objects = llist_copy(p2->all_objects);
+
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
 	p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
@@ -536,7 +541,7 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->unique_objects,
+			llist_sorted_difference_inplace(local->all_objects,
 							alt->all_objects);
 			local = local->next;
 		}
@@ -567,8 +572,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
-	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -646,7 +650,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 
 	load_all_objects();
 
-	cmp_local_packs();
 	if (alt_odb)
 		scan_alt_odb_packs();
 
@@ -663,10 +666,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
+	cmp_local_packs();
+
 	minimize(&min);
 
 	if (verbose) {
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 3/6] pack-redundant: new algorithm to find min packs
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                                 ` (2 preceding siblings ...)
  2019-01-30 11:47               ` [PATCH v7 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  2019-01-31 19:30                 ` Junio C Hamano
  2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
                                 ` (2 subsequent siblings)
  6 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 108 +++++++++++++++++++++++---------------
 t/t5323-pack-redundant.sh |  12 ++---
 2 files changed, 73 insertions(+), 47 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index f7dab0ec60..b3f3bb5e09 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -426,14 +426,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -451,49 +489,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -607,7 +633,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 710fe9884c..d176a6eb91 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -155,7 +155,7 @@ cat >expected <<EOF
 P2:$P2
 EOF
 
-test_expect_success 'one of pack-2/pack-3 is redundant' '
+test_expect_failure 'one of pack-2/pack-3 is redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -172,7 +172,7 @@ P4:$P4
 P6:$P6
 EOF
 
-test_expect_success 'pack 2, 4, and 6 are redundant' '
+test_expect_failure 'pack 2, 4, and 6 are redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -189,7 +189,7 @@ P6:$P6
 P8:$P8
 EOF
 
-test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -201,7 +201,7 @@ test_expect_success 'clean loose objects' '
 	test_must_be_empty out
 '
 
-test_expect_success 'remove redundant packs and pass fsck' '
+test_expect_failure 'remove redundant packs and pass fsck' '
 	git pack-redundant --all | xargs rm &&
 	git fsck --no-progress &&
 	git pack-redundant --all >out &&
@@ -215,7 +215,7 @@ test_expect_success 'setup shared.git' '
 	printf "../../master.git/objects" >objects/info/alternates
 '
 
-test_expect_success 'no redundant packs without --alt-odb' '
+test_expect_failure 'no redundant packs without --alt-odb' '
 	git pack-redundant --all >out &&
 	test_must_be_empty out
 '
@@ -227,7 +227,7 @@ P5:$P5
 P7:$P7
 EOF
 
-test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
+test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
 	git pack-redundant --all --verbose >out 2>out.err &&
 	test_must_be_empty out &&
 	grep "pack$" out.err | format_packfiles >actual &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 4/6] pack-redundant: remove unused functions
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                                 ` (3 preceding siblings ...)
  2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  2019-01-30 15:03                 ` [PATCH v8 1/1] pack-redundant: delete redundant code 16657101987
  2019-01-30 11:47               ` [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ----------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index b3f3bb5e09..b83d55031a 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -290,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                                 ` (4 preceding siblings ...)
  2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  2019-01-30 11:47               ` [PATCH v7 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index b83d55031a..c145852b8b 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -241,9 +241,9 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
 	if (!p1->unique_objects)
-		p1->unique_objects = llist_copy(p1->all_objects);
+		p1->unique_objects = llist_copy(p1->remaining_objects);
 	if (!p2->unique_objects)
-		p2->unique_objects = llist_copy(p2->all_objects);
+		p2->unique_objects = llist_copy(p2->remaining_objects);
 
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
@@ -344,8 +344,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -355,7 +355,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -399,7 +399,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -417,20 +417,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -445,7 +445,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -456,7 +456,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -481,11 +481,11 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->all_objects,
-							alt->all_objects);
+			llist_sorted_difference_inplace(local->remaining_objects,
+							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
@@ -500,7 +500,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -509,7 +509,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	l.unique_objects = NULL;
@@ -606,7 +606,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v7 6/6] pack-redundant: consistent sort method
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                                 ` (5 preceding siblings ...)
  2019-01-30 11:47               ` [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-01-30 11:47               ` Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-01-30 11:47 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 24 ++++++++++++++++--------
 t/t5323-pack-redundant.sh | 14 +++++++-------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index c145852b8b..0316a400ad 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -340,19 +341,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
+		/* have the same remaining_objects, big pack first */
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
+		/* sort by remaining objects, more objects first */
 		return 1;
-	else
+	} else {
 		return -1;
+	}
 }
 
 /* Sort pack_list, greater size of remaining_objects first */
@@ -370,7 +377,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -512,6 +519,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index d176a6eb91..4469128203 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -152,10 +152,10 @@ test_expect_success 'create pack 4, 5' '
 '
 
 cat >expected <<EOF
-P2:$P2
+P3:$P3
 EOF
 
-test_expect_failure 'one of pack-2/pack-3 is redundant' '
+test_expect_success 'one of pack-2/pack-3 is redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -172,7 +172,7 @@ P4:$P4
 P6:$P6
 EOF
 
-test_expect_failure 'pack 2, 4, and 6 are redundant' '
+test_expect_success 'pack 2, 4, and 6 are redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -189,7 +189,7 @@ P6:$P6
 P8:$P8
 EOF
 
-test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
 	git pack-redundant --all >out &&
 	format_packfiles <out >actual &&
 	test_cmp expected actual
@@ -201,7 +201,7 @@ test_expect_success 'clean loose objects' '
 	test_must_be_empty out
 '
 
-test_expect_failure 'remove redundant packs and pass fsck' '
+test_expect_success 'remove redundant packs and pass fsck' '
 	git pack-redundant --all | xargs rm &&
 	git fsck --no-progress &&
 	git pack-redundant --all >out &&
@@ -215,7 +215,7 @@ test_expect_success 'setup shared.git' '
 	printf "../../master.git/objects" >objects/info/alternates
 '
 
-test_expect_failure 'no redundant packs without --alt-odb' '
+test_expect_success 'no redundant packs without --alt-odb' '
 	git pack-redundant --all >out &&
 	test_must_be_empty out
 '
@@ -227,7 +227,7 @@ P5:$P5
 P7:$P7
 EOF
 
-test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
 	git pack-redundant --all --verbose >out 2>out.err &&
 	test_must_be_empty out &&
 	grep "pack$" out.err | format_packfiles >actual &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v8 1/1] pack-redundant: delete redundant code
  2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-30 15:03                 ` 16657101987
  0 siblings, 0 replies; 83+ messages in thread
From: 16657101987 @ 2019-01-30 15:03 UTC (permalink / raw)
  To: worldhello.net; +Cc: git, gitster, sunchao9, szeder.dev, zhiyou.jx

From: Sun Chao <sunchao9@huawei.com>

The objects in alt-odb are removed from `all_objects` twice in `load_all_objects`
and `scan_alt_odb_packs`, remove it from the later function.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
---
 builtin/pack-redundant.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 0316a400ad..29ff5e99cb 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -492,7 +492,6 @@ static void scan_alt_odb_packs(void)
 							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
-- 
2.20.1



^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 3/6] pack-redundant: new algorithm to find min packs
  2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-31 19:30                 ` Junio C Hamano
  2019-02-01  9:55                   ` Jiang Xin
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-31 19:30 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Jiang Xin <worldhello.net@gmail.com> writes:

> From: Sun Chao <sunchao9@huawei.com>
>
> When calling `git pack-redundant --all`, if there are too many local
> packs and too many redundant objects within them, the too deep iteration
> of `get_permutations` will exhaust all the resources, and the process of
> `git pack-redundant` will be killed.

Build each step of the series with "make DEVELOPER=YesPlease" and
your compiler hopefully would notice...

    CC builtin/pack-redundant.o
builtin/pack-redundant.c:347:12: error: 'is_superset' defined but not used [-Werror=unused-function]
 static int is_superset(struct pack_list *pl, struct llist *list)
            ^~~~~~~~~~~
builtin/pack-redundant.c:313:21: error: 'get_permutations' defined but not used [-Werror=unused-function]
 static struct pll * get_permutations(struct pack_list *list, int n)
                     ^~~~~~~~~~~~~~~~
builtin/pack-redundant.c:293:13: error: 'pll_free' defined but not used [-Werror=unused-function]
 static void pll_free(struct pll *l)
             ^~~~~~~~
cc1: all warnings being treated as errors
Makefile:2302: recipe for target 'builtin/pack-redundant.o' failed
make: *** [builtin/pack-redundant.o] Error 1


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-31 21:44                 ` Junio C Hamano
  2019-02-01  5:44                   ` Jiang Xin
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-01-31 21:44 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Jiang Xin <worldhello.net@gmail.com> writes:

> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..710fe9884c
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,322 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git pack-redundant test'
> +
> +. ./test-lib.sh
> +
> +create_commits () {
> +	parent=
> +	for name in A B C D E F G H I J K L M N O P Q R
> +	do
> +		test_tick &&
> +		T=$(git write-tree) &&

Move this outside loop, not for efficiency but for clarity. This
helper function creates a single empty tree and bunch of commits
that hold the same empty tree, arranged as a single strand of
pearls.

By the way, I had to draw a table like this to figure out ...

     T A B C D E F G H I J K L M N O P Q R
1    x x x x x x x                       x
2        x x x x   x x x
3                x     x x x x x
4                        x x x x     x
5                  x x           x x
6                                x x   x
7                                    x x
8      x

... what is going on.  Perhaps something like this would help other
readers near the top of the file (or in test_description)?


> +format_packfiles () {
> +	sed \
> +		-e "s#.*/pack-\(.*\)\.idx#\1#" \
> +		-e "s#.*/pack-\(.*\)\.pack#\1#" |
> +	sort -u |
> +	while read p
> +	do
> +		if test -z "$(eval echo \${P$p})"
> +		then
> +			echo $p

All the "expected output" below will expect P$n:${P$n} prepared by
various create_pack_$n helpers we saw earlier, so an unknown
packfile would be detected as a line that this emits.  Is that the
idea?

> +		else
> +			eval echo "\${P$p}"
> +		fi
> +	done |
> +	sort
> +}
> +
> +test_expect_success 'setup master.git' '
> +	git init --bare master.git &&
> +	cd master.git &&
> +	create_commits
> +'

Everything below will be done inside master.git?  Avoid cd'ing
around in random places in the test script, as a failure in any of
the steps that does cd would start later tests in an unexpected
place, if you can.

> +test_expect_success 'no redundant for pack 1, 2, 3' '
> +	create_pack_1 && create_pack_2 && create_pack_3 &&
> +	git pack-redundant --all >out &&
> +	test_must_be_empty out
> +'
> +
> +test_expect_success 'create pack 4, 5' '
> +	create_pack_4 && create_pack_5
> +'
> +

> +cat >expected <<EOF
> +P2:$P2
> +EOF
> +
> +test_expect_success 'one of pack-2/pack-3 is redundant' '
> +	git pack-redundant --all >out &&
> +	format_packfiles <out >actual &&
> +	test_cmp expected actual
> +'

Do the preparation of file "expect" (most of the tests compare
'expect' vs 'actual', not 'expected') _inside_ the next test that
uses it.  i.e.

	test_expect_success 'with 1 4 and 5, either 2 or 3 can be omitted' '
		cat >expect <<-EOF &&
		P2:$P2
		EOF
		git pack-redundant --all >out &&
		format ... >actual &&
		test_cmp expect actual
	'

Again, I needed to draw this to see if the "one of ... is redundant"
in the title is a valid claim.  Something like it would help future
readers.

     T A B C D E F G H I J K L M N O P Q R
1245 x x x x x x x x x x x x x x x x     x
3                x     x x x x x

     T A B C D E F G H I J K L M N O P Q R
1345 x x x x x x x x x x x x x x x x     x
2        x x x x   x x x

I won't repeat the same for tests that appear later in this file,
but they share the same issue.

> +test_expect_success 'setup shared.git' '
> +	cd "$TRASH_DIRECTORY" &&
> +	git clone -q --mirror master.git shared.git &&

Why "-q"?

> +	cd shared.git &&
> +	printf "../../master.git/objects" >objects/info/alternates
> +'

Why not echo?  I recall designing the alternates file to be a plain
text file.  Is it necessary to leave the line incomplete?

> +test_expect_success 'remove redundant packs by alt-odb, no packs left' '
> +	git pack-redundant --all --alt-odb | xargs rm &&
> +	git fsck --no-progress &&

Why "--no-progress"?

> +	test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
> +	test_cmp expected actual
> +'
> +
> +create_commits_others () {
> +	parent=$(git rev-parse HEAD)

If this fails, you'd still go ahead and enter the loop, which is not
what you want.

> +	for name in X Y Z
> +	do
> +		test_tick &&
> +		T=$(git write-tree) &&

Lift this outside the loop.

> +		if test -z "$parent"
> +		then
> +			oid=$(echo $name | git commit-tree $T)
> +		else
> +			oid=$(echo $name | git commit-tree -p $parent $T)
> +		fi &&
> +		eval $name=$oid &&
> +		parent=$oid ||
> +		return 1
> +	done
> +	git update-ref refs/heads/master $Z
> +}

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-01-31 21:44                 ` Junio C Hamano
@ 2019-02-01  5:44                   ` Jiang Xin
  2019-02-01  6:11                     ` Eric Sunshine
  0 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-02-01  5:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Junio C Hamano <gitster@pobox.com> 于2019年2月1日周五 上午5:44写道:
> > +create_commits () {
> > +     parent=
> > +     for name in A B C D E F G H I J K L M N O P Q R
> > +     do
> > +             test_tick &&
> > +             T=$(git write-tree) &&
>
> Move this outside loop, not for efficiency but for clarity. This
> helper function creates a single empty tree and bunch of commits
> that hold the same empty tree, arranged as a single strand of
> pearls.

Will rewrite as:

    create_commits () {
            parent=
            T=$(git write-tree) &&
            for name in A B C D E F G H I J K L M N O P Q R

>
> By the way, I had to draw a table like this to figure out ...
>
>      T A B C D E F G H I J K L M N O P Q R
> 1    x x x x x x x                       x
> 2        x x x x   x x x
> 3                x     x x x x x
> 4                        x x x x     x
> 5                  x x           x x
> 6                                x x   x
> 7                                    x x
> 8      x
>
> ... what is going on.  Perhaps something like this would help other
> readers near the top of the file (or in test_description)?

Nice chart, will edit test_description as follows:

    test_description='git pack-redundant test

    In order to test git-pack-redundant, we will create a number of
redundant
    packs in the repository `master.git`. The relationship between
packs (P1-P8)
    and objects (T,A-R) is show in the following chart:

           | T A B C D E F G H I J K L M N O P Q R
        ---+--------------------------------------
        P1 | x x x x x x x                       x
        P2 |     x x x x   x x x
        P3 |             x     x x x x x
        P4 |                     x x x x     x
        P5 |               x x           x x
        P6 |                             x x   x
        P7 |                                 x x
        P8 |   x

    Another repoisitory `shared.git` has unique objects (X-Z), while
share others
    objects through alt-odb (of `master.git`).  The relationship
between packs
    and objects is as follows:

           | T A B C D E F G H I J K L M N O P Q R   X Y Z
        ---+----------------------------------------------
        Px1|   x x x                                 x x x
        Px2|         x x x                           x x x
    '

>
>
> > +format_packfiles () {
> > +     sed \
> > +             -e "s#.*/pack-\(.*\)\.idx#\1#" \
> > +             -e "s#.*/pack-\(.*\)\.pack#\1#" |
> > +     sort -u |
> > +     while read p
> > +     do
> > +             if test -z "$(eval echo \${P$p})"
> > +             then
> > +                     echo $p
>
> All the "expected output" below will expect P$n:${P$n} prepared by
> various create_pack_$n helpers we saw earlier, so an unknown
> packfile would be detected as a line that this emits.  Is that the
> idea?

Right.  During the reroll, a typo makes an empty output, so I decide
to make this change.


> > +             else
> > +                     eval echo "\${P$p}"
> > +             fi
> > +     done |
> > +     sort
> > +}
> > +
> > +test_expect_success 'setup master.git' '
> > +     git init --bare master.git &&
> > +     cd master.git &&
> > +     create_commits
> > +'
>
> Everything below will be done inside master.git?  Avoid cd'ing
> around in random places in the test script, as a failure in any of
> the steps that does cd would start later tests in an unexpected
> place, if you can.

The first 10 test cases will run inside master.git, and others will
run inside shared.git.  Only run cd inside the two `setup` test cases.

> > +cat >expected <<EOF
> > +P2:$P2
> > +EOF
> > +
> > +test_expect_success 'one of pack-2/pack-3 is redundant' '
> > +     git pack-redundant --all >out &&
> > +     format_packfiles <out >actual &&
> > +     test_cmp expected actual
> > +'
>
> Do the preparation of file "expect" (most of the tests compare
> 'expect' vs 'actual', not 'expected') _inside_ the next test that
> uses it.  i.e.
>
>         test_expect_success 'with 1 4 and 5, either 2 or 3 can be omitted' '
>                 cat >expect <<-EOF &&
>                 P2:$P2
>                 EOF
>                 git pack-redundant --all >out &&
>                 format ... >actual &&
>                 test_cmp expect actual
>         '

Will do.

> > +test_expect_success 'setup shared.git' '
> > +     cd "$TRASH_DIRECTORY" &&
> > +     git clone -q --mirror master.git shared.git &&
>
> Why "-q"?

To make verbose output cleaner.

> > +     cd shared.git &&
> > +     printf "../../master.git/objects" >objects/info/alternates
> > +'
>
> Why not echo?  I recall designing the alternates file to be a plain
> text file.  Is it necessary to leave the line incomplete?

Forgot "\n", will append.

>
> > +test_expect_success 'remove redundant packs by alt-odb, no packs left' '
> > +     git pack-redundant --all --alt-odb | xargs rm &&
> > +     git fsck --no-progress &&
>
> Why "--no-progress"?

To make verbose output cleaner.

>
> > +     test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
> > +     test_cmp expected actual
> > +'
> > +
> > +create_commits_others () {
> > +     parent=$(git rev-parse HEAD)

Will append "&&".

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-02-01  5:44                   ` Jiang Xin
@ 2019-02-01  6:11                     ` Eric Sunshine
  2019-02-01  7:23                       ` Jiang Xin
  2019-02-01  9:51                       ` Jiang Xin
  0 siblings, 2 replies; 83+ messages in thread
From: Eric Sunshine @ 2019-02-01  6:11 UTC (permalink / raw)
  To: Jiang Xin
  Cc: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao, Jiang Xin

On Fri, Feb 1, 2019 at 12:44 AM Jiang Xin <worldhello.net@gmail.com> wrote:
>> Junio C Hamano <gitster@pobox.com> 于2019年2月1日周五 上午5:44写道:
> > Move this outside loop, not for efficiency but for clarity. This
> > helper function creates a single empty tree and bunch of commits
> > that hold the same empty tree, arranged as a single strand of
> > pearls.
>
> Will rewrite as:
>
>     create_commits () {
>             parent=
>             T=$(git write-tree) &&
>             for name in A B C D E F G H I J K L M N O P Q R

Don't forget the && at the end of the 'parent=' line to protect
against someone later adding code above that line. So:

    create_commits () {
        parent= &&
        T=$(git write-tree) &&
        ...

> Nice chart, will edit test_description as follows:
>
>     test_description='git pack-redundant test
>
>     In order to test git-pack-redundant, we will create a number of
> redundant
>     packs in the repository `master.git`. The relationship between
> packs (P1-P8)
>     and objects (T,A-R) is show in the following chart:
>
>            | T A B C D E F G H I J K L M N O P Q R
>         ---+--------------------------------------
>         P1 | x x x x x x x                       x
>         P2 |     x x x x   x x x
>         P3 |             x     x x x x x
>         P4 |                     x x x x     x
>         P5 |               x x           x x
>         P6 |                             x x   x
>         P7 |                                 x x
>         P8 |   x

test_description should be a meaningful one-liner; it should not
contain this other information, but this information should appear as
comments in the test script.

>     Another repoisitory `shared.git` has unique objects (X-Z), while
> share others

s/repoisitory/repository/

> > > +test_expect_success 'setup master.git' '
> > > +     git init --bare master.git &&
> > > +     cd master.git &&
> > > +     create_commits
> > > +'
> >
> > Everything below will be done inside master.git?  Avoid cd'ing
> > around in random places in the test script, as a failure in any of
> > the steps that does cd would start later tests in an unexpected
> > place, if you can.
>
> The first 10 test cases will run inside master.git, and others will
> run inside shared.git.  Only run cd inside the two `setup` test cases.

That's not what Junio meant. It's okay for tests to 'cd', but each
test which does so _must_ ensure that the 'cd' is undone at the end of
the test, even if the test fails. The correct way to do this within
each test is by using 'cd' in a subhsell, like this:

    test_expect_success 'setup master.git' '
        git init --bare master.git &&
        (
            cd master.git &&
            create_commits
        )
    '

Then, each test which needs to use "master.git" would 'cd' itself, like this:

    test_expect_success 'some test' '
        (
            cd master.git &&
            ...
        )
    '

> > > +test_expect_success 'setup shared.git' '
> > > +     cd "$TRASH_DIRECTORY" &&
> > > +     git clone -q --mirror master.git shared.git &&
> >
> > Why "-q"?
>
> To make verbose output cleaner.

What Junio really meant by asking that question was that you should
not do this. When something goes wrong with a test, we want as much
output as possible to help diagnose the problem, so suppressing output
is undesirable. To summarize, don't use -q, --no-progress, or any
other such option and don't redirect to /dev/null.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-02-01  6:11                     ` Eric Sunshine
@ 2019-02-01  7:23                       ` Jiang Xin
  2019-02-01  7:25                         ` Jiang Xin
  2019-02-01  9:51                       ` Jiang Xin
  1 sibling, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-02-01  7:23 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Eric Sunshine <sunshine@sunshineco.com> 于2019年2月1日周五 下午2:11写道:
>
> On Fri, Feb 1, 2019 at 12:44 AM Jiang Xin <worldhello.net@gmail.com> wrote:
> >> Junio C Hamano <gitster@pobox.com> 于2019年2月1日周五 上午5:44写道:
> > > Move this outside loop, not for efficiency but for clarity. This
> > > helper function creates a single empty tree and bunch of commits
> > > that hold the same empty tree, arranged as a single strand of
> > > pearls.
> >
> > Will rewrite as:
> >
> >     create_commits () {
> >             parent=
> >             T=$(git write-tree) &&
> >             for name in A B C D E F G H I J K L M N O P Q R
>
> Don't forget the && at the end of the 'parent=' line to protect
> against someone later adding code above that line. So:
>
>     create_commits () {
>         parent= &&
>         T=$(git write-tree) &&
>         ...

Will do.

> > Nice chart, will edit test_description as follows:
> >
> >     test_description='git pack-redundant test
> >
> >     In order to test git-pack-redundant, we will create a number of
> > redundant
> >     packs in the repository `master.git`. The relationship between
> > packs (P1-P8)
> >     and objects (T,A-R) is show in the following chart:
> >
> >            | T A B C D E F G H I J K L M N O P Q R
> >         ---+--------------------------------------
> >         P1 | x x x x x x x                       x
> >         P2 |     x x x x   x x x
> >         P3 |             x     x x x x x
> >         P4 |                     x x x x     x
> >         P5 |               x x           x x
> >         P6 |                             x x   x
> >         P7 |                                 x x
> >         P8 |   x
>
> test_description should be a meaningful one-liner; it should not
> contain this other information, but this information should appear as
> comments in the test script.

In 't/t0000-basic.sh', there is also a very long test_description.
After read 't/test-lib.sh', the only usage of test_description
is showing it as help, when runing:

    sh ./t0000-basic.sh

So write a long test_description is ok, I think.

> >     Another repoisitory `shared.git` has unique objects (X-Z), while
> > share others
>
> s/repoisitory/repository/

Thanks, will fix.

> > > > +test_expect_success 'setup master.git' '
> > > > +     git init --bare master.git &&
> > > > +     cd master.git &&
> > > > +     create_commits
> > > > +'
> > >
> > > Everything below will be done inside master.git?  Avoid cd'ing
> > > around in random places in the test script, as a failure in any of
> > > the steps that does cd would start later tests in an unexpected
> > > place, if you can.
> >
> > The first 10 test cases will run inside master.git, and others will
> > run inside shared.git.  Only run cd inside the two `setup` test cases.
>
> That's not what Junio meant. It's okay for tests to 'cd', but each
> test which does so _must_ ensure that the 'cd' is undone at the end of
> the test, even if the test fails. The correct way to do this within
> each test is by using 'cd' in a subhsell, like this:
>
>     test_expect_success 'setup master.git' '
>         git init --bare master.git &&
>         (
>             cd master.git &&
>             create_commits
>         )
>     '
>
> Then, each test which needs to use "master.git" would 'cd' itself, like this:
>
>     test_expect_success 'some test' '
>         (
>             cd master.git &&
>             ...
>         )
>     '

Nice explaination, will do.

> > > > +test_expect_success 'setup shared.git' '
> > > > +     cd "$TRASH_DIRECTORY" &&
> > > > +     git clone -q --mirror master.git shared.git &&
> > >
> > > Why "-q"?
> >
> > To make verbose output cleaner.
>
> What Junio really meant by asking that question was that you should
> not do this. When something goes wrong with a test, we want as much
> output as possible to help diagnose the problem, so suppressing output
> is undesirable. To summarize, don't use -q, --no-progress, or any
> other such option and don't redirect to /dev/null.

Thanks.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-02-01  7:23                       ` Jiang Xin
@ 2019-02-01  7:25                         ` Jiang Xin
  0 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01  7:25 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Jiang Xin <worldhello.net@gmail.com> 于2019年2月1日周五 下午3:23写道:
>
> Eric Sunshine <sunshine@sunshineco.com> 于2019年2月1日周五 下午2:11写道:
> > > Nice chart, will edit test_description as follows:
> > >
> > >     test_description='git pack-redundant test
> > >
> > >     In order to test git-pack-redundant, we will create a number of
> > > redundant
> > >     packs in the repository `master.git`. The relationship between
> > > packs (P1-P8)
> > >     and objects (T,A-R) is show in the following chart:
> > >
> > >            | T A B C D E F G H I J K L M N O P Q R
> > >         ---+--------------------------------------
> > >         P1 | x x x x x x x                       x
> > >         P2 |     x x x x   x x x
> > >         P3 |             x     x x x x x
> > >         P4 |                     x x x x     x
> > >         P5 |               x x           x x
> > >         P6 |                             x x   x
> > >         P7 |                                 x x
> > >         P8 |   x
> >
> > test_description should be a meaningful one-liner; it should not
> > contain this other information, but this information should appear as
> > comments in the test script.
>
> In 't/t0000-basic.sh', there is also a very long test_description.
> After read 't/test-lib.sh', the only usage of test_description
> is showing it as help, when runing:
>
>     sh ./t0000-basic.sh

sh ./t0000-basic.sh --help

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 1/6] t5323: test cases for git-pack-redundant
  2019-02-01  6:11                     ` Eric Sunshine
  2019-02-01  7:23                       ` Jiang Xin
@ 2019-02-01  9:51                       ` Jiang Xin
  1 sibling, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01  9:51 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Eric Sunshine <sunshine@sunshineco.com> 于2019年2月1日周五 下午2:11写道:
> > Everything below will be done inside master.git?  Avoid cd'ing
> > > around in random places in the test script, as a failure in any of
> > > the steps that does cd would start later tests in an unexpected
> > > place, if you can.
> >
> > The first 10 test cases will run inside master.git, and others will
> > run inside shared.git.  Only run cd inside the two `setup` test cases.
>
> That's not what Junio meant. It's okay for tests to 'cd', but each
> test which does so _must_ ensure that the 'cd' is undone at the end of
> the test, even if the test fails. The correct way to do this within
> each test is by using 'cd' in a subhsell, like this:
>
>     test_expect_success 'setup master.git' '
>         git init --bare master.git &&
>         (
>             cd master.git &&
>             create_commits
>         )
>     '

create_commits should not run in sub-shell, or variables set are lost.
I write a commit_commits_in function :
    # Usage: create_commits_in <repo> A B C ...
    # Note: DO NOT run it in sub shell, or variables are not set
    create_commits_in () {
           repo="$1" &&
            parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) || parent=
           T=$(git -C "$repo" write-tree) &&
            shift &&
            while test $# -gt 0
            do
                    name=$1 &&
                    test_tick &&
                    if test -z "$parent"
                    then
                           oid=$(echo $name | git -C "$repo" commit-tree $T)
                    else
                            oid=$(echo $name | git -C "$repo"
commit-tree -p $parent $T)
                    fi &&
                    eval $name=$oid &&
                    parent=$oid &&
                    shift ||
                    return 1
            done
            git -C "$repo" update-ref refs/heads/master $oid
    }

and use it to create commits like:

    create_commits_in master.git A B C D E F G ...

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v7 3/6] pack-redundant: new algorithm to find min packs
  2019-01-31 19:30                 ` Junio C Hamano
@ 2019-02-01  9:55                   ` Jiang Xin
  0 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01  9:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin

Junio C Hamano <gitster@pobox.com> 于2019年2月1日周五 上午3:30写道:
>
> Jiang Xin <worldhello.net@gmail.com> writes:
>
> > From: Sun Chao <sunchao9@huawei.com>
> >
> > When calling `git pack-redundant --all`, if there are too many local
> > packs and too many redundant objects within them, the too deep iteration
> > of `get_permutations` will exhaust all the resources, and the process of
> > `git pack-redundant` will be killed.
>
> Build each step of the series with "make DEVELOPER=YesPlease" and
> your compiler hopefully would notice...
>
>     CC builtin/pack-redundant.o
> builtin/pack-redundant.c:347:12: error: 'is_superset' defined but not used [-Werror=unused-function]
>  static int is_superset(struct pack_list *pl, struct llist *list)
>             ^~~~~~~~~~~
> builtin/pack-redundant.c:313:21: error: 'get_permutations' defined but not used [-Werror=unused-function]
>  static struct pll * get_permutations(struct pack_list *list, int n)
>                      ^~~~~~~~~~~~~~~~
> builtin/pack-redundant.c:293:13: error: 'pll_free' defined but not used [-Werror=unused-function]
>  static void pll_free(struct pll *l)
>              ^~~~~~~~
> cc1: all warnings being treated as errors
> Makefile:2302: recipe for target 'builtin/pack-redundant.o' failed
> make: *** [builtin/pack-redundant.o] Error 1
>

These unused code are deleted in another, and will be squashed to
previous patch in next reroll.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 0/6] pack-redundant: new algorithm to find min packs
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
@ 2019-02-01 16:21                 ` " Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
                                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them
overlap each other, running `git pack-redundant --all` will
exhaust all memories and the process will be killed by kernel.

There is a script in commit log of commit 3/6, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

## Changes since reroll v7

1. Rewrite [PATCH v9 1/6] (t5323: test cases for git-pack-redundant)

   * Add many tables for relationship of packs and objects.
   * Change dir in subshell and fixed other issues.

2. New patch file from Sun Chao: [PATCH v9 3/6] (pack-redundant: delete redundant code)

3. Squash patches (remove unused functions) to patch 4/6 (new algorithm to find min packs).

## Range diff

1:  799e804d5e < -:  ---------- t5323: test cases for git-pack-redundant
-:  ---------- > 1:  c8dbf8cef2 t5323: test cases for git-pack-redundant
2:  520f6277fb = 2:  a6300516d7 pack-redundant: delay creation of unique_objects
-:  ---------- > 3:  fb71973df5 pack-redundant: delete redundant code
3:  ab1c2c4950 ! 4:  9963d1c49f pack-redundant: new algorithm to find min packs
    @@ -76,6 +76,113 @@
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
      --- a/builtin/pack-redundant.c
      +++ b/builtin/pack-redundant.c
    +@@
    + 	struct llist *all_objects;
    + } *local_packs = NULL, *altodb_packs = NULL;
    + 
    +-struct pll {
    +-	struct pll *next;
    +-	struct pack_list *pl;
    +-};
    +-
    + static struct llist_item *free_nodes;
    + 
    + static inline void llist_item_put(struct llist_item *item)
    +@@
    + 	return new_item;
    + }
    + 
    +-static void llist_free(struct llist *list)
    +-{
    +-	while ((list->back = list->front)) {
    +-		list->front = list->front->next;
    +-		llist_item_put(list->back);
    +-	}
    +-	free(list);
    +-}
    +-
    + static inline void llist_init(struct llist **list)
    + {
    + 	*list = xmalloc(sizeof(struct llist));
    +@@
    + 	}
    + }
    + 
    +-static void pll_free(struct pll *l)
    +-{
    +-	struct pll *old;
    +-	struct pack_list *opl;
    +-
    +-	while (l) {
    +-		old = l;
    +-		while (l->pl) {
    +-			opl = l->pl;
    +-			l->pl = opl->next;
    +-			free(opl);
    +-		}
    +-		l = l->next;
    +-		free(old);
    +-	}
    +-}
    +-
    +-/* all the permutations have to be free()d at the same time,
    +- * since they refer to each other
    +- */
    +-static struct pll * get_permutations(struct pack_list *list, int n)
    +-{
    +-	struct pll *subset, *ret = NULL, *new_pll = NULL;
    +-
    +-	if (list == NULL || pack_list_size(list) < n || n == 0)
    +-		return NULL;
    +-
    +-	if (n == 1) {
    +-		while (list) {
    +-			new_pll = xmalloc(sizeof(*new_pll));
    +-			new_pll->pl = NULL;
    +-			pack_list_insert(&new_pll->pl, list);
    +-			new_pll->next = ret;
    +-			ret = new_pll;
    +-			list = list->next;
    +-		}
    +-		return ret;
    +-	}
    +-
    +-	while (list->next) {
    +-		subset = get_permutations(list->next, n - 1);
    +-		while (subset) {
    +-			new_pll = xmalloc(sizeof(*new_pll));
    +-			new_pll->pl = subset->pl;
    +-			pack_list_insert(&new_pll->pl, list);
    +-			new_pll->next = ret;
    +-			ret = new_pll;
    +-			subset = subset->next;
    +-		}
    +-		list = list->next;
    +-	}
    +-	return ret;
    +-}
    +-
    +-static int is_superset(struct pack_list *pl, struct llist *list)
    +-{
    +-	struct llist *diff;
    +-
    +-	diff = llist_copy(list);
    +-
    +-	while (pl) {
    +-		llist_sorted_difference_inplace(diff, pl->all_objects);
    +-		if (diff->size == 0) { /* we're done */
    +-			llist_free(diff);
    +-			return 1;
    +-		}
    +-		pl = pl->next;
    +-	}
    +-	llist_free(diff);
    +-	return 0;
    +-}
    +-
    + static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
    + {
    + 	size_t ret = 0;
     @@
      	return ret;
      }
    @@ -221,56 +328,56 @@
      --- a/t/t5323-pack-redundant.sh
      +++ b/t/t5323-pack-redundant.sh
     @@
    - P2:$P2
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x   x
    + #
    + #############################################################################
     -test_expect_success 'one of pack-2/pack-3 is redundant' '
    -+test_expect_failure 'one of pack-2/pack-3 is redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P6:$P6
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack 2, 4, and 6 are redundant' '
    -+test_expect_failure 'pack 2, 4, and 6 are redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P8:$P8
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    -+test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - 	test_must_be_empty out
    + 	)
      '
      
     -test_expect_success 'remove redundant packs and pass fsck' '
    -+test_expect_failure 'remove redundant packs and pass fsck' '
    - 	git pack-redundant --all | xargs rm &&
    - 	git fsck --no-progress &&
    - 	git pack-redundant --all >out &&
    ++test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		git pack-redundant --all | xargs rm &&
     @@
    - 	printf "../../master.git/objects" >objects/info/alternates
    + 	)
      '
      
     -test_expect_success 'no redundant packs without --alt-odb' '
    -+test_expect_failure 'no redundant packs without --alt-odb' '
    - 	git pack-redundant --all >out &&
    - 	test_must_be_empty out
    - '
    ++test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
    + 	(
    + 		cd "$shared_repo" &&
    + 		git pack-redundant --all >out &&
     @@
    - P7:$P7
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    -+test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    - 	git pack-redundant --all --verbose >out 2>out.err &&
    - 	test_must_be_empty out &&
    - 	grep "pack$" out.err | format_packfiles >actual &&
    ++test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
    + 	(
    + 		cd "$shared_repo" &&
    + 		cat >expect <<-EOF &&
4:  3c3a7ea40f < -:  ---------- pack-redundant: remove unused functions
5:  bc4b681f40 ! 5:  b8f80ad454 pack-redundant: rename pack_list.all_objects
    @@ -115,11 +115,7 @@
     +							alt->remaining_objects);
      			local = local->next;
      		}
    --		llist_sorted_difference_inplace(all_objects, alt->all_objects);
    -+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
      		alt = alt->next;
    - 	}
    - }
     @@
      		return NULL;
      
6:  6cfba5b4b2 ! 6:  8a12ad699e pack-redundant: consistent sort method
    @@ -83,60 +83,71 @@
      --- a/t/t5323-pack-redundant.sh
      +++ b/t/t5323-pack-redundant.sh
     @@
    - '
    - 
    - cat >expected <<EOF
    --P2:$P2
    -+P3:$P3
    - EOF
    - 
    --test_expect_failure 'one of pack-2/pack-3 is redundant' '
    + #         | T A B C D E F G H I J K L M N O P Q R
    + #     ----+--------------------------------------
    + #     P1  | x x x x x x x                       x
    +-#     P2* |     ! ! ! !   ! ! !
    +-#     P3  |             x     x x x x x
    ++#     P2  |     x x x x   x x x
    ++#     P3* |             !     ! ! ! ! !
    + #     P4  |                     x x x x     x
    + #     P5  |               x x           x x
    + #     ----+--------------------------------------
    + #     ALL | x x x x x x x x x x x x x x x x x   x
    + #
    + #############################################################################
    +-test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
    +-			P2:$P2
    ++			P3:$P3
    + 			EOF
    + 		git pack-redundant --all >out &&
    + 		format_packfiles <out >actual &&
     @@
    - P6:$P6
    - EOF
    - 
    --test_expect_failure 'pack 2, 4, and 6 are redundant' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P8:$P8
    - EOF
    - 
    --test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
     +test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - 	test_must_be_empty out
    + 	)
      '
      
    --test_expect_failure 'remove redundant packs and pass fsck' '
    +-test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
     +test_expect_success 'remove redundant packs and pass fsck' '
    - 	git pack-redundant --all | xargs rm &&
    - 	git fsck --no-progress &&
    - 	git pack-redundant --all >out &&
    + 	(
    + 		cd "$master_repo" &&
    + 		git pack-redundant --all | xargs rm &&
     @@
    - 	printf "../../master.git/objects" >objects/info/alternates
    + 	)
      '
      
    --test_expect_failure 'no redundant packs without --alt-odb' '
    +-test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
     +test_expect_success 'no redundant packs without --alt-odb' '
    - 	git pack-redundant --all >out &&
    - 	test_must_be_empty out
    - '
    + 	(
    + 		cd "$shared_repo" &&
    + 		git pack-redundant --all >out &&
     @@
    - P7:$P7
    - EOF
    - 
    --test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
     +test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    - 	git pack-redundant --all --verbose >out 2>out.err &&
    - 	test_must_be_empty out &&
    - 	grep "pack$" out.err | format_packfiles >actual &&
    + 	(
    + 		cd "$shared_repo" &&
    + 		cat >expect <<-EOF &&


Jiang Xin (4):
  t5323: test cases for git-pack-redundant
  pack-redundant: delay creation of unique_objects
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: delete redundant code
  pack-redundant: new algorithm to find min packs

 builtin/pack-redundant.c  | 232 +++++++----------
 t/t5323-pack-redundant.sh | 510 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 602 insertions(+), 140 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 1/6] t5323: test cases for git-pack-redundant
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 " Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  2019-02-01 19:42                   ` Eric Sunshine
  2019-02-01 16:21                 ` [PATCH v9 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
                                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao
  Cc: Jiang Xin, Jiang Xin, SZEDER Gábor, Eric Sunshine

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Reviewed-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5323-pack-redundant.sh | 510 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 510 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..d224ff3c50
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,510 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='Test git pack-redundant
+
+In order to test git-pack-redundant, we will create a number of objects and
+packs in the repository `master.git`. The relationship between packs (P1-P8)
+and objects (T, A-R) is showed in the following chart. Objects of a pack will
+be marked with letter x, while objects of redundant packs will be marked with
+exclamation point, and redundant pack itself will be marked with asterisk.
+
+        | T A B C D E F G H I J K L M N O P Q R
+    ----+--------------------------------------
+    P1  | x x x x x x x                       x
+    P2* |     ! ! ! !   ! ! !
+    P3  |             x     x x x x x
+    P4* |                     ! ! ! !     !
+    P5  |               x x           x x
+    P6* |                             ! !   !
+    P7  |                                 x x
+    P8* |   !
+    ----+--------------------------------------
+    ALL | x x x x x x x x x x x x x x x x x x x
+
+Another repository `shared.git` has unique objects (X-Z), while other objects
+(marked with letter s) are shared through alt-odb (of `master.git`). The
+relationship between packs and objects is as follows:
+
+        | T A B C D E F G H I J K L M N O P Q R   X Y Z
+    ----+----------------------------------------------
+    Px1 |   s s s                                 x x x
+    Px2 |         s s s                           x x x
+'
+
+. ./test-lib.sh
+
+master_repo=master.git
+shared_repo=shared.git
+
+# Note: DO NOT run it in a subshell, otherwise the variables will not be set
+# Usage: create_commits_in <repo> A B C ...
+create_commits_in () {
+	repo="$1" &&
+	parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) || parent=
+	T=$(git -C "$repo" write-tree) &&
+	shift &&
+	while test $# -gt 0
+	do
+		name=$1 &&
+		test_tick &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git -C "$repo" commit-tree $T)
+		else
+			oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid &&
+		shift ||
+		return 1
+	done
+	git -C "$repo" update-ref refs/heads/master $oid
+}
+
+# Note: DO NOT run it in a subshell, otherwise the variables will not be set
+create_pack_1 () {
+	P1=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$T
+		$A
+		$B
+		$C
+		$D
+		$E
+		$F
+		$R
+		EOF
+	) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2 () {
+	P2=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$B
+		$C
+		$D
+		$E
+		$G
+		$H
+		$I
+		EOF
+	) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3 () {
+	P3=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$F
+		$I
+		$J
+		$K
+		$L
+		$M
+		EOF
+	) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4 () {
+	P4=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$J
+		$K
+		$L
+		$M
+		$P
+		EOF
+	) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5 () {
+	P5=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$G
+		$H
+		$N
+		$O
+		EOF
+	) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6 () {
+	P6=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$N
+		$O
+		$Q
+		EOF
+	) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7 () {
+	P7=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$P
+		$Q
+		EOF
+	) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8 () {
+	P8=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
+		$A
+		EOF
+	) &&
+	eval P$P8=P8:$P8
+}
+
+format_packfiles () {
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" |
+	sort -u |
+	while read p
+	do
+		if test -z "$(eval echo \${P$p})"
+		then
+			echo $p
+		else
+			eval echo "\${P$p}"
+		fi
+	done |
+	sort
+}
+
+test_expect_success 'setup master repo' '
+	git init --bare "$master_repo" &&
+	create_commits_in "$master_repo" A B C D E F G H I J K L M N O P Q R
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2  |     x x x x   x x x
+#     P3  |             x     x x x x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x         x
+#
+#############################################################################
+test_expect_success 'no redundant for pack 1, 2, 3' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	(
+		cd "$master_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4  |                     x x x x     x
+#     P5  |               x x           x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x   x
+#
+#############################################################################
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4* |                     ! ! ! !     !
+#     P5  |               x x           x x
+#     P6* |                             ! !   !
+#     P7  |                                 x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			P4:$P4
+			P6:$P6
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4* |                     ! ! ! !     !
+#     P5  |               x x           x x
+#     P6* |                             ! !   !
+#     P7  |                                 x x
+#     P8* |   !
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			P4:$P4
+			P6:$P6
+			P8:$P8
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'clean loose objects' '
+	(
+		cd "$master_repo" &&
+		git prune-packed &&
+		find objects -type f | sed -e "/objects\/pack\//d" >out &&
+		test_must_be_empty out
+	)
+'
+
+test_expect_success 'remove redundant packs and pass fsck' '
+	(
+		cd "$master_repo" &&
+		git pack-redundant --all | xargs rm &&
+		git fsck &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+# The following test cases will execute inside `shared.git`, instead of
+# inside `master.git`.
+test_expect_success 'setup shared.git' '
+	git clone --mirror "$master_repo" "$shared_repo" &&
+	(
+		cd "$shared_repo" &&
+		printf "../../$master_repo/objects\n" >objects/info/alternates
+	)
+'
+
+test_expect_success 'no redundant packs without --alt-odb' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------+
+#     ----+--------------------------------------             |
+#     P1  | x x x x x x x                       x             |
+#     P3  |             x     x x x x x                       |
+#     P5  |               x x           x x                   |
+#     P7  |                                 x x               |
+#     ----+--------------------------------------             |
+#     ALL | x x x x x x x x x x x x x x x x x x x             |
+#                                                             |
+#                                                             |
+#     ================ shared.git ===============             |
+#         | T A B C D E F G H I J K L M N O P Q R  <objects/info/alternates>
+#     ----+--------------------------------------
+#     P1* | s s s s s s s                       s
+#     P3* |             s     s s s s s
+#     P5* |               s s           s s
+#     P7* |                                 s s
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			P1:$P1
+			P3:$P3
+			P5:$P5
+			P7:$P7
+			EOF
+		git pack-redundant --all --verbose >out 2>out.err &&
+		test_must_be_empty out &&
+		grep "pack$" out.err | format_packfiles >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'remove redundant packs by alt-odb, no packs left' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			fatal: Zero packs found!
+			EOF
+		git pack-redundant --all --alt-odb | xargs rm &&
+		git fsck &&
+		test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
+		test_cmp expect actual
+	)
+'
+
+# Note: DO NOT run function `create_pack_*` in sub shell, or variables are not set
+create_pack_x1_in () {
+	repo="$1" &&
+	Px1=$(git -C "$repo/objects/pack" pack-objects -q pack <<-EOF
+		$X
+		$Y
+		$Z
+		$A
+		$B
+		$C
+		EOF
+	) &&
+	eval P${Px1}=Px1:${Px1}
+}
+
+create_pack_x2_in () {
+	repo="$1" &&
+	Px2=$(git -C "$repo/objects/pack" pack-objects -q pack <<-EOF
+		$X
+		$Y
+		$Z
+		$D
+		$E
+		$F
+		EOF
+	) &&
+	eval P${Px2}=Px2:${Px2}
+}
+
+test_expect_success 'create new objects and packs in shared.git' '
+	create_commits_in "$shared_repo" X Y Z &&
+	create_pack_x1_in "$shared_repo" &&
+	create_pack_x2_in "$shared_repo"
+'
+
+test_expect_success 'no redundant without --alt-odb' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------------+
+#     ----+--------------------------------------                   |
+#     P1  | x x x x x x x                       x                   |
+#     P3  |             x     x x x x x                             |
+#     P5  |               x x           x x                         |
+#     P7  |                                 x x                     |
+#     ----+--------------------------------------                   |
+#     ALL | x x x x x x x x x x x x x x x x x x x                   |
+#                                                                   |
+#                                                                   |
+#     ================ shared.git =======================           |
+#         | T A B C D E F G H I J K L M N O P Q R   X Y Z <objects/info/alternates>
+#     ----+----------------------------------------------
+#     Px1 |   s s s                                 x x x
+#     Px2*|         s s s                           ! ! !
+#     ----+----------------------------------------------
+#     ALL | s s s s s s s s s s s s s s s s s s s   x x x
+#
+#############################################################################
+test_expect_success 'one pack is redundant' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all --alt-odb >out &&
+		format_packfiles <out >actual &&
+		test_line_count = 1 actual
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------------+
+#     ----+--------------------------------------                   |
+#     P1  | x x x x x x x                       x                   |
+#     P3  |             x     x x x x x                             |
+#     P5  |               x x           x x                         |
+#     P7  |                                 x x                     |
+#     ----+--------------------------------------                   |
+#     ALL | x x x x x x x x x x x x x x x x x x x                   |
+#                                                                   |
+#                                                                   |
+#     ================ shared.git =======================           |
+#         | T A B C D E F G H I J K L M N O P Q R   X Y Z <objects/info/alternates>
+#     ----+----------------------------------------------
+#     Px1*|   s s s                                 i i i
+#     Px2*|         s s s                           i i i
+#     ----+----------------------------------------------
+#     ALL | s s s s s s s s s s s s s s s s s s s   i i i
+#                                                  (ignored objects, marked with i)
+#
+#############################################################################
+test_expect_success 'set ignore objects and all two packs are redundant' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			Px1:$Px1
+			Px2:$Px2
+			EOF
+		git pack-redundant --all --alt-odb >out <<-EOF &&
+			$X
+			$Y
+			$Z
+			EOF
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 2/6] pack-redundant: delay creation of unique_objects
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 " Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 3/6] pack-redundant: delete redundant code Jiang Xin
                                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Instead of initializing unique_objects in `add_pack()`, copy from
all_objects in `cmp_two_packs()`, when unwanted objects are removed from
all_objects.

This will save memory (no allocate memory for alt-odb packs), and run
`llist_sorted_difference_inplace()` only once when removing ignored
objects and removing objects in alt-odb in `scan_alt_odb_packs()`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..f7dab0ec60 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -254,6 +254,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
+	if (!p1->unique_objects)
+		p1->unique_objects = llist_copy(p1->all_objects);
+	if (!p2->unique_objects)
+		p2->unique_objects = llist_copy(p2->all_objects);
+
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
 	p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
@@ -536,7 +541,7 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->unique_objects,
+			llist_sorted_difference_inplace(local->all_objects,
 							alt->all_objects);
 			local = local->next;
 		}
@@ -567,8 +572,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
-	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -646,7 +650,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 
 	load_all_objects();
 
-	cmp_local_packs();
 	if (alt_odb)
 		scan_alt_odb_packs();
 
@@ -663,10 +666,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
+	cmp_local_packs();
+
 	minimize(&min);
 
 	if (verbose) {
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 3/6] pack-redundant: delete redundant code
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
                                   ` (2 preceding siblings ...)
  2019-02-01 16:21                 ` [PATCH v9 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
                                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

The objects in alt-odb are removed from `all_objects` twice in `load_all_objects`
and `scan_alt_odb_packs`, remove it from the later function.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index f7dab0ec60..4a06f057dd 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -545,7 +545,6 @@ static void scan_alt_odb_packs(void)
 							alt->all_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
 		alt = alt->next;
 	}
 }
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 4/6] pack-redundant: new algorithm to find min packs
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
                                   ` (3 preceding siblings ...)
  2019-02-01 16:21                 ` [PATCH v9 3/6] pack-redundant: delete redundant code Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 194 +++++++++++++-------------------------
 t/t5323-pack-redundant.sh |  12 +--
 2 files changed, 73 insertions(+), 133 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 4a06f057dd..d6d9a66e46 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -290,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
@@ -426,14 +340,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -451,49 +403,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -606,7 +546,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index d224ff3c50..897acaa365 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -218,7 +218,7 @@ test_expect_success 'create pack 4, 5' '
 #     ALL | x x x x x x x x x x x x x x x x x   x
 #
 #############################################################################
-test_expect_success 'one of pack-2/pack-3 is redundant' '
+test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
@@ -250,7 +250,7 @@ test_expect_success 'create pack 6, 7' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'pack 2, 4, and 6 are redundant' '
+test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
@@ -285,7 +285,7 @@ test_expect_success 'create pack 8' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
@@ -309,7 +309,7 @@ test_expect_success 'clean loose objects' '
 	)
 '
 
-test_expect_success 'remove redundant packs and pass fsck' '
+test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
 	(
 		cd "$master_repo" &&
 		git pack-redundant --all | xargs rm &&
@@ -329,7 +329,7 @@ test_expect_success 'setup shared.git' '
 	)
 '
 
-test_expect_success 'no redundant packs without --alt-odb' '
+test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
 	(
 		cd "$shared_repo" &&
 		git pack-redundant --all >out &&
@@ -362,7 +362,7 @@ test_expect_success 'no redundant packs without --alt-odb' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
+test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
 	(
 		cd "$shared_repo" &&
 		cat >expect <<-EOF &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
                                   ` (4 preceding siblings ...)
  2019-02-01 16:21                 ` [PATCH v9 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  2019-02-01 16:21                 ` [PATCH v9 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d6d9a66e46..15cdf233c4 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -241,9 +241,9 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
 	if (!p1->unique_objects)
-		p1->unique_objects = llist_copy(p1->all_objects);
+		p1->unique_objects = llist_copy(p1->remaining_objects);
 	if (!p2->unique_objects)
-		p2->unique_objects = llist_copy(p2->all_objects);
+		p2->unique_objects = llist_copy(p2->remaining_objects);
 
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
@@ -344,8 +344,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -355,7 +355,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -399,7 +399,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -417,20 +417,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -445,7 +445,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -456,7 +456,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -481,8 +481,8 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->all_objects,
-							alt->all_objects);
+			llist_sorted_difference_inplace(local->remaining_objects,
+							alt->remaining_objects);
 			local = local->next;
 		}
 		alt = alt->next;
@@ -499,7 +499,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -508,7 +508,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	l.unique_objects = NULL;
@@ -605,7 +605,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v9 6/6] pack-redundant: consistent sort method
  2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
                                   ` (5 preceding siblings ...)
  2019-02-01 16:21                 ` [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-02-01 16:21                 ` Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-01 16:21 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao
  Cc: Jiang Xin, Jiang Xin, SZEDER Gábor

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 24 ++++++++++++++++--------
 t/t5323-pack-redundant.sh | 18 +++++++++---------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 15cdf233c4..29ff5e99cb 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -340,19 +341,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
+		/* have the same remaining_objects, big pack first */
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
+		/* sort by remaining objects, more objects first */
 		return 1;
-	else
+	} else {
 		return -1;
+	}
 }
 
 /* Sort pack_list, greater size of remaining_objects first */
@@ -370,7 +377,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -511,6 +518,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 897acaa365..e642240d71 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -210,19 +210,19 @@ test_expect_success 'create pack 4, 5' '
 #         | T A B C D E F G H I J K L M N O P Q R
 #     ----+--------------------------------------
 #     P1  | x x x x x x x                       x
-#     P2* |     ! ! ! !   ! ! !
-#     P3  |             x     x x x x x
+#     P2  |     x x x x   x x x
+#     P3* |             !     ! ! ! ! !
 #     P4  |                     x x x x     x
 #     P5  |               x x           x x
 #     ----+--------------------------------------
 #     ALL | x x x x x x x x x x x x x x x x x   x
 #
 #############################################################################
-test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
+test_expect_success 'one of pack-2/pack-3 is redundant' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
-			P2:$P2
+			P3:$P3
 			EOF
 		git pack-redundant --all >out &&
 		format_packfiles <out >actual &&
@@ -250,7 +250,7 @@ test_expect_success 'create pack 6, 7' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
+test_expect_success 'pack 2, 4, and 6 are redundant' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
@@ -285,7 +285,7 @@ test_expect_success 'create pack 8' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
@@ -309,7 +309,7 @@ test_expect_success 'clean loose objects' '
 	)
 '
 
-test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
+test_expect_success 'remove redundant packs and pass fsck' '
 	(
 		cd "$master_repo" &&
 		git pack-redundant --all | xargs rm &&
@@ -329,7 +329,7 @@ test_expect_success 'setup shared.git' '
 	)
 '
 
-test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
+test_expect_success 'no redundant packs without --alt-odb' '
 	(
 		cd "$shared_repo" &&
 		git pack-redundant --all >out &&
@@ -362,7 +362,7 @@ test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
 	(
 		cd "$shared_repo" &&
 		cat >expect <<-EOF &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v9 1/6] t5323: test cases for git-pack-redundant
  2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-02-01 19:42                   ` Eric Sunshine
  2019-02-01 21:03                     ` Junio C Hamano
  0 siblings, 1 reply; 83+ messages in thread
From: Eric Sunshine @ 2019-02-01 19:42 UTC (permalink / raw)
  To: Jiang Xin
  Cc: Junio C Hamano, Git List, Sun Chao, Jiang Xin, SZEDER Gábor

On Fri, Feb 1, 2019 at 11:22 AM Jiang Xin <worldhello.net@gmail.com> wrote:
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
>
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> ---
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,510 @@
> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set

Which variables won't be set? It's not clear what this restriction is about.

> +# Usage: create_commits_in <repo> A B C ...
> +create_commits_in () {
> +       repo="$1" &&
> +       parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) || parent=

Broken &&-chain. Instead, perhaps:

    if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null)
    then
        parent=
    fi &&

or something simpler.

> +       T=$(git -C "$repo" write-tree) &&
> +       shift &&
> +       while test $# -gt 0
> +       do
> +               name=$1 &&
> +               test_tick &&
> +               if test -z "$parent"
> +               then
> +                       oid=$(echo $name | git -C "$repo" commit-tree $T)
> +               else
> +                       oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T)
> +               fi &&
> +               eval $name=$oid &&
> +               parent=$oid &&
> +               shift ||
> +               return 1
> +       done

Broken &&-chain. Use:

    done &&

> +       git -C "$repo" update-ref refs/heads/master $oid
> +}
> +
> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
> +create_pack_1 () {
> +       P1=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF

Which variables? Note that you can capture output of a subshell into a
variable, if necessary.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v9 1/6] t5323: test cases for git-pack-redundant
  2019-02-01 19:42                   ` Eric Sunshine
@ 2019-02-01 21:03                     ` Junio C Hamano
  2019-02-01 21:49                       ` Eric Sunshine
  0 siblings, 1 reply; 83+ messages in thread
From: Junio C Hamano @ 2019-02-01 21:03 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Jiang Xin, Git List, Sun Chao, Jiang Xin, SZEDER Gábor

Eric Sunshine <sunshine@sunshineco.com> writes:

> On Fri, Feb 1, 2019 at 11:22 AM Jiang Xin <worldhello.net@gmail.com> wrote:
>> Add test cases for git pack-redundant to validate new algorithm for git
>> pack-redundant.
>>
>> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>> ---
>> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
>> @@ -0,0 +1,510 @@
>> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
>
> Which variables won't be set? It's not clear what this restriction is about.

>> +       git -C "$repo" update-ref refs/heads/master $oid
>> +}
>> +
>> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
>> +create_pack_1 () {
>> +       P1=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
>
> Which variables? Note that you can capture output of a subshell into a
> variable, if necessary.

These helper functions set a bunch of variables $P1, $P2, etc. as
well as variables whose name begin with P and followed by 40-hex.
The script wants to use them later when preparing expected output,
and with the most natural way to organize the code, that "later"
happens in the process that would have spawned a subshell to run
this function.

It would have been easier for you to grok if the note instead said
"this function sets two global shell variables" or something,
perhaps?  Such a variable would certainly not be visible if this
function is called inside a subshell to the main process.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* Re: [PATCH v9 1/6] t5323: test cases for git-pack-redundant
  2019-02-01 21:03                     ` Junio C Hamano
@ 2019-02-01 21:49                       ` Eric Sunshine
  2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
                                           ` (6 more replies)
  0 siblings, 7 replies; 83+ messages in thread
From: Eric Sunshine @ 2019-02-01 21:49 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jiang Xin, Git List, Sun Chao, Jiang Xin, SZEDER Gábor

On Fri, Feb 1, 2019 at 4:03 PM Junio C Hamano <gitster@pobox.com> wrote:
> Eric Sunshine <sunshine@sunshineco.com> writes:
> > On Fri, Feb 1, 2019 at 11:22 AM Jiang Xin <worldhello.net@gmail.com> wrote:
> >> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
> >
> > Which variables won't be set? It's not clear what this restriction is about.
>
> >> +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
> >> +create_pack_1 () {
> >> +       P1=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
> >
> > Which variables? Note that you can capture output of a subshell into a
> > variable, if necessary.
>
> These helper functions set a bunch of variables $P1, $P2, etc. as
> well as variables whose name begin with P and followed by 40-hex.
> The script wants to use them later when preparing expected output,
> and with the most natural way to organize the code, that "later"
> happens in the process that would have spawned a subshell to run
> this function.
>
> It would have been easier for you to grok if the note instead said
> "this function sets two global shell variables" or something,
> perhaps?  Such a variable would certainly not be visible if this
> function is called inside a subshell to the main process.

Yes, better function comments would facilitate comprehension both for
the reviewer and those working on the code in the future. For
instance:

    # Create commit for each argument [...with blah properties...] and
    # assign [...] to shell variable of same name as argument.
    # NOTE: Avoid calling this function from a subshell since variable
    # assignments will disappear when subshell exits.

or something.

But, looking more closely at the patch, I'm wondering why the various
create_pack_#() functions are defined at all since they are each only
ever called from a single place.

    create_pack_4 () {
        ...
        eval P$P4=P4:$P4
    }
   ...
   test_expect_success 'create pack 4, 5' '
        create_pack_4 && create_pack_5
    '

I haven't been able to convince myself that this helps readability --
especially since the function definition is often far removed from the
single point of use -- over merely inlining the function body directly
in the sole test which calls it.

Anyhow, this is all pretty minor, not necessarily worth a re-roll.

^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 0/6] pack-redundant: new algorithm to find min packs
  2019-02-01 21:49                       ` Eric Sunshine
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 1/6] t5323: test cases for git-pack-redundant Jiang Xin
                                           ` (5 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Eric Sunshine, Sun Chao; +Cc: Jiang Xin, Sun Chao

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them
overlap each other, running `git pack-redundant --all` will
exhaust all memories and the process will be killed by kernel.

There is a script in commit log of commit 3/6, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

## Changes since re-roll v9

Eric Sunshine <sunshine@sunshineco.com> 于2019年2月2日周六 上午3:43写道:
>
> On Fri, Feb 1, 2019 at 11:22 AM Jiang Xin <worldhello.net@gmail.com> wrote:
> > Add test cases for git pack-redundant to validate new algorithm for git
> > pack-redundant.
> >
> > Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > ---
> > diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> > @@ -0,0 +1,510 @@
> > +# Note: DO NOT run it in a subshell, otherwise the variables will not be set
>
> Which variables won't be set? It's not clear what this restriction is about.
>
> > +# Usage: create_commits_in <repo> A B C ...
> > +create_commits_in () {
> > +       repo="$1" &&
> > +       parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) || parent=
>
> Broken &&-chain. Instead, perhaps:
>
>     if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null)
>     then
>         parent=
>     fi &&
>
> or something simpler.

Fixed.

> > +       T=$(git -C "$repo" write-tree) &&
> > +       shift &&
> > +       while test $# -gt 0
> > +       do
> > +               name=$1 &&
> > +               test_tick &&
> > +               if test -z "$parent"
> > +               then
> > +                       oid=$(echo $name | git -C "$repo" commit-tree $T)
> > +               else
> > +                       oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T)
> > +               fi &&
> > +               eval $name=$oid &&
> > +               parent=$oid &&
> > +               shift ||
> > +               return 1
> > +       done
>
> Broken &&-chain. Use:
>
>     done &&
>

Fixed, thanks.

> > It would have been easier for you to grok if the note instead said
> > "this function sets two global shell variables" or something,
> > perhaps?  Such a variable would certainly not be visible if this
> > function is called inside a subshell to the main process.
>
> Yes, better function comments would facilitate comprehension both for
> the reviewer and those working on the code in the future. For
> instance:
>
>     # Create commit for each argument [...with blah properties...] and
>     # assign [...] to shell variable of same name as argument.
>     # NOTE: Avoid calling this function from a subshell since variable
>     # assignments will disappear when subshell exits.

Polished comments for `create_commits_in` and `create_pack_in` helper
function.

>     create_pack_4 () {
>         ...
>         eval P$P4=P4:$P4
>     }
>    ...
>    test_expect_success 'create pack 4, 5' '
>         create_pack_4 && create_pack_5
>     '
>
> I haven't been able to convince myself that this helps readability --
> especially since the function definition is often far removed from the
> single point of use -- over merely inlining the function body directly
> in the sole test which calls it.

Use a new helper function `create_pack_in` to create packs near test 
functions.


## Range diff since v9:

1:  c8dbf8cef2 ! 1:  4719043603 t5323: test cases for git-pack-redundant
    @@ -43,8 +43,8 @@
     +    ALL | x x x x x x x x x x x x x x x x x x x
     +
     +Another repository `shared.git` has unique objects (X-Z), while other objects
    -+(marked with letter s) are shared through alt-odb (of `master.git`). The
    -+relationship between packs and objects is as follows:
    ++(marked with letter s) can be found in the shared alt-odb (of `master.git`).
    ++The relationship between packs and objects is as follows:
     +
     +        | T A B C D E F G H I J K L M N O P Q R   X Y Z
     +    ----+----------------------------------------------
    @@ -57,11 +57,19 @@
     +master_repo=master.git
     +shared_repo=shared.git
     +
    -+# Note: DO NOT run it in a subshell, otherwise the variables will not be set
    -+# Usage: create_commits_in <repo> A B C ...
    ++# Create commits in <repo> and assign each commit's oid to shell variables
    ++# given in the arguments (A, B, and C). E.g.:
    ++#
    ++#     create_commits_in <repo> A B C
    ++#
    ++# NOTE: Avoid calling this function from a subshell since variable
    ++# assignments will disappear when subshell exits.
     +create_commits_in () {
     +	repo="$1" &&
    -+	parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null) || parent=
    ++	if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null)
    ++	then
    ++		parent=
    ++	fi &&
     +	T=$(git -C "$repo" write-tree) &&
     +	shift &&
     +	while test $# -gt 0
    @@ -78,101 +86,26 @@
     +		parent=$oid &&
     +		shift ||
     +		return 1
    -+	done
    ++	done &&
     +	git -C "$repo" update-ref refs/heads/master $oid
     +}
     +
    -+# Note: DO NOT run it in a subshell, otherwise the variables will not be set
    -+create_pack_1 () {
    -+	P1=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$T
    -+		$A
    -+		$B
    -+		$C
    -+		$D
    -+		$E
    -+		$F
    -+		$R
    -+		EOF
    -+	) &&
    -+	eval P$P1=P1:$P1
    -+}
    -+
    -+create_pack_2 () {
    -+	P2=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$B
    -+		$C
    -+		$D
    -+		$E
    -+		$G
    -+		$H
    -+		$I
    -+		EOF
    -+	) &&
    -+	eval P$P2=P2:$P2
    -+}
    -+
    -+create_pack_3 () {
    -+	P3=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$F
    -+		$I
    -+		$J
    -+		$K
    -+		$L
    -+		$M
    -+		EOF
    -+	) &&
    -+	eval P$P3=P3:$P3
    -+}
    -+
    -+create_pack_4 () {
    -+	P4=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$J
    -+		$K
    -+		$L
    -+		$M
    -+		$P
    -+		EOF
    -+	) &&
    -+	eval P$P4=P4:$P4
    -+}
    -+
    -+create_pack_5 () {
    -+	P5=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$G
    -+		$H
    -+		$N
    -+		$O
    -+		EOF
    -+	) &&
    -+	eval P$P5=P5:$P5
    -+}
    -+
    -+create_pack_6 () {
    -+	P6=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$N
    -+		$O
    -+		$Q
    -+		EOF
    -+	) &&
    -+	eval P$P6=P6:$P6
    -+}
    -+
    -+create_pack_7 () {
    -+	P7=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$P
    -+		$Q
    -+		EOF
    -+	) &&
    -+	eval P$P7=P7:$P7
    -+}
    -+
    -+create_pack_8 () {
    -+	P8=$(git -C "$master_repo/objects/pack" pack-objects -q pack <<-EOF
    -+		$A
    -+		EOF
    -+	) &&
    -+	eval P$P8=P8:$P8
    ++# Create pack in <repo> and assign pack id to variable given in the 2nd argument
    ++# (<name>). Commits in the pack will be read from stdin. E.g.:
    ++#
    ++#     create_pack_in <repo> <name> <<-EOF
    ++#         ...
    ++#         EOF
    ++#
    ++# NOTE: commits from stdin should be given using heredoc, not using pipe, and
    ++# avoid calling this function from a subshell since variable assignments will
    ++# disappear when subshell exits.
    ++create_pack_in () {
    ++	repo="$1" &&
    ++	name="$2" &&
    ++	pack=$(git -C "$repo/objects/pack" pack-objects -q pack) &&
    ++	eval $name=$pack &&
    ++	eval P$pack=$name:$pack
     +}
     +
     +format_packfiles () {
    @@ -209,8 +142,34 @@
     +#     ALL | x x x x x x x x x x x x x x         x
     +#
     +#############################################################################
    -+test_expect_success 'no redundant for pack 1, 2, 3' '
    -+	create_pack_1 && create_pack_2 && create_pack_3 &&
    ++test_expect_success 'master: no redundant for pack 1, 2, 3' '
    ++	create_pack_in "$master_repo" P1 <<-EOF &&
    ++		$T
    ++		$A
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$F
    ++		$R
    ++		EOF
    ++	create_pack_in "$master_repo" P2 <<-EOF &&
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$G
    ++		$H
    ++		$I
    ++		EOF
    ++	create_pack_in "$master_repo" P3 <<-EOF &&
    ++		$F
    ++		$I
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		EOF
     +	(
     +		cd "$master_repo" &&
     +		git pack-redundant --all >out &&
    @@ -218,10 +177,6 @@
     +	)
     +'
     +
    -+test_expect_success 'create pack 4, 5' '
    -+	create_pack_4 && create_pack_5
    -+'
    -+
     +#############################################################################
     +# Chart of packs and objects for this test case
     +#
    @@ -236,7 +191,20 @@
     +#     ALL | x x x x x x x x x x x x x x x x x   x
     +#
     +#############################################################################
    -+test_expect_success 'one of pack-2/pack-3 is redundant' '
    ++test_expect_success 'master: one of pack-2/pack-3 is redundant' '
    ++	create_pack_in "$master_repo" P4 <<-EOF &&
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		$P
    ++		EOF
    ++	create_pack_in "$master_repo" P5 <<-EOF &&
    ++		$G
    ++		$H
    ++		$N
    ++		$O
    ++		EOF
     +	(
     +		cd "$master_repo" &&
     +		cat >expect <<-EOF &&
    @@ -248,10 +216,6 @@
     +	)
     +'
     +
    -+test_expect_success 'create pack 6, 7' '
    -+	create_pack_6 && create_pack_7
    -+'
    -+
     +#############################################################################
     +# Chart of packs and objects for this test case
     +#
    @@ -268,7 +232,16 @@
     +#     ALL | x x x x x x x x x x x x x x x x x x x
     +#
     +#############################################################################
    -+test_expect_success 'pack 2, 4, and 6 are redundant' '
    ++test_expect_success 'master: pack 2, 4, and 6 are redundant' '
    ++	create_pack_in "$master_repo" P6 <<-EOF &&
    ++		$N
    ++		$O
    ++		$Q
    ++		EOF
    ++	create_pack_in "$master_repo" P7 <<-EOF &&
    ++		$P
    ++		$Q
    ++		EOF
     +	(
     +		cd "$master_repo" &&
     +		cat >expect <<-EOF &&
    @@ -282,10 +255,6 @@
     +	)
     +'
     +
    -+test_expect_success 'create pack 8' '
    -+	create_pack_8
    -+'
    -+
     +#############################################################################
     +# Chart of packs and objects for this test case
     +#
    @@ -303,7 +272,10 @@
     +#     ALL | x x x x x x x x x x x x x x x x x x x
     +#
     +#############################################################################
    -+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
    ++	create_pack_in "$master_repo" P8 <<-EOF &&
    ++		$A
    ++		EOF
     +	(
     +		cd "$master_repo" &&
     +		cat >expect <<-EOF &&
    @@ -318,7 +290,7 @@
     +	)
     +'
     +
    -+test_expect_success 'clean loose objects' '
    ++test_expect_success 'master: clean loose objects' '
     +	(
     +		cd "$master_repo" &&
     +		git prune-packed &&
    @@ -327,7 +299,7 @@
     +	)
     +'
     +
    -+test_expect_success 'remove redundant packs and pass fsck' '
    ++test_expect_success 'master: remove redundant packs and pass fsck' '
     +	(
     +		cd "$master_repo" &&
     +		git pack-redundant --all | xargs rm &&
    @@ -347,7 +319,7 @@
     +	)
     +'
     +
    -+test_expect_success 'no redundant packs without --alt-odb' '
    ++test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
     +	(
     +		cd "$shared_repo" &&
     +		git pack-redundant --all >out &&
    @@ -380,7 +352,7 @@
     +#     ALL | x x x x x x x x x x x x x x x x x x x
     +#
     +#############################################################################
    -+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    ++test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
     +	(
     +		cd "$shared_repo" &&
     +		cat >expect <<-EOF &&
    @@ -396,7 +368,7 @@
     +	)
     +'
     +
    -+test_expect_success 'remove redundant packs by alt-odb, no packs left' '
    ++test_expect_success 'shared: remove redundant packs, no packs left' '
     +	(
     +		cd "$shared_repo" &&
     +		cat >expect <<-EOF &&
    @@ -409,10 +381,9 @@
     +	)
     +'
     +
    -+# Note: DO NOT run function `create_pack_*` in sub shell, or variables are not set
    -+create_pack_x1_in () {
    -+	repo="$1" &&
    -+	Px1=$(git -C "$repo/objects/pack" pack-objects -q pack <<-EOF
    ++test_expect_success 'shared: create new objects and packs' '
    ++	create_commits_in "$shared_repo" X Y Z &&
    ++	create_pack_in "$shared_repo" Px1 <<-EOF &&
     +		$X
     +		$Y
     +		$Z
    @@ -420,13 +391,7 @@
     +		$B
     +		$C
     +		EOF
    -+	) &&
    -+	eval P${Px1}=Px1:${Px1}
    -+}
    -+
    -+create_pack_x2_in () {
    -+	repo="$1" &&
    -+	Px2=$(git -C "$repo/objects/pack" pack-objects -q pack <<-EOF
    ++	create_pack_in "$shared_repo" Px2 <<-EOF
     +		$X
     +		$Y
     +		$Z
    @@ -434,17 +399,9 @@
     +		$E
     +		$F
     +		EOF
    -+	) &&
    -+	eval P${Px2}=Px2:${Px2}
    -+}
    -+
    -+test_expect_success 'create new objects and packs in shared.git' '
    -+	create_commits_in "$shared_repo" X Y Z &&
    -+	create_pack_x1_in "$shared_repo" &&
    -+	create_pack_x2_in "$shared_repo"
     +'
     +
    -+test_expect_success 'no redundant without --alt-odb' '
    ++test_expect_success 'shared: no redundant without --alt-odb' '
     +	(
     +		cd "$shared_repo" &&
     +		git pack-redundant --all >out &&
    @@ -475,7 +432,7 @@
     +#     ALL | s s s s s s s s s s s s s s s s s s s   x x x
     +#
     +#############################################################################
    -+test_expect_success 'one pack is redundant' '
    ++test_expect_success 'shared: one pack is redundant with --alt-odb' '
     +	(
     +		cd "$shared_repo" &&
     +		git pack-redundant --all --alt-odb >out &&
    @@ -508,7 +465,7 @@
     +#                                                  (ignored objects, marked with i)
     +#
     +#############################################################################
    -+test_expect_success 'set ignore objects and all two packs are redundant' '
    ++test_expect_success 'shared: ignore unique objects and all two packs are redundant' '
     +	(
     +		cd "$shared_repo" &&
     +		cat >expect <<-EOF &&
2:  a6300516d7 = 2:  4feb1eaa40 pack-redundant: delay creation of unique_objects
3:  fb71973df5 = 3:  875367d7b4 pack-redundant: delete redundant code
4:  9963d1c49f ! 4:  50cb2854f1 pack-redundant: new algorithm to find min packs
    @@ -331,35 +331,35 @@
      #     ALL | x x x x x x x x x x x x x x x x x   x
      #
      #############################################################################
    --test_expect_success 'one of pack-2/pack-3 is redundant' '
    -+test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
    - 	(
    - 		cd "$master_repo" &&
    - 		cat >expect <<-EOF &&
    +-test_expect_success 'master: one of pack-2/pack-3 is redundant' '
    ++test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' '
    + 	create_pack_in "$master_repo" P4 <<-EOF &&
    + 		$J
    + 		$K
     @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_success 'pack 2, 4, and 6 are redundant' '
    -+test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
    - 	(
    - 		cd "$master_repo" &&
    - 		cat >expect <<-EOF &&
    +-test_expect_success 'master: pack 2, 4, and 6 are redundant' '
    ++test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' '
    + 	create_pack_in "$master_repo" P6 <<-EOF &&
    + 		$N
    + 		$O
     @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    -+test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    - 	(
    - 		cd "$master_repo" &&
    - 		cat >expect <<-EOF &&
    +-test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    + 	create_pack_in "$master_repo" P8 <<-EOF &&
    + 		$A
    + 		EOF
     @@
      	)
      '
      
    --test_expect_success 'remove redundant packs and pass fsck' '
    -+test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
    +-test_expect_success 'master: remove redundant packs and pass fsck' '
    ++test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' '
      	(
      		cd "$master_repo" &&
      		git pack-redundant --all | xargs rm &&
    @@ -367,8 +367,8 @@
      	)
      '
      
    --test_expect_success 'no redundant packs without --alt-odb' '
    -+test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
    +-test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
    ++test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' '
      	(
      		cd "$shared_repo" &&
      		git pack-redundant --all >out &&
    @@ -376,8 +376,8 @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    -+test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
    +-test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
    ++test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' '
      	(
      		cd "$shared_repo" &&
      		cat >expect <<-EOF &&
5:  b8f80ad454 = 5:  4af03876d4 pack-redundant: rename pack_list.all_objects
6:  8a12ad699e ! 6:  89ed4fb2a5 pack-redundant: consistent sort method
    @@ -96,8 +96,12 @@
      #     ALL | x x x x x x x x x x x x x x x x x   x
      #
      #############################################################################
    --test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
    -+test_expect_success 'one of pack-2/pack-3 is redundant' '
    +-test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' '
    ++test_expect_success 'master: one of pack-2/pack-3 is redundant' '
    + 	create_pack_in "$master_repo" P4 <<-EOF &&
    + 		$J
    + 		$K
    +@@
      	(
      		cd "$master_repo" &&
      		cat >expect <<-EOF &&
    @@ -110,26 +114,26 @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
    -+test_expect_success 'pack 2, 4, and 6 are redundant' '
    - 	(
    - 		cd "$master_repo" &&
    - 		cat >expect <<-EOF &&
    +-test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' '
    ++test_expect_success 'master: pack 2, 4, and 6 are redundant' '
    + 	create_pack_in "$master_repo" P6 <<-EOF &&
    + 		$N
    + 		$O
     @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    -+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    - 	(
    - 		cd "$master_repo" &&
    - 		cat >expect <<-EOF &&
    +-test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    ++test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
    + 	create_pack_in "$master_repo" P8 <<-EOF &&
    + 		$A
    + 		EOF
     @@
      	)
      '
      
    --test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
    -+test_expect_success 'remove redundant packs and pass fsck' '
    +-test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' '
    ++test_expect_success 'master: remove redundant packs and pass fsck' '
      	(
      		cd "$master_repo" &&
      		git pack-redundant --all | xargs rm &&
    @@ -137,8 +141,8 @@
      	)
      '
      
    --test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
    -+test_expect_success 'no redundant packs without --alt-odb' '
    +-test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' '
    ++test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
      	(
      		cd "$shared_repo" &&
      		git pack-redundant --all >out &&
    @@ -146,8 +150,8 @@
      #     ALL | x x x x x x x x x x x x x x x x x x x
      #
      #############################################################################
    --test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
    -+test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    +-test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' '
    ++test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
      	(
      		cd "$shared_repo" &&
      		cat >expect <<-EOF &&

--

Jiang Xin (4):
  t5323: test cases for git-pack-redundant
  pack-redundant: delay creation of unique_objects
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: delete redundant code
  pack-redundant: new algorithm to find min packs

 builtin/pack-redundant.c  | 232 ++++++++-----------
 t/t5323-pack-redundant.sh | 467 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 559 insertions(+), 140 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 1/6] t5323: test cases for git-pack-redundant
  2019-02-01 21:49                       ` Eric Sunshine
  2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
                                           ` (4 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao
  Cc: Jiang Xin, Jiang Xin, Sun Chao, SZEDER Gábor, Eric Sunshine

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Reviewed-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5323-pack-redundant.sh | 467 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 467 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..f18db06d91
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,467 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='Test git pack-redundant
+
+In order to test git-pack-redundant, we will create a number of objects and
+packs in the repository `master.git`. The relationship between packs (P1-P8)
+and objects (T, A-R) is showed in the following chart. Objects of a pack will
+be marked with letter x, while objects of redundant packs will be marked with
+exclamation point, and redundant pack itself will be marked with asterisk.
+
+        | T A B C D E F G H I J K L M N O P Q R
+    ----+--------------------------------------
+    P1  | x x x x x x x                       x
+    P2* |     ! ! ! !   ! ! !
+    P3  |             x     x x x x x
+    P4* |                     ! ! ! !     !
+    P5  |               x x           x x
+    P6* |                             ! !   !
+    P7  |                                 x x
+    P8* |   !
+    ----+--------------------------------------
+    ALL | x x x x x x x x x x x x x x x x x x x
+
+Another repository `shared.git` has unique objects (X-Z), while other objects
+(marked with letter s) are shared through alt-odb (of `master.git`). The
+relationship between packs and objects is as follows:
+
+        | T A B C D E F G H I J K L M N O P Q R   X Y Z
+    ----+----------------------------------------------
+    Px1 |   s s s                                 x x x
+    Px2 |         s s s                           x x x
+'
+
+. ./test-lib.sh
+
+master_repo=master.git
+shared_repo=shared.git
+
+# Create commits in <repo> and assign each commit's oid to shell variables
+# given in the arguments (A, B, and C). E.g.:
+#
+#     create_commits_in <repo> A B C
+#
+# NOTE: Avoid calling this function from a subshell since variable
+# assignments will disappear when subshell exits.
+create_commits_in () {
+	repo="$1" &&
+	if ! parent=$(git -C "$repo" rev-parse HEAD^{} 2>/dev/null)
+	then
+		parent=
+	fi &&
+	T=$(git -C "$repo" write-tree) &&
+	shift &&
+	while test $# -gt 0
+	do
+		name=$1 &&
+		test_tick &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git -C "$repo" commit-tree $T)
+		else
+			oid=$(echo $name | git -C "$repo" commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid &&
+		shift ||
+		return 1
+	done &&
+	git -C "$repo" update-ref refs/heads/master $oid
+}
+
+# Create pack in <repo> and assign pack id to variable given in the 2nd argument
+# (<name>). Commits in the pack will be read from stdin. E.g.:
+#
+#     create_pack_in <repo> <name> <<-EOF
+#         ...
+#         EOF
+#
+# NOTE: commits from stdin should be given using heredoc, not using pipe, and
+# avoid calling this function from a subshell since variable assignments will
+# disappear when subshell exits.
+create_pack_in () {
+	repo="$1" &&
+	name="$2" &&
+	pack=$(git -C "$repo/objects/pack" pack-objects -q pack) &&
+	eval $name=$pack &&
+	eval P$pack=$name:$pack
+}
+
+format_packfiles () {
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" |
+	sort -u |
+	while read p
+	do
+		if test -z "$(eval echo \${P$p})"
+		then
+			echo $p
+		else
+			eval echo "\${P$p}"
+		fi
+	done |
+	sort
+}
+
+test_expect_success 'setup master repo' '
+	git init --bare "$master_repo" &&
+	create_commits_in "$master_repo" A B C D E F G H I J K L M N O P Q R
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2  |     x x x x   x x x
+#     P3  |             x     x x x x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x         x
+#
+#############################################################################
+test_expect_success 'master: no redundant for pack 1, 2, 3' '
+	create_pack_in "$master_repo" P1 <<-EOF &&
+		$T
+		$A
+		$B
+		$C
+		$D
+		$E
+		$F
+		$R
+		EOF
+	create_pack_in "$master_repo" P2 <<-EOF &&
+		$B
+		$C
+		$D
+		$E
+		$G
+		$H
+		$I
+		EOF
+	create_pack_in "$master_repo" P3 <<-EOF &&
+		$F
+		$I
+		$J
+		$K
+		$L
+		$M
+		EOF
+	(
+		cd "$master_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4  |                     x x x x     x
+#     P5  |               x x           x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x   x
+#
+#############################################################################
+test_expect_success 'master: one of pack-2/pack-3 is redundant' '
+	create_pack_in "$master_repo" P4 <<-EOF &&
+		$J
+		$K
+		$L
+		$M
+		$P
+		EOF
+	create_pack_in "$master_repo" P5 <<-EOF &&
+		$G
+		$H
+		$N
+		$O
+		EOF
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4* |                     ! ! ! !     !
+#     P5  |               x x           x x
+#     P6* |                             ! !   !
+#     P7  |                                 x x
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'master: pack 2, 4, and 6 are redundant' '
+	create_pack_in "$master_repo" P6 <<-EOF &&
+		$N
+		$O
+		$Q
+		EOF
+	create_pack_in "$master_repo" P7 <<-EOF &&
+		$P
+		$Q
+		EOF
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			P4:$P4
+			P6:$P6
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#         | T A B C D E F G H I J K L M N O P Q R
+#     ----+--------------------------------------
+#     P1  | x x x x x x x                       x
+#     P2* |     ! ! ! !   ! ! !
+#     P3  |             x     x x x x x
+#     P4* |                     ! ! ! !     !
+#     P5  |               x x           x x
+#     P6* |                             ! !   !
+#     P7  |                                 x x
+#     P8* |   !
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
+	create_pack_in "$master_repo" P8 <<-EOF &&
+		$A
+		EOF
+	(
+		cd "$master_repo" &&
+		cat >expect <<-EOF &&
+			P2:$P2
+			P4:$P4
+			P6:$P6
+			P8:$P8
+			EOF
+		git pack-redundant --all >out &&
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'master: clean loose objects' '
+	(
+		cd "$master_repo" &&
+		git prune-packed &&
+		find objects -type f | sed -e "/objects\/pack\//d" >out &&
+		test_must_be_empty out
+	)
+'
+
+test_expect_success 'master: remove redundant packs and pass fsck' '
+	(
+		cd "$master_repo" &&
+		git pack-redundant --all | xargs rm &&
+		git fsck &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+# The following test cases will execute inside `shared.git`, instead of
+# inside `master.git`.
+test_expect_success 'setup shared.git' '
+	git clone --mirror "$master_repo" "$shared_repo" &&
+	(
+		cd "$shared_repo" &&
+		printf "../../$master_repo/objects\n" >objects/info/alternates
+	)
+'
+
+test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------+
+#     ----+--------------------------------------             |
+#     P1  | x x x x x x x                       x             |
+#     P3  |             x     x x x x x                       |
+#     P5  |               x x           x x                   |
+#     P7  |                                 x x               |
+#     ----+--------------------------------------             |
+#     ALL | x x x x x x x x x x x x x x x x x x x             |
+#                                                             |
+#                                                             |
+#     ================ shared.git ===============             |
+#         | T A B C D E F G H I J K L M N O P Q R  <objects/info/alternates>
+#     ----+--------------------------------------
+#     P1* | s s s s s s s                       s
+#     P3* |             s     s s s s s
+#     P5* |               s s           s s
+#     P7* |                                 s s
+#     ----+--------------------------------------
+#     ALL | x x x x x x x x x x x x x x x x x x x
+#
+#############################################################################
+test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			P1:$P1
+			P3:$P3
+			P5:$P5
+			P7:$P7
+			EOF
+		git pack-redundant --all --verbose >out 2>out.err &&
+		test_must_be_empty out &&
+		grep "pack$" out.err | format_packfiles >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'shared: remove redundant packs, no packs left' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			fatal: Zero packs found!
+			EOF
+		git pack-redundant --all --alt-odb | xargs rm &&
+		git fsck &&
+		test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'shared: create new objects and packs' '
+	create_commits_in "$shared_repo" X Y Z &&
+	create_pack_in "$shared_repo" Px1 <<-EOF &&
+		$X
+		$Y
+		$Z
+		$A
+		$B
+		$C
+		EOF
+	create_pack_in "$shared_repo" Px2 <<-EOF
+		$X
+		$Y
+		$Z
+		$D
+		$E
+		$F
+		EOF
+'
+
+test_expect_success 'shared: no redundant without --alt-odb' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all >out &&
+		test_must_be_empty out
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------------+
+#     ----+--------------------------------------                   |
+#     P1  | x x x x x x x                       x                   |
+#     P3  |             x     x x x x x                             |
+#     P5  |               x x           x x                         |
+#     P7  |                                 x x                     |
+#     ----+--------------------------------------                   |
+#     ALL | x x x x x x x x x x x x x x x x x x x                   |
+#                                                                   |
+#                                                                   |
+#     ================ shared.git =======================           |
+#         | T A B C D E F G H I J K L M N O P Q R   X Y Z <objects/info/alternates>
+#     ----+----------------------------------------------
+#     Px1 |   s s s                                 x x x
+#     Px2*|         s s s                           ! ! !
+#     ----+----------------------------------------------
+#     ALL | s s s s s s s s s s s s s s s s s s s   x x x
+#
+#############################################################################
+test_expect_success 'shared: one pack is redundant with --alt-odb' '
+	(
+		cd "$shared_repo" &&
+		git pack-redundant --all --alt-odb >out &&
+		format_packfiles <out >actual &&
+		test_line_count = 1 actual
+	)
+'
+
+#############################################################################
+# Chart of packs and objects for this test case
+#
+#     ================ master.git ===============
+#         | T A B C D E F G H I J K L M N O P Q R  <----------------+
+#     ----+--------------------------------------                   |
+#     P1  | x x x x x x x                       x                   |
+#     P3  |             x     x x x x x                             |
+#     P5  |               x x           x x                         |
+#     P7  |                                 x x                     |
+#     ----+--------------------------------------                   |
+#     ALL | x x x x x x x x x x x x x x x x x x x                   |
+#                                                                   |
+#                                                                   |
+#     ================ shared.git =======================           |
+#         | T A B C D E F G H I J K L M N O P Q R   X Y Z <objects/info/alternates>
+#     ----+----------------------------------------------
+#     Px1*|   s s s                                 i i i
+#     Px2*|         s s s                           i i i
+#     ----+----------------------------------------------
+#     ALL | s s s s s s s s s s s s s s s s s s s   i i i
+#                                                  (ignored objects, marked with i)
+#
+#############################################################################
+test_expect_success 'shared: ignore unique objects and all two packs are redundant' '
+	(
+		cd "$shared_repo" &&
+		cat >expect <<-EOF &&
+			Px1:$Px1
+			Px2:$Px2
+			EOF
+		git pack-redundant --all --alt-odb >out <<-EOF &&
+			$X
+			$Y
+			$Z
+			EOF
+		format_packfiles <out >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 2/6] pack-redundant: delay creation of unique_objects
  2019-02-01 21:49                       ` Eric Sunshine
  2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 1/6] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 3/6] pack-redundant: delete redundant code Jiang Xin
                                           ` (3 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin, Sun Chao

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Instead of initializing unique_objects in `add_pack()`, copy from
all_objects in `cmp_two_packs()`, when unwanted objects are removed from
all_objects.

This will save memory (no allocate memory for alt-odb packs), and run
`llist_sorted_difference_inplace()` only once when removing ignored
objects and removing objects in alt-odb in `scan_alt_odb_packs()`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..f7dab0ec60 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -254,6 +254,11 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
+	if (!p1->unique_objects)
+		p1->unique_objects = llist_copy(p1->all_objects);
+	if (!p2->unique_objects)
+		p2->unique_objects = llist_copy(p2->all_objects);
+
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
 	p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
@@ -536,7 +541,7 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->unique_objects,
+			llist_sorted_difference_inplace(local->all_objects,
 							alt->all_objects);
 			local = local->next;
 		}
@@ -567,8 +572,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
-	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -646,7 +650,6 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 
 	load_all_objects();
 
-	cmp_local_packs();
 	if (alt_odb)
 		scan_alt_odb_packs();
 
@@ -663,10 +666,12 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
+	cmp_local_packs();
+
 	minimize(&min);
 
 	if (verbose) {
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 3/6] pack-redundant: delete redundant code
  2019-02-01 21:49                       ` Eric Sunshine
                                           ` (2 preceding siblings ...)
  2019-02-02 13:30                         ` [PATCH v10 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
                                           ` (2 subsequent siblings)
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

The objects in alt-odb are removed from `all_objects` twice in `load_all_objects`
and `scan_alt_odb_packs`, remove it from the later function.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index f7dab0ec60..4a06f057dd 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -545,7 +545,6 @@ static void scan_alt_odb_packs(void)
 							alt->all_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
 		alt = alt->next;
 	}
 }
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 4/6] pack-redundant: new algorithm to find min packs
  2019-02-01 21:49                       ` Eric Sunshine
                                           ` (3 preceding siblings ...)
  2019-02-02 13:30                         ` [PATCH v10 3/6] pack-redundant: delete redundant code Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Some test cases will fail on Mac OS X. Mark them and will resolve in
later commit.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 194 +++++++++++++-------------------------
 t/t5323-pack-redundant.sh |  12 +--
 2 files changed, 73 insertions(+), 133 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 4a06f057dd..d6d9a66e46 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -290,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
@@ -426,14 +340,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -451,49 +403,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -606,7 +546,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index f18db06d91..3e62e8663f 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -173,7 +173,7 @@ test_expect_success 'master: no redundant for pack 1, 2, 3' '
 #     ALL | x x x x x x x x x x x x x x x x x   x
 #
 #############################################################################
-test_expect_success 'master: one of pack-2/pack-3 is redundant' '
+test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' '
 	create_pack_in "$master_repo" P4 <<-EOF &&
 		$J
 		$K
@@ -214,7 +214,7 @@ test_expect_success 'master: one of pack-2/pack-3 is redundant' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'master: pack 2, 4, and 6 are redundant' '
+test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' '
 	create_pack_in "$master_repo" P6 <<-EOF &&
 		$N
 		$O
@@ -254,7 +254,7 @@ test_expect_success 'master: pack 2, 4, and 6 are redundant' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
+test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
 	create_pack_in "$master_repo" P8 <<-EOF &&
 		$A
 		EOF
@@ -281,7 +281,7 @@ test_expect_success 'master: clean loose objects' '
 	)
 '
 
-test_expect_success 'master: remove redundant packs and pass fsck' '
+test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' '
 	(
 		cd "$master_repo" &&
 		git pack-redundant --all | xargs rm &&
@@ -301,7 +301,7 @@ test_expect_success 'setup shared.git' '
 	)
 '
 
-test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
+test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' '
 	(
 		cd "$shared_repo" &&
 		git pack-redundant --all >out &&
@@ -334,7 +334,7 @@ test_expect_success 'shared: all packs are redundant, but no output without --al
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
+test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' '
 	(
 		cd "$shared_repo" &&
 		cat >expect <<-EOF &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects
  2019-02-01 21:49                       ` Eric Sunshine
                                           ` (4 preceding siblings ...)
  2019-02-02 13:30                         ` [PATCH v10 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  2019-02-02 13:30                         ` [PATCH v10 6/6] pack-redundant: consistent sort method Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao; +Cc: Jiang Xin, Jiang Xin, Sun Chao

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d6d9a66e46..15cdf233c4 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -241,9 +241,9 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	const unsigned int hashsz = the_hash_algo->rawsz;
 
 	if (!p1->unique_objects)
-		p1->unique_objects = llist_copy(p1->all_objects);
+		p1->unique_objects = llist_copy(p1->remaining_objects);
 	if (!p2->unique_objects)
-		p2->unique_objects = llist_copy(p2->all_objects);
+		p2->unique_objects = llist_copy(p2->remaining_objects);
 
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
@@ -344,8 +344,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -355,7 +355,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -399,7 +399,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -417,20 +417,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -445,7 +445,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -456,7 +456,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -481,8 +481,8 @@ static void scan_alt_odb_packs(void)
 	while (alt) {
 		local = local_packs;
 		while (local) {
-			llist_sorted_difference_inplace(local->all_objects,
-							alt->all_objects);
+			llist_sorted_difference_inplace(local->remaining_objects,
+							alt->remaining_objects);
 			local = local->next;
 		}
 		alt = alt->next;
@@ -499,7 +499,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -508,7 +508,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	l.unique_objects = NULL;
@@ -605,7 +605,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	llist_sorted_difference_inplace(all_objects, ignore);
 	pl = local_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

* [PATCH v10 6/6] pack-redundant: consistent sort method
  2019-02-01 21:49                       ` Eric Sunshine
                                           ` (5 preceding siblings ...)
  2019-02-02 13:30                         ` [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-02-02 13:30                         ` Jiang Xin
  6 siblings, 0 replies; 83+ messages in thread
From: Jiang Xin @ 2019-02-02 13:30 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Sun Chao
  Cc: Jiang Xin, Jiang Xin, Sun Chao, SZEDER Gábor

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c  | 24 ++++++++++++++++--------
 t/t5323-pack-redundant.sh | 18 +++++++++---------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 15cdf233c4..29ff5e99cb 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -340,19 +341,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
+		/* have the same remaining_objects, big pack first */
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
+		/* sort by remaining objects, more objects first */
 		return 1;
-	else
+	} else {
 		return -1;
+	}
 }
 
 /* Sort pack_list, greater size of remaining_objects first */
@@ -370,7 +377,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -511,6 +518,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	l.unique_objects = NULL;
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 3e62e8663f..384b244314 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -165,15 +165,15 @@ test_expect_success 'master: no redundant for pack 1, 2, 3' '
 #         | T A B C D E F G H I J K L M N O P Q R
 #     ----+--------------------------------------
 #     P1  | x x x x x x x                       x
-#     P2* |     ! ! ! !   ! ! !
-#     P3  |             x     x x x x x
+#     P2  |     x x x x   x x x
+#     P3* |             !     ! ! ! ! !
 #     P4  |                     x x x x     x
 #     P5  |               x x           x x
 #     ----+--------------------------------------
 #     ALL | x x x x x x x x x x x x x x x x x   x
 #
 #############################################################################
-test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)' '
+test_expect_success 'master: one of pack-2/pack-3 is redundant' '
 	create_pack_in "$master_repo" P4 <<-EOF &&
 		$J
 		$K
@@ -190,7 +190,7 @@ test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)'
 	(
 		cd "$master_repo" &&
 		cat >expect <<-EOF &&
-			P2:$P2
+			P3:$P3
 			EOF
 		git pack-redundant --all >out &&
 		format_packfiles <out >actual &&
@@ -214,7 +214,7 @@ test_expect_failure 'master: one of pack-2/pack-3 is redundant (failed on Mac)'
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' '
+test_expect_success 'master: pack 2, 4, and 6 are redundant' '
 	create_pack_in "$master_repo" P6 <<-EOF &&
 		$N
 		$O
@@ -254,7 +254,7 @@ test_expect_failure 'master: pack 2, 4, and 6 are redundant (failed on Mac)' '
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'master: pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
+test_expect_success 'master: pack-8 (subset of pack-1) is also redundant' '
 	create_pack_in "$master_repo" P8 <<-EOF &&
 		$A
 		EOF
@@ -281,7 +281,7 @@ test_expect_success 'master: clean loose objects' '
 	)
 '
 
-test_expect_failure 'master: remove redundant packs and pass fsck (failed on Mac)' '
+test_expect_success 'master: remove redundant packs and pass fsck' '
 	(
 		cd "$master_repo" &&
 		git pack-redundant --all | xargs rm &&
@@ -301,7 +301,7 @@ test_expect_success 'setup shared.git' '
 	)
 '
 
-test_expect_failure 'shared: all packs are redundant, but no output without --alt-odb (failed on Mac)' '
+test_expect_success 'shared: all packs are redundant, but no output without --alt-odb' '
 	(
 		cd "$shared_repo" &&
 		git pack-redundant --all >out &&
@@ -334,7 +334,7 @@ test_expect_failure 'shared: all packs are redundant, but no output without --al
 #     ALL | x x x x x x x x x x x x x x x x x x x
 #
 #############################################################################
-test_expect_failure 'shared: show redundant packs in stderr for verbose mode (failed on Mac)' '
+test_expect_success 'shared: show redundant packs in stderr for verbose mode' '
 	(
 		cd "$shared_repo" &&
 		cat >expect <<-EOF &&
-- 
2.20.1.103.ged0fc2ca7b


^ permalink raw reply	[flat|nested] 83+ messages in thread

end of thread, back to index

Thread overview: 83+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-18  9:58 [PATCH 1/2] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-09 12:56       ` SZEDER Gábor
2019-01-09 16:47         ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 " Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-01 19:42                   ` Eric Sunshine
2019-02-01 21:03                     ` Junio C Hamano
2019-02-01 21:49                       ` Eric Sunshine
2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 6/6] pack-redundant: consistent sort method Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-31 21:44                 ` Junio C Hamano
2019-02-01  5:44                   ` Jiang Xin
2019-02-01  6:11                     ` Eric Sunshine
2019-02-01  7:23                       ` Jiang Xin
2019-02-01  7:25                         ` Jiang Xin
2019-02-01  9:51                       ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-31 19:30                 ` Junio C Hamano
2019-02-01  9:55                   ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
2019-01-30 15:03                 ` [PATCH v8 1/1] pack-redundant: delete redundant code 16657101987
2019-01-30 11:47               ` [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10 21:11             ` Junio C Hamano
2019-01-11  1:59               ` Jiang Xin
2019-01-11 18:00                 ` Junio C Hamano
2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-11  1:19             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 20:05             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10  7:11           ` Johannes Sixt
2019-01-10 11:57           ` SZEDER Gábor
2019-01-10 12:25             ` Torsten Bögershausen
2019-01-10 17:36             ` Junio C Hamano
2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
2019-01-15 21:09               ` Eric Sunshine
2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
2019-01-22 19:47               ` Junio C Hamano
2019-01-22 20:00                 ` Torsten Bögershausen
2019-01-22 21:15                   ` Eric Sunshine
2019-01-23  6:35                     ` Torsten Bögershausen
2019-01-23 17:54                       ` Junio C Hamano
2019-01-25 19:12                         ` Torsten Bögershausen
2019-01-27 22:34                           ` Junio C Hamano
2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
2019-01-08 19:30         ` Junio C Hamano
2019-01-09  0:29           ` 16657101987
2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox