git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
* [PATCH 1/2] pack-redundant: new algorithm to find min packs
@ 2018-12-18  9:58 Jiang Xin
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  0 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2018-12-18  9:58 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Lukas Sandström, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 116 ++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 43 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..19dcf74750 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static void minimize(struct pack_list **min)
+static int cmp_pack_list_reverse(const void *a, const void *b)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
+static void minimize(struct pack_list **min, struct llist *ignore)
+{
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,41 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove all the ignored objects and unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while ((pl = non_unique)) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		while ((pl = pl->next)) {
+			if (pl->all_objects->size == 0)
+				break;
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		}
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +633,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -667,7 +697,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 		pl = pl->next;
 	}
 
-	minimize(&min);
+	minimize(&min, ignore);
 
 	if (verbose) {
 		fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
-- 
2.20.0.2.g660e9286fc


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 2/2] pack-redundant: remove unused functions
  2018-12-18  9:58 [PATCH 1/2] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-18  9:58 ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
                     ` (3 more replies)
  0 siblings, 4 replies; 43+ messages in thread
From: Jiang Xin @ 2018-12-18  9:58 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Lukas Sandström, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 81 ----------------------------------------
 1 file changed, 81 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 19dcf74750..d0ff2377f3 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -63,15 +63,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +276,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.2.g660e9286fc


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2 0/3] pack-redundant: new algorithm to find min packs
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
                       ` (3 more replies)
  2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
                     ` (2 subsequent siblings)
  3 siblings, 4 replies; 43+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Jiang Xin, Sun Chao

Sun Chao is my former colleague at Huawei. He finds a bug of git-pack-redundant.

When I was in Huawei, I develop a program to manage fork tree of repositories,
using alternate repo for forks to save disk spaces. 

Sun Chao finds if there are too many packs and many of them overlap each
other, running `git pack-redundant --all` will exhaust all memories and the
process will be killed by kernel.

There is a script in commit log of commit 2/3, which can be used to create a
repository with lots of redundant packs. Running `git pack-redundant
--all` in it can reproduce this issue.

Updates of reroll v2:

* Add test cases in t5322.
* Fix a bug in patch 2/3.

--

Jiang Xin (1):
  t5322: test cases for git-pack-redundant

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 181 ++++++++++++++------------------------
 t/t5322-pack-redundant.sh |  69 +++++++++++++++
 2 files changed, 137 insertions(+), 113 deletions(-)
 create mode 100755 t/t5322-pack-redundant.sh

-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2 1/3] t5322: test cases for git-pack-redundant
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5322-pack-redundant.sh | 69 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100755 t/t5322-pack-redundant.sh

diff --git a/t/t5322-pack-redundant.sh b/t/t5322-pack-redundant.sh
new file mode 100755
index 0000000000..4add9c2bb1
--- /dev/null
+++ b/t/t5322-pack-redundant.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	set -e
+	parent=
+	for name in A B C D E F G H I J K L M
+	do
+		test_tick
+		T=$(git write-tree)
+		if test -z "$parent"
+		then
+			sha1=$(echo $name | git commit-tree $T)
+		else
+			sha1=$(echo $name | git commit-tree -p $parent $T)
+		fi
+		eval $name=$sha1
+		parent=$sha1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_redundant_packs()
+{
+	set -e
+	cd .git/objects/pack
+	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
+	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
+	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
+	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
+	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
+	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
+	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
+	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
+	cd -
+}
+
+# Create commits and packs
+create_commits
+create_redundant_packs
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
+'
+
+printf "$P1\n$P4\n$P5\n$P6\n" | sort >expected
+
+test_expect_success 'git pack-redundant --all' '
+	git pack-redundant --all | \
+		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
+		sort -u >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	test $(git pack-redundant --all | wc -l) -eq 0
+'
+
+test_done
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2 2/3] pack-redundant: new algorithm to find min packs
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2 3/3] pack-redundant: remove unused functions
  2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
                     ` (2 preceding siblings ...)
  2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2018-12-19 12:14   ` Jiang Xin
  3 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2018-12-19 12:14 UTC (permalink / raw)
  To: Git List; +Cc: Sun Chao, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 72 ----------------------------------------
 1 file changed, 72 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..9630117c90 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -285,78 +285,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v3 0/3] pack-redundant: new algorithm to find min packs
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-02  4:34     ` " Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them overlap
each other, running `git pack-redundant --all` will exhaust all memories
and the process will be killed by kernel.

There is a script in commit log of commit 2/3, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

Updates of reroll v3:

* Rename test case file from t5322 to t5323, for I see t5322 exist in
  commit 404dead121: "pack-objects: add --sparse option".

Jiang Xin (1):
  t5323: test cases for git-pack-redundant

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 181 +++++++++++++++++-----------------------------
 t/t5323-pack-redundant.sh |  84 +++++++++++++++++++++
 2 files changed, 152 insertions(+), 113 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-09 12:56       ` SZEDER Gábor
  2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..ef6076f065
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	set -e
+	parent=
+	for name in A B C D E F G H I J K L M
+	do
+		test_tick
+		T=$(git write-tree)
+		if test -z "$parent"
+		then
+			sha1=$(echo $name | git commit-tree $T)
+		else
+			sha1=$(echo $name | git commit-tree -p $parent $T)
+		fi
+		eval $name=$sha1
+		parent=$sha1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_redundant_packs()
+{
+	set -e
+	cd .git/objects/pack
+	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
+	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
+	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
+	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
+	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
+	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
+	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
+	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
+	cd -
+	eval P$P1=P1:$P1
+	eval P$P2=P2:$P2
+	eval P$P3=P3:$P3
+	eval P$P4=P4:$P4
+	eval P$P5=P5:$P5
+	eval P$P6=P6:$P6
+	eval P$P7=P7:$P7
+	eval P$P8=P8:$P8
+}
+
+# Create commits and packs
+create_commits
+create_redundant_packs
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
+'
+
+cat >expected <<EOF
+P1:$P1
+P4:$P4
+P5:$P5
+P6:$P6
+EOF
+
+test_expect_success 'git pack-redundant --all' '
+	git pack-redundant --all | \
+		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort > actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	test $(git pack-redundant --all | wc -l) -eq 0
+'
+
+test_done
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v3 2/3] pack-redundant: new algorithm to find min packs
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  3 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 109 +++++++++++++++++++++++++++++------------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v3 3/3] pack-redundant: remove unused functions
  2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
                       ` (2 preceding siblings ...)
  2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-02  4:34     ` Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
                         ` (2 more replies)
  3 siblings, 3 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-02  4:34 UTC (permalink / raw)
  To: Sun Chao, Git List, Junio C Hamano; +Cc: Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
---
 builtin/pack-redundant.c | 72 ------------------------------------------------
 1 file changed, 72 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..9630117c90 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -285,78 +285,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.14.5.agit.2


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-08 16:40       ` " 16657101987
  2019-01-08 19:30         ` Junio C Hamano
  2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
  2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
  2 siblings, 1 reply; 43+ messages in thread
From: 16657101987 @ 2019-01-08 16:40 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

I'm particularly grateful to Junio and JiangXin for fixing the patches,
and I noticed Junio send a new commit to remove more unused codes and
suggest to SQUASH it.

So I create this new version of patches to do this work, I also have
checked the left codes and remove a unused struct based on Junio's
last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.

--

Sun Chao (1):
  pack-redundant: remove unused functions

 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

-- 
2.8.1



^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v4 1/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
@ 2019-01-08 16:43       ` " 16657101987
  2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
  2 siblings, 0 replies; 43+ messages in thread
From: 16657101987 @ 2019-01-08 16:43 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7..eac2350 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +271,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.8.1



^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
  2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
@ 2019-01-08 16:45       ` " 16657101987
  2 siblings, 0 replies; 43+ messages in thread
From: 16657101987 @ 2019-01-08 16:45 UTC (permalink / raw)
  To: worldhello.net, git; +Cc: gitster, sunchao9

From: Sun Chao <sunchao9@huawei.com>

I'm particularly grateful to Junio and JiangXin for fixing the patches,
and I noticed Junio send a new commit to remove more unused codes and
suggest to SQUASH it.

So I create this new version of patches to do this work, I also have
checked the left codes and remove a unused struct based on Junio's
last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.

--

Sun Chao (1):
  pack-redundant: remove unused functions

 builtin/pack-redundant.c | 86 ------------------------------------------------
 1 file changed, 86 deletions(-)

-- 
2.8.1



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
@ 2019-01-08 19:30         ` Junio C Hamano
  2019-01-09  0:29           ` 16657101987
  0 siblings, 1 reply; 43+ messages in thread
From: Junio C Hamano @ 2019-01-08 19:30 UTC (permalink / raw)
  To: 16657101987; +Cc: worldhello.net, git, sunchao9

16657101987@163.com writes:

> From: Sun Chao <sunchao9@huawei.com>
>
> I'm particularly grateful to Junio and JiangXin for fixing the patches,
> and I noticed Junio send a new commit to remove more unused codes and
> suggest to SQUASH it.
>
> So I create this new version of patches to do this work, I also have
> checked the left codes and remove a unused struct based on Junio's
> last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.
>
> --
>
> Sun Chao (1):
>   pack-redundant: remove unused functions

Is this meant to replace [v3 3/3]?


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v4 0/1] pack-redundant: remove unused functions
  2019-01-08 19:30         ` Junio C Hamano
@ 2019-01-09  0:29           ` 16657101987
  0 siblings, 0 replies; 43+ messages in thread
From: 16657101987 @ 2019-01-09  0:29 UTC (permalink / raw)
  To: gitster; +Cc: 16657101987, git, sunchao9, worldhello.net

> 16657101987@163.com writes:
> 
>> From: Sun Chao <sunchao9@huawei.com>
>>
>> I'm particularly grateful to Junio and JiangXin for fixing the patches,
>> and I noticed Junio send a new commit to remove more unused codes and
>> suggest to SQUASH it.
>>
>> So I create this new version of patches to do this work, I also have
>> checked the left codes and remove a unused struct based on Junio's
>> last commit of `https://github.com/gitster/git/commits/sc/pack-redundant`.
>>
>> --
>>
>> Sun Chao (1):
>>   pack-redundant: remove unused functions
> 
> Is this meant to replace [v3 3/3]?

I'm Sun Chao and because my huawei email account can't send
email outside from company, so I used 163 email account to
send new path at home. I'm sorry for not explaining that.

Yes, this is meant to replace [v3 3/3], and I have noticed the
patch is applied, Thanks very much.


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-09 12:56       ` SZEDER Gábor
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  0 siblings, 2 replies; 43+ messages in thread
From: SZEDER Gábor @ 2019-01-09 12:56 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> 
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
> 
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> ---
>  t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 84 insertions(+)
>  create mode 100755 t/t5323-pack-redundant.sh
> 
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..ef6076f065
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,84 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git redundant test'

s/redundant/pack-redundant/ ?

> +
> +. ./test-lib.sh
> +
> +create_commits()
> +{
> +	set -e
> +	parent=
> +	for name in A B C D E F G H I J K L M
> +	do
> +		test_tick
> +		T=$(git write-tree)
> +		if test -z "$parent"
> +		then
> +			sha1=$(echo $name | git commit-tree $T)

There is a considerable effort going on to switch from SHA-1 to a
different hash function, so please don't add any new $sha1 variable;
call it $oid or $commit instead.

> +		else
> +			sha1=$(echo $name | git commit-tree -p $parent $T)
> +		fi
> +		eval $name=$sha1
> +		parent=$sha1
> +	done
> +	git update-ref refs/heads/master $M
> +}
> +
> +create_redundant_packs()
> +{
> +	set -e
> +	cd .git/objects/pack
> +	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
> +	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
> +	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
> +	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
> +	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
> +	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
> +	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
> +	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
> +	cd -
> +	eval P$P1=P1:$P1
> +	eval P$P2=P2:$P2
> +	eval P$P3=P3:$P3
> +	eval P$P4=P4:$P4
> +	eval P$P5=P5:$P5
> +	eval P$P6=P6:$P6
> +	eval P$P7=P7:$P7
> +	eval P$P8=P8:$P8
> +}
> +
> +# Create commits and packs
> +create_commits
> +create_redundant_packs

Please perform all setup tasks in a test_expect_success block, so we
get verbose and trace output about what's going on.

Don't use 'set -e', use an &&-chain instead.  To fail the test if a
command in the for loop were to fail you could do something like this:

  for ....
  do
    do-this &&
    do-that ||
    return 1
  done

> +
> +test_expect_success 'clear loose objects' '
> +	git prune-packed &&
> +	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0

Use something like

  find .git/objects -type f | grep -v pack >out &&
  test_must_be_empty out

instead, so we get an informative error message on failure.

> +'
> +
> +cat >expected <<EOF
> +P1:$P1
> +P4:$P4
> +P5:$P5
> +P6:$P6
> +EOF
> +
> +test_expect_success 'git pack-redundant --all' '
> +	git pack-redundant --all | \

Don't run a git command (especially the particular command the test
script focuses on) upstream of a pipe, because it hides the command's
exit code.  Use an intermediate file instead.

> +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \

This sed command doesn't seem to work on macOS (on Travis CI), and
causes the test to fail with:

  ++git pack-redundant --all
  ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
  ++sort -u
  ++read p
  ++sort
  ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
  ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
  ++test_cmp expected actual
  ++diff -u expected actual
  --- expected    2019-01-09 01:53:45.000000000 +0000
  +++ actual      2019-01-09 01:53:45.000000000 +0000
  @@ -1,4 +0,0 @@
  -P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  error: last command exited with $?=1
  not ok 2 - git pack-redundant --all

I'm not sure what's wrong with it, though.

Minor nit: 'git pack-redundant' prints one filename per line, so the
'g' at the end of the 's###g' is not necessary.

> +		sort -u | \
> +		while read p; do eval echo "\${P$p}"; done | \
> +		sort > actual && \

Style nit: no space between redirection operator and filename

> +	test_cmp expected actual
> +'
> +
> +test_expect_success 'remove redundant packs' '
> +	git pack-redundant --all | xargs rm &&
> +	git fsck &&
> +	test $(git pack-redundant --all | wc -l) -eq 0
> +'
> +
> +test_done
> -- 
> 2.14.5.agit.2
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-09 12:56       ` SZEDER Gábor
@ 2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                             ` (5 more replies)
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  1 sibling, 6 replies; 43+ messages in thread
From: SZEDER Gábor @ 2019-01-09 16:47 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Wed, Jan 09, 2019 at 01:56:28PM +0100, SZEDER Gábor wrote:
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF

Creating the expected results could be moved into the
test_expect_success block as well.

> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +	git pack-redundant --all | \
> 
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
> 
> > +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> 
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
> 
>   ++git pack-redundant --all
>   ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
>   ++sort -u
>   ++read p
>   ++sort
>   ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
>   ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
>   ++test_cmp expected actual
>   ++diff -u expected actual
>   --- expected    2019-01-09 01:53:45.000000000 +0000
>   +++ actual      2019-01-09 01:53:45.000000000 +0000
>   @@ -1,4 +0,0 @@
>   -P1:24ee080366509364d04a138cd4e168dc4ff33354
>   -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
>   -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
>   -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
>   error: last command exited with $?=1
>   not ok 2 - git pack-redundant --all
> 
> I'm not sure what's wrong with it, though.

So, it appears that 'sed' in macOS doesn't understand the
'\(idx\|pack\)' part of that regex.  Turning that command into

  sed -e "s#^.git/objects/pack/pack-\($OID_REGEX\)\..*#\1#" out | \

makes it work even on macOS, but note that those 40 hexdigits are not
actual OIDs but file content checksums, so using $OID_REGEX is not the
right thing to do here (though I'm not sure what is supposed to be
used instead, as $_x40 hardcodes the number of hexdigits).

Alas, the test as a whole still fails with the following on macOS:

  ++diff -u expected actual
  --- expected    2019-01-09 15:54:49.000000000 +0000
  +++ actual      2019-01-09 15:54:49.000000000 +0000
  @@ -1,4 +1,4 @@
   P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  +P3:0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e
   P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  +P7:4ecc1eb138516a26654cd4e3570b322c0820f170
  error: last command exited with $?=1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-09 12:56       ` SZEDER Gábor
  2019-01-09 16:47         ` SZEDER Gábor
@ 2019-01-10  3:28         ` Jiang Xin
  2019-01-10  7:11           ` Johannes Sixt
  2019-01-10 11:57           ` SZEDER Gábor
  1 sibling, 2 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-10  3:28 UTC (permalink / raw)
  To: SZEDER Gábor; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > +test_description='git redundant test'
>
> s/redundant/pack-redundant/ ?

Yes, will correct it.

> > +
> > +. ./test-lib.sh
> > +
> > +create_commits()
> > +{
> > +     set -e
> > +     parent=
> > +     for name in A B C D E F G H I J K L M
> > +     do
> > +             test_tick
> > +             T=$(git write-tree)
> > +             if test -z "$parent"
> > +             then
> > +                     sha1=$(echo $name | git commit-tree $T)
>
> There is a considerable effort going on to switch from SHA-1 to a
> different hash function, so please don't add any new $sha1 variable;
> call it $oid or $commit instead.
>

Will do.

> > +
> > +# Create commits and packs
> > +create_commits
> > +create_redundant_packs
>
> Please perform all setup tasks in a test_expect_success block, so we
> get verbose and trace output about what's going on.

Will do like this:

    test_expect_success 'setup' '
            create_commits  &&
            create_redundant_packs
    '

> Don't use 'set -e', use an &&-chain instead.  To fail the test if a
> command in the for loop were to fail you could do something like this:
>
>   for ....
>   do
>     do-this &&
>     do-that ||
>     return 1
>   done

Will do.

> > +
> > +test_expect_success 'clear loose objects' '
> > +     git prune-packed &&
> > +     test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
>
> Use something like
>
>   find .git/objects -type f | grep -v pack >out &&
>   test_must_be_empty out
>
> instead, so we get an informative error message on failure.

if `grep -v pack` return empty output, it will return error, so
I will use `sed -e "/objects\/pack\//d" >out` instead.

>
> > +'
> > +
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF
> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +     git pack-redundant --all | \
>
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
>
> > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
>

It works if rewrite as follows:

    git pack-redundant --all >out &&
    sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \

Without `-E`, MasOS has to write two seperate sed commands, such as:

    git pack-redundant --all >out &&
    sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
    sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
unti 4.3), released on May 11 2009.  I prefer the `-E` version.

>
> Minor nit: 'git pack-redundant' prints one filename per line, so the
> 'g' at the end of the 's###g' is not necessary.
>
> > +             sort -u | \
> > +             while read p; do eval echo "\${P$p}"; done | \
> > +             sort > actual && \
>
> Style nit: no space between redirection operator and filename

Will do.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10  7:11           ` Johannes Sixt
  2019-01-10 11:57           ` SZEDER Gábor
  1 sibling, 0 replies; 43+ messages in thread
From: Johannes Sixt @ 2019-01-10  7:11 UTC (permalink / raw)
  To: Jiang Xin
  Cc: SZEDER Gábor, Sun Chao, Git List, Junio C Hamano, Jiang Xin

Am 10.01.19 um 04:28 schrieb Jiang Xin:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>> Use something like
>>
>>    find .git/objects -type f | grep -v pack >out &&
>>    test_must_be_empty out
>>
>> instead, so we get an informative error message on failure.
> 
> if `grep -v pack` return empty output, it will return error, so
> I will use `sed -e "/objects\/pack\//d" >out` instead.

So, you could even write this as

	find .git/objects -type f >out &&
	! grep -v pack out	# must be empty
or
	! find .git/objects -type f | grep -v pack

if you want to be terse.

-- Hannes

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
  2019-01-10  7:11           ` Johannes Sixt
@ 2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
                               ` (3 more replies)
  1 sibling, 4 replies; 43+ messages in thread
From: SZEDER Gábor @ 2019-01-10 11:57 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
> > > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> >
> > This sed command doesn't seem to work on macOS (on Travis CI), and
> > causes the test to fail with:
> >
> 
> It works if rewrite as follows:
> 
>     git pack-redundant --all >out &&
>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
> 
> Without `-E`, MasOS has to write two seperate sed commands, such as:
> 
>     git pack-redundant --all >out &&
>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
> 
> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
> unti 4.3), released on May 11 2009.  I prefer the `-E` version.

Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
whatnot?  I don't know, but POSIX doesn't mention it, there is not a
single instance of it in our current codebase, and it appears that
we've never used it before, either.  OTOH,
't/check-non-portable-shell.pl' doesn't catch it as non-portable
construct...



^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 0/5] pack-redundant: new algorithm to find min packs
  2019-01-09 16:47         ` SZEDER Gábor
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
                               ` (5 more replies)
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
                             ` (4 subsequent siblings)
  5 siblings, 6 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

> Sun Chao (my former colleague at Huawei) found a bug of
> git-pack-redundant.  If there are too many packs and many of them
> overlap
> each other, running `git pack-redundant --all` will exhaust all memories
> and the process will be killed by kernel.
> 
> There is a script in commit log of commit 2/3, which can be used to
> create a repository with lots of redundant packs. Running `git
> pack-redundant --all` in it can reproduce this issue.

SZEDER reported that t5233 won't pass for MacOS. See solution in patch
4/5.

Changes since reroll v4:

* Rewrite t5323, add more test cases.
* Add two new patches, one for refactor, and another changed sorting
  method and fixed t5323 for the new algorithm.

Range diff with sc/pack-redundant feature branch:

    1:  702267a888 < -:  ---------- t5323: test cases for git-pack-redundant
    -:  ---------- > 1:  40fea5d67f t5323: test cases for git-pack-redundant
    2:  c4b133d858 = 2:  50cd5a5b47 pack-redundant: new algorithm to find min packs
    -:  ---------- > 3:  6338c6fad4 pack-redundant: rename pack_list.all_objects
    -:  ---------- > 4:  734f4d8a8b pack-redundant: consistent sort method
    3:  2351d7e8b5 ! 5:  b7ccdea1ad pack-redundant: remove unused functions
        @@ -13,7 +13,7 @@
          --- a/builtin/pack-redundant.c
          +++ b/builtin/pack-redundant.c
         @@
        - 	struct llist *all_objects;
        + 	size_t all_objects_size;
          } *local_packs = NULL, *altodb_packs = NULL;
          
         -struct pll {
        @@ -105,7 +105,7 @@
         -	diff = llist_copy(list);
         -
         -	while (pl) {
        --		llist_sorted_difference_inplace(diff, pl->all_objects);
        +-		llist_sorted_difference_inplace(diff, pl->remaining_objects);
         -		if (diff->size == 0) { /* we're done */
         -			llist_free(diff);
         -			return 1;


Jiang Xin (3):
  t5323: test cases for git-pack-redundant
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 221 +++++++++++++++-----------------------
 t/t5323-pack-redundant.sh | 157 +++++++++++++++++++++++++++
 2 files changed, 242 insertions(+), 136 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh


-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 21:11             ` Junio C Hamano
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
                             ` (3 subsequent siblings)
  5 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
---
 t/t5323-pack-redundant.sh | 157 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..7410426dee
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,157 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git pack-redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	parent=
+	for name in A B C D E F G H I J K L M N O P Q R
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_pack_1()
+{
+	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2()
+{
+	P2=$(cd .git/objects/pack; printf "$B\n$C\n$D\n$E\n$G\n$H\n$I\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3()
+{
+	P3=$(cd .git/objects/pack; printf "$F\n$I\n$J\n$K\n$L\n$M\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4()
+{
+	P4=$(cd .git/objects/pack; printf "$J\n$K\n$L\n$M\n$P\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5()
+{
+	P5=$(cd .git/objects/pack; printf "$G\n$H\n$N\n$O\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6()
+{
+	P6=$(cd .git/objects/pack; printf "$N\n$O\n$Q\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7()
+{
+	P7=$(cd .git/objects/pack; printf "$P\n$Q\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8()
+{
+	P8=$(cd .git/objects/pack; printf "$A\n" | git pack-objects pack 2>/dev/null) &&
+	eval P$P8=P8:$P8
+}
+
+test_expect_success 'setup' '
+	create_commits
+'
+
+test_expect_success 'no redundant packs' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+cat >expected <<EOF
+P2:$P2
+EOF
+
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+EOF
+
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+P8:$P8
+EOF
+
+test_expect_success 'pack-8, subset of pack-1, is also redundant' '
+	git pack-redundant --all >out &&
+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort >actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_done
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 2/5] pack-redundant: new algorithm to find min packs
  2019-01-09 16:47         ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-11  1:19             ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
                             ` (2 subsequent siblings)
  5 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Johannes Sixt, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (2 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
  2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..56591d283f 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 struct pll {
@@ -346,7 +346,7 @@ static int is_superset(struct pack_list *pl, struct llist *list)
 	diff = llist_copy(list);
 
 	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
+		llist_sorted_difference_inplace(diff, pl->remaining_objects);
 		if (diff->size == 0) { /* we're done */
 			llist_free(diff);
 			return 1;
@@ -425,8 +425,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -436,7 +436,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -480,7 +480,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -498,20 +498,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -526,7 +526,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -537,7 +537,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -563,10 +563,10 @@ static void scan_alt_odb_packs(void)
 		local = local_packs;
 		while (local) {
 			llist_sorted_difference_inplace(local->unique_objects,
-							alt->all_objects);
+							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
@@ -581,7 +581,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -590,11 +590,11 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 4/5] pack-redundant: consistent sort method
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (3 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  2019-01-10 20:05             ` SZEDER Gábor
  2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
  5 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Jiang Xin, Johannes Sixt

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c  | 22 +++++++++++++++-------
 t/t5323-pack-redundant.sh |  2 +-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 56591d283f..e9d2586e2e 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 struct pll {
@@ -421,16 +422,22 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	/* if have the same remaining_objects, big pack first */
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+
+	/* sort according to remaining objects, more remaining objects first */
+	if (pl_a->remaining_objects->size < pl_b->remaining_objects->size)
 		return 1;
 	else
 		return -1;
@@ -451,7 +458,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -593,6 +600,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	/* this list will be pruned in cmp_two_packs later */
 	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 7410426dee..2a09ff1bfb 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -90,7 +90,7 @@ test_expect_success 'create pack 4, 5' '
 '
 
 cat >expected <<EOF
-P2:$P2
+P3:$P3
 EOF
 
 test_expect_success 'one of pack-2/pack-3 is redundant' '
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v5 5/5] pack-redundant: remove unused functions
  2019-01-09 16:47         ` SZEDER Gábor
                             ` (4 preceding siblings ...)
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
@ 2019-01-10 12:01           ` Jiang Xin
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-10 12:01 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor, Sun Chao
  Cc: Johannes Sixt, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ----------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index e9d2586e2e..dd71fdd435 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -36,11 +36,6 @@ static struct pack_list {
 	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -64,15 +59,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -286,78 +272,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->remaining_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.1.101.gc01fadde4e


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10 11:57           ` SZEDER Gábor
@ 2019-01-10 12:25             ` Torsten Bögershausen
  2019-01-10 17:36             ` Junio C Hamano
                               ` (2 subsequent siblings)
  3 siblings, 0 replies; 43+ messages in thread
From: Torsten Bögershausen @ 2019-01-10 12:25 UTC (permalink / raw)
  To: SZEDER Gábor, Jiang Xin
  Cc: Sun Chao, Git List, Junio C Hamano, Jiang Xin

On 10.01.19 12:57, SZEDER Gábor wrote:
> On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
>> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>>>> +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>>>
>>> This sed command doesn't seem to work on macOS (on Travis CI), and
>>> causes the test to fail with:
>>>
>>
>> It works if rewrite as follows:
>>
>>     git pack-redundant --all >out &&
>>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
>>
>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>>
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
>>
>> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
>> unti 4.3), released on May 11 2009.  I prefer the `-E` version.
> 
> Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
> whatnot?  I don't know, but POSIX doesn't mention it, there is not a
> single instance of it in our current codebase, and it appears that
> we've never used it before, either.  OTOH,

If we can use "two seperate sed commands" i would (really) prefer to so,
to avoid "sed -E".
My conclusion is that it is not portable enough.
> 't/check-non-portable-shell.pl' doesn't catch it as non-portable
> construct...

Good point.
Actually that script only checks "known non-portable" options.
Every time somebody finds a non-portable option, we update it.
A growing blacklist, so to say.
May be we should have a white list instead.




^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v3 1/3] t5323: test cases for git-pack-redundant
  2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
@ 2019-01-10 17:36             ` Junio C Hamano
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
  2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
  3 siblings, 0 replies; 43+ messages in thread
From: Junio C Hamano @ 2019-01-10 17:36 UTC (permalink / raw)
  To: SZEDER Gábor; +Cc: Jiang Xin, Sun Chao, Git List, Jiang Xin

SZEDER Gábor <szeder.dev@gmail.com> writes:

>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>> 
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Two commands, perhaps, but does it have to be two separate sed
processes piped together?  Why won't something like this work?

	sed -e 's|.*/pack-\([0-9a-f]*\)\.idx$|\1' \
	-e 's|.*/pack-\([0-9a-f]*\)\.pack$|\1'


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v5 4/5] pack-redundant: consistent sort method
  2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
@ 2019-01-10 20:05             ` SZEDER Gábor
  0 siblings, 0 replies; 43+ messages in thread
From: SZEDER Gábor @ 2019-01-10 20:05 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Junio C Hamano, Git List, Sun Chao, Jiang Xin, Johannes Sixt

On Thu, Jan 10, 2019 at 08:01:41PM +0800, Jiang Xin wrote:
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index 56591d283f..e9d2586e2e 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c

> @@ -421,16 +422,22 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
>  	return ret;
>  }
>  
> -static int cmp_pack_list_reverse(const void *a, const void *b)
> +static int cmp_remaining_objects(const void *a, const void *b)
>  {
>  	struct pack_list *pl_a = *((struct pack_list **)a);
>  	struct pack_list *pl_b = *((struct pack_list **)b);
> -	size_t sz_a = pl_a->remaining_objects->size;
> -	size_t sz_b = pl_b->remaining_objects->size;
>  
> -	if (sz_a == sz_b)
> -		return 0;
> -	else if (sz_a < sz_b)
> +	/* if have the same remaining_objects, big pack first */
> +	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
> +		if (pl_a->all_objects_size == pl_b->all_objects_size)
> +			return 0;
> +		else if (pl_a->all_objects_size < pl_b->all_objects_size)
> +			return 1;
> +		else
> +			return -1;

My compiler complains about the above nested if statements:

  builtin/pack-redundant.c: In function ‘cmp_remaining_objects’:
  builtin/pack-redundant.c:345:5: error: suggest explicit braces to avoid ambiguous ‘else’ [-Werror=parentheses]
    if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
       ^
  cc1: all warnings being treated as errors
  Makefile:2302: recipe for target 'builtin/pack-redundant.o' failed

After adding a pair of {} to the outer if statement
't5323-pack-redundant.sh' passed successfully even on macOS (on Travis
CI).


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-10 21:11             ` Junio C Hamano
  2019-01-11  1:59               ` Jiang Xin
  0 siblings, 1 reply; 43+ messages in thread
From: Junio C Hamano @ 2019-01-10 21:11 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Jiang Xin <worldhello.net@gmail.com> writes:

> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
>
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
> ---
>  t/t5323-pack-redundant.sh | 157 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 157 insertions(+)
>  create mode 100755 t/t5323-pack-redundant.sh
>
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..7410426dee
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,157 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git pack-redundant test'
> +
> +. ./test-lib.sh
> +
> +create_commits()
> +{

Style (see Documentation/CodingGuidelines).

> +	parent=
> +	for name in A B C D E F G H I J K L M N O P Q R
> +	do
> +		test_tick &&
> +		T=$(git write-tree) &&
> +		if test -z "$parent"
> +		then
> +			oid=$(echo $name | git commit-tree $T)
> +		else
> +			oid=$(echo $name | git commit-tree -p $parent $T)
> +		fi &&
> +		eval $name=$oid &&
> +		parent=$oid ||
> +		return 1
> +	done
> +	git update-ref refs/heads/master $M
> +}
> +
> +create_pack_1()
> +{
> +	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&

Yikes.  Can't "git pack-objects" get the input directly without
overlong printf, something along the lines of...

	P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
		$A
		$B
		$C
		...
		$R
		EOF
	)

> +	eval P$P1=P1:$P1
> +}
> ...
> +test_expect_success 'setup' '
> +	create_commits
> +'
> +
> +test_expect_success 'no redundant packs' '
> +	create_pack_1 && create_pack_2 && create_pack_3 &&
> +	git pack-redundant --all >out &&
> +	test_must_be_empty out
> +'
> +
> +test_expect_success 'create pack 4, 5' '
> +	create_pack_4 && create_pack_5
> +'
> +
> +cat >expected <<EOF
> +P2:$P2
> +EOF

Move this to the next "expect success" block?

> +test_expect_success 'one of pack-2/pack-3 is redundant' '
> +	git pack-redundant --all >out &&
> +	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \

How portable is "sed -E" (it is not even in POSIX.1)?  Wouldn't it
be easier to work with to have two "-e" fed to a single sed
invocation instead?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v5 2/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-11  1:19             ` SZEDER Gábor
  0 siblings, 0 replies; 43+ messages in thread
From: SZEDER Gábor @ 2019-01-11  1:19 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Junio C Hamano, Git List, Sun Chao, Johannes Sixt

On Thu, Jan 10, 2019 at 08:01:39PM +0800, Jiang Xin wrote:
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index cf9a9aabd4..3655cc7dc6 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c

> @@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
>  		pl = pl->next;
>  	}
>  
> +	*min = unique;
> +
>  	/* return if there are no objects missing from the unique set */
>  	if (missing->size == 0) {
> -		*min = unique;
>  		free(missing);
>  		return;
>  	}
>  
> -	/* find the permutations which contain all missing objects */
> -	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
> -		perm_all = perm = get_permutations(non_unique, n);
> -		while (perm) {
> -			if (is_superset(perm->pl, missing)) {
> -				new_perm = xmalloc(sizeof(struct pll));
> -				memcpy(new_perm, perm, sizeof(struct pll));
> -				new_perm->next = perm_ok;
> -				perm_ok = new_perm;
> -			}
> -			perm = perm->next;
> -		}
> -		if (perm_ok)
> -			break;
> -		pll_free(perm_all);
> -	}

Please make sure that all commits in the patch series can be build
cleanly without any warnings (with '-Werror' or preferably with 'make
DEVELOPER=1') and pass the test suite.  This is important, because
unbuildable commits will cause trouble later on, when e.g. 'git
bisect' happens to pick such a commit.

In this case, the removal of the above loop removes all callsites of
the static functions get_permutations(), is_superset(), and
pll_free(), resulting the following compiler error:

  builtin/pack-redundant.c: At top level:
  builtin/pack-redundant.c:289:13: error: ‘pll_free’ defined but not used [-Werror=unused-function]
   static void pll_free(struct pll *l)
               ^
  builtin/pack-redundant.c:309:21: error: ‘get_permutations’ defined but not used [-Werror=unused-function]
   static struct pll * get_permutations(struct pack_list *list, int n)
                       ^
  builtin/pack-redundant.c:343:12: error: ‘is_superset’ defined but not used [-Werror=unused-function]
   static int is_superset(struct pack_list *pl, struct llist *list)
              ^

I see that the last patch in this series removes those three
unused functions, but that patch should be squashed into this one to
keep Git buildable with '-Werror' or DEVELOPER=1.

Furthermore, after building this patch (without '-Werror'), several
tests in 't5323-pack-redundant.sh' fail.  To avoid the test failure I
think the fourth patch ensuring a consistent sort order should be
squashed in as well.



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 21:11             ` Junio C Hamano
@ 2019-01-11  1:59               ` Jiang Xin
  2019-01-11 18:00                 ` Junio C Hamano
  0 siblings, 1 reply; 43+ messages in thread
From: Jiang Xin @ 2019-01-11  1:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Junio C Hamano <gitster@pobox.com> 于2019年1月11日周五 上午5:11写道:
>
> Jiang Xin <worldhello.net@gmail.com> writes:
>
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > +create_commits()
> > +{
>
> Style (see Documentation/CodingGuidelines).

OK, parenthese after function name.
>
> > +create_pack_1()
> > +{
> > +     P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
>
> Yikes.  Can't "git pack-objects" get the input directly without
> overlong printf, something along the lines of...
>
>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
>                 $A
>                 $B
>                 $C
>                 ...
>                 $R
>                 EOF
>         )

Find that no space before <OID>,  because git-pack-objects not allow that,
and mached parentheses should in the same line.
So Will write like this:

    create_pack_1() {
            P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
    $T
    $A
    $B
    $R
    EOF
            eval P$P1=P1:$P1
    }

> > +test_expect_success 'no redundant packs' '
> > +     create_pack_1 && create_pack_2 && create_pack_3 &&
> > +     git pack-redundant --all >out &&
> > +     test_must_be_empty out
> > +'
> > +
> > +test_expect_success 'create pack 4, 5' '
> > +     create_pack_4 && create_pack_5
> > +'
> > +
> > +cat >expected <<EOF
> > +P2:$P2
> > +EOF
>
> Move this to the next "expect success" block?

$P4 and $P5 are defined after calling `create_pack_4` and `create_pack_5`,
so create pack functions should be called before write `expected` file,
if puts $P4 and/or $P5 in the expected file.

For this case, $P4 and $P5 not in expected file, we can move
create_pack_4 and 5 to the following test_expect_success block,
but the new algorithm may change the expected file.
>
> > +test_expect_success 'one of pack-2/pack-3 is redundant' '
> > +     git pack-redundant --all >out &&
> > +     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
>
> How portable is "sed -E" (it is not even in POSIX.1)?  Wouldn't it
> be easier to work with to have two "-e" fed to a single sed
> invocation instead?

will fix using two '-e' commands.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v5 1/5] t5323: test cases for git-pack-redundant
  2019-01-11  1:59               ` Jiang Xin
@ 2019-01-11 18:00                 ` Junio C Hamano
  0 siblings, 0 replies; 43+ messages in thread
From: Junio C Hamano @ 2019-01-11 18:00 UTC (permalink / raw)
  To: Jiang Xin; +Cc: Git List, SZEDER Gábor, Sun Chao, Jiang Xin, Johannes Sixt

Jiang Xin <worldhello.net@gmail.com> writes:

> Junio C Hamano <gitster@pobox.com> 于2019年1月11日周五 上午5:11写道:
>>
>> Jiang Xin <worldhello.net@gmail.com> writes:
>>
>> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>> > +create_commits()
>> > +{
>>
>> Style (see Documentation/CodingGuidelines).
>
> OK, parenthese after function name.
>>
>> > +create_pack_1()
>> > +{
>> > +     P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
>>
>> Yikes.  Can't "git pack-objects" get the input directly without
>> overlong printf, something along the lines of...
>>
>>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
>>                 $A
>>                 $B
>>                 $C
>>                 ...
>>                 $R
>>                 EOF
>>         )
>
> Find that no space before <OID>,  because git-pack-objects not allow that,
> and mached parentheses should in the same line.
> So Will write like this:
>
>     create_pack_1() {
>             P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
>     $T

Isn't the whole point of <<-EOF (notice the leading dash) to allow
us to indent the here-doc with horizontal tab?


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 0/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-12  9:17             ` " Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
                               ` (4 subsequent siblings)
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor; +Cc: Jiang Xin, Sun Chao

> Sun Chao (my former colleague at Huawei) found a bug of
> git-pack-redundant.  If there are too many packs and many of them
> overlap each other, running `git pack-redundant --all` will
> exhaust all memories and the process will be killed by kernel.
> 
> There is a script in commit log of commit 2/5, which can be used to
> create a repository with lots of redundant packs. Running `git
> pack-redundant --all` in it can reproduce this issue.


Junio C Hamano <gitster@pobox.com> 于2019年1月12日周六 上午2:00写道:
> >> Yikes.  Can't "git pack-objects" get the input directly without
> >> overlong printf, something along the lines of...
> >>
> >>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
> >>                 $A
> >>                 $B
> >>                 $C
> >>                 ...
> >>                 $R
> >>                 EOF
> >>         )
> >
> > Find that no space before <OID>,  because git-pack-objects not allow that,
> > and mached parentheses should in the same line.
> > So Will write like this:
> >
> >     create_pack_1() {
> >             P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
> >     $T
>
> Isn't the whole point of <<-EOF (notice the leading dash) to allow
> us to indent the here-doc with horizontal tab?

The reason that indents are not stripped even with `<<-EOF` is I mixed
tabs and spaces to make a better align.

If put the heredoc outside the parentheses, it will failed on MacOS, so
use the syntax Junio previously suggested.


SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月11日周五 上午9:19写道:
> I see that the last patch in this series removes those three
> unused functions, but that patch should be squashed into this one to
> keep Git buildable with '-Werror' or DEVELOPER=1.
>
> Furthermore, after building this patch (without '-Werror'), several
> tests in 't5323-pack-redundant.sh' fail.  To avoid the test failure I
> think the fourth patch ensuring a consistent sort order should be
> squashed in as well.
Patch 3/5 to 5/5 can be squashed to patch 2/5.


## Changes since reroll v5


1:  40fea5d67f ! 1:  7e4e703083 t5323: test cases for git-pack-redundant
    @@ -22,8 +22,7 @@
     +
     +. ./test-lib.sh
     +
    -+create_commits()
    -+{
    ++create_commits() {
     +	parent=
     +	for name in A B C D E F G H I J K L M N O P Q R
     +	do
    @@ -39,54 +38,98 @@
     +		parent=$oid ||
     +		return 1
     +	done
    -+	git update-ref refs/heads/master $M
    ++	git update-ref refs/heads/master $R
     +}
     +
    -+create_pack_1()
    -+{
    -+	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_1() {
    ++	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$T
    ++		$A
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$F
    ++		$R
    ++		EOF
    ++	) &&
     +	eval P$P1=P1:$P1
     +}
     +
    -+create_pack_2()
    -+{
    -+	P2=$(cd .git/objects/pack; printf "$B\n$C\n$D\n$E\n$G\n$H\n$I\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_2() {
    ++	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$G
    ++		$H
    ++		$I
    ++		EOF
    ++	) &&
     +	eval P$P2=P2:$P2
     +}
     +
    -+create_pack_3()
    -+{
    -+	P3=$(cd .git/objects/pack; printf "$F\n$I\n$J\n$K\n$L\n$M\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_3() {
    ++	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$F
    ++		$I
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		EOF
    ++	) &&
     +	eval P$P3=P3:$P3
     +}
     +
    -+create_pack_4()
    -+{
    -+	P4=$(cd .git/objects/pack; printf "$J\n$K\n$L\n$M\n$P\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_4() {
    ++	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		$P
    ++		EOF
    ++	) &&
     +	eval P$P4=P4:$P4
     +}
     +
    -+create_pack_5()
    -+{
    -+	P5=$(cd .git/objects/pack; printf "$G\n$H\n$N\n$O\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_5() {
    ++	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$G
    ++		$H
    ++		$N
    ++		$O
    ++		EOF
    ++	) &&
     +	eval P$P5=P5:$P5
     +}
     +
    -+create_pack_6()
    -+{
    -+	P6=$(cd .git/objects/pack; printf "$N\n$O\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_6() {
    ++	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$N
    ++		$O
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P6=P6:$P6
     +}
     +
    -+create_pack_7()
    -+{
    -+	P7=$(cd .git/objects/pack; printf "$P\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_7() {
    ++	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$P
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P7=P7:$P7
     +}
     +
    -+create_pack_8()
    -+{
    -+	P8=$(cd .git/objects/pack; printf "$A\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_8() {
    ++	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$A
    ++		EOF
    ++	) &&
     +	eval P$P8=P8:$P8
     +}
     +
    @@ -110,10 +153,12 @@
     +
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -121,6 +166,7 @@
     +	create_pack_6 && create_pack_7
     +'
     +
    ++# Only after calling create_pack_6, we can use $P6 variable.
     +cat >expected <<EOF
     +P2:$P2
     +P4:$P4
    @@ -129,10 +175,12 @@
     +
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -147,24 +195,26 @@
     +P8:$P8
     +EOF
     +
    -+test_expect_success 'pack-8, subset of pack-1, is also redundant' '
    ++test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    -+test_expect_success 'clear loose objects' '
    ++test_expect_success 'clean loose objects' '
     +	git prune-packed &&
     +	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
     +	test_must_be_empty out
     +'
     +
    -+test_expect_success 'remove redundant packs' '
    ++test_expect_success 'remove redundant packs and pass fsck' '
     +	git pack-redundant --all | xargs rm &&
    -+	git fsck &&
    ++	git fsck --no-progress &&
     +	git pack-redundant --all >out &&
     +	test_must_be_empty out
     +'
2:  50cd5a5b47 ! 2:  51a9c2d8a5 pack-redundant: new algorithm to find min packs
    @@ -67,7 +67,7 @@
         Original PR and discussions: https://github.com/jiangxin/git/pull/25
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
5:  b7ccdea1ad ! 3:  c5eb21c23c pack-redundant: remove unused functions
    @@ -6,14 +6,14 @@
         `pll_free`, etc.
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
      --- a/builtin/pack-redundant.c
      +++ b/builtin/pack-redundant.c
     @@
    - 	size_t all_objects_size;
    + 	struct llist *all_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
     -struct pll {
    @@ -105,7 +105,7 @@
     -	diff = llist_copy(list);
     -
     -	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    +-		llist_sorted_difference_inplace(diff, pl->all_objects);
     -		if (diff->size == 0) { /* we're done */
     -			llist_free(diff);
     -			return 1;
3:  6338c6fad4 ! 4:  1acdd0af1e pack-redundant: rename pack_list.all_objects
    @@ -18,16 +18,7 @@
     +	struct llist *remaining_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    -@@
    - 	diff = llist_copy(list);
    - 
    - 	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->all_objects);
    -+		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    - 		if (diff->size == 0) { /* we're done */
    - 			llist_free(diff);
    - 			return 1;
    + static struct llist_item *free_nodes;
     @@
      {
      	struct pack_list *pl_a = *((struct pack_list **)a);
4:  734f4d8a8b ! 5:  306d515cda pack-redundant: consistent sort method
    @@ -26,7 +26,7 @@
     +	size_t all_objects_size;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    + static struct llist_item *free_nodes;
     @@
      	return ret;
      }
    @@ -42,20 +42,24 @@
     -	if (sz_a == sz_b)
     -		return 0;
     -	else if (sz_a < sz_b)
    -+	/* if have the same remaining_objects, big pack first */
    -+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
    ++	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
    ++		/* have the same remaining_objects, big pack first */
     +		if (pl_a->all_objects_size == pl_b->all_objects_size)
     +			return 0;
     +		else if (pl_a->all_objects_size < pl_b->all_objects_size)
     +			return 1;
     +		else
     +			return -1;
    -+
    -+	/* sort according to remaining objects, more remaining objects first */
    -+	if (pl_a->remaining_objects->size < pl_b->remaining_objects->size)
    ++	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
    ++		/* sort by remaining objects, more objects first */
      		return 1;
    - 	else
    +-	else
    ++	} else {
      		return -1;
    ++	}
    + }
    + 
    + /* Sort pack_list, greater size of remaining_objects first */
     @@
      	for (n = 0, p = *pl; p; p = p->next)
      		ary[n++] = p;

## This reroll has the following commits:

Jiang Xin (3):
  t5323: test cases for git-pack-redundant
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 221 +++++++++++++++-----------------------
 t/t5323-pack-redundant.sh | 207 +++++++++++++++++++++++++++++++++++
 2 files changed, 292 insertions(+), 136 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 1/5] t5323: test cases for git-pack-redundant
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Reviewed-by: SZEDER Gábor <szeder.dev@gmail.com>
---
 t/t5323-pack-redundant.sh | 207 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 207 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..407838f0e8
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,207 @@
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git pack-redundant test'
+
+. ./test-lib.sh
+
+create_commits() {
+	parent=
+	for name in A B C D E F G H I J K L M N O P Q R
+	do
+		test_tick &&
+		T=$(git write-tree) &&
+		if test -z "$parent"
+		then
+			oid=$(echo $name | git commit-tree $T)
+		else
+			oid=$(echo $name | git commit-tree -p $parent $T)
+		fi &&
+		eval $name=$oid &&
+		parent=$oid ||
+		return 1
+	done
+	git update-ref refs/heads/master $R
+}
+
+create_pack_1() {
+	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$T
+		$A
+		$B
+		$C
+		$D
+		$E
+		$F
+		$R
+		EOF
+	) &&
+	eval P$P1=P1:$P1
+}
+
+create_pack_2() {
+	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$B
+		$C
+		$D
+		$E
+		$G
+		$H
+		$I
+		EOF
+	) &&
+	eval P$P2=P2:$P2
+}
+
+create_pack_3() {
+	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$F
+		$I
+		$J
+		$K
+		$L
+		$M
+		EOF
+	) &&
+	eval P$P3=P3:$P3
+}
+
+create_pack_4() {
+	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$J
+		$K
+		$L
+		$M
+		$P
+		EOF
+	) &&
+	eval P$P4=P4:$P4
+}
+
+create_pack_5() {
+	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$G
+		$H
+		$N
+		$O
+		EOF
+	) &&
+	eval P$P5=P5:$P5
+}
+
+create_pack_6() {
+	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$N
+		$O
+		$Q
+		EOF
+	) &&
+	eval P$P6=P6:$P6
+}
+
+create_pack_7() {
+	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$P
+		$Q
+		EOF
+	) &&
+	eval P$P7=P7:$P7
+}
+
+create_pack_8() {
+	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
+		$A
+		EOF
+	) &&
+	eval P$P8=P8:$P8
+}
+
+test_expect_success 'setup' '
+	create_commits
+'
+
+test_expect_success 'no redundant packs' '
+	create_pack_1 && create_pack_2 && create_pack_3 &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'create pack 4, 5' '
+	create_pack_4 && create_pack_5
+'
+
+cat >expected <<EOF
+P2:$P2
+EOF
+
+test_expect_success 'one of pack-2/pack-3 is redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 6, 7' '
+	create_pack_6 && create_pack_7
+'
+
+# Only after calling create_pack_6, we can use $P6 variable.
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+EOF
+
+test_expect_success 'pack 2, 4, and 6 are redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'create pack 8' '
+	create_pack_8
+'
+
+cat >expected <<EOF
+P2:$P2
+P4:$P4
+P6:$P6
+P8:$P8
+EOF
+
+test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
+	git pack-redundant --all >out &&
+	sed \
+		-e "s#.*/pack-\(.*\)\.idx#\1#" \
+		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
+		sort -u |
+		while read p; do eval echo "\${P$p}"; done |
+		sort >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'clean loose objects' '
+	git prune-packed &&
+	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
+	test_must_be_empty out
+'
+
+test_expect_success 'remove redundant packs and pass fsck' '
+	git pack-redundant --all | xargs rm &&
+	git fsck --no-progress &&
+	git pack-redundant --all >out &&
+	test_must_be_empty out
+'
+
+test_done
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 2/5] pack-redundant: new algorithm to find min packs
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Sun Chao, Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

When calling `git pack-redundant --all`, if there are too many local
packs and too many redundant objects within them, the too deep iteration
of `get_permutations` will exhaust all the resources, and the process of
`git pack-redundant` will be killed.

The following script could create a repository with too many redundant
packs, and running `git pack-redundant --all` in the `test.git` repo
will die soon.

    #!/bin/sh

    repo="$(pwd)/test.git"
    work="$(pwd)/test"
    i=1
    max=199

    if test -d "$repo" || test -d "$work"; then
    	echo >&2 "ERROR: '$repo' or '$work' already exist"
    	exit 1
    fi

    git init -q --bare "$repo"
    git --git-dir="$repo" config gc.auto 0
    git --git-dir="$repo" config transfer.unpackLimit 0
    git clone -q "$repo" "$work" 2>/dev/null

    while :; do
        cd "$work"
        echo "loop $i: $(date +%s)" >$i
        git add $i
        git commit -q -sm "loop $i"
        git push -q origin HEAD:master
        printf "\rCreate pack %4d/%d\t" $i $max
        if test $i -ge $max; then break; fi

        cd "$repo"
        git repack -q
        if test $(($i % 2)) -eq 0; then
            git repack -aq
            pack=$(ls -t $repo/objects/pack/*.pack | head -1)
            touch "${pack%.pack}.keep"
        fi
        i=$((i+1))
    done
    printf "\ndone\n"

To get the `min` unique pack list, we can replace the iteration in
`minimize` function with a new algorithm, and this could solve this
issue:

1. Get the unique and non_uniqe packs, add the unique packs to the
   `min` list.

2. Remove the objects of unique packs from non_unique packs, then each
   object left in the non_unique packs will have at least two copies.

3. Sort the non_unique packs by the objects' size, more objects first,
   and add the first non_unique pack to `min` list.

4. Drop the duplicated objects from other packs in the ordered
   non_unique pack list, and repeat step 3.

Original PR and discussions: https://github.com/jiangxin/git/pull/25

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 109 ++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 41 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index cf9a9aabd4..3655cc7dc6 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -421,14 +421,52 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
+static int cmp_pack_list_reverse(const void *a, const void *b)
+{
+	struct pack_list *pl_a = *((struct pack_list **)a);
+	struct pack_list *pl_b = *((struct pack_list **)b);
+	size_t sz_a = pl_a->all_objects->size;
+	size_t sz_b = pl_b->all_objects->size;
+
+	if (sz_a == sz_b)
+		return 0;
+	else if (sz_a < sz_b)
+		return 1;
+	else
+		return -1;
+}
+
+/* Sort pack_list, greater size of all_objects first */
+static void sort_pack_list(struct pack_list **pl)
+{
+	struct pack_list **ary, *p;
+	int i;
+	size_t n = pack_list_size(*pl);
+
+	if (n < 2)
+		return;
+
+	/* prepare an array of packed_list for easier sorting */
+	ary = xcalloc(n, sizeof(struct pack_list *));
+	for (n = 0, p = *pl; p; p = p->next)
+		ary[n++] = p;
+
+	QSORT(ary, n, cmp_pack_list_reverse);
+
+	/* link them back again */
+	for (i = 0; i < n - 1; i++)
+		ary[i]->next = ary[i + 1];
+	ary[n - 1]->next = NULL;
+	*pl = ary[0];
+
+	free(ary);
+}
+
+
 static void minimize(struct pack_list **min)
 {
-	struct pack_list *pl, *unique = NULL,
-		*non_unique = NULL, *min_perm = NULL;
-	struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
-	struct llist *missing;
-	off_t min_perm_size = 0, perm_size;
-	int n;
+	struct pack_list *pl, *unique = NULL, *non_unique = NULL;
+	struct llist *missing, *unique_pack_objects;
 
 	pl = local_packs;
 	while (pl) {
@@ -446,49 +484,37 @@ static void minimize(struct pack_list **min)
 		pl = pl->next;
 	}
 
+	*min = unique;
+
 	/* return if there are no objects missing from the unique set */
 	if (missing->size == 0) {
-		*min = unique;
 		free(missing);
 		return;
 	}
 
-	/* find the permutations which contain all missing objects */
-	for (n = 1; n <= pack_list_size(non_unique) && !perm_ok; n++) {
-		perm_all = perm = get_permutations(non_unique, n);
-		while (perm) {
-			if (is_superset(perm->pl, missing)) {
-				new_perm = xmalloc(sizeof(struct pll));
-				memcpy(new_perm, perm, sizeof(struct pll));
-				new_perm->next = perm_ok;
-				perm_ok = new_perm;
-			}
-			perm = perm->next;
-		}
-		if (perm_ok)
-			break;
-		pll_free(perm_all);
-	}
-	if (perm_ok == NULL)
-		die("Internal error: No complete sets found!");
-
-	/* find the permutation with the smallest size */
-	perm = perm_ok;
-	while (perm) {
-		perm_size = pack_set_bytecount(perm->pl);
-		if (!min_perm_size || min_perm_size > perm_size) {
-			min_perm_size = perm_size;
-			min_perm = perm->pl;
-		}
-		perm = perm->next;
-	}
-	*min = min_perm;
-	/* add the unique packs to the list */
-	pl = unique;
+	unique_pack_objects = llist_copy(all_objects);
+	llist_sorted_difference_inplace(unique_pack_objects, missing);
+
+	/* remove unique pack objects from the non_unique packs */
+	pl = non_unique;
 	while (pl) {
-		pack_list_insert(min, pl);
+		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
 		pl = pl->next;
 	}
+
+	while (non_unique) {
+		/* sort the non_unique packs, greater size of all_objects first */
+		sort_pack_list(&non_unique);
+		if (non_unique->all_objects->size == 0)
+			break;
+
+		pack_list_insert(min, non_unique);
+
+		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+
+		non_unique = non_unique->next;
+	}
 }
 
 static void load_all_objects(void)
@@ -603,7 +629,7 @@ static void load_all(void)
 int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 {
 	int i;
-	struct pack_list *min, *red, *pl;
+	struct pack_list *min = NULL, *red, *pl;
 	struct llist *ignore;
 	struct object_id *oid;
 	char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
@@ -664,6 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
+		llist_sorted_difference_inplace(pl->all_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 3/5] pack-redundant: remove unused functions
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (2 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Sun Chao, Jiang Xin, Jiang Xin

From: Sun Chao <sunchao9@huawei.com>

Remove unused functions to find `min` packs, such as `get_permutations`,
`pll_free`, etc.

Signed-off-by: Sun Chao <sunchao9@huawei.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-redundant.c | 86 ----------------------------------------
 1 file changed, 86 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 3655cc7dc6..eac23500ee 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -35,11 +35,6 @@ static struct pack_list {
 	struct llist *all_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
-struct pll {
-	struct pll *next;
-	struct pack_list *pl;
-};
-
 static struct llist_item *free_nodes;
 
 static inline void llist_item_put(struct llist_item *item)
@@ -63,15 +58,6 @@ static inline struct llist_item *llist_item_get(void)
 	return new_item;
 }
 
-static void llist_free(struct llist *list)
-{
-	while ((list->back = list->front)) {
-		list->front = list->front->next;
-		llist_item_put(list->back);
-	}
-	free(list);
-}
-
 static inline void llist_init(struct llist **list)
 {
 	*list = xmalloc(sizeof(struct llist));
@@ -285,78 +271,6 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-static void pll_free(struct pll *l)
-{
-	struct pll *old;
-	struct pack_list *opl;
-
-	while (l) {
-		old = l;
-		while (l->pl) {
-			opl = l->pl;
-			l->pl = opl->next;
-			free(opl);
-		}
-		l = l->next;
-		free(old);
-	}
-}
-
-/* all the permutations have to be free()d at the same time,
- * since they refer to each other
- */
-static struct pll * get_permutations(struct pack_list *list, int n)
-{
-	struct pll *subset, *ret = NULL, *new_pll = NULL;
-
-	if (list == NULL || pack_list_size(list) < n || n == 0)
-		return NULL;
-
-	if (n == 1) {
-		while (list) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = NULL;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			list = list->next;
-		}
-		return ret;
-	}
-
-	while (list->next) {
-		subset = get_permutations(list->next, n - 1);
-		while (subset) {
-			new_pll = xmalloc(sizeof(*new_pll));
-			new_pll->pl = subset->pl;
-			pack_list_insert(&new_pll->pl, list);
-			new_pll->next = ret;
-			ret = new_pll;
-			subset = subset->next;
-		}
-		list = list->next;
-	}
-	return ret;
-}
-
-static int is_superset(struct pack_list *pl, struct llist *list)
-{
-	struct llist *diff;
-
-	diff = llist_copy(list);
-
-	while (pl) {
-		llist_sorted_difference_inplace(diff, pl->all_objects);
-		if (diff->size == 0) { /* we're done */
-			llist_free(diff);
-			return 1;
-		}
-		pl = pl->next;
-	}
-	llist_free(diff);
-	return 0;
-}
-
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (3 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

New algorithm uses `pack_list.all_objects` to track remaining objects,
so rename it to `pack_list.remaining_objects`.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index eac23500ee..64eec3e297 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -32,7 +32,7 @@ static struct pack_list {
 	struct pack_list *next;
 	struct packed_git *pack;
 	struct llist *unique_objects;
-	struct llist *all_objects;
+	struct llist *remaining_objects;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -339,8 +339,8 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->all_objects->size;
-	size_t sz_b = pl_b->all_objects->size;
+	size_t sz_a = pl_a->remaining_objects->size;
+	size_t sz_b = pl_b->remaining_objects->size;
 
 	if (sz_a == sz_b)
 		return 0;
@@ -350,7 +350,7 @@ static int cmp_pack_list_reverse(const void *a, const void *b)
 		return -1;
 }
 
-/* Sort pack_list, greater size of all_objects first */
+/* Sort pack_list, greater size of remaining_objects first */
 static void sort_pack_list(struct pack_list **pl)
 {
 	struct pack_list **ary, *p;
@@ -394,7 +394,7 @@ static void minimize(struct pack_list **min)
 	missing = llist_copy(all_objects);
 	pl = unique;
 	while (pl) {
-		llist_sorted_difference_inplace(missing, pl->all_objects);
+		llist_sorted_difference_inplace(missing, pl->remaining_objects);
 		pl = pl->next;
 	}
 
@@ -412,20 +412,20 @@ static void minimize(struct pack_list **min)
 	/* remove unique pack objects from the non_unique packs */
 	pl = non_unique;
 	while (pl) {
-		llist_sorted_difference_inplace(pl->all_objects, unique_pack_objects);
+		llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
 		pl = pl->next;
 	}
 
 	while (non_unique) {
-		/* sort the non_unique packs, greater size of all_objects first */
+		/* sort the non_unique packs, greater size of remaining_objects first */
 		sort_pack_list(&non_unique);
-		if (non_unique->all_objects->size == 0)
+		if (non_unique->remaining_objects->size == 0)
 			break;
 
 		pack_list_insert(min, non_unique);
 
-		for (pl = non_unique->next; pl && pl->all_objects->size > 0;  pl = pl->next)
-			llist_sorted_difference_inplace(pl->all_objects, non_unique->all_objects);
+		for (pl = non_unique->next; pl && pl->remaining_objects->size > 0;  pl = pl->next)
+			llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
 
 		non_unique = non_unique->next;
 	}
@@ -440,7 +440,7 @@ static void load_all_objects(void)
 
 	while (pl) {
 		hint = NULL;
-		l = pl->all_objects->front;
+		l = pl->remaining_objects->front;
 		while (l) {
 			hint = llist_insert_sorted_unique(all_objects,
 							  l->oid, hint);
@@ -451,7 +451,7 @@ static void load_all_objects(void)
 	/* remove objects present in remote packs */
 	pl = altodb_packs;
 	while (pl) {
-		llist_sorted_difference_inplace(all_objects, pl->all_objects);
+		llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
 		pl = pl->next;
 	}
 }
@@ -477,10 +477,10 @@ static void scan_alt_odb_packs(void)
 		local = local_packs;
 		while (local) {
 			llist_sorted_difference_inplace(local->unique_objects,
-							alt->all_objects);
+							alt->remaining_objects);
 			local = local->next;
 		}
-		llist_sorted_difference_inplace(all_objects, alt->all_objects);
+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
 		alt = alt->next;
 	}
 }
@@ -495,7 +495,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		return NULL;
 
 	l.pack = p;
-	llist_init(&l.all_objects);
+	llist_init(&l.remaining_objects);
 
 	if (open_pack_index(p))
 		return NULL;
@@ -504,11 +504,11 @@ static struct pack_list * add_pack(struct packed_git *p)
 	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
 	step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
 	while (off < p->num_objects * step) {
-		llist_insert_back(l.all_objects, (const struct object_id *)(base + off));
+		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
 	/* this list will be pruned in cmp_two_packs later */
-	l.unique_objects = llist_copy(l.all_objects);
+	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
 		return pack_list_insert(&local_packs, &l);
 	else
@@ -604,7 +604,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
 	pl = local_packs;
 	while (pl) {
 		llist_sorted_difference_inplace(pl->unique_objects, ignore);
-		llist_sorted_difference_inplace(pl->all_objects, ignore);
+		llist_sorted_difference_inplace(pl->remaining_objects, ignore);
 		pl = pl->next;
 	}
 
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v6 5/5] pack-redundant: consistent sort method
  2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
                               ` (4 preceding siblings ...)
  2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
@ 2019-01-12  9:17             ` Jiang Xin
  5 siblings, 0 replies; 43+ messages in thread
From: Jiang Xin @ 2019-01-12  9:17 UTC (permalink / raw)
  To: Junio C Hamano, Git List, SZEDER Gábor
  Cc: Jiang Xin, Sun Chao, Jiang Xin

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

SZEDER reported that test case t5323 has different test result on MacOS.
This is because `cmp_pack_list_reverse` cannot give identical result
when two pack being sorted has the same size of remaining_objects.

Changes to the sorting function will make consistent test result for
t5323.

The new algorithm to find redundant packs is a trade-off to save memory
resources, and the result of it may be different with old one, and may
be not the best result sometimes.  Update t5323 for the new algorithm.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/pack-redundant.c  | 24 ++++++++++++++++--------
 t/t5323-pack-redundant.sh |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 64eec3e297..4448e58a10 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -33,6 +33,7 @@ static struct pack_list {
 	struct packed_git *pack;
 	struct llist *unique_objects;
 	struct llist *remaining_objects;
+	size_t all_objects_size;
 } *local_packs = NULL, *altodb_packs = NULL;
 
 static struct llist_item *free_nodes;
@@ -335,19 +336,25 @@ static inline off_t pack_set_bytecount(struct pack_list *pl)
 	return ret;
 }
 
-static int cmp_pack_list_reverse(const void *a, const void *b)
+static int cmp_remaining_objects(const void *a, const void *b)
 {
 	struct pack_list *pl_a = *((struct pack_list **)a);
 	struct pack_list *pl_b = *((struct pack_list **)b);
-	size_t sz_a = pl_a->remaining_objects->size;
-	size_t sz_b = pl_b->remaining_objects->size;
 
-	if (sz_a == sz_b)
-		return 0;
-	else if (sz_a < sz_b)
+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
+		/* have the same remaining_objects, big pack first */
+		if (pl_a->all_objects_size == pl_b->all_objects_size)
+			return 0;
+		else if (pl_a->all_objects_size < pl_b->all_objects_size)
+			return 1;
+		else
+			return -1;
+	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
+		/* sort by remaining objects, more objects first */
 		return 1;
-	else
+	} else {
 		return -1;
+	}
 }
 
 /* Sort pack_list, greater size of remaining_objects first */
@@ -365,7 +372,7 @@ static void sort_pack_list(struct pack_list **pl)
 	for (n = 0, p = *pl; p; p = p->next)
 		ary[n++] = p;
 
-	QSORT(ary, n, cmp_pack_list_reverse);
+	QSORT(ary, n, cmp_remaining_objects);
 
 	/* link them back again */
 	for (i = 0; i < n - 1; i++)
@@ -507,6 +514,7 @@ static struct pack_list * add_pack(struct packed_git *p)
 		llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
 		off += step;
 	}
+	l.all_objects_size = l.remaining_objects->size;
 	/* this list will be pruned in cmp_two_packs later */
 	l.unique_objects = llist_copy(l.remaining_objects);
 	if (p->pack_local)
diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
index 407838f0e8..663328ab30 100755
--- a/t/t5323-pack-redundant.sh
+++ b/t/t5323-pack-redundant.sh
@@ -133,7 +133,7 @@ test_expect_success 'create pack 4, 5' '
 '
 
 cat >expected <<EOF
-P2:$P2
+P3:$P3
 EOF
 
 test_expect_success 'one of pack-2/pack-3 is redundant' '
-- 
2.20.0.3.gc45e608566


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-10 11:57           ` SZEDER Gábor
  2019-01-10 12:25             ` Torsten Bögershausen
  2019-01-10 17:36             ` Junio C Hamano
@ 2019-01-15 20:30             ` tboegi
  2019-01-15 21:09               ` Eric Sunshine
  2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
  3 siblings, 2 replies; 43+ messages in thread
From: tboegi @ 2019-01-15 20:30 UTC (permalink / raw)
  To: git, szeder.dev, zhiyou.jx; +Cc: Torsten Bögershausen

From: Torsten Bögershausen <tboegi@web.de>

From `man sed` (on a Mac OS X box):
The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
on other operating systems.

From `man sed` on a Linux box:
REGULAR EXPRESSIONS
       POSIX.2 BREs should be supported, but they aren't completely because of
       performance problems.  The \n sequence in a regular expression matches
       the newline character,  and  similarly  for \a, \t, and other sequences.
       The -E option switches to using extended regular expressions instead;
       the -E option has been supported for years by GNU sed, and is now
       included in POSIX.

Well, there are still a lot of systems out there, which don't support it.

Beside that, see IEEE Std 1003.1TM-2017
http://pubs.opengroup.org/onlinepubs/9699919799/
does not mention -E either.

To be on the safe side, don't allow it.

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---

I am somewhat unsure if we should disable all options except -e -f -n
instead ?
/\bsed\s+-[^efn]/ and err 'Not portable option with sed. Only -n -e -f are portable';

That would cause a false positive in t9001 here:
"--cc-cmd=./cccmd-sed --suppress-cc=self"

which could either be fixed by an anchor:
/^\s*sed\s+-[^efn]/

Or by allowing '--' like this:
/\bsed\s+-[^-efn]/

Any thoughts, please ?

t/check-non-portable-shell.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
index b45bdac688..96b6afdeb8 100755
--- a/t/check-non-portable-shell.pl
+++ b/t/check-non-portable-shell.pl
@@ -35,7 +35,7 @@ sub err {
 		chomp;
 	}

-	/\bsed\s+-i/ and err 'sed -i is not portable';
+	/\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
 	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
 	/^\s*declare\s+/ and err 'arrays/declare not portable';
 	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';
--
2.20.1.2.gb21ebb671


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
@ 2019-01-15 21:09               ` Eric Sunshine
  2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 43+ messages in thread
From: Eric Sunshine @ 2019-01-15 21:09 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Git List, SZEDER Gábor, zhiyou.jx

On Tue, Jan 15, 2019 at 3:31 PM <tboegi@web.de> wrote:
> From `man sed` (on a Mac OS X box):
> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
> on other operating systems.
> [...]
> To be on the safe side, don't allow it.
>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
> ---
> diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
> @@ -35,7 +35,7 @@ sub err {
> -       /\bsed\s+-i/ and err 'sed -i is not portable';
> +       /\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
>         /\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
>         /^\s*declare\s+/ and err 'arrays/declare not portable';
>         /^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';

Please update the new message to be more consistent with existing
surrounding error messages. For instance:

    err 'sed -i/-a/-l/-E not portable (use only -e/-f/-n)'

or something. Thanks.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
  2019-01-15 21:09               ` Eric Sunshine
@ 2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 43+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-16 11:24 UTC (permalink / raw)
  To: tboegi; +Cc: git, szeder.dev, zhiyou.jx


On Tue, Jan 15 2019, tboegi@web.de wrote:

> From: Torsten Bögershausen <tboegi@web.de>
>
> From `man sed` (on a Mac OS X box):
> The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
> on other operating systems.
>
> From `man sed` on a Linux box:
> REGULAR EXPRESSIONS
>        POSIX.2 BREs should be supported, but they aren't completely because of
>        performance problems.  The \n sequence in a regular expression matches
>        the newline character,  and  similarly  for \a, \t, and other sequences.
>        The -E option switches to using extended regular expressions instead;
>        the -E option has been supported for years by GNU sed, and is now
>        included in POSIX.
>
> Well, there are still a lot of systems out there, which don't support it.
>
> Beside that, see IEEE Std 1003.1TM-2017
> http://pubs.opengroup.org/onlinepubs/9699919799/
> does not mention -E either.
>
> To be on the safe side, don't allow it.
>
> Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Torsten Bögershausen <tboegi@web.de>
> ---
>
> I am somewhat unsure if we should disable all options except -e -f -n
> instead ?
> /\bsed\s+-[^efn]/ and err 'Not portable option with sed. Only -n -e -f are portable';
>
> That would cause a false positive in t9001 here:
> "--cc-cmd=./cccmd-sed --suppress-cc=self"
>
> which could either be fixed by an anchor:
> /^\s*sed\s+-[^efn]/
>
> Or by allowing '--' like this:
> /\bsed\s+-[^-efn]/
>
> Any thoughts, please ?
>
> t/check-non-portable-shell.pl | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
> index b45bdac688..96b6afdeb8 100755
> --- a/t/check-non-portable-shell.pl
> +++ b/t/check-non-portable-shell.pl
> @@ -35,7 +35,7 @@ sub err {
>  		chomp;
>  	}
>
> -	/\bsed\s+-i/ and err 'sed -i is not portable';
> +	/\bsed\s+-[Eail]/ and err 'Not portable option with sed. Only -e -f -n are portable';
>  	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
>  	/^\s*declare\s+/ and err 'arrays/declare not portable';
>  	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';

I'd just go for your /\bsed\s+-[^-efn]/ suggestion. Just a note if we do
go for the whitelist: According to GNU sed's manpage -E is also known as
-r, so /\bsed\s+-[Erail]/ would be better.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file]
  2019-01-10 11:57           ` SZEDER Gábor
                               ` (2 preceding siblings ...)
  2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
@ 2019-01-20  7:53             ` tboegi
  3 siblings, 0 replies; 43+ messages in thread
From: tboegi @ 2019-01-20  7:53 UTC (permalink / raw)
  To: git, szeder.dev, zhiyou.jx, sunshine, avarab; +Cc: Torsten Bögershausen

From: Torsten Bögershausen <tboegi@web.de>

From `man sed` (on a Mac OS X box):
The -E, -a and -i options are non-standard FreeBSD extensions and may not be available
on other operating systems.

From `man sed` on a Linux box:
REGULAR EXPRESSIONS
       POSIX.2 BREs should be supported, but they aren't completely because of
       performance problems.  The \n sequence in a regular expression matches the newline
       character,  and  similarly  for \a, \t, and other sequences.
       The -E option switches to using extended regular expressions instead; the -E option
       has been supported for years by GNU sed, and is now included in POSIX.

Well, there are still a lot of systems out there, which don't support it.
Beside that, IEEE Std 1003.1TM-2017, see
http://pubs.opengroup.org/onlinepubs/9699919799/
does not mention -E either.

To be on the safe side, don't allow -E (or -r, which is GNU).
Change check-non-portable-shell.pl to only accept the portable options:
sed [-n] [-e command] [-f command_file]

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 t/check-non-portable-shell.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
index b45bdac688..6c798608a9 100755
--- a/t/check-non-portable-shell.pl
+++ b/t/check-non-portable-shell.pl
@@ -35,7 +35,7 @@ sub err {
 		chomp;
 	}

-	/\bsed\s+-i/ and err 'sed -i is not portable';
+	/\bsed\s+-[^efn]\s+/ and err 'Not portable option with sed (use only [-n] [-e command] [-f command_file])';
 	/\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)';
 	/^\s*declare\s+/ and err 'arrays/declare not portable';
 	/^\s*[^#]\s*which\s/ and err 'which is not portable (use type)';
--
2.20.1.2.gb21ebb671


^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, back to index

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-18  9:58 [PATCH 1/2] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-09 12:56       ` SZEDER Gábor
2019-01-09 16:47         ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10 21:11             ` Junio C Hamano
2019-01-11  1:59               ` Jiang Xin
2019-01-11 18:00                 ` Junio C Hamano
2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-11  1:19             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 20:05             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10  7:11           ` Johannes Sixt
2019-01-10 11:57           ` SZEDER Gábor
2019-01-10 12:25             ` Torsten Bögershausen
2019-01-10 17:36             ` Junio C Hamano
2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
2019-01-15 21:09               ` Eric Sunshine
2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
2019-01-08 19:30         ` Junio C Hamano
2019-01-09  0:29           ` 16657101987
2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox