From: "René Scharfe" <l.s.r@web.de>
To: Git List <git@vger.kernel.org>
Subject: [PATCH] commit-reach: avoid commit_list_insert_by_date()
Date: Fri, 24 Oct 2025 18:47:10 +0200 [thread overview]
Message-ID: <87a00cb8-8faf-48ec-91aa-009e6e906363@web.de> (raw)
Building a list using commit_list_insert_by_date() has quadratic worst
case complexity. Avoid it by just appending in the loop and sorting at
the end.
The number of merge bases is usually small, so don't expect speedups in
normal repositories. It has no limit, though. The added perf test
shows a nice improvement when dealing with 16384 merge bases:
Test v2.51.1 HEAD
-----------------------------------------------------------------
6010.2: git merge-base 0.55(0.54+0.00) 0.03(0.02+0.00) -94.5%
Signed-off-by: René Scharfe <l.s.r@web.de>
---
Patch formatted with --function-context for easier review, particularly
of paint_down_to_common().
commit-reach.c | 14 +++--
t/perf/p6010-merge-base.sh | 101 +++++++++++++++++++++++++++++++++++++
2 files changed, 110 insertions(+), 5 deletions(-)
create mode 100755 t/perf/p6010-merge-base.sh
diff --git a/commit-reach.c b/commit-reach.c
index a339e41aa4..cc18c86d3b 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -52,127 +52,130 @@ static int queue_has_nonstale(struct prio_queue *queue)
/* all input commits in one and twos[] must have been parsed! */
static int paint_down_to_common(struct repository *r,
struct commit *one, int n,
struct commit **twos,
timestamp_t min_generation,
int ignore_missing_commits,
struct commit_list **result)
{
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
int i;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
+ struct commit_list **tail = result;
if (!min_generation && !corrected_commit_dates_enabled(r))
queue.compare = compare_commits_by_commit_date;
one->object.flags |= PARENT1;
if (!n) {
commit_list_append(one, result);
return 0;
}
prio_queue_put(&queue, one);
for (i = 0; i < n; i++) {
twos[i]->object.flags |= PARENT2;
prio_queue_put(&queue, twos[i]);
}
while (queue_has_nonstale(&queue)) {
struct commit *commit = prio_queue_get(&queue);
struct commit_list *parents;
int flags;
timestamp_t generation = commit_graph_generation(commit);
if (min_generation && generation > last_gen)
BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
generation, last_gen,
oid_to_hex(&commit->object.oid));
last_gen = generation;
if (generation < min_generation)
break;
flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
commit->object.flags |= RESULT;
- commit_list_insert_by_date(commit, result);
+ tail = commit_list_append(commit, tail);
}
/* Mark parents of a found merge stale */
flags |= STALE;
}
parents = commit->parents;
while (parents) {
struct commit *p = parents->item;
parents = parents->next;
if ((p->object.flags & flags) == flags)
continue;
if (repo_parse_commit(r, p)) {
clear_prio_queue(&queue);
free_commit_list(*result);
*result = NULL;
/*
* At this stage, we know that the commit is
* missing: `repo_parse_commit()` uses
* `OBJECT_INFO_DIE_IF_CORRUPT` and therefore
* corrupt commits would already have been
* dispatched with a `die()`.
*/
if (ignore_missing_commits)
return 0;
return error(_("could not parse commit %s"),
oid_to_hex(&p->object.oid));
}
p->object.flags |= flags;
prio_queue_put(&queue, p);
}
}
clear_prio_queue(&queue);
+ commit_list_sort_by_date(result);
return 0;
}
static int merge_bases_many(struct repository *r,
struct commit *one, int n,
struct commit **twos,
struct commit_list **result)
{
- struct commit_list *list = NULL;
+ struct commit_list *list = NULL, **tail = result;
int i;
for (i = 0; i < n; i++) {
if (one == twos[i]) {
/*
* We do not mark this even with RESULT so we do not
* have to clean it up.
*/
*result = commit_list_insert(one, result);
return 0;
}
}
if (!one)
return 0;
if (repo_parse_commit(r, one))
return error(_("could not parse commit %s"),
oid_to_hex(&one->object.oid));
for (i = 0; i < n; i++) {
if (!twos[i])
return 0;
if (repo_parse_commit(r, twos[i]))
return error(_("could not parse commit %s"),
oid_to_hex(&twos[i]->object.oid));
}
if (paint_down_to_common(r, one, n, twos, 0, 0, &list)) {
free_commit_list(list);
return -1;
}
while (list) {
struct commit *commit = pop_commit(&list);
if (!(commit->object.flags & STALE))
- commit_list_insert_by_date(commit, result);
+ tail = commit_list_append(commit, tail);
}
+ commit_list_sort_by_date(result);
return 0;
}
@@ -421,47 +424,48 @@ static int remove_redundant(struct repository *r, struct commit **array,
static int get_merge_bases_many_0(struct repository *r,
struct commit *one,
size_t n,
struct commit **twos,
int cleanup,
struct commit_list **result)
{
- struct commit_list *list;
+ struct commit_list *list, **tail = result;
struct commit **rslt;
size_t cnt, i;
int ret;
if (merge_bases_many(r, one, n, twos, result) < 0)
return -1;
for (i = 0; i < n; i++) {
if (one == twos[i])
return 0;
}
if (!*result || !(*result)->next) {
if (cleanup) {
clear_commit_marks(one, all_flags);
clear_commit_marks_many(n, twos, all_flags);
}
return 0;
}
/* There are more than one */
cnt = commit_list_count(*result);
CALLOC_ARRAY(rslt, cnt);
for (list = *result, i = 0; list; list = list->next)
rslt[i++] = list->item;
free_commit_list(*result);
*result = NULL;
clear_commit_marks(one, all_flags);
clear_commit_marks_many(n, twos, all_flags);
ret = remove_redundant(r, rslt, cnt, &cnt);
if (ret < 0) {
free(rslt);
return -1;
}
for (i = 0; i < cnt; i++)
- commit_list_insert_by_date(rslt[i], result);
+ tail = commit_list_append(rslt[i], tail);
+ commit_list_sort_by_date(result);
free(rslt);
return 0;
}
diff --git a/t/perf/p6010-merge-base.sh b/t/perf/p6010-merge-base.sh
new file mode 100755
index 0000000000..54f52fa23e
--- /dev/null
+++ b/t/perf/p6010-merge-base.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test git merge-base'
+
+. ./perf-lib.sh
+
+test_perf_fresh_repo
+
+#
+# Creates lots of merges to make history traversal costly. In
+# particular it creates 2^($max_level-1)-1 2-way merges on top of
+# 2^($max_level-1) root commits. E.g., the commit history looks like
+# this for a $max_level of 3:
+#
+# _1_
+# / \
+# 2 3
+# / \ / \
+# 4 5 6 7
+#
+# The numbers are the fast-import marks, which also are the commit
+# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges,
+# 4-7 are the root commits.
+#
+build_history () {
+ local max_level="$1" &&
+ local level="${2:-1}" &&
+ local mark="${3:-1}" &&
+ if test $level -eq $max_level
+ then
+ echo "reset refs/heads/master" &&
+ echo "from $ZERO_OID" &&
+ echo "commit refs/heads/master" &&
+ echo "mark :$mark" &&
+ echo "committer C <c@example.com> 1234567890 +0000" &&
+ echo "data <<EOF" &&
+ echo "$mark" &&
+ echo "EOF"
+ else
+ local level1=$((level+1)) &&
+ local mark1=$((2*mark)) &&
+ local mark2=$((2*mark+1)) &&
+ build_history $max_level $level1 $mark1 &&
+ build_history $max_level $level1 $mark2 &&
+ echo "commit refs/heads/master" &&
+ echo "mark :$mark" &&
+ echo "committer C <c@example.com> 1234567890 +0000" &&
+ echo "data <<EOF" &&
+ echo "$mark" &&
+ echo "EOF" &&
+ echo "from :$mark1" &&
+ echo "merge :$mark2"
+ fi
+}
+
+#
+# Creates a new merge history in the same shape as build_history does,
+# while reusing the same root commits. This way the two top commits
+# have 2^($max_level-1) merge bases between them.
+#
+build_history2 () {
+ local max_level="$1" &&
+ local level="${2:-1}" &&
+ local mark="${3:-1}" &&
+ if test $level -lt $max_level
+ then
+ local level1=$((level+1)) &&
+ local mark1=$((2*mark)) &&
+ local mark2=$((2*mark+1)) &&
+ build_history2 $max_level $level1 $mark1 &&
+ build_history2 $max_level $level1 $mark2 &&
+ echo "commit refs/heads/master" &&
+ echo "mark :$mark" &&
+ echo "committer C <c@example.com> 1234567890 +0000" &&
+ echo "data <<EOF" &&
+ echo "$mark II" &&
+ echo "EOF" &&
+ echo "from :$mark1" &&
+ echo "merge :$mark2"
+ fi
+}
+
+test_expect_success 'setup' '
+ max_level=15 &&
+ build_history $max_level | git fast-import --export-marks=marks &&
+ git tag one &&
+ build_history2 $max_level | git fast-import --import-marks=marks --force &&
+ git tag two &&
+ git gc &&
+ git log --format=%H --no-merges >expect
+'
+
+test_perf 'git merge-base' '
+ git merge-base --all one two >actual
+'
+
+test_expect_success 'verify result' '
+ test_cmp expect actual
+'
+
+test_done
--
2.51.1
reply other threads:[~2025-10-24 16:47 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87a00cb8-8faf-48ec-91aa-009e6e906363@web.de \
--to=l.s.r@web.de \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).