git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 0/3] Fix built-in rebase perf regression
Date: Mon, 12 Nov 2018 03:44:28 -0800 (PST)	[thread overview]
Message-ID: <pull.72.v2.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.72.git.gitgitgadget@gmail.com>

In our tests with large repositories, we noticed a serious regression of the
performance of git rebase when using the built-in vs the shell script
version. It boils down to an incorrect conversion of a git checkout -q:
instead of using a twoway_merge as git checkout does, we used a oneway_merge 
as git reset does. The latter, however, calls lstat() on all files listed in
the index, while the former essentially looks only at the files that are
different between the given two revisions.

Let's reinstate the original behavior by introducing a flag to the 
reset_head() function to indicate whether we want to emulate reset --hard 
(in which case we use the oneway_merge, otherwise we use twoway_merge).

Johannes Schindelin (3):
  rebase: consolidate clean-up code before leaving reset_head()
  rebase: prepare reset_head() for more flags
  built-in rebase: reinstate `checkout -q` behavior where appropriate

 builtin/rebase.c | 79 ++++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 33 deletions(-)


base-commit: 8858448bb49332d353febc078ce4a3abcc962efe
Published-As: https://github.com/gitgitgadget/git/releases/tags/pr-72%2Fdscho%2Fbuiltin-rebase-perf-regression-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-72/dscho/builtin-rebase-perf-regression-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/72

Range-diff vs v1:

 1:  64597fe827 ! 1:  28e24d98ab rebase: consolidate clean-up code before leaving reset_head()
     @@ -11,6 +11,33 @@
      --- a/builtin/rebase.c
      +++ b/builtin/rebase.c
      @@
     + 	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
     + 		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
     + 
     +-	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
     +-		return -1;
     ++	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0) {
     ++		ret = -1;
     ++		goto leave_reset_head;
     ++	}
     + 
     + 	if (!oid) {
     + 		if (get_oid("HEAD", &head_oid)) {
     +-			rollback_lock_file(&lock);
     +-			return error(_("could not determine HEAD revision"));
     ++			ret = error(_("could not determine HEAD revision"));
     ++			goto leave_reset_head;
     + 		}
     + 		oid = &head_oid;
     + 	}
     +@@
     + 		unpack_tree_opts.reset = 1;
     + 
     + 	if (read_index_unmerged(the_repository->index) < 0) {
     +-		rollback_lock_file(&lock);
     +-		return error(_("could not read index"));
     ++		ret = error(_("could not read index"));
     ++		goto leave_reset_head;
       	}
       
       	if (!fill_tree_descriptor(&desc, oid)) {
     @@ -31,15 +58,17 @@
       	}
       
       	tree = parse_tree_indirect(oid);
     -@@
     + 	prime_cache_tree(the_repository->index, tree);
       
     - 	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
     +-	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0)
     ++	if (write_locked_index(the_repository->index, &lock, COMMIT_LOCK) < 0) {
       		ret = error(_("could not write index"));
      -	free((void *)desc.buffer);
     - 
     - 	if (ret)
     +-
     +-	if (ret)
      -		return ret;
      +		goto leave_reset_head;
     ++	}
       
       	reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT);
       	strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : "rebase");
 -:  ---------- > 2:  db963b2094 rebase: prepare reset_head() for more flags
 2:  070092b430 ! 3:  a7360b856f built-in rebase: reinstate `checkout -q` behavior where appropriate
     @@ -20,15 +20,18 @@
      @@
       #define GIT_REFLOG_ACTION_ENVIRONMENT "GIT_REFLOG_ACTION"
       
     + #define RESET_HEAD_DETACH (1<<0)
     ++#define RESET_HEAD_HARD (1<<1)
     + 
       static int reset_head(struct object_id *oid, const char *action,
     --		      const char *switch_to_branch, int detach_head,
     -+		      const char *switch_to_branch,
     -+		      int detach_head, int reset_hard,
     + 		      const char *switch_to_branch, unsigned flags,
       		      const char *reflog_orig_head, const char *reflog_head)
       {
     + 	unsigned detach_head = flags & RESET_HEAD_DETACH;
     ++	unsigned reset_hard = flags & RESET_HEAD_HARD;
       	struct object_id head_oid;
      -	struct tree_desc desc;
     -+	struct tree_desc desc[2];
     ++	struct tree_desc desc[2] = { { NULL }, { NULL } };
       	struct lock_file lock = LOCK_INIT;
       	struct unpack_trees_options unpack_tree_opts;
       	struct tree *tree;
     @@ -42,18 +45,18 @@
       	if (switch_to_branch && !starts_with(switch_to_branch, "refs/"))
       		BUG("Not a fully qualified branch: '%s'", switch_to_branch);
      @@
     - 	if (hold_locked_index(&lock, LOCK_REPORT_ON_ERROR) < 0)
     - 		return -1;
     + 		goto leave_reset_head;
     + 	}
       
      -	if (!oid) {
      -		if (get_oid("HEAD", &head_oid)) {
     --			rollback_lock_file(&lock);
     --			return error(_("could not determine HEAD revision"));
     +-			ret = error(_("could not determine HEAD revision"));
     +-			goto leave_reset_head;
      -		}
      -		oid = &head_oid;
     -+	if (get_oid("HEAD", &head_oid)) {
     -+		rollback_lock_file(&lock);
     -+		return error(_("could not determine HEAD revision"));
     ++	if ((!oid || !reset_hard) && get_oid("HEAD", &head_oid)) {
     ++		ret = error(_("could not determine HEAD revision"));
     ++		goto leave_reset_head;
       	}
       
      +	if (!oid)
     @@ -70,7 +73,7 @@
       	unpack_tree_opts.merge = 1;
       	if (!detach_head)
      @@
     - 		return error(_("could not read index"));
     + 		goto leave_reset_head;
       	}
       
      -	if (!fill_tree_descriptor(&desc, oid)) {
     @@ -104,7 +107,8 @@
       		string_list_clear(&merge_rr, 1);
       
      -		if (reset_head(NULL, "reset", NULL, 0, NULL, NULL) < 0)
     -+		if (reset_head(NULL, "reset", NULL, 0, 1, NULL, NULL) < 0)
     ++		if (reset_head(NULL, "reset", NULL, RESET_HEAD_HARD,
     ++			       NULL, NULL) < 0)
       			die(_("could not discard worktree changes"));
       		if (read_basic_state(&options))
       			exit(1);
     @@ -113,7 +117,8 @@
       			exit(1);
       		if (reset_head(&options.orig_head, "reset",
      -			       options.head_name, 0, NULL, NULL) < 0)
     -+			       options.head_name, 0, 1, NULL, NULL) < 0)
     ++			       options.head_name, RESET_HEAD_HARD,
     ++			       NULL, NULL) < 0)
       			die(_("could not move back to %s"),
       			    oid_to_hex(&options.orig_head));
       		ret = finish_rebase(&options);
     @@ -122,34 +127,7 @@
       			printf(_("Created autostash: %s\n"), buf.buf);
       			if (reset_head(&head->object.oid, "reset --hard",
      -				       NULL, 0, NULL, NULL) < 0)
     -+				       NULL, 0, 1, NULL, NULL) < 0)
     ++				       NULL, RESET_HEAD_HARD, NULL, NULL) < 0)
       				die(_("could not reset --hard"));
       			printf(_("HEAD is now at %s"),
       			       find_unique_abbrev(&head->object.oid,
     -@@
     - 				strbuf_addf(&buf, "rebase: checkout %s",
     - 					    options.switch_to);
     - 				if (reset_head(&oid, "checkout",
     --					       options.head_name, 0,
     -+					       options.head_name, 0, 0,
     - 					       NULL, NULL) < 0) {
     - 					ret = !!error(_("could not switch to "
     - 							"%s"),
     -@@
     - 			 "it...\n"));
     - 
     - 	strbuf_addf(&msg, "rebase: checkout %s", options.onto_name);
     --	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1,
     -+	if (reset_head(&options.onto->object.oid, "checkout", NULL, 1, 0,
     - 	    NULL, msg.buf))
     - 		die(_("Could not detach HEAD"));
     - 	strbuf_release(&msg);
     -@@
     - 		strbuf_addf(&msg, "rebase finished: %s onto %s",
     - 			options.head_name ? options.head_name : "detached HEAD",
     - 			oid_to_hex(&options.onto->object.oid));
     --		reset_head(NULL, "Fast-forwarded", options.head_name, 0,
     -+		reset_head(NULL, "Fast-forwarded", options.head_name, 0, 0,
     - 			   "HEAD", msg.buf);
     - 		strbuf_release(&msg);
     - 		ret = !!finish_rebase(&options);

-- 
gitgitgadget

  parent reply	other threads:[~2018-11-12 11:44 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-09  9:34 [PATCH 0/2] Fix built-in rebase perf regression Johannes Schindelin via GitGitGadget
2018-11-09  9:34 ` [PATCH 1/2] rebase: consolidate clean-up code before leaving reset_head() Johannes Schindelin via GitGitGadget
2018-11-09 10:03   ` Jeff King
2018-11-09 17:13     ` Johannes Schindelin
2018-11-09  9:34 ` [PATCH 2/2] built-in rebase: reinstate `checkout -q` behavior where appropriate Johannes Schindelin via GitGitGadget
2018-11-09 10:11   ` Jeff King
2018-11-09 17:21     ` Johannes Schindelin
2018-11-09 19:59       ` Jeff King
2018-11-12 11:11         ` Johannes Schindelin
2018-11-12  4:04       ` Junio C Hamano
2018-11-12 11:42         ` Johannes Schindelin
2018-11-13  1:49           ` Junio C Hamano
2018-11-13  6:07             ` Junio C Hamano
2018-11-12 11:44 ` Johannes Schindelin via GitGitGadget [this message]
2018-11-12 11:44   ` [PATCH v2 1/3] rebase: consolidate clean-up code before leaving reset_head() Johannes Schindelin via GitGitGadget
2018-11-12 11:44   ` [PATCH v2 2/3] rebase: prepare reset_head() for more flags Johannes Schindelin via GitGitGadget
2018-11-12 11:44   ` [PATCH v2 3/3] built-in rebase: reinstate `checkout -q` behavior where appropriate Johannes Schindelin via GitGitGadget
2018-11-12 14:26   ` [PATCH v2 0/3] Fix built-in rebase perf regression Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.72.v2.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).