git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Linus Torvalds <torvalds@osdl.org>
To: Junio C Hamano <junkio@cox.net>, Git Mailing List <git@vger.kernel.org>
Subject: git-rev-list: add "--dense" flag
Date: Fri, 21 Oct 2005 16:40:54 -0700 (PDT)	[thread overview]
Message-ID: <Pine.LNX.4.64.0510211631400.10477@g5.osdl.org> (raw)


This is what the recent git-rev-list changes have all been gearing up for.

When we use a path filter to git-rev-list, the new "--dense" flag asks
git-rev-list to compress the history so that it _only_ contains commits
that change files in the path filter.  It also rewrites the parent
information so that tools like "gitk" will see the result as a dense
history tree.

For example, on the current kernel archive:

	[torvalds@g5 linux]$ git-rev-list HEAD | wc -l
	9904
	[torvalds@g5 linux]$ git-rev-list HEAD -- kernel | wc -l
	5442
	[torvalds@g5 linux]$ git-rev-list --dense HEAD -- kernel | wc -l
	356

which shows that while we have almost ten thousand commits, we can prune
down the work to slightly more than half by only following the merges
that are interesting. But further, we can then compress the history to
just 356 entries that actually make changes to the kernel subdirectory.

To see this in action, try something like

	gitk --dense -- gitk

to see just the history that affects gitk.  Or, to show that true
parallell development still remains parallell, do

	gitk --dense -- daemon.c

which shows some parallell commits in the current git tree.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---

I'm really happy with how this turned out. It's a bit expensive to run on 
big archives, but I _really_ think it's quite spectacular. And likely very 
useful indeed.

For example, say you love gitk, but only care about networking changes. 
Easy enough, just do

	gitk --dense -- net/ include/net/

and off you go. It's not free (we do a _lot_ of tree comparisons), but 
dammit, it's fast enough that it's very very useful.  The tree comparisons 
are done very efficiently.

This is _way_ more powerful than annotate. Interested in just a single 
file? Just do

	gitk --dense -- kernel/exit.c

and it will show the 17 or so commits that change kernel/exit.c with the 
right history (it turns out that there is no parallell development at all 
in that file, so in this case it will linearize history entirely).

Damn, I'm good. 

diff --git a/rev-list.c b/rev-list.c
index b5dbb9f..5f125fd 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -11,6 +11,7 @@
 #define INTERESTING	(1u << 1)
 #define COUNTED		(1u << 2)
 #define SHOWN		(1u << 3)
+#define TREECHANGE	(1u << 4)
 
 static const char rev_list_usage[] =
 	"git-rev-list [OPTION] commit-id <commit-id>\n"
@@ -27,6 +28,7 @@ static const char rev_list_usage[] =
 		      "  --merge-order [ --show-breaks ]\n"
 		      "  --topo-order";
 
+static int dense = 0;
 static int unpacked = 0;
 static int bisect_list = 0;
 static int tag_objects = 0;
@@ -79,6 +81,26 @@ static void show_commit(struct commit *c
 	fflush(stdout);
 }
 
+static void rewrite_one(struct commit **pp)
+{
+	for (;;) {
+		struct commit *p = *pp;
+		if (p->object.flags & (TREECHANGE | UNINTERESTING))
+			return;
+		/* Only single-parent commits don't have TREECHANGE */
+		*pp = p->parents->item;
+	}
+}
+
+static void rewrite_parents(struct commit *commit)
+{
+	struct commit_list *parent = commit->parents;
+	while (parent) {
+		rewrite_one(&parent->item);
+		parent = parent->next;
+	}
+}
+
 static int filter_commit(struct commit * commit)
 {
 	if (stop_traversal && (commit->object.flags & BOUNDARY))
@@ -95,6 +117,11 @@ static int filter_commit(struct commit *
 		return STOP;
 	if (no_merges && (commit->parents && commit->parents->next))
 		return CONTINUE;
+	if (paths && dense) {
+		if (!(commit->object.flags & TREECHANGE))
+			return CONTINUE;
+		rewrite_parents(commit);
+	}
 	return DO;
 }
 
@@ -404,6 +431,14 @@ static struct diff_options diff_opt = {
 	.change = file_change,
 };
 
+static int same_tree(struct tree *t1, struct tree *t2)
+{
+	is_different = 0;
+	if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
+		return 0;
+	return !is_different;
+}
+
 static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent)
 {
 	if (!commit->tree)
@@ -415,11 +450,7 @@ static struct commit *try_to_simplify_me
 		parse_commit(p);
 		if (!p->tree)
 			continue;
-		is_different = 0;
-		if (diff_tree_sha1(commit->tree->object.sha1,
-				   p->tree->object.sha1, "", &diff_opt) < 0)
-			continue;
-		if (!is_different)
+		if (same_tree(commit->tree, p->tree))
 			return p;
 	}
 	return NULL;
@@ -485,6 +516,27 @@ static void add_parents_to_list(struct c
 	}
 }
 
+static void compress_list(struct commit_list *list)
+{
+	while (list) {
+		struct commit *commit = list->item;
+		struct commit_list *parent = commit->parents;
+		list = list->next;
+
+		/*
+		 * Exactly one parent? Check if it leaves the tree
+		 * unchanged
+		 */
+		if (parent && !parent->next) {
+			struct tree *t1 = commit->tree;
+			struct tree *t2 = parent->item->tree;
+			if (!t1 || !t2 || same_tree(t1, t2))
+				continue;
+		}
+		commit->object.flags |= TREECHANGE;
+	}
+}
+
 static struct commit_list *limit_list(struct commit_list *list)
 {
 	struct commit_list *newlist = NULL;
@@ -514,6 +566,8 @@ static struct commit_list *limit_list(st
 	}
 	if (tree_objects)
 		mark_edges_uninteresting(newlist);
+	if (paths && dense)
+		compress_list(newlist);
 	if (bisect_list)
 		newlist = find_bisection(newlist);
 	return newlist;
@@ -700,6 +754,10 @@ int main(int argc, const char **argv)
 		        limited = 1;
 			continue;
 		}
+		if (!strcmp(arg, "--dense")) {
+			dense = 1;
+			continue;
+		}
 		if (!strcmp(arg, "--")) {
 			paths = get_pathspec(prefix, argv + i + 1);
 			if (paths) {

             reply	other threads:[~2005-10-21 23:41 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-10-21 23:40 Linus Torvalds [this message]
2005-10-21 23:47 ` git-rev-list: add "--dense" flag Linus Torvalds
2005-10-21 23:55 ` Linus Torvalds
2005-10-22  0:37 ` Petr Baudis
2005-10-22  0:47   ` Handling renames Petr Baudis
2005-10-22  1:28     ` Linus Torvalds
2005-10-22  1:51       ` Petr Baudis
2005-10-22  2:10         ` Junio C Hamano
2005-10-22  2:49           ` Petr Baudis
2005-10-22  3:23         ` Linus Torvalds
2005-10-22  1:26   ` git-rev-list: add "--dense" flag Linus Torvalds
2005-10-22  2:56     ` Petr Baudis
2005-10-22  3:30       ` Linus Torvalds
2005-10-22  3:55     ` Junio C Hamano
2005-10-22 20:37       ` Linus Torvalds
2005-10-25 18:07 ` Jonas Fonseca
2005-10-25 18:23   ` Linus Torvalds
2005-10-25 18:40     ` Jonas Fonseca
2005-10-25 18:52       ` Linus Torvalds
2005-10-25 18:50     ` Linus Torvalds
  -- strict thread matches above, loose matches on Subject: below --
2005-10-23 19:30 Marco Costalba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0510211631400.10477@g5.osdl.org \
    --to=torvalds@osdl.org \
    --cc=git@vger.kernel.org \
    --cc=junkio@cox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).