git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [PATCH] blame: add the ability to ignore commits
@ 2019-01-07 21:30 Barret Rhoden
  2019-01-07 23:13 ` Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-07 21:30 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller, Jeff Smith, Junio C Hamano, Jeff King

Commits that make formatting changes or renames are often not
interesting when blaming a file.  This commit, similar to
git-hyper-blame, allows one to specify certain revisions to ignore during
the blame process.

To ignore a revision, put its committish in a file specified by
--ignore-file=<file> or use -i <rev>, which can be repeated.  The file
.git-blame-ignore-revs is checked by default.

It's useful to be alerted to the presence of an ignored commit in the
history of a line.  Those lines will be marked with '*' in the
non-porcelain output.  The '*' is attached to the line number to keep
from breaking tools that rely on the whitespace between columns.

A blame_entry attributed to an ignored commit will get passed to its
parent.  If an ignored commit changed a line, an ancestor that changed
the line will get blamed.  However, if an ignored commit added lines, a
commit changing a nearby line may get blamed.  If no commit is found,
the original commit for the file will get blamed.

Signed-off-by: Barret Rhoden <brho@google.com>
---
 Documentation/git-blame.txt | 15 +++++++++
 blame.c                     | 38 +++++++++++++++++----
 blame.h                     |  3 ++
 builtin/blame.c             | 66 +++++++++++++++++++++++++++++++++++--
 4 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt
index 16323eb80e31..e41375374892 100644
--- a/Documentation/git-blame.txt
+++ b/Documentation/git-blame.txt
@@ -10,6 +10,7 @@ SYNOPSIS
 [verse]
 'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental]
 	    [-L <range>] [-S <revs-file>] [-M] [-C] [-C] [-C] [--since=<date>]
+	    [-i <rev>] [--no-default-ignores] [--ignore-file=<file>]
 	    [--progress] [--abbrev=<n>] [<rev> | --contents <file> | --reverse <rev>..<rev>]
 	    [--] <file>
 
@@ -84,6 +85,20 @@ include::blame-options.txt[]
 	Ignore whitespace when comparing the parent's version and
 	the child's to find where the lines came from.
 
+-i <rev>::
+	Ignore revision when assigning blame.  Lines that were changed by an
+	ignored commit will be marked with a `*` in the blame output.  Lines
+	that were added by an ignored commit may be attributed commits making
+	nearby changes or to the first commit touching the file.
+
+--no-default-ignores::
+	Do not automatically ignore revisions in the file
+	`.git-blame-ignore-revs`.
+
+--ignore-file=<file>::
+	Ignore revisions listed in `file`, one revision per line.  Whitespace
+	and comments beginning with `#` are ignored.
+
 --abbrev=<n>::
 	Instead of using the default 7+1 hexadecimal digits as the
 	abbreviated object name, use <n>+1 digits. Note that 1 column
diff --git a/blame.c b/blame.c
index d84c93778080..9e338cfa83e3 100644
--- a/blame.c
+++ b/blame.c
@@ -474,7 +474,8 @@ void blame_coalesce(struct blame_scoreboard *sb)
 
 	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 		if (ent->suspect == next->suspect &&
-		    ent->s_lno + ent->num_lines == next->s_lno) {
+		    ent->s_lno + ent->num_lines == next->s_lno &&
+		    ent->ignored == next->ignored) {
 			ent->num_lines += next->num_lines;
 			ent->next = next->next;
 			blame_origin_decref(next->suspect);
@@ -726,6 +727,8 @@ static void split_overlap(struct blame_entry *split,
 	int chunk_end_lno;
 	memset(split, 0, sizeof(struct blame_entry [3]));
 
+	split[0].ignored = split[1].ignored = split[2].ignored = e->ignored;
+
 	if (e->s_lno < tlno) {
 		/* there is a pre-chunk part not blamed on parent */
 		split[0].suspect = blame_origin_incref(e->suspect);
@@ -845,10 +848,10 @@ static struct blame_entry *reverse_blame(struct blame_entry *head,
  */
 static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int tlno, int offset, int same,
-			struct blame_origin *parent)
+			struct blame_origin *parent, int ignore_diffs)
 {
 	struct blame_entry *e = **srcq;
-	struct blame_entry *samep = NULL, *diffp = NULL;
+	struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL;
 
 	while (e && e->s_lno < tlno) {
 		struct blame_entry *next = e->next;
@@ -862,6 +865,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int len = tlno - e->s_lno;
 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
 			n->suspect = e->suspect;
+			n->ignored = e->ignored;
 			n->lno = e->lno + len;
 			n->s_lno = e->s_lno + len;
 			n->num_lines = e->num_lines - len;
@@ -916,6 +920,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int len = same - e->s_lno;
 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
 			n->suspect = blame_origin_incref(e->suspect);
+			n->ignored = e->ignored;
 			n->lno = e->lno + len;
 			n->s_lno = e->s_lno + len;
 			n->num_lines = e->num_lines - len;
@@ -925,10 +930,24 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			n->next = samep;
 			samep = n;
 		}
-		e->next = diffp;
-		diffp = e;
+		if (ignore_diffs) {
+			/* These go to the parent, like the ones before tlno. */
+			blame_origin_decref(e->suspect);
+			e->suspect = blame_origin_incref(parent);
+			e->s_lno += offset;
+			e->ignored = 1;
+			e->next = ignoredp;
+			ignoredp = e;
+		} else {
+			e->next = diffp;
+			diffp = e;
+		}
 		e = next;
 	}
+	if (ignoredp) {
+		**dstq = reverse_blame(ignoredp, **dstq);
+		*dstq = &ignoredp->next;
+	}
 	**srcq = reverse_blame(diffp, reverse_blame(samep, e));
 	/* Move across elements that are in the unblamable portion */
 	if (diffp)
@@ -938,6 +957,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 struct blame_chunk_cb_data {
 	struct blame_origin *parent;
 	long offset;
+	int ignore_diffs;
 	struct blame_entry **dstq;
 	struct blame_entry **srcq;
 };
@@ -950,7 +970,7 @@ static int blame_chunk_cb(long start_a, long count_a,
 	if (start_a - start_b != d->offset)
 		die("internal error in blame::blame_chunk_cb");
 	blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
-		    start_b + count_b, d->parent);
+		    start_b + count_b, d->parent, d->ignore_diffs);
 	d->offset = start_a + count_a - (start_b + count_b);
 	return 0;
 }
@@ -973,18 +993,22 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 
 	d.parent = parent;
 	d.offset = 0;
+	d.ignore_diffs = 0;
 	d.dstq = &newdest; d.srcq = &target->suspects;
 
 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
 	fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
 	sb->num_get_patch++;
 
+	if (oidset_contains(&sb->ignores, &target->commit->object.oid))
+		d.ignore_diffs = 1;
+
 	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
 		die("unable to generate diff (%s -> %s)",
 		    oid_to_hex(&parent->commit->object.oid),
 		    oid_to_hex(&target->commit->object.oid));
 	/* The rest are the same as the parent */
-	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
+	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent, 0);
 	*d.dstq = NULL;
 	queue_blames(sb, parent, newdest);
 
diff --git a/blame.h b/blame.h
index be3a895043e0..3fe71a59d372 100644
--- a/blame.h
+++ b/blame.h
@@ -92,6 +92,7 @@ struct blame_entry {
 	 * scanning the lines over and over.
 	 */
 	unsigned score;
+	int ignored;
 };
 
 /*
@@ -117,6 +118,8 @@ struct blame_scoreboard {
 	/* linked list of blames */
 	struct blame_entry *ent;
 
+	struct oidset ignores;
+
 	/* look-up a line in the final buffer */
 	int num_lines;
 	int *lineno;
diff --git a/builtin/blame.c b/builtin/blame.c
index 6d798f99392e..698834426771 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -516,8 +516,13 @@ static void emit_other(struct blame_scoreboard *sb, struct blame_entry *ent, int
 						   ci.author_tz.buf,
 						   show_raw_time));
 			}
-			printf(" %*d) ",
-			       max_digits, ent->lno + 1 + cnt);
+			if (ent->ignored) {
+				printf(" %*d%c) ", max_digits - 1,
+				       ent->lno + 1 + cnt, '*');
+			} else {
+				printf(" %*d) ", max_digits,
+				       ent->lno + 1 + cnt);
+			}
 		}
 		if (reset)
 			fputs(reset, stdout);
@@ -603,6 +608,7 @@ static void find_alignment(struct blame_scoreboard *sb, int *option)
 {
 	int longest_src_lines = 0;
 	int longest_dst_lines = 0;
+	int has_ignore = 0;
 	unsigned largest_score = 0;
 	struct blame_entry *e;
 	int compute_auto_abbrev = (abbrev < 0);
@@ -639,9 +645,11 @@ static void find_alignment(struct blame_scoreboard *sb, int *option)
 			longest_dst_lines = num;
 		if (largest_score < blame_entry_score(sb, e))
 			largest_score = blame_entry_score(sb, e);
+		if (e->ignored)
+			has_ignore = 1;
 	}
 	max_orig_digits = decimal_width(longest_src_lines);
-	max_digits = decimal_width(longest_dst_lines);
+	max_digits = decimal_width(longest_dst_lines) + has_ignore;
 	max_score_digits = decimal_width(largest_score);
 
 	if (compute_auto_abbrev)
@@ -774,6 +782,43 @@ static int is_a_rev(const char *name)
 	return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
 }
 
+static void handle_ignore_list(struct blame_scoreboard *sb,
+			       struct string_list *ignores)
+{
+	struct string_list_item *i;
+	struct object_id oid;
+
+	oidset_init(&sb->ignores, 0);
+	for_each_string_list_item(i, ignores) {
+		if (get_oid_committish(i->string, &oid))
+			die(_("Can't find revision '%s' to ignore"), i->string);
+		oidset_insert(&sb->ignores, &oid);
+	}
+}
+
+static int handle_ignore_file(const char *path, struct string_list *ignores)
+{
+	FILE *fp = fopen(path, "r");
+	struct strbuf sb = STRBUF_INIT;
+
+	if (!fp)
+		return -1;
+	while (!strbuf_getline(&sb, fp)) {
+		const char *hash;
+
+		hash = strchr(sb.buf, '#');
+		if (hash)
+			strbuf_setlen(&sb, hash - sb.buf);
+		strbuf_trim(&sb);
+		if (!sb.len)
+			continue;
+		string_list_append(ignores, sb.buf);
+	}
+	fclose(fp);
+	strbuf_release(&sb);
+	return 0;
+}
+
 int cmd_blame(int argc, const char **argv, const char *prefix)
 {
 	struct rev_info revs;
@@ -785,8 +830,11 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	struct progress_info pi = { NULL, 0 };
 
 	struct string_list range_list = STRING_LIST_INIT_NODUP;
+	struct string_list ignore_list = STRING_LIST_INIT_DUP;
 	int output_option = 0, opt = 0;
 	int show_stats = 0;
+	int no_default_ignores = 0;
+	const char *ignore_file = NULL;
 	const char *revs_file = NULL;
 	const char *contents_from = NULL;
 	const struct option options[] = {
@@ -806,6 +854,9 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 		OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
 		OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
 		OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+		OPT_STRING_LIST('i', NULL, &ignore_list, N_("rev"), N_("Ignore <rev> when blaming")),
+		OPT_BOOL(0, "no-default-ignores", &no_default_ignores, N_("Do not ignore revisions from the .git-blame-ignore-revs file")),
+		OPT_STRING(0, "ignore-file", &ignore_file, N_("file"), N_("Ignore revisions from <file>")),
 		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
 		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
 
@@ -987,6 +1038,13 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 		argv[argc - 1] = "--";
 	}
 
+	if (!no_default_ignores)
+		handle_ignore_file(".git-blame-ignore-revs", &ignore_list);
+	if (ignore_file) {
+		if (handle_ignore_file(ignore_file, &ignore_list))
+			die(_("Unable to open ignore-file '%s'"), ignore_file);
+	}
+
 	revs.disable_stdin = 1;
 	setup_revisions(argc, argv, &revs, NULL);
 
@@ -995,6 +1053,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	sb.contents_from = contents_from;
 	sb.reverse = reverse;
 	sb.repo = the_repository;
+	handle_ignore_list(&sb, &ignore_list);
+	string_list_clear(&ignore_list, 0);
 	setup_scoreboard(&sb, path, &o);
 	lno = sb.num_lines;
 
-- 
2.20.1.97.g81188d93c3-goog


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-07 21:30 [PATCH] blame: add the ability to ignore commits Barret Rhoden
@ 2019-01-07 23:13 ` Junio C Hamano
  2019-01-08 16:27   ` Barret Rhoden
  2019-01-08 13:12 ` Ævar Arnfjörð Bjarmason
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
  2 siblings, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-07 23:13 UTC (permalink / raw)
  To: Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

Barret Rhoden <brho@google.com> writes:

> Commits that make formatting changes or renames are often not
> interesting when blaming a file.  This commit, similar to
> git-hyper-blame, allows one to specify certain revisions to ignore during
> the blame process.
>
> To ignore a revision, put its committish in a file specified by
> --ignore-file=<file> or use -i <rev>, which can be repeated.

If I read it correctly, this gives a very limited form of -S, in the
sense that anything this can do can be expressed by using -S but the
reverse is not true, but is designed to be easier to use, in the
sense that unlike -S, this does not have to describe the part of the
history you do not have to lie about.  The documentation should say
something about these pros-and-cons to help readers decide which
feature to use.

I somehow feel that this is rare enough that it should not squat on
short-and-sweet '-i'.  We would want to reserve it to something like
"--ignore-case", for example.

> The file .git-blame-ignore-revs is checked by default.

Giving the projects a way to easily help participants to share the
same distorted view of the history is a good idea, but I do not
think we should allow projects to do so to those who merely clone it
without their consent.  IOW, "by default" is a terrible idea.

Perhaps paying attention to blame.skipRevsFile configuration
variable that is set by the end user would be sufficient----the
project can ship .blame-skip-revs (or any random filename of their
choice) in-tree as tracked contents and tell its users that they can
optionally use it.

> It's useful to be alerted to the presence of an ignored commit in the
> history of a line.  Those lines will be marked with '*' in the
> non-porcelain output.  The '*' is attached to the line number to keep
> from breaking tools that rely on the whitespace between columns.

A policy decision like the above two shouldn't be hardcoded in the
feature like this, but should be done as a separate option.  By
default, these shouldn't be marked with '*', as the same tools you
said you are afraid of breaking would be expecting a word with only
digits and no asterisk in the column where line numbers appear and
will get broken by this change if done unconditionally.

In general, I find this patch trying to change too many things at
the same time, without sufficient justification.  Perhaps do these
different things as separate steps in a single series?

> A blame_entry attributed to an ignored commit will get passed to its
> parent.

Obviously, an interesting consideration is what happens when a merge
commit is skipped.  Is it sufficient to change this description to
"...will get passed to its parentS", or would the code do completely
nonsensical things without further tweaks (a possible simple tweak
could be to refuse skipping merges)?

> If an ignored commit changed a line, an ancestor that changed
> the line will get blamed.  However, if an ignored commit added lines, a
> commit changing a nearby line may get blamed.  If no commit is found,
> the original commit for the file will get blamed.

The above somehow does not read as describing a carefully designed
behaviour; rather, it sounds as if it is saying "the code does
whatever random things it happens to do".  For example, when there
is a newly added line how is "A" commit changing a nearby line
chosen (a line has lines before it and after it---they may be
touched by different commits, and before and after that skipped
commit, so there are multiple commits to choose from)?

> diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt
> index 16323eb80e31..e41375374892 100644
> --- a/Documentation/git-blame.txt
> +++ b/Documentation/git-blame.txt
> @@ -10,6 +10,7 @@ SYNOPSIS
>  [verse]
>  'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental]
>  	    [-L <range>] [-S <revs-file>] [-M] [-C] [-C] [-C] [--since=<date>]
> +	    [-i <rev>] [--no-default-ignores] [--ignore-file=<file>]
>  	    [--progress] [--abbrev=<n>] [<rev> | --contents <file> | --reverse <rev>..<rev>]
>  	    [--] <file>
>  
> @@ -84,6 +85,20 @@ include::blame-options.txt[]
>  	Ignore whitespace when comparing the parent's version and
>  	the child's to find where the lines came from.
>  
> +-i <rev>::
> +	Ignore revision when assigning blame.  Lines that were changed by an
> +	ignored commit will be marked with a `*` in the blame output.  Lines
> +	that were added by an ignored commit may be attributed commits making
> +	nearby changes or to the first commit touching the file.

It probably deserves to be told that this option can be given
multiple times and used cumulatively (unlike usual "last one wins"
rule).

> +--no-default-ignores::
> +	Do not automatically ignore revisions in the file
> +	`.git-blame-ignore-revs`.

This should not be "opt-out" like this.

> +--ignore-file=<file>::
> +	Ignore revisions listed in `file`, one revision per line.  Whitespace
> +	and comments beginning with `#` are ignored.

Should it be capable to take two or more ignore-files?  Or should we
use the usual "the last one wins" rule?

I think we should support blame.skipRevFile configuration variable
so that the users do not have to constantly give the option from the
command line.  And with that, there is no need to have a hardcoded
filename .git-blame-ignore-revs or anything like that.

If we are to use configuration variable, however, we'd need a way to
override its use from the command line, too.  Perhaps a sane
arrangement would be

    - if one or more --skip-revs-file=<file> are given from the
      command line, use all of them and ignore blame.skipRevsFile
      configuration variable.

    - if no --skip-revs-file=<file> is given from the command line,
      use blame.skipRevsFile configuration variable.

    - regardless of the above two, pay attention to --skip-rev=<rev>
      command line option(s).


Somehow the damage to blame.c codebase looks way too noisy for what
the code wants to do.  If all we want is to pretend in a history,
e.g.

    ---O---A---B---C---D

that commit B does not exist, i.e. use a distorted view of the
history

    ---O---A-------C---D

wouldn't it be sufficient to modify pass_blame(), i.e. the only the
caller of the pass_blame_to_parent(), where we find the parent
commits of "C" and dig the history to pass blame to "B", and have it
pretend as if "B" does not exist and pass blame directly to "A"?

Thanks.  I am personally not all that interested in this yet.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-07 21:30 [PATCH] blame: add the ability to ignore commits Barret Rhoden
  2019-01-07 23:13 ` Junio C Hamano
@ 2019-01-08 13:12 ` Ævar Arnfjörð Bjarmason
  2019-01-08 16:41   ` Barret Rhoden
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
  2 siblings, 1 reply; 39+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-08 13:12 UTC (permalink / raw)
  To: Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Junio C Hamano, Jeff King


On Mon, Jan 07 2019, Barret Rhoden wrote:

> +static int handle_ignore_file(const char *path, struct string_list *ignores)
> +{
> +	FILE *fp = fopen(path, "r");
> +	struct strbuf sb = STRBUF_INIT;
> +
> +	if (!fp)
> +		return -1;
> +	while (!strbuf_getline(&sb, fp)) {
> +		const char *hash;
> +
> +		hash = strchr(sb.buf, '#');
> +		if (hash)
> +			strbuf_setlen(&sb, hash - sb.buf);
> +		strbuf_trim(&sb);
> +		if (!sb.len)
> +			continue;
> +		string_list_append(ignores, sb.buf);
> +	}
> +	fclose(fp);
> +	strbuf_release(&sb);
> +	return 0;
> +}

Aside from other comments on this patch that Junio had either you mostly
copy-pasted this from init_skiplist() or you've come up with almost the
same code on your own.

In any case, if we're going to integrate something like this patch let's
split this "parse file with SHA-1s or comments/whitespace" into a
utility function that both this and init_skiplist() can call.

Then we could split up the description for the fsck.skipList config
variable to reference that format, and say that both it and this new
thing should consult those docs for how it's parsed.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-07 23:13 ` Junio C Hamano
@ 2019-01-08 16:27   ` Barret Rhoden
  2019-01-08 18:26     ` Junio C Hamano
  0 siblings, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-08 16:27 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

On 2019-01-07 at 15:13 Junio C Hamano <gitster@pobox.com> wrote:
> If I read it correctly, this gives a very limited form of -S, in the
> sense that anything this can do can be expressed by using -S but the
> reverse is not true, but is designed to be easier to use, in the
> sense that unlike -S, this does not have to describe the part of the
> history you do not have to lie about.  The documentation should say
> something about these pros-and-cons to help readers decide which
> feature to use.

Yeah, -S lists the revs to use, this lists the revs to *not* use.  I'll
add a note.

> I somehow feel that this is rare enough that it should not squat on
> short-and-sweet '-i'.  We would want to reserve it to something like
> "--ignore-case", for example.

Can do.  I'll change the interface to your suggestion from down below.

> > The file .git-blame-ignore-revs is checked by default.  
> 
> Giving the projects a way to easily help participants to share the
> same distorted view of the history is a good idea, but I do not
> think we should allow projects to do so to those who merely clone it
> without their consent.  IOW, "by default" is a terrible idea.
> 
> Perhaps paying attention to blame.skipRevsFile configuration
> variable that is set by the end user would be sufficient----the
> project can ship .blame-skip-revs (or any random filename of their
> choice) in-tree as tracked contents and tell its users that they can
> optionally use it.

A blame config option works for me.  I'd like the users/cloners of a
repo to not need to do anything extravagant, but a one-time config
would be fine.

> > It's useful to be alerted to the presence of an ignored commit in the
> > history of a line.  Those lines will be marked with '*' in the
> > non-porcelain output.  The '*' is attached to the line number to keep
> > from breaking tools that rely on the whitespace between columns.  
> 
> A policy decision like the above two shouldn't be hardcoded in the
> feature like this, but should be done as a separate option.  By
> default, these shouldn't be marked with '*', as the same tools you
> said you are afraid of breaking would be expecting a word with only
> digits and no asterisk in the column where line numbers appear and
> will get broken by this change if done unconditionally.

Since users are already opting-in to the blame-ignore, do you also want
them to opt-in to the annotation?  I can make a separate config option
to turn on the annotation.  Any preference for how it is marked?

> In general, I find this patch trying to change too many things at
> the same time, without sufficient justification.  Perhaps do these
> different things as separate steps in a single series?
> 
> > A blame_entry attributed to an ignored commit will get passed to its
> > parent.  
> 
> Obviously, an interesting consideration is what happens when a merge
> commit is skipped.  Is it sufficient to change this description to
> "...will get passed to its parentS", or would the code do completely
> nonsensical things without further tweaks (a possible simple tweak
> could be to refuse skipping merges)?

If we skip a merge commit, it might pick the wrong parent.  For
example, this line was brought in via a merge:

$ ~/src/git/git-blame include/linux/mm.h | grep VM_SYNC
b6fb293f2497a (Jan Kara 2017-11-01 16:36:41 +0100  204) #define VM_SYNC

It's from merge: a3841f94c7ec ("Merge tag 'libnvdimm-for-4.15', and if
we ignore it:

$ ~/src/git/git-blame -i a3841f94c7ecb include/linux/mm.h | grep VM_SYNC
cc2383ec06be0 (Konstantin Khlebnikov 2012-10-08 16:28:37 -0700  204*) #define VM_SYNC

The wrong commit is blamed.

I can put a note in the doc about it and die if we're given a merge
commit.  Is there a convenient helper for detecting a merge, or can I
just check for multiple parents?  (something like commit->parents &&
commit->parents->next)
 
> > If an ignored commit changed a line, an ancestor that changed
> > the line will get blamed.  However, if an ignored commit added lines, a
> > commit changing a nearby line may get blamed.  If no commit is found,
> > the original commit for the file will get blamed.  
> 
> The above somehow does not read as describing a carefully designed
> behaviour; rather, it sounds as if it is saying "the code does
> whatever random things it happens to do".  For example, when there
> is a newly added line how is "A" commit changing a nearby line
> chosen (a line has lines before it and after it---they may be
> touched by different commits, and before and after that skipped
> commit, so there are multiple commits to choose from)?

This was more of a commentary about its behavior.  If you ignore a
commit that added lines, it'd be nice to get a hint of what might have
caused it, and picking a commit that affected an adjacent line seemed
fine.  But yeah, it's not doing anything crazy.

> > diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt
> > index 16323eb80e31..e41375374892 100644
> > --- a/Documentation/git-blame.txt
> > +++ b/Documentation/git-blame.txt
> > @@ -10,6 +10,7 @@ SYNOPSIS
> >  [verse]
> >  'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental]
> >  	    [-L <range>] [-S <revs-file>] [-M] [-C] [-C] [-C] [--since=<date>]
> > +	    [-i <rev>] [--no-default-ignores] [--ignore-file=<file>]
> >  	    [--progress] [--abbrev=<n>] [<rev> | --contents <file> | --reverse <rev>..<rev>]
> >  	    [--] <file>
> >  
> > @@ -84,6 +85,20 @@ include::blame-options.txt[]
> >  	Ignore whitespace when comparing the parent's version and
> >  	the child's to find where the lines came from.
> >  
> > +-i <rev>::
> > +	Ignore revision when assigning blame.  Lines that were changed by an
> > +	ignored commit will be marked with a `*` in the blame output.  Lines
> > +	that were added by an ignored commit may be attributed commits making
> > +	nearby changes or to the first commit touching the file.  
> 
> It probably deserves to be told that this option can be given
> multiple times and used cumulatively (unlike usual "last one wins"
> rule).
> 
> > +--no-default-ignores::
> > +	Do not automatically ignore revisions in the file
> > +	`.git-blame-ignore-revs`.  
> 
> This should not be "opt-out" like this.
> 
> > +--ignore-file=<file>::
> > +	Ignore revisions listed in `file`, one revision per line.  Whitespace
> > +	and comments beginning with `#` are ignored.  
> 
> Should it be capable to take two or more ignore-files?  Or should we
> use the usual "the last one wins" rule?
> 
> I think we should support blame.skipRevFile configuration variable
> so that the users do not have to constantly give the option from the
> command line.  And with that, there is no need to have a hardcoded
> filename .git-blame-ignore-revs or anything like that.
> 
> If we are to use configuration variable, however, we'd need a way to
> override its use from the command line, too.  Perhaps a sane
> arrangement would be
> 
>     - if one or more --skip-revs-file=<file> are given from the
>       command line, use all of them and ignore blame.skipRevsFile
>       configuration variable.
> 
>     - if no --skip-revs-file=<file> is given from the command line,
>       use blame.skipRevsFile configuration variable.
> 
>     - regardless of the above two, pay attention to --skip-rev=<rev>
>       command line option(s).

Sounds fine to me.

> Somehow the damage to blame.c codebase looks way too noisy for what
> the code wants to do.  If all we want is to pretend in a history,
> e.g.
> 
>     ---O---A---B---C---D
> 
> that commit B does not exist, i.e. use a distorted view of the
> history
> 
>     ---O---A-------C---D
> 
> wouldn't it be sufficient to modify pass_blame(), i.e. the only the
> caller of the pass_blame_to_parent(), where we find the parent
> commits of "C" and dig the history to pass blame to "B", and have it
> pretend as if "B" does not exist and pass blame directly to "A"?

I originally tried to skip 'B' in pass_blame() when B popped up as a
scapegoat.  That broke the offsets of the blame_entries in the
parent.  By running diff_hunks(), we get the opportunity to adjust the
offsets.  Also, when it comes to marking the blame_entries for marking
the output, we want to know the specific diffs and to break them up at
the boundaries of [tlno,same) in blame_chunk().

> Thanks.  I am personally not all that interested in this yet.

Thanks for taking a look.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-08 13:12 ` Ævar Arnfjörð Bjarmason
@ 2019-01-08 16:41   ` Barret Rhoden
  2019-01-08 18:04     ` Barret Rhoden
  0 siblings, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-08 16:41 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Stefan Beller, Jeff Smith, Junio C Hamano, Jeff King

On 2019-01-08 at 14:12 Ævar Arnfjörð Bjarmason <avarab@gmail.com> wrote:
> On Mon, Jan 07 2019, Barret Rhoden wrote:
> 
> > +static int handle_ignore_file(const char *path, struct string_list *ignores)
> > +{
> > +	FILE *fp = fopen(path, "r");
> > +	struct strbuf sb = STRBUF_INIT;
> > +
> > +	if (!fp)
> > +		return -1;
> > +	while (!strbuf_getline(&sb, fp)) {
> > +		const char *hash;
> > +
> > +		hash = strchr(sb.buf, '#');
> > +		if (hash)
> > +			strbuf_setlen(&sb, hash - sb.buf);
> > +		strbuf_trim(&sb);
> > +		if (!sb.len)
> > +			continue;
> > +		string_list_append(ignores, sb.buf);
> > +	}
> > +	fclose(fp);
> > +	strbuf_release(&sb);
> > +	return 0;
> > +}  
> 
> Aside from other comments on this patch that Junio had either you mostly
> copy-pasted this from init_skiplist() or you've come up with almost the
> same code on your own.
> 
> In any case, if we're going to integrate something like this patch let's
> split this "parse file with SHA-1s or comments/whitespace" into a
> utility function that both this and init_skiplist() can call.

One minor difference is that fsck wants an unabbreviated SHA-1, using
parse_oid_hex() instead of get_oid_committish().  Would you be OK with
also changing fsck to take a committish instead of a full SHA-1?

Is there a good place for the common helper?  Since it's an oidset, I
could put it in oidset.c.  oidset_parse_file() or something.

> Then we could split up the description for the fsck.skipList config
> variable to reference that format, and say that both it and this new
> thing should consult those docs for how it's parsed.

Is there a good spot for the generic skipList documentation?  The only
common text would be: 

	... comments ('#'), empty lines, and any leading and trailing
	whitespace is ignored

Thanks,

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-08 16:41   ` Barret Rhoden
@ 2019-01-08 18:04     ` Barret Rhoden
  0 siblings, 0 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-08 18:04 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Stefan Beller, Jeff Smith, Junio C Hamano, Jeff King

On 2019-01-08 at 11:41 Barret Rhoden <brho@google.com> wrote:
> Would you be OK with
> also changing fsck to take a committish instead of a full SHA-1?

Actually, in retrospect, I can keep the unabbreviated SHA-1 for the
file inputs and use get_oid_committish() for the one-off --skip-rev=
cases.

Thanks,

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-08 16:27   ` Barret Rhoden
@ 2019-01-08 18:26     ` Junio C Hamano
  2019-01-09 20:48       ` Barret Rhoden
  2019-01-09 21:21       ` Stefan Beller
  0 siblings, 2 replies; 39+ messages in thread
From: Junio C Hamano @ 2019-01-08 18:26 UTC (permalink / raw)
  To: Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

Barret Rhoden <brho@google.com> writes:

>> A policy decision like the above two shouldn't be hardcoded in the
>> feature like this, but should be done as a separate option.  By
>> default, these shouldn't be marked with '*', as the same tools you
>> said you are afraid of breaking would be expecting a word with only
>> digits and no asterisk in the column where line numbers appear and
>> will get broken by this change if done unconditionally.
>
> Since users are already opting-in to the blame-ignore, do you also want
> them to opt-in to the annotation?

Absolutely.

After all, the users of a moral equivalent that is -S
never needed such an extra annotation, which tells us two things.
(1) the claim "It's useful to be alerted to the presence of an
ignored commit" in the proposed log message is merely a personal
preference and universal users' requirement; (2) if it is useful to
mark a blame-source whose parents (used while blaming) do not match
the actual parents, such an annotation would also be useful while
running -S.  So probably it should be a separate option that can be
given when any of the --skip-commit=<rev>, --skip-commits-file=<file>,
r -S<file> option is given.

>> Obviously, an interesting consideration is what happens when a merge
>> commit is skipped.  Is it sufficient to change this description to
>> "...will get passed to its parentS", or would the code do completely
>> nonsensical things without further tweaks (a possible simple tweak
>> could be to refuse skipping merges)?
>
> If we skip a merge commit, it might pick the wrong parent.

Actually after thinking about it a bit more, I no longer think a
merge is so special.  In this topology (as usual, time flows from
left to right), if you are skipping M,

        ---A---M---C---D
              /
         ----B

you'd simply pretend that the ancestry is like this and you'd be
fine, no?


        ---A-------C---D
                  /
         --------B

That is, when inspecting C, pass_blame() would enumerate its true
parents, notice that M to be skipped, which would cause it to
pretend as if C has M's parents instead of M itself as its parents.
If M in the true history is the first parent of C, then M's first
parent in the true history becomes C's first parent, M's second
parent in the true history becomes C's second parent, etc. (if C
were a merge in the true history, such a rewriting would make C an
octopus in the distorted history)

> The wrong commit is blamed.

So... I still suspect that it is merely a bug in your implementation
and there is nothing inherent that forces us to avoid skipping a
merge.

>> Somehow the damage to blame.c codebase looks way too noisy for what
>> the code wants to do.  If all we want is to pretend in a history,
>> e.g.
>> 
>>     ---O---A---B---C---D
>> 
>> that commit B does not exist, i.e. use a distorted view of the
>> history
>> 
>>     ---O---A-------C---D
>> 
>> wouldn't it be sufficient to modify pass_blame(), i.e. the only the
>> caller of the pass_blame_to_parent(), where we find the parent
>> commits of "C" and dig the history to pass blame to "B", and have it
>> pretend as if "B" does not exist and pass blame directly to "A"?
>
> I originally tried to skip 'B' in pass_blame() when B popped up as a
> scapegoat.  That broke the offsets of the blame_entries in the
> parent.

Why?  When inspecting C in order to exonerate it from as many lines
as possible, we'd run "git diff $P C" for each parent of C, but in
the distorted history (i.e. instead of using $P==B, we use $P==A).
As long as the code reads from "git diff A C", I do not see why "the
offsets in the parent" can be broken.  Care to elaborate a bit more?


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-08 18:26     ` Junio C Hamano
@ 2019-01-09 20:48       ` Barret Rhoden
  2019-01-10 22:29         ` Junio C Hamano
  2019-01-09 21:21       ` Stefan Beller
  1 sibling, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-09 20:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

Hi -

On 2019-01-08 at 10:26 Junio C Hamano <gitster@pobox.com> wrote:
> >> Obviously, an interesting consideration is what happens when a merge
> >> commit is skipped.  Is it sufficient to change this description to
> >> "...will get passed to its parentS", or would the code do completely
> >> nonsensical things without further tweaks (a possible simple tweak
> >> could be to refuse skipping merges)?  
> >
> > If we skip a merge commit, it might pick the wrong parent.  
> 
> Actually after thinking about it a bit more, I no longer think a
> merge is so special.  In this topology (as usual, time flows from
> left to right), if you are skipping M,
> 
>         ---A---M---C---D
>               /
>          ----B
> 
> you'd simply pretend that the ancestry is like this and you'd be
> fine, no?
> 
> 
>         ---A-------C---D
>                   /
>          --------B
> 
> That is, when inspecting C, pass_blame() would enumerate its true
> parents, notice that M to be skipped, which would cause it to
> pretend as if C has M's parents instead of M itself as its parents.
> If M in the true history is the first parent of C, then M's first
> parent in the true history becomes C's first parent, M's second
> parent in the true history becomes C's second parent, etc. (if C
> were a merge in the true history, such a rewriting would make C an
> octopus in the distorted history)
> 
> > The wrong commit is blamed.  
> 
> So... I still suspect that it is merely a bug in your implementation
> and there is nothing inherent that forces us to avoid skipping a
> merge.

Probably!  =)  I tried a bunch of things initially, and might have had
things broken for other reasons.  One reason was not calling
parse_commit() at the right time.

I took another look and had first_scapegoat() scan the parent list, and
for any parent in the list, replace it with its own parent list.  e.g.
when removing 'A':

         ---X---A---B---C
               /
              Y 
              
(B's parent was A, A is the skipped commit, and A has parents X and Y)

becomes:

         ---X---B---C
               /
              Y

But just yanking skipped commits from the scapegoat list doesn't help,
since B gets blamed for the changes made by A, since the file B and
{X,Y} differ, specifically due to the changes A made.  

You'd think that would have worked, since it sounds similar to what -S
revs-file does, but it turns out I want different behavior than -S for
the ignore/skip.  With -S, you blame a nearby commit (at least based on
my testing - what is the desired behavior there?).  With ignore/skip, I
want to blame the last commit that modified the line.

For instance, commit X does this:

-foo(x,y);
+foo(x,y,z);

Then commit Y comes along to reformat it:

-foo(x,y,z);
+foo(x, y, z);

And the history / rev-list for the file looks like:

---O---A---X---B---C---D---Y---E---F

I want to ignore/skip Y and see X in the blame output.  

Sorry that I wasn't clear about that from the beginning; I can see how
one could have different expectations for what happens when a commit is
skipped.

> >> Somehow the damage to blame.c codebase looks way too noisy for what
> >> the code wants to do.  If all we want is to pretend in a history,
> >> e.g.
> >> 
> >>     ---O---A---B---C---D
> >> 
> >> that commit B does not exist, i.e. use a distorted view of the
> >> history
> >> 
> >>     ---O---A-------C---D
> >> 
> >> wouldn't it be sufficient to modify pass_blame(), i.e. the only the
> >> caller of the pass_blame_to_parent(), where we find the parent
> >> commits of "C" and dig the history to pass blame to "B", and have it
> >> pretend as if "B" does not exist and pass blame directly to "A"?  
> >
> > I originally tried to skip 'B' in pass_blame() when B popped up as a
> > scapegoat.  That broke the offsets of the blame_entries in the
> > parent.  
> 
> Why?  When inspecting C in order to exonerate it from as many lines
> as possible, we'd run "git diff $P C" for each parent of C, but in
> the distorted history (i.e. instead of using $P==B, we use $P==A).
> As long as the code reads from "git diff A C", I do not see why "the
> offsets in the parent" can be broken.  Care to elaborate a bit more?

When we run "git diff $P C" with $P == A, the diffs from B (the one we
want to remove) will pop up and be attributed to C.  This is what
happened when I tried the "yank B from the history" approach from
above.  If we ever diff over a skipped commit like that, we'll blame
the wrong one. (C, here).

The first thing I tried was to simply move the blame_entry list from C
to B to A without calling diff_hunks().  That's where the "file offsets
were broken," since e->s_lno should be adjusted when passing an entry
from the target blame_origin to its parent blame_origin.  An example of
this is in blame_chunk() when we hand every blame_entry before tlno to
the parent in the "e->s_lno < tlno" block.

Without diffing commit B, we can't propagate the blame_entries.  And
if we just diff A and C, we'll get the "wrong" commit.

The other reason to run the diffs was to identify and mark the
blame_entry that would have been blamed for the skipped commit.  That's
for the annotations during the output phase.  

If we don't track and taint the blame entries, you could just call
blame_chunk() from blame_chunk_cb() with tlno = same, which in essence
tells the blame code that "this diff belongs to the parent."  But that
loses the blame_entry boundaries, and is almost nearly as much of a
change to blame.c.

When it comes to skipping merges, my existing code couldn't do it, but
I found a way to make it work:

         ---A---M---C---D
               /
       ---X---B

Say the commit changing the line/blame_entry was X, and we try to skip
M.  'A' made no changes to the line.

If B is the first scapegoat, pass_blame_to_parent() will see no diff
between B and M for that line, and the blame will get passed to B, and
we're done.  This is the same both with my change and without.

However, if A is the first scapegoat, the line will have a diff, and
from the perspective of the ---A---M---C graph, it looks like M
introduced the change.  (In fact it did - it was via B though).  The
existing code would keep the blame on M and then check the next
scapegoat: B.  

With my change, it couldn't tell the difference between M introducing
a change that we wanted to skip and M having another parent that
introduced the change.  Both look like a diff from A..M.

I changed my version to handle that case.  In pass_blame(), if it fails
to find a scapegoat in the loop that calls pass_blame_to_parent(),
it'll make another loop through the scapegoats, where it will pass the
blame to the first scapegoat that has a diff.

This way, we get a first attempt to pass the blame for real (e.g. to
B), and then when we failed, we skip the commit.  In other words, we
only skip the commit when we need to.

With that change, from my earlier example:

This was wrong:
$ git-blame --skip-rev a3841f94c7ecb include/linux/mm.h | grep VM_SYNC
cc2383ec06be0 (Konstantin Khlebnikov 2012-10-08 16:28:37 -0700  204*) #define VM_SYNC

This is right:
$ git-blame --skip-rev a3841f94c7ecb include/linux/mm.h | grep VM_SYNC
b6fb293f2497a (Jan Kara              2017-11-01 16:36:41 +0100  204) #define VM_SYNC

Also note that in the 'right' case it didn't annotate it with the *.
That's because we didn't need to ignore the commit.  b6fb29 was the
correct one, and we didn't need to ignore M.

Anyway, thanks for reading and for the feedback.  I'll clean things up
and explain it better in another patchset.

Thanks,

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-08 18:26     ` Junio C Hamano
  2019-01-09 20:48       ` Barret Rhoden
@ 2019-01-09 21:21       ` Stefan Beller
  1 sibling, 0 replies; 39+ messages in thread
From: Stefan Beller @ 2019-01-09 21:21 UTC (permalink / raw)
  To: Junio C Hamano, Jonathan Tan
  Cc: Barret Rhoden, git, Stefan Beller, Jeff Smith, Jeff King

On Tue, Jan 8, 2019 at 10:26 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Barret Rhoden <brho@google.com> writes:
>
> >> A policy decision like the above two shouldn't be hardcoded in the
> >> feature like this, but should be done as a separate option.  By
> >> default, these shouldn't be marked with '*', as the same tools you
> >> said you are afraid of breaking would be expecting a word with only
> >> digits and no asterisk in the column where line numbers appear and
> >> will get broken by this change if done unconditionally.
> >
> > Since users are already opting-in to the blame-ignore, do you also want
> > them to opt-in to the annotation?
>
> Absolutely.
>
> After all, the users of a moral equivalent that is -S
> never needed such an extra annotation, which tells us two things.
> (1) the claim "It's useful to be alerted to the presence of an
> ignored commit" in the proposed log message is merely a personal
> preference and universal users' requirement; (2) if it is useful to
> mark a blame-source whose parents (used while blaming) do not match
> the actual parents, such an annotation would also be useful while
> running -S.  So probably it should be a separate option that can be
> given when any of the --skip-commit=<rev>, --skip-commits-file=<file>,
> r -S<file> option is given.

From a users point of view it may be desirable to express all this
in the grafts file, i.e. -S <file> where the syntax of that file is extended.
For example we could introduce
    !<hash>
to make it exclude that commit.
Of course this could lead to confusion, as this puts 2 very different
concepts into the same option/file.


Speaking of the implementation:
This patch proposes an oid-set that is handled specially
in blame itself. I wonder if this could be generalized.

Jonathan Tan (cc'd) refactored and extended revision walking
for git-fetch and its negotiation leading to
7c85ee6c58 (Merge branch 'jt/fetch-negotiator-skipping',
2018-08-02), and 3390e42adb (fetch-pack:
support negotiation tip whitelist, 2018-07-02)
which implements another revision walking
algorithm that can be used to fine-tune revisions
walked when fetching.

I wonder if that work could be generalized more
to have "generic" revision walking algorithms
and then making use of them in either fetch or
blame.

For git-fetch there is a new algorithm that increases
step size between commits, which would be funny to
try for blame here. It would give the wrong blamed
commit, but would speed up blaming a lot.

Omitting some revisions seems to be applicable to
more than just blame/fetch, too.

Stefan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-09 20:48       ` Barret Rhoden
@ 2019-01-10 22:29         ` Junio C Hamano
  2019-01-14 15:19           ` Barret Rhoden
  0 siblings, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-10 22:29 UTC (permalink / raw)
  To: Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

Barret Rhoden <brho@google.com> writes:

> For instance, commit X does this:
>
> -foo(x,y);
> +foo(x,y,z);
>
> Then commit Y comes along to reformat it:
>
> -foo(x,y,z);
> +foo(x, y, z);
>
> And the history / rev-list for the file looks like:
>
> ---O---A---X---B---C---D---Y---E---F
>
> I want to ignore/skip Y and see X in the blame output.  

If you skip Y, the altered history would have "foo(x, y, z)" in E,
"foo(x,y,z)" in X, and "foo(x,y)" in A.  If you start blaming from
F, you'd get E as the commit that explains the latest state.  If you
do not skip Y, you'd get Y.  I am not sure how you'd get X in either
case.  

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-10 22:29         ` Junio C Hamano
@ 2019-01-14 15:19           ` Barret Rhoden
  2019-01-14 17:45             ` Junio C Hamano
  0 siblings, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-14 15:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

On 2019-01-10 at 14:29 Junio C Hamano <gitster@pobox.com> wrote:
> > For instance, commit X does this:
> >
> > -foo(x,y);
> > +foo(x,y,z);
> >
> > Then commit Y comes along to reformat it:
> >
> > -foo(x,y,z);
> > +foo(x, y, z);
> >
> > And the history / rev-list for the file looks like:
> >
> > ---O---A---X---B---C---D---Y---E---F
> >
> > I want to ignore/skip Y and see X in the blame output.    
> 
> If you skip Y, the altered history would have "foo(x, y, z)" in E,
> "foo(x,y,z)" in X, and "foo(x,y)" in A.  If you start blaming from
> F, you'd get E as the commit that explains the latest state.  If you
> do not skip Y, you'd get Y.  I am not sure how you'd get X in either
> case.  

The way to do it is to let the blames get passed to Y - don't yank it
from the graph.  Then when trying to pass the blames from Y to its
parent, when we get a diff chunk that Y is responsible for, instead of
keeping it on Y's suspect list, we hand the blame_entry to D.  That
blame_entry will get passed all the way back to X, which also has a
diff that touches that line.

Basically we do the same blame processing as usual, but just don't let
any blames stick to Y.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-14 15:19           ` Barret Rhoden
@ 2019-01-14 17:45             ` Junio C Hamano
  2019-01-14 18:26               ` Randall S. Becker
  0 siblings, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-14 17:45 UTC (permalink / raw)
  To: Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

Barret Rhoden <brho@google.com> writes:

> On 2019-01-10 at 14:29 Junio C Hamano <gitster@pobox.com> wrote:
>> > For instance, commit X does this:
>> >
>> > -foo(x,y);
>> > +foo(x,y,z);
>> >
>> > Then commit Y comes along to reformat it:
>> >
>> > -foo(x,y,z);
>> > +foo(x, y, z);
>> >
>> > And the history / rev-list for the file looks like:
>> >
>> > ---O---A---X---B---C---D---Y---E---F
>> >
>> > I want to ignore/skip Y and see X in the blame output.    
>> 
>> If you skip Y, the altered history would have "foo(x, y, z)" in E,
>> "foo(x,y,z)" in X, and "foo(x,y)" in A.  If you start blaming from
>> F, you'd get E as the commit that explains the latest state.  If you
>> do not skip Y, you'd get Y.  I am not sure how you'd get X in either
>> case.  
>
> The way to do it is ...

Sorry, I made a too-fuzzy statement.  What I meant was, that unless
you are ignoring E, I do not know why you "would want to" attribute
a line "foo(x, y, z)" that appears in F to X.  Starting from X up to
D (and to Y in real history, but you are ignoring Y), the line was
"foo(x,y,z)", after E, it is "foo(x, y, z)".  I didn't mean to ask
how you "would show" such a result---as I do not yet understand why
you would want such a result to begin with.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* RE: [PATCH] blame: add the ability to ignore commits
  2019-01-14 17:45             ` Junio C Hamano
@ 2019-01-14 18:26               ` Randall S. Becker
  2019-01-14 19:03                 ` Barret Rhoden
  2019-01-15  6:08                 ` Junio C Hamano
  0 siblings, 2 replies; 39+ messages in thread
From: Randall S. Becker @ 2019-01-14 18:26 UTC (permalink / raw)
  To: Junio C Hamano, Barret Rhoden; +Cc: git, Stefan Beller, Jeff Smith, Jeff King

On January 14, 2019 12:46, Junio C Hamano wrote:
> Barret Rhoden <brho@google.com> writes:
> 
> > On 2019-01-10 at 14:29 Junio C Hamano <gitster@pobox.com> wrote:
> >> > For instance, commit X does this:
> >> >
> >> > -foo(x,y);
> >> > +foo(x,y,z);
> >> >
> >> > Then commit Y comes along to reformat it:
> >> >
> >> > -foo(x,y,z);
> >> > +foo(x, y, z);
> >> >
> >> > And the history / rev-list for the file looks like:
> >> >
> >> > ---O---A---X---B---C---D---Y---E---F
> >> >
> >> > I want to ignore/skip Y and see X in the blame output.
> >>
> >> If you skip Y, the altered history would have "foo(x, y, z)" in E,
> >> "foo(x,y,z)" in X, and "foo(x,y)" in A.  If you start blaming from F,
> >> you'd get E as the commit that explains the latest state.  If you do
> >> not skip Y, you'd get Y.  I am not sure how you'd get X in either
> >> case.
> >
> > The way to do it is ...
> 
> Sorry, I made a too-fuzzy statement.  What I meant was, that unless you
are
> ignoring E, I do not know why you "would want to" attribute a line "foo(x,
y,
> z)" that appears in F to X.  Starting from X up to D (and to Y in real
history, but
> you are ignoring Y), the line was "foo(x,y,z)", after E, it is "foo(x, y,
z)".  I
> didn't mean to ask how you "would show" such a result---as I do not yet
> understand why you would want such a result to begin with.

From my own community, this came up also. The intent was to show everyone
who touched a particular line, throughout history, not just the current one.
Perhaps that is what Barret is going for.

Regards,
Randall


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-14 18:26               ` Randall S. Becker
@ 2019-01-14 19:03                 ` Barret Rhoden
  2019-01-15  6:08                 ` Junio C Hamano
  1 sibling, 0 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-14 19:03 UTC (permalink / raw)
  To: Randall S. Becker
  Cc: Junio C Hamano, git, Stefan Beller, Jeff Smith, Jeff King

On 2019-01-14 at 13:26 "Randall S. Becker" <rsbecker@nexbridge.com>
wrote:
> > 
> > Sorry, I made a too-fuzzy statement.  What I meant was, that unless you  
> are
> > ignoring E, I do not know why you "would want to" attribute a line "foo(x,  
> y,
> > z)" that appears in F to X.  Starting from X up to D (and to Y in real  
> history, but
> > you are ignoring Y), the line was "foo(x,y,z)", after E, it is "foo(x, y,  
> z)".  I
> > didn't mean to ask how you "would show" such a result---as I do not yet
> > understand why you would want such a result to begin with.  
> 
> From my own community, this came up also. The intent was to show everyone
> who touched a particular line, throughout history, not just the current one.
> Perhaps that is what Barret is going for.

Yeah.  I want to find the most recent commit that changed a line, minus
a set of commits that are deemed 'not interesting' by the user.  

The primary reason for this is to not see a blame attributed to a
commit that does nothing but reformatting the code base.

One could also use this feature for one-off investigation with the
command-line switch.  Imagine a manually driven git-blame + git-log,
where you do a git-blame, check the commit, decide you want to see the
next one back, and rerun git-blame with --skip-rev=SHA1.

The Chromium depot_tools has a python tool that can do this, called
hyper-blame: 
(https://commondatastorage.googleapis.com/chrome-infra-docs/flat/depot_tools/docs/html/git-hyper-blame.html).

I thought it was a useful feature for all of git's users.  And I also
didn't want people to have to install depot_tools.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] blame: add the ability to ignore commits
  2019-01-14 18:26               ` Randall S. Becker
  2019-01-14 19:03                 ` Barret Rhoden
@ 2019-01-15  6:08                 ` Junio C Hamano
  1 sibling, 0 replies; 39+ messages in thread
From: Junio C Hamano @ 2019-01-15  6:08 UTC (permalink / raw)
  To: Randall S. Becker
  Cc: Barret Rhoden, git, Stefan Beller, Jeff Smith, Jeff King

"Randall S. Becker" <rsbecker@nexbridge.com> writes:

> On January 14, 2019 12:46, Junio C Hamano wrote:
>> Barret Rhoden <brho@google.com> writes:
>> 
>> > On 2019-01-10 at 14:29 Junio C Hamano <gitster@pobox.com> wrote:
>> >> > For instance, commit X does this:
>> >> >
>> >> > -foo(x,y);
>> >> > +foo(x,y,z);
>> >> >
>> >> > Then commit Y comes along to reformat it:
>> >> >
>> >> > -foo(x,y,z);
>> >> > +foo(x, y, z);
>> >> >
>> >> > And the history / rev-list for the file looks like:
>> >> >
>> >> > ---O---A---X---B---C---D---Y---E---F
>> >> >
>> >> > I want to ignore/skip Y and see X in the blame output.
>> >>
>> >> If you skip Y, the altered history would have "foo(x, y, z)" in E,
>> >> "foo(x,y,z)" in X, and "foo(x,y)" in A.  If you start blaming from F,
>> >> you'd get E as the commit that explains the latest state.  If you do
>> >> not skip Y, you'd get Y.  I am not sure how you'd get X in either
>> >> case.
>> >
>> ...
>
> From my own community, this came up also. The intent was to show everyone
> who touched a particular line, throughout history, not just the current one.
> Perhaps that is what Barret is going for.

I think I now understand what is going on.  In short, what Barret
wanted is way more ambitious than "blame in a hypothetical history
that lacks certain commits".  There is no similiarity with existing
"-S <revs>" option that I asked about in my initial review.  This is
quite a different beast (and I kind-a like it ;-).

Instead of "skipping" Y, Barret wants to pretend that the effect of
commit Y, in addition to the commit Y itself, never existed in the
history.  Even when blaming back from F (where it has "foo(x, y,
z)", which is the same as E and down to Y), the algorithm wants to
pretend as if E and F had that line in "foo(x,y,z)" form, i.e.
before Y touched it, and also pretend that the user started blaming
a line in that shape.

As there is no general solution [*1*] to "how did this line looked
like before this commit", such a blame may not have a meaningful
answer.  When Y added a block of text without removing anything
(i.e. "git show Y" has no line that begins with '-'), and when the
user asked to blame that block of text in F, there is no sensible
answer to the question: "what got replaced by Y in this text?", so a
blame that "ignores" Y would not be able to answer "where did these
lines come from?" question.  But in some cases (e.g. when "git show
Y" has only one '-' line, immediately followed by '+' line, both
with quite a similar contents and no other change in the patch---it
is highly plausible that in such a change, the former became the
latter, like the example in Barret's message I was responding to),
we can say "this was how the line that was modified in Y looked like
before Y" and blaming while ignoring Y can have a reasonable answer.


[Footnote]

*1* Here, I use the word "solution" in the same sense as "solution
to a math equation", not in the sense to describe the steps to
derive that "solution".

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 0/3] blame: add the ability to ignore commits
  2019-01-07 21:30 [PATCH] blame: add the ability to ignore commits Barret Rhoden
  2019-01-07 23:13 ` Junio C Hamano
  2019-01-08 13:12 ` Ævar Arnfjörð Bjarmason
@ 2019-01-17 20:29 ` " Barret Rhoden
  2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
                     ` (3 more replies)
  2 siblings, 4 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-17 20:29 UTC (permalink / raw)
  To: git
  Cc: Ævar Arnfjörð Bjarmason, David Kastrup, Jeff King,
	Jeff Smith, Johannes Schindelin, Junio C Hamano,
	René Scharfe, Stefan Beller

Adds the ability to ignore a set of commits and their changes when
blaming.  This can be used to ignore a commit deemed 'not interesting,'
such as reformatting.

v1: https://public-inbox.org/git/20190107213013.231514-1-brho@google.com/
v1 -> v2
- extracted the skiplist from fsck to avoid duplicating code
- overhauled the interface and options
- split out markIgnoredFiles
- handled merges

Barret Rhoden (3):
  Move init_skiplist() outside of fsck
  blame: add the ability to ignore commits and their changes
  blame: add a config option to mark ignored lines

 Documentation/blame-options.txt | 15 +++++++++
 Documentation/git-blame.txt     |  1 +
 blame.c                         | 56 +++++++++++++++++++++++++++------
 blame.h                         |  3 ++
 builtin/blame.c                 | 33 +++++++++++++++++++
 fsck.c                          | 37 +---------------------
 oidset.c                        | 35 +++++++++++++++++++++
 oidset.h                        |  7 +++++
 8 files changed, 142 insertions(+), 45 deletions(-)

-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
@ 2019-01-17 20:29   ` Barret Rhoden
  2019-01-18  9:25     ` Johannes Schindelin
  2019-01-18  9:45     ` Ævar Arnfjörð Bjarmason
  2019-01-17 20:29   ` [PATCH v2 2/3] blame: add the ability to ignore commits and their changes Barret Rhoden
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-17 20:29 UTC (permalink / raw)
  To: git
  Cc: Ævar Arnfjörð Bjarmason, David Kastrup, Jeff King,
	Jeff Smith, Johannes Schindelin, Junio C Hamano,
	René Scharfe, Stefan Beller

init_skiplist() took a file consisting of SHA-1s and comments and added
the objects to an oidset.  This functionality is useful for other
commands.

Signed-off-by: Barret Rhoden <brho@google.com>
---
 fsck.c   | 37 +------------------------------------
 oidset.c | 35 +++++++++++++++++++++++++++++++++++
 oidset.h |  7 +++++++
 3 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/fsck.c b/fsck.c
index 68502ce85b11..80b53e6f4968 100644
--- a/fsck.c
+++ b/fsck.c
@@ -181,41 +181,6 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
 	return msg_type;
 }
 
-static void init_skiplist(struct fsck_options *options, const char *path)
-{
-	FILE *fp;
-	struct strbuf sb = STRBUF_INIT;
-	struct object_id oid;
-
-	fp = fopen(path, "r");
-	if (!fp)
-		die("Could not open skip list: %s", path);
-	while (!strbuf_getline(&sb, fp)) {
-		const char *p;
-		const char *hash;
-
-		/*
-		 * Allow trailing comments, leading whitespace
-		 * (including before commits), and empty or whitespace
-		 * only lines.
-		 */
-		hash = strchr(sb.buf, '#');
-		if (hash)
-			strbuf_setlen(&sb, hash - sb.buf);
-		strbuf_trim(&sb);
-		if (!sb.len)
-			continue;
-
-		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
-			die("Invalid SHA-1: %s", sb.buf);
-		oidset_insert(&options->skiplist, &oid);
-	}
-	if (ferror(fp))
-		die_errno("Could not read '%s'", path);
-	fclose(fp);
-	strbuf_release(&sb);
-}
-
 static int parse_msg_type(const char *str)
 {
 	if (!strcmp(str, "error"))
@@ -284,7 +249,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
 		if (!strcmp(buf, "skiplist")) {
 			if (equal == len)
 				die("skiplist requires a path");
-			init_skiplist(options, buf + equal + 1);
+			oidset_parse_file(&options->skiplist, buf + equal + 1);
 			buf += len + 1;
 			continue;
 		}
diff --git a/oidset.c b/oidset.c
index fe4eb921df81..a4f38a040320 100644
--- a/oidset.c
+++ b/oidset.c
@@ -35,3 +35,38 @@ void oidset_clear(struct oidset *set)
 	kh_release_oid(&set->set);
 	oidset_init(set, 0);
 }
+
+void oidset_parse_file(struct oidset *set, const char *path)
+{
+	FILE *fp;
+	struct strbuf sb = STRBUF_INIT;
+	struct object_id oid;
+
+	fp = fopen(path, "r");
+	if (!fp)
+		die("Could not open skip list: %s", path);
+	while (!strbuf_getline(&sb, fp)) {
+		const char *p;
+		const char *hash;
+
+		/*
+		 * Allow trailing comments, leading whitespace
+		 * (including before commits), and empty or whitespace
+		 * only lines.
+		 */
+		hash = strchr(sb.buf, '#');
+		if (hash)
+			strbuf_setlen(&sb, hash - sb.buf);
+		strbuf_trim(&sb);
+		if (!sb.len)
+			continue;
+
+		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+			die("Invalid SHA-1: %s", sb.buf);
+		oidset_insert(set, &oid);
+	}
+	if (ferror(fp))
+		die_errno("Could not read '%s'", path);
+	fclose(fp);
+	strbuf_release(&sb);
+}
diff --git a/oidset.h b/oidset.h
index c9d0f6d3cc8b..a3452eb7de84 100644
--- a/oidset.h
+++ b/oidset.h
@@ -73,6 +73,13 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
  */
 void oidset_clear(struct oidset *set);
 
+/**
+ * Add the contents of the file 'path' to an initialized oidset.  Each line is
+ * an unabbreviated SHA-1.  Comments begin with '#', and trailing comments are
+ * allowed.  Leading whitespace and empty or white-space only lines are ignored.
+ */
+void oidset_parse_file(struct oidset *set, const char *path);
+
 struct oidset_iter {
 	kh_oid_t *set;
 	khiter_t iter;
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
  2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
@ 2019-01-17 20:29   ` Barret Rhoden
  2019-01-18  9:47     ` Johannes Schindelin
  2019-01-20 18:19     ` René Scharfe
  2019-01-17 20:29   ` [PATCH v2 3/3] blame: add a config option to mark ignored lines Barret Rhoden
  2019-01-18  9:52   ` [PATCH v2 0/3] blame: add the ability to ignore commits Ævar Arnfjörð Bjarmason
  3 siblings, 2 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-17 20:29 UTC (permalink / raw)
  To: git
  Cc: Ævar Arnfjörð Bjarmason, David Kastrup, Jeff King,
	Jeff Smith, Johannes Schindelin, Junio C Hamano,
	René Scharfe, Stefan Beller

Commits that make formatting changes or function renames are often not
interesting when blaming a file.  A user may deem such a commit as 'not
interesting' and want to ignore and its changes it when assigning blame.

For example, say a file has the following git history / rev-list:

---O---A---X---B---C---D---Y---E---F

Commits X and Y both touch a particular line, and the other commits do
not:

X: "Take a third parameter"
-MyFunc(1, 2);
+MyFunc(1, 2, 3);

Y: "Remove camelcase"
-MyFunc(1, 2, 3);
+my_func(1, 2, 3);

git-blame will blame Y for the change.  I'd like to be able to ignore Y:
both the existence of the commit as well as any changes it made.  This
differs from -S rev-list, which specifies the list of commits to
process for the blame.  We still process Y, we just don't let the blame
'stick.'

Users can ignore a revision with --ignore-rev=rev, which may be
repeated.  They can specify a file of full SHA-1 hashes of revs (one per
line) with the blame.ignoreRevFile config option.  They can also specify
a file with --ignore-rev-file=file, which overrides the config option.

For a typical use case, projects will maintain the file containing
revisions for commits that perform mass reformatting, and their users
have the optional to ignore all of the commits in that file.

Additionally, a user can use the --ignore-rev option for one-off
investigation.  To go back to the example above, X was a substantive
change to the function, but not the change the user is interested in.
The user inspected X, but wanted to find the previous change to that
line - perhaps a commit that introduced that function call.

To make this work, we can't simply remove all ignored commits from the
rev-list.  We need to diff the changes introduced by Y so that we can
ignore them.  We let the blames get passed to Y, just like when
processing normally.  When Y is the target, we make sure that Y does not
*keep* any blames.  Any changes that Y is responsible for get passed to
its parent.  Note we make one pass through all of the scapegoats
(parents) to attempt to pass blame normally; we don't know if we *need*
to ignore the commit until we've checked all of the parents.

The blame_entry will get passed up the tree until we find a commit that
has a diff chunk that affects those lines.  If an ignored commit added
more lines than it removed, the blame will fall on a commit that made a
change nearby.  There is no general solution here, just a best-effort
approach.  For a trivial example, consider ignoring this commit:

Z: "Adding Lines"
 foo
+No commit
+ever touched
+these lines
 bar

Signed-off-by: Barret Rhoden <brho@google.com>
---
 Documentation/blame-options.txt | 13 +++++++++
 Documentation/git-blame.txt     |  1 +
 blame.c                         | 48 +++++++++++++++++++++++++++------
 blame.h                         |  2 ++
 builtin/blame.c                 | 24 +++++++++++++++++
 5 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
index dc41957afab2..424a63f0b45c 100644
--- a/Documentation/blame-options.txt
+++ b/Documentation/blame-options.txt
@@ -110,5 +110,18 @@ commit. And the default value is 40. If there are more than one
 `-C` options given, the <num> argument of the last `-C` will
 take effect.
 
+--ignore-rev <rev>::
+	Ignore changes made by the revision when assigning blame, as if the
+	change never happened.  Lines that were changed or added by an ignored
+	commit will be blamed on the previous commit that changed that line or
+	nearby lines.  This option may be specified multiple times to ignore
+	more than one revision.
+
+--ignore-revs-file <file>::
+	Ignore revisions listed in `file`, one full SHA-1 hash per line.
+	Whitespace and comments beginning with `#` are ignored.  This overrides
+	the `blame.ignoreRevsFile` config option, which specifies a default
+	file.
+
 -h::
 	Show help message.
diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt
index 16323eb80e31..7e8154199635 100644
--- a/Documentation/git-blame.txt
+++ b/Documentation/git-blame.txt
@@ -10,6 +10,7 @@ SYNOPSIS
 [verse]
 'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental]
 	    [-L <range>] [-S <revs-file>] [-M] [-C] [-C] [-C] [--since=<date>]
+	    [--ignore-rev <rev>] [--ignore-revs-file <file>]
 	    [--progress] [--abbrev=<n>] [<rev> | --contents <file> | --reverse <rev>..<rev>]
 	    [--] <file>
 
diff --git a/blame.c b/blame.c
index d84c93778080..0b91fba2d04c 100644
--- a/blame.c
+++ b/blame.c
@@ -845,10 +845,10 @@ static struct blame_entry *reverse_blame(struct blame_entry *head,
  */
 static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int tlno, int offset, int same,
-			struct blame_origin *parent)
+			struct blame_origin *parent, int ignore_diffs)
 {
 	struct blame_entry *e = **srcq;
-	struct blame_entry *samep = NULL, *diffp = NULL;
+	struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL;
 
 	while (e && e->s_lno < tlno) {
 		struct blame_entry *next = e->next;
@@ -925,10 +925,23 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			n->next = samep;
 			samep = n;
 		}
-		e->next = diffp;
-		diffp = e;
+		if (ignore_diffs) {
+			/* These go to the parent, like the ones before tlno. */
+			blame_origin_decref(e->suspect);
+			e->suspect = blame_origin_incref(parent);
+			e->s_lno += offset;
+			e->next = ignoredp;
+			ignoredp = e;
+		} else {
+			e->next = diffp;
+			diffp = e;
+		}
 		e = next;
 	}
+	if (ignoredp) {
+		**dstq = reverse_blame(ignoredp, **dstq);
+		*dstq = &ignoredp->next;
+	}
 	**srcq = reverse_blame(diffp, reverse_blame(samep, e));
 	/* Move across elements that are in the unblamable portion */
 	if (diffp)
@@ -938,6 +951,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 struct blame_chunk_cb_data {
 	struct blame_origin *parent;
 	long offset;
+	int ignore_diffs;
 	struct blame_entry **dstq;
 	struct blame_entry **srcq;
 };
@@ -950,7 +964,7 @@ static int blame_chunk_cb(long start_a, long count_a,
 	if (start_a - start_b != d->offset)
 		die("internal error in blame::blame_chunk_cb");
 	blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
-		    start_b + count_b, d->parent);
+		    start_b + count_b, d->parent, d->ignore_diffs);
 	d->offset = start_a + count_a - (start_b + count_b);
 	return 0;
 }
@@ -962,7 +976,7 @@ static int blame_chunk_cb(long start_a, long count_a,
  */
 static void pass_blame_to_parent(struct blame_scoreboard *sb,
 				 struct blame_origin *target,
-				 struct blame_origin *parent)
+				 struct blame_origin *parent, int ignore_diffs)
 {
 	mmfile_t file_p, file_o;
 	struct blame_chunk_cb_data d;
@@ -973,6 +987,7 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 
 	d.parent = parent;
 	d.offset = 0;
+	d.ignore_diffs = ignore_diffs;
 	d.dstq = &newdest; d.srcq = &target->suspects;
 
 	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
@@ -984,7 +999,7 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 		    oid_to_hex(&parent->commit->object.oid),
 		    oid_to_hex(&target->commit->object.oid));
 	/* The rest are the same as the parent */
-	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
+	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent, 0);
 	*d.dstq = NULL;
 	queue_blames(sb, parent, newdest);
 
@@ -1489,11 +1504,28 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 			blame_origin_incref(porigin);
 			origin->previous = porigin;
 		}
-		pass_blame_to_parent(sb, origin, porigin);
+		pass_blame_to_parent(sb, origin, porigin, 0);
 		if (!origin->suspects)
 			goto finish;
 	}
 
+	/*
+	 * Pass remaining suspects for ignored commits to their parents.
+	 */
+	if (oidset_contains(&sb->ignore_list, &commit->object.oid)) {
+		for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
+		     i < num_sg && sg;
+		     sg = sg->next, i++) {
+			struct blame_origin *porigin = sg_origin[i];
+
+			if (!porigin)
+				continue;
+			pass_blame_to_parent(sb, origin, porigin, 1);
+			if (!origin->suspects)
+				goto finish;
+		}
+	}
+
 	/*
 	 * Optionally find moves in parents' files.
 	 */
diff --git a/blame.h b/blame.h
index be3a895043e0..086b92915e4b 100644
--- a/blame.h
+++ b/blame.h
@@ -117,6 +117,8 @@ struct blame_scoreboard {
 	/* linked list of blames */
 	struct blame_entry *ent;
 
+	struct oidset ignore_list;
+
 	/* look-up a line in the final buffer */
 	int num_lines;
 	int *lineno;
diff --git a/builtin/blame.c b/builtin/blame.c
index 6d798f99392e..2f9183fb5fbd 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -52,6 +52,7 @@ static int no_whole_file_rename;
 static int show_progress;
 static char repeated_meta_color[COLOR_MAXLEN];
 static int coloring_mode;
+static const char *ignore_revs_file;
 
 static struct date_mode blame_date_mode = { DATE_ISO8601 };
 static size_t blame_date_width;
@@ -695,6 +696,8 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 		parse_date_format(value, &blame_date_mode);
 		return 0;
 	}
+	if (!strcmp(var, "blame.ignorerevsfile"))
+		return git_config_pathname(&ignore_revs_file, var, value);
 	if (!strcmp(var, "color.blame.repeatedlines")) {
 		if (color_parse_mem(value, strlen(value), repeated_meta_color))
 			warning(_("invalid color '%s' in color.blame.repeatedLines"),
@@ -774,6 +777,22 @@ static int is_a_rev(const char *name)
 	return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
 }
 
+static void build_ignorelist(struct blame_scoreboard *sb,
+			     struct string_list *ignore_rev_list)
+{
+	struct string_list_item *i;
+	struct object_id oid;
+
+	oidset_init(&sb->ignore_list, 0);
+	if (ignore_revs_file)
+		oidset_parse_file(&sb->ignore_list, ignore_revs_file);
+	for_each_string_list_item(i, ignore_rev_list) {
+		if (get_oid_committish(i->string, &oid))
+			die(_("Can't find revision '%s' to ignore"), i->string);
+		oidset_insert(&sb->ignore_list, &oid);
+	}
+}
+
 int cmd_blame(int argc, const char **argv, const char *prefix)
 {
 	struct rev_info revs;
@@ -785,6 +804,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	struct progress_info pi = { NULL, 0 };
 
 	struct string_list range_list = STRING_LIST_INIT_NODUP;
+	struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP;
 	int output_option = 0, opt = 0;
 	int show_stats = 0;
 	const char *revs_file = NULL;
@@ -806,6 +826,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 		OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
 		OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
 		OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+		OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore <rev> when blaming")),
+		OPT_STRING(0, "ignore-revs-file", &ignore_revs_file, N_("file"), N_("Ignore revisions from <file>")),
 		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
 		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
 
@@ -995,6 +1017,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	sb.contents_from = contents_from;
 	sb.reverse = reverse;
 	sb.repo = the_repository;
+	build_ignorelist(&sb, &ignore_rev_list);
+	string_list_clear(&ignore_rev_list, 0);
 	setup_scoreboard(&sb, path, &o);
 	lno = sb.num_lines;
 
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 3/3] blame: add a config option to mark ignored lines
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
  2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
  2019-01-17 20:29   ` [PATCH v2 2/3] blame: add the ability to ignore commits and their changes Barret Rhoden
@ 2019-01-17 20:29   ` Barret Rhoden
  2019-01-18 10:03     ` Johannes Schindelin
  2019-01-18  9:52   ` [PATCH v2 0/3] blame: add the ability to ignore commits Ævar Arnfjörð Bjarmason
  3 siblings, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-17 20:29 UTC (permalink / raw)
  To: git
  Cc: Ævar Arnfjörð Bjarmason, David Kastrup, Jeff King,
	Jeff Smith, Johannes Schindelin, Junio C Hamano,
	René Scharfe, Stefan Beller

When ignoring commits, the commit that is blamed might not be
responsible for the change.  Users might want to know when a particular
line has a potentially inaccurate blame.

By specifying blame.markIgnoredFiles, each blame line is marked with an
'*'.  For example:

278b6158d6fdb (Barret Rhoden  2016-04-11 13:57:54 -0400 26)

appears as:

*278b6158d6fd (Barret Rhoden  2016-04-11 13:57:54 -0400 26)

where the '*' is placed before the commit, and the hash has one fewer
characters.

Signed-off-by: Barret Rhoden <brho@google.com>
---
 Documentation/blame-options.txt | 4 +++-
 blame.c                         | 8 +++++++-
 blame.h                         | 1 +
 builtin/blame.c                 | 9 +++++++++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
index 424a63f0b45c..92787ae951ac 100644
--- a/Documentation/blame-options.txt
+++ b/Documentation/blame-options.txt
@@ -115,7 +115,9 @@ take effect.
 	change never happened.  Lines that were changed or added by an ignored
 	commit will be blamed on the previous commit that changed that line or
 	nearby lines.  This option may be specified multiple times to ignore
-	more than one revision.
+	more than one revision.  If the `blame.markIgnoredLines` config option
+	is set, then lines that were changed by an ignored commit will be
+	marked with a `*` in the blame output.
 
 --ignore-revs-file <file>::
 	Ignore revisions listed in `file`, one full SHA-1 hash per line.
diff --git a/blame.c b/blame.c
index 0b91fba2d04c..b1805633fb23 100644
--- a/blame.c
+++ b/blame.c
@@ -474,7 +474,8 @@ void blame_coalesce(struct blame_scoreboard *sb)
 
 	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 		if (ent->suspect == next->suspect &&
-		    ent->s_lno + ent->num_lines == next->s_lno) {
+		    ent->s_lno + ent->num_lines == next->s_lno &&
+		    ent->ignored == next->ignored) {
 			ent->num_lines += next->num_lines;
 			ent->next = next->next;
 			blame_origin_decref(next->suspect);
@@ -726,6 +727,8 @@ static void split_overlap(struct blame_entry *split,
 	int chunk_end_lno;
 	memset(split, 0, sizeof(struct blame_entry [3]));
 
+	split[0].ignored = split[1].ignored = split[2].ignored = e->ignored;
+
 	if (e->s_lno < tlno) {
 		/* there is a pre-chunk part not blamed on parent */
 		split[0].suspect = blame_origin_incref(e->suspect);
@@ -862,6 +865,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int len = tlno - e->s_lno;
 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
 			n->suspect = e->suspect;
+			n->ignored = e->ignored;
 			n->lno = e->lno + len;
 			n->s_lno = e->s_lno + len;
 			n->num_lines = e->num_lines - len;
@@ -916,6 +920,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			int len = same - e->s_lno;
 			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
 			n->suspect = blame_origin_incref(e->suspect);
+			n->ignored = e->ignored;
 			n->lno = e->lno + len;
 			n->s_lno = e->s_lno + len;
 			n->num_lines = e->num_lines - len;
@@ -930,6 +935,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			blame_origin_decref(e->suspect);
 			e->suspect = blame_origin_incref(parent);
 			e->s_lno += offset;
+			e->ignored = 1;
 			e->next = ignoredp;
 			ignoredp = e;
 		} else {
diff --git a/blame.h b/blame.h
index 086b92915e4b..56aeff582b01 100644
--- a/blame.h
+++ b/blame.h
@@ -92,6 +92,7 @@ struct blame_entry {
 	 * scanning the lines over and over.
 	 */
 	unsigned score;
+	int ignored;
 };
 
 /*
diff --git a/builtin/blame.c b/builtin/blame.c
index 2f9183fb5fbd..8c3c5e435c9c 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -53,6 +53,7 @@ static int show_progress;
 static char repeated_meta_color[COLOR_MAXLEN];
 static int coloring_mode;
 static const char *ignore_revs_file;
+static int mark_ignored_lines;
 
 static struct date_mode blame_date_mode = { DATE_ISO8601 };
 static size_t blame_date_width;
@@ -480,6 +481,10 @@ static void emit_other(struct blame_scoreboard *sb, struct blame_entry *ent, int
 			}
 		}
 
+		if (mark_ignored_lines && ent->ignored) {
+			length--;
+			putchar('*');
+		}
 		printf("%.*s", length, hex);
 		if (opt & OUTPUT_ANNOTATE_COMPAT) {
 			const char *name;
@@ -698,6 +703,10 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 	}
 	if (!strcmp(var, "blame.ignorerevsfile"))
 		return git_config_pathname(&ignore_revs_file, var, value);
+	if (!strcmp(var, "blame.markignoredlines")) {
+		mark_ignored_lines = git_config_bool(var, value);
+		return 0;
+	}
 	if (!strcmp(var, "color.blame.repeatedlines")) {
 		if (color_parse_mem(value, strlen(value), repeated_meta_color))
 			warning(_("invalid color '%s' in color.blame.repeatedLines"),
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
@ 2019-01-18  9:25     ` Johannes Schindelin
  2019-01-18  9:45     ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 39+ messages in thread
From: Johannes Schindelin @ 2019-01-18  9:25 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: git, Ævar Arnfjörð Bjarmason, David Kastrup,
	Jeff King, Jeff Smith, Junio C Hamano, René Scharfe,
	Stefan Beller

Hi Barret,

On Thu, 17 Jan 2019, Barret Rhoden wrote:

> init_skiplist() took a file consisting of SHA-1s and comments and added
> the objects to an oidset.  This functionality is useful for other
> commands.
> 
> Signed-off-by: Barret Rhoden <brho@google.com>

This patch looks good, I have just one small suggestion: SHA-1's days are
counted. We already know the roadmap, that we want to use SHA-256 instead
at some stage. Why not talk about "object hashes" instead of "SHA-1s"?

Thanks,
Johannes

> ---
>  fsck.c   | 37 +------------------------------------
>  oidset.c | 35 +++++++++++++++++++++++++++++++++++
>  oidset.h |  7 +++++++
>  3 files changed, 43 insertions(+), 36 deletions(-)
> 
> diff --git a/fsck.c b/fsck.c
> index 68502ce85b11..80b53e6f4968 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -181,41 +181,6 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
>  	return msg_type;
>  }
>  
> -static void init_skiplist(struct fsck_options *options, const char *path)
> -{
> -	FILE *fp;
> -	struct strbuf sb = STRBUF_INIT;
> -	struct object_id oid;
> -
> -	fp = fopen(path, "r");
> -	if (!fp)
> -		die("Could not open skip list: %s", path);
> -	while (!strbuf_getline(&sb, fp)) {
> -		const char *p;
> -		const char *hash;
> -
> -		/*
> -		 * Allow trailing comments, leading whitespace
> -		 * (including before commits), and empty or whitespace
> -		 * only lines.
> -		 */
> -		hash = strchr(sb.buf, '#');
> -		if (hash)
> -			strbuf_setlen(&sb, hash - sb.buf);
> -		strbuf_trim(&sb);
> -		if (!sb.len)
> -			continue;
> -
> -		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
> -			die("Invalid SHA-1: %s", sb.buf);
> -		oidset_insert(&options->skiplist, &oid);
> -	}
> -	if (ferror(fp))
> -		die_errno("Could not read '%s'", path);
> -	fclose(fp);
> -	strbuf_release(&sb);
> -}
> -
>  static int parse_msg_type(const char *str)
>  {
>  	if (!strcmp(str, "error"))
> @@ -284,7 +249,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
>  		if (!strcmp(buf, "skiplist")) {
>  			if (equal == len)
>  				die("skiplist requires a path");
> -			init_skiplist(options, buf + equal + 1);
> +			oidset_parse_file(&options->skiplist, buf + equal + 1);
>  			buf += len + 1;
>  			continue;
>  		}
> diff --git a/oidset.c b/oidset.c
> index fe4eb921df81..a4f38a040320 100644
> --- a/oidset.c
> +++ b/oidset.c
> @@ -35,3 +35,38 @@ void oidset_clear(struct oidset *set)
>  	kh_release_oid(&set->set);
>  	oidset_init(set, 0);
>  }
> +
> +void oidset_parse_file(struct oidset *set, const char *path)
> +{
> +	FILE *fp;
> +	struct strbuf sb = STRBUF_INIT;
> +	struct object_id oid;
> +
> +	fp = fopen(path, "r");
> +	if (!fp)
> +		die("Could not open skip list: %s", path);
> +	while (!strbuf_getline(&sb, fp)) {
> +		const char *p;
> +		const char *hash;
> +
> +		/*
> +		 * Allow trailing comments, leading whitespace
> +		 * (including before commits), and empty or whitespace
> +		 * only lines.
> +		 */
> +		hash = strchr(sb.buf, '#');
> +		if (hash)
> +			strbuf_setlen(&sb, hash - sb.buf);
> +		strbuf_trim(&sb);
> +		if (!sb.len)
> +			continue;
> +
> +		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
> +			die("Invalid SHA-1: %s", sb.buf);
> +		oidset_insert(set, &oid);
> +	}
> +	if (ferror(fp))
> +		die_errno("Could not read '%s'", path);
> +	fclose(fp);
> +	strbuf_release(&sb);
> +}
> diff --git a/oidset.h b/oidset.h
> index c9d0f6d3cc8b..a3452eb7de84 100644
> --- a/oidset.h
> +++ b/oidset.h
> @@ -73,6 +73,13 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
>   */
>  void oidset_clear(struct oidset *set);
>  
> +/**
> + * Add the contents of the file 'path' to an initialized oidset.  Each line is
> + * an unabbreviated SHA-1.  Comments begin with '#', and trailing comments are
> + * allowed.  Leading whitespace and empty or white-space only lines are ignored.
> + */
> +void oidset_parse_file(struct oidset *set, const char *path);
> +
>  struct oidset_iter {
>  	kh_oid_t *set;
>  	khiter_t iter;
> -- 
> 2.20.1.321.g9e740568ce-goog
> 
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
  2019-01-18  9:25     ` Johannes Schindelin
@ 2019-01-18  9:45     ` Ævar Arnfjörð Bjarmason
  2019-01-18 17:36       ` Junio C Hamano
  1 sibling, 1 reply; 39+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-18  9:45 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: git, David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Junio C Hamano, René Scharfe, Stefan Beller


On Thu, Jan 17 2019, Barret Rhoden wrote:

> -		die("Could not open skip list: %s", path);
> [...]
> +		die("Could not open skip list: %s", path);

You're just moving this around, but now that this has two uses let's say
"Could not open SHA-1 list; %s" or something like that.

> +			die("Invalid SHA-1: %s", sb.buf);

Unlike Johannes I think it's fine to leave this. This file-format is
SHA-1 only now. We can cross the bridge of making it (and others)
SHA-256 somehow when we come to that, whether that'll be allowing
variable width or a different file.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-17 20:29   ` [PATCH v2 2/3] blame: add the ability to ignore commits and their changes Barret Rhoden
@ 2019-01-18  9:47     ` Johannes Schindelin
  2019-01-18 17:33       ` Barret Rhoden
  2019-01-20 18:19     ` René Scharfe
  1 sibling, 1 reply; 39+ messages in thread
From: Johannes Schindelin @ 2019-01-18  9:47 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: git, Ævar Arnfjörð Bjarmason, David Kastrup,
	Jeff King, Jeff Smith, Junio C Hamano, René Scharfe,
	Stefan Beller

Hi Barret,

On Thu, 17 Jan 2019, Barret Rhoden wrote:

> [...]
> 
> Users can ignore a revision with --ignore-rev=rev, which may be
> repeated.  They can specify a file of full SHA-1 hashes of revs (one per
> line) with the blame.ignoreRevFile config option.  They can also specify
> a file with --ignore-rev-file=file, which overrides the config option.

This sounds like that is already the case in Git, but the truth is: this
patch *introduces* that feature. Maybe start the paragraph with "With this
patch, ..."?

I cannot speak for the correctness of the changes to blame.c, others on
the Cc: list are already much more familiar with that code, so I'll leave
it to them to comment on that.

However, I am missing a regression test for this behavior, the best idea
would be likely to add t/t8013-blame-ignore-revs.sh (copy-edit it from
t/t8009-blame-vs-topicbranches.sh, maybe).

And there is another thing:

> diff --git a/builtin/blame.c b/builtin/blame.c
> index 6d798f99392e..2f9183fb5fbd 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -52,6 +52,7 @@ static int no_whole_file_rename;
>  static int show_progress;
>  static char repeated_meta_color[COLOR_MAXLEN];
>  static int coloring_mode;
> +static const char *ignore_revs_file;

... this...

>  
>  static struct date_mode blame_date_mode = { DATE_ISO8601 };
>  static size_t blame_date_width;
> @@ -695,6 +696,8 @@ static int git_blame_config(const char *var, const char *value, void *cb)
>  		parse_date_format(value, &blame_date_mode);
>  		return 0;
>  	}
> +	if (!strcmp(var, "blame.ignorerevsfile"))
> +		return git_config_pathname(&ignore_revs_file, var, value);

... this...

>  	if (!strcmp(var, "color.blame.repeatedlines")) {
>  		if (color_parse_mem(value, strlen(value), repeated_meta_color))
>  			warning(_("invalid color '%s' in color.blame.repeatedLines"),
> @@ -806,6 +826,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
>  		OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
>  		OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
>  		OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
> +		OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore <rev> when blaming")),
> +		OPT_STRING(0, "ignore-revs-file", &ignore_revs_file, N_("file"), N_("Ignore revisions from <file>")),

... and this change limit the user to specifying a single file, for no
good reason. Worse: specifying two different files via two
`--ignore-revs-file` parameters will only heed the latter and skip the
former without any warning.

A better idea IMHO would be to use an OPT_STRING_LIST() for
`--ignore-revs-file`, too, and to allow for multiple
`blame.ignoreRevsFile` config entries (with our usual trick of handling an
empty setting by resetting the list of paths that were accumulated so
far, see e.g. how `credential.helper` is handled).

Ciao,
Johannes

>  		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
>  		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
>  
> @@ -995,6 +1017,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
>  	sb.contents_from = contents_from;
>  	sb.reverse = reverse;
>  	sb.repo = the_repository;
> +	build_ignorelist(&sb, &ignore_rev_list);
> +	string_list_clear(&ignore_rev_list, 0);
>  	setup_scoreboard(&sb, path, &o);
>  	lno = sb.num_lines;
>  
> -- 
> 2.20.1.321.g9e740568ce-goog
> 
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 0/3] blame: add the ability to ignore commits
  2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
                     ` (2 preceding siblings ...)
  2019-01-17 20:29   ` [PATCH v2 3/3] blame: add a config option to mark ignored lines Barret Rhoden
@ 2019-01-18  9:52   ` Ævar Arnfjörð Bjarmason
  3 siblings, 0 replies; 39+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-18  9:52 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: git, David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Junio C Hamano, René Scharfe, Stefan Beller


On Thu, Jan 17 2019, Barret Rhoden wrote:

> Adds the ability to ignore a set of commits and their changes when
> blaming.  This can be used to ignore a commit deemed 'not interesting,'
> such as reformatting.
>
> v1: https://public-inbox.org/git/20190107213013.231514-1-brho@google.com/
> v1 -> v2
> - extracted the skiplist from fsck to avoid duplicating code
> - overhauled the interface and options
> - split out markIgnoredFiles
> - handled merges
>
> Barret Rhoden (3):
>   Move init_skiplist() outside of fsck
>   blame: add the ability to ignore commits and their changes
>   blame: add a config option to mark ignored lines

Looks much better this time around, I skimmed it, but quickly discovered
that there's no tests for any of it, which is a prerequisite for getting
something like this in.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 3/3] blame: add a config option to mark ignored lines
  2019-01-17 20:29   ` [PATCH v2 3/3] blame: add a config option to mark ignored lines Barret Rhoden
@ 2019-01-18 10:03     ` Johannes Schindelin
  0 siblings, 0 replies; 39+ messages in thread
From: Johannes Schindelin @ 2019-01-18 10:03 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: git, Ævar Arnfjörð Bjarmason, David Kastrup,
	Jeff King, Jeff Smith, Junio C Hamano, René Scharfe,
	Stefan Beller

Hi Barret,

On Thu, 17 Jan 2019, Barret Rhoden wrote:

> When ignoring commits, the commit that is blamed might not be
> responsible for the change.  Users might want to know when a particular
> line has a potentially inaccurate blame.
> 
> By specifying blame.markIgnoredFiles, each blame line is marked with an
> '*'.  For example:
> 
> 278b6158d6fdb (Barret Rhoden  2016-04-11 13:57:54 -0400 26)
> 
> appears as:
> 
> *278b6158d6fd (Barret Rhoden  2016-04-11 13:57:54 -0400 26)
> 
> where the '*' is placed before the commit, and the hash has one fewer
> characters.
> 
> Signed-off-by: Barret Rhoden <brho@google.com>

Again, I cannot comment on blame.c, there are more competent people Cc:ed.
But I do have to point out that Git prefers commit messages in the
imperative form rather than the present tense (reading the commit message
could leave the inclined reader wondering what the patch changes, if Git
already does all that).

Ciao,
Johannes

> ---
>  Documentation/blame-options.txt | 4 +++-
>  blame.c                         | 8 +++++++-
>  blame.h                         | 1 +
>  builtin/blame.c                 | 9 +++++++++
>  4 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
> index 424a63f0b45c..92787ae951ac 100644
> --- a/Documentation/blame-options.txt
> +++ b/Documentation/blame-options.txt
> @@ -115,7 +115,9 @@ take effect.
>  	change never happened.  Lines that were changed or added by an ignored
>  	commit will be blamed on the previous commit that changed that line or
>  	nearby lines.  This option may be specified multiple times to ignore
> -	more than one revision.
> +	more than one revision.  If the `blame.markIgnoredLines` config option
> +	is set, then lines that were changed by an ignored commit will be
> +	marked with a `*` in the blame output.
>  
>  --ignore-revs-file <file>::
>  	Ignore revisions listed in `file`, one full SHA-1 hash per line.
> diff --git a/blame.c b/blame.c
> index 0b91fba2d04c..b1805633fb23 100644
> --- a/blame.c
> +++ b/blame.c
> @@ -474,7 +474,8 @@ void blame_coalesce(struct blame_scoreboard *sb)
>  
>  	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
>  		if (ent->suspect == next->suspect &&
> -		    ent->s_lno + ent->num_lines == next->s_lno) {
> +		    ent->s_lno + ent->num_lines == next->s_lno &&
> +		    ent->ignored == next->ignored) {
>  			ent->num_lines += next->num_lines;
>  			ent->next = next->next;
>  			blame_origin_decref(next->suspect);
> @@ -726,6 +727,8 @@ static void split_overlap(struct blame_entry *split,
>  	int chunk_end_lno;
>  	memset(split, 0, sizeof(struct blame_entry [3]));
>  
> +	split[0].ignored = split[1].ignored = split[2].ignored = e->ignored;
> +
>  	if (e->s_lno < tlno) {
>  		/* there is a pre-chunk part not blamed on parent */
>  		split[0].suspect = blame_origin_incref(e->suspect);
> @@ -862,6 +865,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
>  			int len = tlno - e->s_lno;
>  			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
>  			n->suspect = e->suspect;
> +			n->ignored = e->ignored;
>  			n->lno = e->lno + len;
>  			n->s_lno = e->s_lno + len;
>  			n->num_lines = e->num_lines - len;
> @@ -916,6 +920,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
>  			int len = same - e->s_lno;
>  			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
>  			n->suspect = blame_origin_incref(e->suspect);
> +			n->ignored = e->ignored;
>  			n->lno = e->lno + len;
>  			n->s_lno = e->s_lno + len;
>  			n->num_lines = e->num_lines - len;
> @@ -930,6 +935,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
>  			blame_origin_decref(e->suspect);
>  			e->suspect = blame_origin_incref(parent);
>  			e->s_lno += offset;
> +			e->ignored = 1;
>  			e->next = ignoredp;
>  			ignoredp = e;
>  		} else {
> diff --git a/blame.h b/blame.h
> index 086b92915e4b..56aeff582b01 100644
> --- a/blame.h
> +++ b/blame.h
> @@ -92,6 +92,7 @@ struct blame_entry {
>  	 * scanning the lines over and over.
>  	 */
>  	unsigned score;
> +	int ignored;
>  };
>  
>  /*
> diff --git a/builtin/blame.c b/builtin/blame.c
> index 2f9183fb5fbd..8c3c5e435c9c 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -53,6 +53,7 @@ static int show_progress;
>  static char repeated_meta_color[COLOR_MAXLEN];
>  static int coloring_mode;
>  static const char *ignore_revs_file;
> +static int mark_ignored_lines;
>  
>  static struct date_mode blame_date_mode = { DATE_ISO8601 };
>  static size_t blame_date_width;
> @@ -480,6 +481,10 @@ static void emit_other(struct blame_scoreboard *sb, struct blame_entry *ent, int
>  			}
>  		}
>  
> +		if (mark_ignored_lines && ent->ignored) {
> +			length--;
> +			putchar('*');
> +		}
>  		printf("%.*s", length, hex);
>  		if (opt & OUTPUT_ANNOTATE_COMPAT) {
>  			const char *name;
> @@ -698,6 +703,10 @@ static int git_blame_config(const char *var, const char *value, void *cb)
>  	}
>  	if (!strcmp(var, "blame.ignorerevsfile"))
>  		return git_config_pathname(&ignore_revs_file, var, value);
> +	if (!strcmp(var, "blame.markignoredlines")) {
> +		mark_ignored_lines = git_config_bool(var, value);
> +		return 0;
> +	}
>  	if (!strcmp(var, "color.blame.repeatedlines")) {
>  		if (color_parse_mem(value, strlen(value), repeated_meta_color))
>  			warning(_("invalid color '%s' in color.blame.repeatedLines"),
> -- 
> 2.20.1.321.g9e740568ce-goog
> 
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-18  9:47     ` Johannes Schindelin
@ 2019-01-18 17:33       ` Barret Rhoden
  0 siblings, 0 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-18 17:33 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: git, Ævar Arnfjörð Bjarmason, David Kastrup,
	Jeff King, Jeff Smith, Junio C Hamano, René Scharfe,
	Stefan Beller

On 2019-01-18 at 10:47 Johannes Schindelin <Johannes.Schindelin@gmx.de>
wrote:
> ... and this change limit the user to specifying a single file, for no
> good reason. Worse: specifying two different files via two
> `--ignore-revs-file` parameters will only heed the latter and skip the
> former without any warning.
> 
> A better idea IMHO would be to use an OPT_STRING_LIST() for
> `--ignore-revs-file`, too, and to allow for multiple
> `blame.ignoreRevsFile` config entries (with our usual trick of handling an
> empty setting by resetting the list of paths that were accumulated so
> far, see e.g. how `credential.helper` is handled).

I can do this if you all want, though I was modeling it off the behavior
of -S, which also only takes a single file.  

Thanks, everyone, for all of the feedback.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-18  9:45     ` Ævar Arnfjörð Bjarmason
@ 2019-01-18 17:36       ` Junio C Hamano
  2019-01-18 20:59         ` Johannes Schindelin
  0 siblings, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-18 17:36 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Barret Rhoden, git, David Kastrup, Jeff King, Jeff Smith,
	Johannes Schindelin, René Scharfe, Stefan Beller

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Thu, Jan 17 2019, Barret Rhoden wrote:
>
>> -		die("Could not open skip list: %s", path);
>> [...]
>> +		die("Could not open skip list: %s", path);
>
> You're just moving this around, but now that this has two uses let's say
> "Could not open SHA-1 list; %s" or something like that.
>
>> +			die("Invalid SHA-1: %s", sb.buf);
>
> Unlike Johannes I think it's fine to leave this. This file-format is
> SHA-1 only now. We can cross the bridge of making it (and others)
> SHA-256 somehow when we come to that, whether that'll be allowing
> variable width or a different file.

I tend to agree.  The Documentation/glossary-contents.txt makes it
clear that "object name" is the most formal term to use here, with
synonyms like "object identifier" and much less formal "hash".  For
now, "SHA-1" is good enough, even though "object name" is acceptable
if we really want to future-proof.  But I would suspect that people
would colloquially keep saying Shaah-one even when we start using
different hash function(s), so such a future-proofing may not be
worth it ;-)


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-18 17:36       ` Junio C Hamano
@ 2019-01-18 20:59         ` Johannes Schindelin
  2019-01-18 21:30           ` Jeff King
  0 siblings, 1 reply; 39+ messages in thread
From: Johannes Schindelin @ 2019-01-18 20:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Ævar Arnfjörð Bjarmason, Barret Rhoden, git,
	David Kastrup, Jeff King, Jeff Smith, René Scharfe,
	Stefan Beller

[-- Attachment #1: Type: text/plain, Size: 2145 bytes --]

Hi,

On Fri, 18 Jan 2019, Junio C Hamano wrote:

> Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:
> 
> > On Thu, Jan 17 2019, Barret Rhoden wrote:
> >
> >> -		die("Could not open skip list: %s", path);
> >> [...]
> >> +		die("Could not open skip list: %s", path);
> >
> > You're just moving this around, but now that this has two uses let's say
> > "Could not open SHA-1 list; %s" or something like that.
> >
> >> +			die("Invalid SHA-1: %s", sb.buf);
> >
> > Unlike Johannes I think it's fine to leave this. This file-format is
> > SHA-1 only now. We can cross the bridge of making it (and others)
> > SHA-256 somehow when we come to that, whether that'll be allowing
> > variable width or a different file.
> 
> I tend to agree.  The Documentation/glossary-contents.txt makes it
> clear that "object name" is the most formal term to use here, with
> synonyms like "object identifier" and much less formal "hash".  For
> now, "SHA-1" is good enough, even though "object name" is acceptable
> if we really want to future-proof.  But I would suspect that people
> would colloquially keep saying Shaah-one even when we start using
> different hash function(s), so such a future-proofing may not be
> worth it ;-)

By that reasoning all the preparatory work for switching to SHA-256 and
making the references in the Git code base less tied to SHA-1 would be
irrelevant now, "because we can cross that bridge when we reach it".

You are suggesting to incur technical debt here. Let's be smarter about
this. We do not *have* to incur said technical debt. Nothing (except
mental laziness) makes use do that.

Instead, we can make our load "when we reach that bridge" a lot lighter
by already doing the right thing.

BTW I totally disagree that the skip list is bound to be SHA-1. It is
bound to be a list of object names, that's what its purpose is, and just
because we happen to not yet support other hash algorithms but SHA-1 does
not mean that the skip list is fixed to SHA-1. It'll always be whatever
hash algorithm is used in the current repository.

So no, introducing mentions of "SHA-1" *now* is not a smart thing to do.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-18 20:59         ` Johannes Schindelin
@ 2019-01-18 21:30           ` Jeff King
  2019-01-18 22:26             ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 39+ messages in thread
From: Jeff King @ 2019-01-18 21:30 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Junio C Hamano, Ævar Arnfjörð Bjarmason,
	Barret Rhoden, git, David Kastrup, Jeff Smith, René Scharfe,
	Stefan Beller

On Fri, Jan 18, 2019 at 09:59:21PM +0100, Johannes Schindelin wrote:

> By that reasoning all the preparatory work for switching to SHA-256 and
> making the references in the Git code base less tied to SHA-1 would be
> irrelevant now, "because we can cross that bridge when we reach it".
> 
> You are suggesting to incur technical debt here. Let's be smarter about
> this. We do not *have* to incur said technical debt. Nothing (except
> mental laziness) makes use do that.
> 
> Instead, we can make our load "when we reach that bridge" a lot lighter
> by already doing the right thing.
> 
> BTW I totally disagree that the skip list is bound to be SHA-1. It is
> bound to be a list of object names, that's what its purpose is, and just
> because we happen to not yet support other hash algorithms but SHA-1 does
> not mean that the skip list is fixed to SHA-1. It'll always be whatever
> hash algorithm is used in the current repository.

Yeah, I agree with this. In particular, the code has already been
modified to use "struct object_id" and parse_oid_hex(). So it is not
even like somebody will have to come through later and fix the
implementation here, and while they're at it change the "SHA-1" in the
message. It has literally already been fixed, and is just waiting on
parse_oid_hex() to learn about the new hashes behind the scenes.

IMHO the conversion to object_id probably would have been the time to
fix that message so we would not even have to be revisiting the
discussion now. But that conversion was such a monumental pain it is
hard to fault the authors for not picking up every scrap at that moment. ;)

That is no excuse not to do it now, though.

-Peff

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-18 21:30           ` Jeff King
@ 2019-01-18 22:26             ` Ævar Arnfjörð Bjarmason
  2019-01-22  7:12               ` Jeff King
  0 siblings, 1 reply; 39+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-18 22:26 UTC (permalink / raw)
  To: Jeff King
  Cc: Johannes Schindelin, Junio C Hamano, Barret Rhoden, git,
	David Kastrup, Jeff Smith, René Scharfe, Stefan Beller


On Fri, Jan 18 2019, Jeff King wrote:

> On Fri, Jan 18, 2019 at 09:59:21PM +0100, Johannes Schindelin wrote:
>
>> By that reasoning all the preparatory work for switching to SHA-256 and
>> making the references in the Git code base less tied to SHA-1 would be
>> irrelevant now, "because we can cross that bridge when we reach it".
>>
>> You are suggesting to incur technical debt here. Let's be smarter about
>> this. We do not *have* to incur said technical debt. Nothing (except
>> mental laziness) makes use do that.
>>
>> Instead, we can make our load "when we reach that bridge" a lot lighter
>> by already doing the right thing.
>>
>> BTW I totally disagree that the skip list is bound to be SHA-1. It is
>> bound to be a list of object names, that's what its purpose is, and just
>> because we happen to not yet support other hash algorithms but SHA-1 does
>> not mean that the skip list is fixed to SHA-1. It'll always be whatever
>> hash algorithm is used in the current repository.
>
> Yeah, I agree with this. In particular, the code has already been
> modified to use "struct object_id" and parse_oid_hex(). So it is not
> even like somebody will have to come through later and fix the
> implementation here, and while they're at it change the "SHA-1" in the
> message. It has literally already been fixed, and is just waiting on
> parse_oid_hex() to learn about the new hashes behind the scenes.
>
> IMHO the conversion to object_id probably would have been the time to
> fix that message so we would not even have to be revisiting the
> discussion now. But that conversion was such a monumental pain it is
> hard to fault the authors for not picking up every scrap at that moment. ;)
>
> That is no excuse not to do it now, though.

I stand corrected, I thought these still needed to be updated to parse
anything that wasn't 40 chars, since I hadn't seen anything about these
formats in the hash transition document.

So fair enough, let's change that while we're at it, but this seems like
something that needs to be planned for in more detail / documented in
the hash transition doc.

I.e. many (e.g. me) maintain some system-wide skiplist for strict fsck
cloning of legacy repos. So I can see there being some need for a
SHA1<->SHA256 map in this case, but since these files might stretch
across repo boundaries and not be checked into the repo itself this is a
new use-case that needs thinking about.

But now that I think about it this sort of thing would be a good
use-case for just fixing these various historical fsck issues while
we're at it when possible, e.g. "missing space before email" (probably
not all could be unambiguously fixed). So instead of sha256<->sha1
fn(sha256)<->fn(sha1)[1]?

1. https://public-inbox.org/git/87ftyyedqd.fsf@evledraar.gmail.com/

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-17 20:29   ` [PATCH v2 2/3] blame: add the ability to ignore commits and their changes Barret Rhoden
  2019-01-18  9:47     ` Johannes Schindelin
@ 2019-01-20 18:19     ` René Scharfe
  2019-01-22 15:26       ` Barret Rhoden
  2019-01-22 18:14       ` Junio C Hamano
  1 sibling, 2 replies; 39+ messages in thread
From: René Scharfe @ 2019-01-20 18:19 UTC (permalink / raw)
  To: Barret Rhoden, git
  Cc: Ævar Arnfjörð Bjarmason, David Kastrup, Jeff King,
	Jeff Smith, Johannes Schindelin, Junio C Hamano, Stefan Beller

Am 17.01.2019 um 21:29 schrieb Barret Rhoden:
> The blame_entry will get passed up the tree until we find a commit that
> has a diff chunk that affects those lines.  If an ignored commit added
> more lines than it removed, the blame will fall on a commit that made a
> change nearby.  There is no general solution here, just a best-effort
> approach.  For a trivial example, consider ignoring this commit:
>
> Z: "Adding Lines"
>  foo
> +No commit
> +ever touched
> +these lines
>  bar

Wouldn't it make more sense to assign such lines to unknown, perhaps
represented by an all-zero commit ID, instead of blaming a semi-random
bystander?

René

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-18 22:26             ` Ævar Arnfjörð Bjarmason
@ 2019-01-22  7:12               ` Jeff King
  2019-01-22  9:46                 ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 39+ messages in thread
From: Jeff King @ 2019-01-22  7:12 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Johannes Schindelin, Junio C Hamano, Barret Rhoden, git,
	David Kastrup, Jeff Smith, René Scharfe, Stefan Beller

On Fri, Jan 18, 2019 at 11:26:29PM +0100, Ævar Arnfjörð Bjarmason wrote:

> I stand corrected, I thought these still needed to be updated to parse
> anything that wasn't 40 chars, since I hadn't seen anything about these
> formats in the hash transition document.
> 
> So fair enough, let's change that while we're at it, but this seems like
> something that needs to be planned for in more detail / documented in
> the hash transition doc.
> 
> I.e. many (e.g. me) maintain some system-wide skiplist for strict fsck
> cloning of legacy repos. So I can see there being some need for a
> SHA1<->SHA256 map in this case, but since these files might stretch
> across repo boundaries and not be checked into the repo itself this is a
> new use-case that needs thinking about.

My assumption had been that changing your local repository would be a
(local) flag day, and you'd update any ancillary files like skiplists,
mailmap.blob, etc at the same time. I'm not opposed to making those
features more clever, though.

> But now that I think about it this sort of thing would be a good
> use-case for just fixing these various historical fsck issues while
> we're at it when possible, e.g. "missing space before email" (probably
> not all could be unambiguously fixed). So instead of sha256<->sha1
> fn(sha256)<->fn(sha1)[1]?

That is a very tempting thing to do, but I think it comes with its own
complications. We do not want to do fn(sha1), I don't think; the reason
we care about sha1 at all is that those hashes are already set in stone.

There could be a "clean up the data as we convert to sha256" operation,
but:

  - it needs to be set in stone from day 1, I'd think. The last thing we
    want is to modify it after conversions are in the wild

  - I think we need to be bi-directional. So it must be a mapping that
    can be undone to retrieve the original bytes, so we can compute
    their "real" sha1.

At which point, I think it might be simpler to just make git more
permissive with respect to those minor data errors (and in fact, we are
already pretty permissive for the most part in non-fsck operations).

-Peff

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-22  7:12               ` Jeff King
@ 2019-01-22  9:46                 ` Ævar Arnfjörð Bjarmason
  2019-01-22 17:54                   ` Junio C Hamano
  2019-01-22 18:28                   ` Jeff King
  0 siblings, 2 replies; 39+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-01-22  9:46 UTC (permalink / raw)
  To: Jeff King
  Cc: Johannes Schindelin, Junio C Hamano, Barret Rhoden, git,
	David Kastrup, Jeff Smith, René Scharfe, Stefan Beller


On Tue, Jan 22 2019, Jeff King wrote:

> On Fri, Jan 18, 2019 at 11:26:29PM +0100, Ævar Arnfjörð Bjarmason wrote:
>
>> I stand corrected, I thought these still needed to be updated to parse
>> anything that wasn't 40 chars, since I hadn't seen anything about these
>> formats in the hash transition document.
>>
>> So fair enough, let's change that while we're at it, but this seems like
>> something that needs to be planned for in more detail / documented in
>> the hash transition doc.
>>
>> I.e. many (e.g. me) maintain some system-wide skiplist for strict fsck
>> cloning of legacy repos. So I can see there being some need for a
>> SHA1<->SHA256 map in this case, but since these files might stretch
>> across repo boundaries and not be checked into the repo itself this is a
>> new use-case that needs thinking about.
>
> My assumption had been that changing your local repository would be a
> (local) flag day, and you'd update any ancillary files like skiplists,
> mailmap.blob, etc at the same time. I'm not opposed to making those
> features more clever, though.
>
>> But now that I think about it this sort of thing would be a good
>> use-case for just fixing these various historical fsck issues while
>> we're at it when possible, e.g. "missing space before email" (probably
>> not all could be unambiguously fixed). So instead of sha256<->sha1
>> fn(sha256)<->fn(sha1)[1]?
>
> That is a very tempting thing to do, but I think it comes with its own
> complications. We do not want to do fn(sha1), I don't think; the reason
> we care about sha1 at all is that those hashes are already set in stone.
>
> There could be a "clean up the data as we convert to sha256" operation,
> but:
>
>   - it needs to be set in stone from day 1, I'd think. The last thing we
>     want is to modify it after conversions are in the wild
>
>   - I think we need to be bi-directional. So it must be a mapping that
>     can be undone to retrieve the original bytes, so we can compute
>     their "real" sha1.

It needing to be bidirectional is a very good point, and I think that
makes my suggestion a non-starter. Thanks.

> At which point, I think it might be simpler to just make git more
> permissive with respect to those minor data errors (and in fact, we are
> already pretty permissive for the most part in non-fsck operations).

Yeah it's probably better to make some of these "errors" softer
warnings.

The X-Y issue I have is that I turned on transfer.fsckObjects, so then I
can't clone repos with various minor historical issues in commit headers
etc., so I maintain a big skip list. But what I was actually after was
fsck checks like the .gitmodules security check.

Of course I could chase them all down and turn them into
warn/error/ignore individually, but it would be better if we e.g. had
some way to say "serious things error, minor things warn", maybe with
the option of only having the looser version on fetch but not recieve
with the principle that we should be loose in what we accept from
existing data but strict with new data #leftoverbits

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-20 18:19     ` René Scharfe
@ 2019-01-22 15:26       ` Barret Rhoden
  2019-01-22 18:14       ` Junio C Hamano
  1 sibling, 0 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-01-22 15:26 UTC (permalink / raw)
  To: René Scharfe
  Cc: git, Ævar Arnfjörð Bjarmason, David Kastrup,
	Jeff King, Jeff Smith, Johannes Schindelin, Junio C Hamano,
	Stefan Beller

On 2019-01-20 at 19:19 René Scharfe <l.s.r@web.de> wrote:
> Am 17.01.2019 um 21:29 schrieb Barret Rhoden:
> > The blame_entry will get passed up the tree until we find a commit that
> > has a diff chunk that affects those lines.  If an ignored commit added
> > more lines than it removed, the blame will fall on a commit that made a
> > change nearby.  There is no general solution here, just a best-effort
> > approach.  For a trivial example, consider ignoring this commit:
> >
> > Z: "Adding Lines"
> >  foo
> > +No commit
> > +ever touched
> > +these lines
> >  bar  
> 
> Wouldn't it make more sense to assign such lines to unknown, perhaps
> represented by an all-zero commit ID, instead of blaming a semi-random
> bystander?

I don't know if we can algorithmicly determine whether or not an
assignment was the commit the user wanted.  Maybe we could add a set
of heuristics, like "any diff hunk that only added", as in my
example.  Maybe if count_a == 0 in blame_chunk_cb, that would work for
that case; I'm not too familiar with that code.

Either way, in my next commit I provide the option to mark the commit
with a '*', which could serve the same purpose as the all-zero commit
ID: flag the commit so the user knows it might not be correct.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-22  9:46                 ` Ævar Arnfjörð Bjarmason
@ 2019-01-22 17:54                   ` Junio C Hamano
  2019-01-22 18:28                   ` Jeff King
  1 sibling, 0 replies; 39+ messages in thread
From: Junio C Hamano @ 2019-01-22 17:54 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Jeff King, Johannes Schindelin, Barret Rhoden, git,
	David Kastrup, Jeff Smith, René Scharfe, Stefan Beller

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> It needing to be bidirectional is a very good point, and I think that
> makes my suggestion a non-starter. Thanks.

Yes, it is a bit sad that we need to carry the mistakes forward
while moving to the new hash, but bidi convertibility is a must
for the transition to work smoothly, I think.

Thanks for a good discussion.  FWIW, on the original issue that
brought it up, I think using "object name" from the glossary to
move away from saying "SHA-1" would be good.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-20 18:19     ` René Scharfe
  2019-01-22 15:26       ` Barret Rhoden
@ 2019-01-22 18:14       ` Junio C Hamano
  2019-01-22 19:35         ` Barret Rhoden
  1 sibling, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-22 18:14 UTC (permalink / raw)
  To: René Scharfe
  Cc: Barret Rhoden, git, Ævar Arnfjörð Bjarmason,
	David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Stefan Beller

René Scharfe <l.s.r@web.de> writes:

> Am 17.01.2019 um 21:29 schrieb Barret Rhoden:
>> The blame_entry will get passed up the tree until we find a commit that
>> has a diff chunk that affects those lines.  If an ignored commit added
>> more lines than it removed, the blame will fall on a commit that made a
>> change nearby.  There is no general solution here, just a best-effort
>> approach.  For a trivial example, consider ignoring this commit:
>>
>> Z: "Adding Lines"
>>  foo
>> +No commit
>> +ever touched
>> +these lines
>>  bar
>
> Wouldn't it make more sense to assign such lines to unknown, perhaps
> represented by an all-zero commit ID, instead of blaming a semi-random
> bystander?

I share the sentiment.

Isn't it, however, showing a bigger problem in the "feature"?

Your "a change that adds lines without removing any" is an obvious
case where these added lines have no corresponding lines in the
preimage, but most of the time it is unclear what line corresponds
to what previous line.  If a commit being "ignored" brought a change
like this:

     1
    -four
    -three
    +3
    +4
     5

did "+3" come from "-three"?

Or did "+4" (read: "added '4'") come from "-three" (read: "removed
'three'")?  Did it come from "-four"?  Or was it genuinely added by
that ignored commit?  Your suggestion deals with the case where we
decide that "+4" had no corresponding lines in the preimage (and
paint it as "no blame can be assigned").  But when we decide that
"+4" came from "-four" or "-three", we continue drilling down from
that ignored commit and assign the blame to either the commit that
introduced "four" or the commit that introduced "three", which would
obviously give us different result.  Worse yet, if a reader expected
us to consider "+4" came from "-four" at that ignored commit, but
the algorithm decided that "+4" corresponded to "-three", when we
show the commit that eventually gets blamed for that line that has
"4" has no "four" (it has "three"), which I suspect would confuse
the reader of the output.

So... I dunno.



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 1/3] Move init_skiplist() outside of fsck
  2019-01-22  9:46                 ` Ævar Arnfjörð Bjarmason
  2019-01-22 17:54                   ` Junio C Hamano
@ 2019-01-22 18:28                   ` Jeff King
  1 sibling, 0 replies; 39+ messages in thread
From: Jeff King @ 2019-01-22 18:28 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Johannes Schindelin, Junio C Hamano, Barret Rhoden, git,
	David Kastrup, Jeff Smith, René Scharfe, Stefan Beller

On Tue, Jan 22, 2019 at 10:46:56AM +0100, Ævar Arnfjörð Bjarmason wrote:

> > At which point, I think it might be simpler to just make git more
> > permissive with respect to those minor data errors (and in fact, we are
> > already pretty permissive for the most part in non-fsck operations).
> 
> Yeah it's probably better to make some of these "errors" softer
> warnings.
> 
> The X-Y issue I have is that I turned on transfer.fsckObjects, so then I
> can't clone repos with various minor historical issues in commit headers
> etc., so I maintain a big skip list. But what I was actually after was
> fsck checks like the .gitmodules security check.
>
> Of course I could chase them all down and turn them into
> warn/error/ignore individually, but it would be better if we e.g. had
> some way to say "serious things error, minor things warn", maybe with
> the option of only having the looser version on fetch but not recieve
> with the principle that we should be loose in what we accept from
> existing data but strict with new data #leftoverbits

Yeah, I think the current state here is rather unfortunate. The worst
part is that many of the things _are_ marked as warnings, but we reject
transfers even for warnings. So now we have "info" as well, which is
really just silly.

I think the big blocker to simply loosening "warning" is that the
current severities are pretty arbitrary. MISSING_NAME_BEFORE_EMAIL
probably ought to be warning, but it's an warning. Whereas HAS_DOTGIT is
a warning, but has pretty serious security implications.

So that does not save you from chasing them all down, but if you do, at
least the work could benefit everybody. ;)

-Peff

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-22 18:14       ` Junio C Hamano
@ 2019-01-22 19:35         ` Barret Rhoden
  2019-01-23 19:26           ` Junio C Hamano
  0 siblings, 1 reply; 39+ messages in thread
From: Barret Rhoden @ 2019-01-22 19:35 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: René Scharfe, git, Ævar Arnfjörð Bjarmason,
	David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Stefan Beller

On 2019-01-22 at 10:14 Junio C Hamano <gitster@pobox.com> wrote:
> René Scharfe <l.s.r@web.de> writes:
> 
> > Am 17.01.2019 um 21:29 schrieb Barret Rhoden:  
> >> The blame_entry will get passed up the tree until we find a commit that
> >> has a diff chunk that affects those lines.  If an ignored commit added
> >> more lines than it removed, the blame will fall on a commit that made a
> >> change nearby.  There is no general solution here, just a best-effort
> >> approach.  For a trivial example, consider ignoring this commit:
> >>
> >> Z: "Adding Lines"
> >>  foo
> >> +No commit
> >> +ever touched
> >> +these lines
> >>  bar  
> >
> > Wouldn't it make more sense to assign such lines to unknown, perhaps
> > represented by an all-zero commit ID, instead of blaming a semi-random
> > bystander?  
> 
> I share the sentiment.
> 
> Isn't it, however, showing a bigger problem in the "feature"?
> 
> Your "a change that adds lines without removing any" is an obvious
> case where these added lines have no corresponding lines in the
> preimage, but most of the time it is unclear what line corresponds
> to what previous line.  If a commit being "ignored" brought a change
> like this:
> 
>      1
>     -four
>     -three
>     +3
>     +4
>      5
> 
> did "+3" come from "-three"?
> 
> Or did "+4" (read: "added '4'") come from "-three" (read: "removed
> 'three'")?  Did it come from "-four"?  Or was it genuinely added by
> that ignored commit?  Your suggestion deals with the case where we
> decide that "+4" had no corresponding lines in the preimage (and
> paint it as "no blame can be assigned").  But when we decide that
> "+4" came from "-four" or "-three", we continue drilling down from
> that ignored commit and assign the blame to either the commit that
> introduced "four" or the commit that introduced "three", which would
> obviously give us different result.  Worse yet, if a reader expected
> us to consider "+4" came from "-four" at that ignored commit, but
> the algorithm decided that "+4" corresponded to "-three", when we
> show the commit that eventually gets blamed for that line that has
> "4" has no "four" (it has "three"), which I suspect would confuse
> the reader of the output.
> 
> So... I dunno.

I guess if you swap the lines as well as change them, then we're not
going to be able to detect that.  Just to be clear, if you did this:

Commit A:
---------
 other_stuff
+one
 other_stuff

Commit B:
---------
 other_stuff
 one
+two
 other_stuff

Commit C:
---------
 other_stuff
-one
-two
+1
+2
 other_stuff

And ignore commit C, my change will properly identify commit A and B,
e.g.:

OTHER_HASH Author Date 11) other_stuff
*A_HASH    Author Date 12) 1
*B_HASH    Author Date 13) 2
OTHER_HASH Author Date 14) other_stuff

But if you swapped the lines in addition to change names to numbers:

Commit C-swap:
--------------
 other_stuff
-one
-two
+2
+1
 other_stuff

Then it won't have the semantic knowledge that "one" == "1".  If a user
is ignoring a commit, we don't have an oracle that knows exactly what
that commit did to determine what commit the user wants blamed.  The
current change attempts to find the last commit that touched a line.

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-22 19:35         ` Barret Rhoden
@ 2019-01-23 19:26           ` Junio C Hamano
  2019-02-01 20:37             ` Barret Rhoden
  0 siblings, 1 reply; 39+ messages in thread
From: Junio C Hamano @ 2019-01-23 19:26 UTC (permalink / raw)
  To: Barret Rhoden
  Cc: René Scharfe, git, Ævar Arnfjörð Bjarmason,
	David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Stefan Beller

Barret Rhoden <brho@google.com> writes:

>> So... I dunno.
>
> I guess if you swap the lines as well as change them,...
> ... Then it won't have the semantic knowledge that "one" == "1".  If a user
> is ignoring a commit, we don't have an oracle that knows exactly what
> that commit did to determine what commit the user wants blamed.

Yeah, and if the original had two adjacent lines, and replacement
has three adjacent lines, the algorithm would not even know if 

 - the first line in the original was split into first two in the
   update and the second line was modified in place; or

 - the first line in the original was modified in place and the
   second line was split into the latter two lines in the update

In short, there is no answer to "what is the corresponding line of
this line before this commit changed it?" in general, and any
algorithm, as long as it tries to see what was the "corresponding
line" of the line that is blamed to a commit, would not produce
results human readers would expect all the time.

As you said, heuristics may get us far enough to be useful, though
;-).





^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 2/3] blame: add the ability to ignore commits and their changes
  2019-01-23 19:26           ` Junio C Hamano
@ 2019-02-01 20:37             ` Barret Rhoden
  0 siblings, 0 replies; 39+ messages in thread
From: Barret Rhoden @ 2019-02-01 20:37 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: René Scharfe, git, Ævar Arnfjörð Bjarmason,
	David Kastrup, Jeff King, Jeff Smith, Johannes Schindelin,
	Stefan Beller

On 2019-01-23 at 11:26 Junio C Hamano <gitster@pobox.com> wrote:
> Yeah, and if the original had two adjacent lines, and replacement
> has three adjacent lines, the algorithm would not even know if 
> 
>  - the first line in the original was split into first two in the
>    update and the second line was modified in place; or
> 
>  - the first line in the original was modified in place and the
>    second line was split into the latter two lines in the update
> 
> In short, there is no answer to "what is the corresponding line of
> this line before this commit changed it?" in general, and any
> algorithm, as long as it tries to see what was the "corresponding
> line" of the line that is blamed to a commit, would not produce
> results human readers would expect all the time.
> 
> As you said, heuristics may get us far enough to be useful, though
> ;-).

Yeah.  We can do one more thing: when we ignore a change that added
more lines than it removed, we can at least not report totally
unrelated commits.  

For example, the parent of an ignored commit has this, say at line 11:

commit-a 11) void new_func_1(void *x, void *y);
commit-b 12) void new_func_2(void *x, void *y);
commit-c 13) some_line_c
commit-d 14) some_line_d

After a commit 'X', we have:

commit-X 11) void new_func_1(void *x,
commit-X 12)                 void *y);
commit-X 13) void new_func_2(void *x,
commit-X 14)                 void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d

In my existing code, if you ignore commit X, the blames look like this:

commit-a 11) void new_func_1(void *x,
commit-b 12)                 void *y);
commit-c 13) void new_func_2(void *x,
commit-d 14)                 void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d

Lines 13 and 14 are blamed on the nearby commits C and D.  The reason
is the blame entry for X is four lines long, rooted at line 11, but when
we look back through the history, we'll look at the parent's image of
the file where that diff hunk is only two lines long.  The extra two
lines just blame whatever follows the diff hunk in the parent's image.
In this case, it is just the lines C and D repeated again.

I can detect this situation when we ignore the diffs from commit X.  If
X added more lines than it removed, then I only pass the number of
lines to the parent that the parent had.  The rest get their own
blame_entry, marked 'unblamable', which I'll catch when we create the
output.  The parent can't find blame for lines that don't exist in its
image of the file.  

With that change, the above example blames like this:

commit-a 11) void new_func_1(void *x,
commit-b 12)                 void *y);
00000000 13) void new_func_2(void *x,
00000000 14)                 void *y);
commit-c 15) some_line_c
commit-d 16) some_line_d

As we discussed, we still can never be certain about which commits
*should* be blamed for which lines.  (Note that line 12 was blamed on
B, though B was the commit for new_func_2(), not new_func_1()).  But I
can avoid blaming commits that just happen to be in the area and would
only be 'correct' due to dumb luck.

This also handles cases where an ignored commit only adds lines,
which is a specific case of "added more than removed."

Handling this case is a surprisingly small change.  I'll post it once I
sort out the tests and other comments on this patch series.  

For now, I went with unconditionally marking the commit as all 0s for
the case where we know it is 'wrong.'  I can do that conditionally
based on blame.markIgnoredLines if you want, though I think any commit
attributed to those lines would be misleading.

Thanks,

Barret


^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, back to index

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-07 21:30 [PATCH] blame: add the ability to ignore commits Barret Rhoden
2019-01-07 23:13 ` Junio C Hamano
2019-01-08 16:27   ` Barret Rhoden
2019-01-08 18:26     ` Junio C Hamano
2019-01-09 20:48       ` Barret Rhoden
2019-01-10 22:29         ` Junio C Hamano
2019-01-14 15:19           ` Barret Rhoden
2019-01-14 17:45             ` Junio C Hamano
2019-01-14 18:26               ` Randall S. Becker
2019-01-14 19:03                 ` Barret Rhoden
2019-01-15  6:08                 ` Junio C Hamano
2019-01-09 21:21       ` Stefan Beller
2019-01-08 13:12 ` Ævar Arnfjörð Bjarmason
2019-01-08 16:41   ` Barret Rhoden
2019-01-08 18:04     ` Barret Rhoden
2019-01-17 20:29 ` [PATCH v2 0/3] " Barret Rhoden
2019-01-17 20:29   ` [PATCH v2 1/3] Move init_skiplist() outside of fsck Barret Rhoden
2019-01-18  9:25     ` Johannes Schindelin
2019-01-18  9:45     ` Ævar Arnfjörð Bjarmason
2019-01-18 17:36       ` Junio C Hamano
2019-01-18 20:59         ` Johannes Schindelin
2019-01-18 21:30           ` Jeff King
2019-01-18 22:26             ` Ævar Arnfjörð Bjarmason
2019-01-22  7:12               ` Jeff King
2019-01-22  9:46                 ` Ævar Arnfjörð Bjarmason
2019-01-22 17:54                   ` Junio C Hamano
2019-01-22 18:28                   ` Jeff King
2019-01-17 20:29   ` [PATCH v2 2/3] blame: add the ability to ignore commits and their changes Barret Rhoden
2019-01-18  9:47     ` Johannes Schindelin
2019-01-18 17:33       ` Barret Rhoden
2019-01-20 18:19     ` René Scharfe
2019-01-22 15:26       ` Barret Rhoden
2019-01-22 18:14       ` Junio C Hamano
2019-01-22 19:35         ` Barret Rhoden
2019-01-23 19:26           ` Junio C Hamano
2019-02-01 20:37             ` Barret Rhoden
2019-01-17 20:29   ` [PATCH v2 3/3] blame: add a config option to mark ignored lines Barret Rhoden
2019-01-18 10:03     ` Johannes Schindelin
2019-01-18  9:52   ` [PATCH v2 0/3] blame: add the ability to ignore commits Ævar Arnfjörð Bjarmason

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox