git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Barret Rhoden <brho@google.com>
To: git@vger.kernel.org
Cc: "Michael Platings" <michael@platin.gs>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"David Kastrup" <dak@gnu.org>, "Jeff King" <peff@peff.net>,
	"Jeff Smith" <whydoubt@gmail.com>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"Junio C Hamano" <gitster@pobox.com>,
	"René Scharfe" <l.s.r@web.de>,
	"Stefan Beller" <stefanbeller@gmail.com>
Subject: [PATCH v6 5/6] blame: optionally track line fingerprints during fill_blame_origin()
Date: Wed, 10 Apr 2019 12:24:08 -0400	[thread overview]
Message-ID: <20190410162409.117264-6-brho@google.com> (raw)
In-Reply-To: <20190410162409.117264-1-brho@google.com>

fill_blame_origin() is a convenient place to store data that we will use
throughout the lifetime of a blame_origin.  Some heuristics for
ignoring commits during a blame session can make use of this storage.
In particular, we will calculate a fingerprint for each line of a file
for blame_origins involved in an ignored commit.

In this commit, we only calculate the line_starts, reusing the existing
code from the scoreboard's line_starts.  In an upcoming commit, we will
actually compute the fingerprints.

This feature will be used when we attempt to pass blame entries to
parents when we "ignore" a commit.  Most uses of fill_blame_origin()
will not require this feature, hence the flag parameter.  Multiple calls
to fill_blame_origin() are idempotent, and any of them can request the
creation of the fingerprints structure.

Suggested-by: Michael Platings <michael@platin.gs>
Signed-off-by: Barret Rhoden <brho@google.com>
---
 blame.c | 95 +++++++++++++++++++++++++++++++++++++++------------------
 blame.h |  2 ++
 2 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/blame.c b/blame.c
index a98ae00e2cfc..a42dff80b1a5 100644
--- a/blame.c
+++ b/blame.c
@@ -311,12 +311,63 @@ static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
 	return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
 }
 
+static const char *get_next_line(const char *start, const char *end)
+{
+	const char *nl = memchr(start, '\n', end - start);
+
+	return nl ? nl + 1 : end;
+}
+
+static int find_line_starts(int **line_starts, const char *buf,
+			    unsigned long len)
+{
+	const char *end = buf + len;
+	const char *p;
+	int *lineno;
+	int num = 0;
+
+	for (p = buf; p < end; p = get_next_line(p, end))
+		num++;
+
+	ALLOC_ARRAY(*line_starts, num + 1);
+	lineno = *line_starts;
+
+	for (p = buf; p < end; p = get_next_line(p, end))
+		*lineno++ = p - buf;
+
+	*lineno = len;
+
+	return num;
+}
+
+static void fill_origin_fingerprints(struct blame_origin *o, mmfile_t *file)
+{
+	int *line_starts;
+
+	if (o->fingerprints)
+		return;
+	o->num_lines = find_line_starts(&line_starts, o->file.ptr,
+					o->file.size);
+	/* TODO: Will fill in fingerprints in a future commit */
+	o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
+	free(line_starts);
+}
+
+static void drop_origin_fingerprints(struct blame_origin *o)
+{
+	if (o->fingerprints) {
+		o->num_lines = 0;
+		FREE_AND_NULL(o->fingerprints);
+	}
+}
+
 /*
  * Given an origin, prepare mmfile_t structure to be used by the
  * diff machinery
  */
 static void fill_origin_blob(struct diff_options *opt,
-			     struct blame_origin *o, mmfile_t *file, int *num_read_blob)
+			     struct blame_origin *o, mmfile_t *file,
+			     int *num_read_blob, int fill_fingerprints)
 {
 	if (!o->file.ptr) {
 		enum object_type type;
@@ -340,11 +391,14 @@ static void fill_origin_blob(struct diff_options *opt,
 	}
 	else
 		*file = o->file;
+	if (fill_fingerprints)
+		fill_origin_fingerprints(o, file);
 }
 
 static void drop_origin_blob(struct blame_origin *o)
 {
 	FREE_AND_NULL(o->file.ptr);
+	drop_origin_fingerprints(o);
 }
 
 /*
@@ -1136,8 +1190,10 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 	d.ignore_diffs = ignore_diffs;
 	d.dstq = &newdest; d.srcq = &target->suspects;
 
-	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
-	fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+			 &sb->num_read_blob, ignore_diffs);
+	fill_origin_blob(&sb->revs->diffopt, target, &file_o,
+			 &sb->num_read_blob, ignore_diffs);
 	sb->num_get_patch++;
 
 	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
@@ -1348,7 +1404,8 @@ static void find_move_in_parent(struct blame_scoreboard *sb,
 	if (!unblamed)
 		return; /* nothing remains for this target */
 
-	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+			 &sb->num_read_blob, 0);
 	if (!file_p.ptr)
 		return;
 
@@ -1477,7 +1534,8 @@ static void find_copy_in_parent(struct blame_scoreboard *sb,
 			norigin = get_origin(parent, p->one->path);
 			oidcpy(&norigin->blob_oid, &p->one->oid);
 			norigin->mode = p->one->mode;
-			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob);
+			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p,
+					 &sb->num_read_blob, 0);
 			if (!file_p.ptr)
 				continue;
 
@@ -1816,37 +1874,14 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 	}
 }
 
-static const char *get_next_line(const char *start, const char *end)
-{
-	const char *nl = memchr(start, '\n', end - start);
-	return nl ? nl + 1 : end;
-}
-
 /*
  * To allow quick access to the contents of nth line in the
  * final image, prepare an index in the scoreboard.
  */
 static int prepare_lines(struct blame_scoreboard *sb)
 {
-	const char *buf = sb->final_buf;
-	unsigned long len = sb->final_buf_size;
-	const char *end = buf + len;
-	const char *p;
-	int *lineno;
-	int num = 0;
-
-	for (p = buf; p < end; p = get_next_line(p, end))
-		num++;
-
-	ALLOC_ARRAY(sb->lineno, num + 1);
-	lineno = sb->lineno;
-
-	for (p = buf; p < end; p = get_next_line(p, end))
-		*lineno++ = p - buf;
-
-	*lineno = len;
-
-	sb->num_lines = num;
+	sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf,
+					 sb->final_buf_size);
 	return sb->num_lines;
 }
 
diff --git a/blame.h b/blame.h
index 53df8b4c5b3f..5dd877bb78fc 100644
--- a/blame.h
+++ b/blame.h
@@ -51,6 +51,8 @@ struct blame_origin {
 	 */
 	struct blame_entry *suspects;
 	mmfile_t file;
+	int num_lines;
+	void *fingerprints;
 	struct object_id blob_oid;
 	unsigned mode;
 	/* guilty gets set when shipping any suspects to the final
-- 
2.21.0.392.gf8f6787159e-goog


  parent reply	other threads:[~2019-04-10 16:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-10 16:24 [PATCH v6 0/6] blame: add the ability to ignore commits Barret Rhoden
2019-04-10 16:24 ` [PATCH v6 1/6] Move init_skiplist() outside of fsck Barret Rhoden
2019-04-10 19:04   ` Ævar Arnfjörð Bjarmason
2019-04-15 13:32     ` Barret Rhoden
2019-04-10 16:24 ` [PATCH v6 2/6] blame: use a helper function in blame_chunk() Barret Rhoden
2019-04-10 16:24 ` [PATCH v6 3/6] blame: add the ability to ignore commits and their changes Barret Rhoden
2019-04-10 19:00   ` Ævar Arnfjörð Bjarmason
2019-04-14 10:42     ` Michael Platings
2019-04-15 13:32       ` Barret Rhoden
2019-04-15 13:34     ` Barret Rhoden
2019-04-10 16:24 ` [PATCH v6 4/6] blame: add config options to handle output for ignored lines Barret Rhoden
2019-04-14  3:45   ` Junio C Hamano
2019-04-14 10:09     ` Michael Platings
2019-04-14 10:24       ` Junio C Hamano
2019-04-14 11:27         ` Michael Platings
2019-04-15 13:51           ` Barret Rhoden
2019-04-10 16:24 ` Barret Rhoden [this message]
2019-04-10 16:24 ` [PATCH v6 6/6] blame: use a fingerprint heuristic to match " Barret Rhoden
2019-04-14  3:54   ` Junio C Hamano
2019-04-14  9:41     ` Michael Platings
2019-04-15 14:03     ` Barret Rhoden
2019-04-16  4:10       ` Junio C Hamano
2019-04-14 21:10 ` [PATCH v6 0/6] blame: add the ability to ignore commits Michael Platings
2019-04-15 13:23   ` Barret Rhoden
2019-04-15 21:54     ` Michael Platings

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190410162409.117264-6-brho@google.com \
    --to=brho@google.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=avarab@gmail.com \
    --cc=dak@gnu.org \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=l.s.r@web.de \
    --cc=michael@platin.gs \
    --cc=peff@peff.net \
    --cc=stefanbeller@gmail.com \
    --cc=whydoubt@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).