git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>,
	Junio C Hamano <gitster@pobox.com>,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH 11/19] built-in add -p: implement the hunk splitting feature
Date: Fri, 13 Dec 2019 08:07:58 +0000	[thread overview]
Message-ID: <2cd9855fffe97ce31640dfa5ed22bffa98047403.1576224486.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.173.git.1576224486.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

If this developer's workflow is any indication, then this is *the* most
useful feature of Git's interactive `add `command.

Note: once again, this is not a verbatim conversion from the Perl code
to C: the `hunk_splittable()` function, for example, essentially did all
the work of splitting the hunk, just to find out whether more than one
hunk would have been the result (and then tossed that result into the
trash). In C we instead count the number of resulting hunks (without
actually doing the work of splitting, but just counting the transitions
from non-context lines to context lines), and store that information
with the hunk, and we do that *while* parsing the diff in the first
place.

Another deviation: the built-in `git add -p` was designed with a single
strbuf holding the diff (and another one holding the colored diff, if
that one was asked for) in mind, and hunks essentially store just the
start and end offsets pointing into that strbuf. As a consequence, when
we split hunks, we now use a special mode where the hunk header is
generated dynamically, and only the rest of the hunk is stored using
such start/end offsets. This way, we also avoid the frequent
formatting/re-parsing of the hunk header of the Perl version.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 add-patch.c                | 215 ++++++++++++++++++++++++++++++++++++-
 t/t3701-add-interactive.sh |  12 +++
 2 files changed, 225 insertions(+), 2 deletions(-)

diff --git a/add-patch.c b/add-patch.c
index 171025b08d..2d34ddd7f4 100644
--- a/add-patch.c
+++ b/add-patch.c
@@ -28,7 +28,7 @@ struct hunk_header {
 };
 
 struct hunk {
-	size_t start, end, colored_start, colored_end;
+	size_t start, end, colored_start, colored_end, splittable_into;
 	enum { UNDECIDED_HUNK = 0, SKIP_HUNK, USE_HUNK } use;
 	struct hunk_header header;
 };
@@ -155,7 +155,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 	struct argv_array args = ARGV_ARRAY_INIT;
 	struct strbuf *plain = &s->plain, *colored = NULL;
 	struct child_process cp = CHILD_PROCESS_INIT;
-	char *p, *pend, *colored_p = NULL, *colored_pend = NULL;
+	char *p, *pend, *colored_p = NULL, *colored_pend = NULL, marker = '\0';
 	size_t file_diff_alloc = 0, i, color_arg_index;
 	struct file_diff *file_diff = NULL;
 	struct hunk *hunk = NULL;
@@ -217,6 +217,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 			hunk->start = p - plain->buf;
 			if (colored_p)
 				hunk->colored_start = colored_p - colored->buf;
+			marker = '\0';
 		} else if (p == plain->buf)
 			BUG("diff starts with unexpected line:\n"
 			    "%.*s\n", (int)(eol - p), p);
@@ -225,6 +226,13 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 		else if (starts_with(p, "@@ ") ||
 			 (hunk == &file_diff->head &&
 			  skip_prefix(p, "deleted file", &deleted))) {
+			if (marker == '-' || marker == '+')
+				/*
+				 * Should not happen; previous hunk did not end
+				 * in a context line? Handle it anyway.
+				 */
+				hunk->splittable_into++;
+
 			file_diff->hunk_nr++;
 			ALLOC_GROW(file_diff->hunk, file_diff->hunk_nr,
 				   file_diff->hunk_alloc);
@@ -239,6 +247,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 				file_diff->deleted = 1;
 			else if (parse_hunk_header(s, hunk) < 0)
 				return -1;
+
+			/*
+			 * Start counting into how many hunks this one can be
+			 * split
+			 */
+			marker = *p;
 		} else if (hunk == &file_diff->head &&
 			   skip_prefix(p, "old mode ", &mode_change) &&
 			   is_octal(mode_change, eol - mode_change)) {
@@ -286,6 +300,11 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 			    (int)(eol - (plain->buf + file_diff->head.start)),
 			    plain->buf + file_diff->head.start);
 
+		if ((marker == '-' || marker == '+') && *p == ' ')
+			hunk->splittable_into++;
+		if (marker && *p != '\\')
+			marker = *p;
+
 		p = eol == pend ? pend : eol + 1;
 		hunk->end = p - plain->buf;
 
@@ -311,9 +330,30 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps)
 		}
 	}
 
+	if (marker == '-' || marker == '+')
+		/*
+		 * Last hunk ended in non-context line (i.e. it appended lines
+		 * to the file, so there are no trailing context lines).
+		 */
+		hunk->splittable_into++;
+
 	return 0;
 }
 
+static size_t find_next_line(struct strbuf *sb, size_t offset)
+{
+	char *eol;
+
+	if (offset >= sb->len)
+		BUG("looking for next line beyond buffer (%d >= %d)\n%s",
+		    (int)offset, (int)sb->len, sb->buf);
+
+	eol = memchr(sb->buf + offset, '\n', sb->len - offset);
+	if (!eol)
+		return sb->len;
+	return eol - sb->buf + 1;
+}
+
 static void render_hunk(struct add_p_state *s, struct hunk *hunk,
 			ssize_t delta, int colored, struct strbuf *out)
 {
@@ -412,6 +452,165 @@ static void reassemble_patch(struct add_p_state *s,
 	}
 }
 
+static int split_hunk(struct add_p_state *s, struct file_diff *file_diff,
+		       size_t hunk_index)
+{
+	int colored = !!s->colored.len, first = 1;
+	struct hunk *hunk = file_diff->hunk + hunk_index;
+	size_t splittable_into;
+	size_t end, colored_end, current, colored_current = 0, context_line_count;
+	struct hunk_header remaining, *header;
+	char marker, ch;
+
+	if (hunk_index >= file_diff->hunk_nr)
+		BUG("invalid hunk index: %d (must be >= 0 and < %d)",
+		    (int)hunk_index, (int)file_diff->hunk_nr);
+
+	if (hunk->splittable_into < 2)
+		return 0;
+	splittable_into = hunk->splittable_into;
+
+	end = hunk->end;
+	colored_end = hunk->colored_end;
+
+	remaining = hunk->header;
+
+	file_diff->hunk_nr += splittable_into - 1;
+	ALLOC_GROW(file_diff->hunk, file_diff->hunk_nr, file_diff->hunk_alloc);
+	if (hunk_index + splittable_into < file_diff->hunk_nr)
+		memmove(file_diff->hunk + hunk_index + splittable_into,
+			file_diff->hunk + hunk_index + 1,
+			(file_diff->hunk_nr - hunk_index - splittable_into)
+			* sizeof(*hunk));
+	hunk = file_diff->hunk + hunk_index;
+	hunk->splittable_into = 1;
+	memset(hunk + 1, 0, (splittable_into - 1) * sizeof(*hunk));
+
+	header = &hunk->header;
+	header->old_count = header->new_count = 0;
+
+	current = hunk->start;
+	if (colored)
+		colored_current = hunk->colored_start;
+	marker = '\0';
+	context_line_count = 0;
+
+	while (splittable_into > 1) {
+		ch = s->plain.buf[current];
+
+		if (!ch)
+			BUG("buffer overrun while splitting hunks");
+
+		/*
+		 * Is this the first context line after a chain of +/- lines?
+		 * Then record the start of the next split hunk.
+		 */
+		if ((marker == '-' || marker == '+') && ch == ' ') {
+			first = 0;
+			hunk[1].start = current;
+			if (colored)
+				hunk[1].colored_start = colored_current;
+			context_line_count = 0;
+		}
+
+		/*
+		 * Was the previous line a +/- one? Alternatively, is this the
+		 * first line (and not a +/- one)?
+		 *
+		 * Then just increment the appropriate counter and continue
+		 * with the next line.
+		 */
+		if (marker != ' ' || (ch != '-' && ch != '+')) {
+next_hunk_line:
+			/* Comment lines are attached to the previous line */
+			if (ch == '\\')
+				ch = marker ? marker : ' ';
+
+			/* current hunk not done yet */
+			if (ch == ' ')
+				context_line_count++;
+			else if (ch == '-')
+				header->old_count++;
+			else if (ch == '+')
+				header->new_count++;
+			else
+				BUG("unhandled diff marker: '%c'", ch);
+			marker = ch;
+			current = find_next_line(&s->plain, current);
+			if (colored)
+				colored_current =
+					find_next_line(&s->colored,
+						       colored_current);
+			continue;
+		}
+
+		/*
+		 * We got us the start of a new hunk!
+		 *
+		 * This is a context line, so it is shared with the previous
+		 * hunk, if any.
+		 */
+
+		if (first) {
+			if (header->old_count || header->new_count)
+				BUG("counts are off: %d/%d",
+				    (int)header->old_count,
+				    (int)header->new_count);
+
+			header->old_count = context_line_count;
+			header->new_count = context_line_count;
+			context_line_count = 0;
+			first = 0;
+			goto next_hunk_line;
+		}
+
+		remaining.old_offset += header->old_count;
+		remaining.old_count -= header->old_count;
+		remaining.new_offset += header->new_count;
+		remaining.new_count -= header->new_count;
+
+		/* initialize next hunk header's offsets */
+		hunk[1].header.old_offset =
+			header->old_offset + header->old_count;
+		hunk[1].header.new_offset =
+			header->new_offset + header->new_count;
+
+		/* add one split hunk */
+		header->old_count += context_line_count;
+		header->new_count += context_line_count;
+
+		hunk->end = current;
+		if (colored)
+			hunk->colored_end = colored_current;
+
+		hunk++;
+		hunk->splittable_into = 1;
+		hunk->use = hunk[-1].use;
+		header = &hunk->header;
+
+		header->old_count = header->new_count = context_line_count;
+		context_line_count = 0;
+
+		splittable_into--;
+		marker = ch;
+	}
+
+	/* last hunk simply gets the rest */
+	if (header->old_offset != remaining.old_offset)
+		BUG("miscounted old_offset: %lu != %lu",
+		    header->old_offset, remaining.old_offset);
+	if (header->new_offset != remaining.new_offset)
+		BUG("miscounted new_offset: %lu != %lu",
+		    header->new_offset, remaining.new_offset);
+	header->old_count = remaining.old_count;
+	header->new_count = remaining.new_count;
+	hunk->end = end;
+	if (colored)
+		hunk->colored_end = colored_end;
+
+	return 0;
+}
+
 static const char help_patch_text[] =
 N_("y - stage this hunk\n"
    "n - do not stage this hunk\n"
@@ -421,6 +620,7 @@ N_("y - stage this hunk\n"
    "J - leave this hunk undecided, see next hunk\n"
    "k - leave this hunk undecided, see previous undecided hunk\n"
    "K - leave this hunk undecided, see previous hunk\n"
+   "s - split the current hunk into smaller hunks\n"
    "? - print help\n");
 
 static int patch_update_file(struct add_p_state *s,
@@ -477,6 +677,8 @@ static int patch_update_file(struct add_p_state *s,
 			strbuf_addstr(&s->buf, ",j");
 		if (hunk_index + 1 < file_diff->hunk_nr)
 			strbuf_addstr(&s->buf, ",J");
+		if (hunk->splittable_into > 1)
+			strbuf_addstr(&s->buf, ",s");
 
 		if (file_diff->deleted)
 			prompt_mode_type = PROMPT_DELETION;
@@ -539,6 +741,15 @@ static int patch_update_file(struct add_p_state *s,
 				hunk_index = undecided_next;
 			else
 				err(s, _("No next hunk"));
+		} else if (s->answer.buf[0] == 's') {
+			size_t splittable_into = hunk->splittable_into;
+			if (splittable_into < 2)
+				err(s, _("Sorry, cannot split this hunk"));
+			else if (!split_hunk(s, file_diff,
+					     hunk - file_diff->hunk))
+				color_fprintf_ln(stdout, s->s.header_color,
+						 _("Split into %d hunks."),
+						 (int)splittable_into);
 		} else
 			color_fprintf(stdout, s->s.help_color,
 				      _(help_patch_text));
diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh
index 5db6432e33..fe383be50e 100755
--- a/t/t3701-add-interactive.sh
+++ b/t/t3701-add-interactive.sh
@@ -442,6 +442,18 @@ test_expect_failure 'split hunk "add -p (no, yes, edit)"' '
 	! grep "^+31" actual
 '
 
+test_expect_success 'split hunk with incomplete line at end' '
+	git reset --hard &&
+	printf "missing LF" >>test &&
+	git add test &&
+	test_write_lines before 10 20 30 40 50 60 70 >test &&
+	git grep --cached missing &&
+	test_write_lines s n y q | git add -p &&
+	test_must_fail git grep --cached missing &&
+	git grep before &&
+	test_must_fail git grep --cached before
+'
+
 test_expect_failure 'edit, adding lines to the first hunk' '
 	test_write_lines 10 11 20 30 40 50 51 60 >test &&
 	git reset &&
-- 
gitgitgadget


  parent reply	other threads:[~2019-12-13  8:08 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-13  8:07 [PATCH 00/19] Implement the git add --patch backend in C Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 01/19] built-in add -i: start implementing the `patch` functionality " Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 02/19] built-in add -i: wire up the new C code for the `patch` command Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 03/19] built-in add -p: show colored hunks by default Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 04/19] built-in add -p: adjust hunk headers as needed Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 05/19] built-in add -p: color the prompt and the help text Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 06/19] built-in add -p: offer a helpful error message when hunk navigation failed Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 07/19] built-in add -p: support multi-file diffs Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 08/19] built-in add -p: handle deleted empty files Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 09/19] built-in app -p: allow selecting a mode change as a "hunk" Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` [PATCH 10/19] built-in add -p: show different prompts for mode changes and deletions Johannes Schindelin via GitGitGadget
2019-12-13  8:07 ` Johannes Schindelin via GitGitGadget [this message]
2019-12-13  8:07 ` [PATCH 12/19] built-in add -p: coalesce hunks after splitting them Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 13/19] strbuf: add a helper function to call the editor "on an strbuf" Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 14/19] built-in add -p: implement hunk editing Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 15/19] built-in add -p: implement the 'g' ("goto") command Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 16/19] built-in add -p: implement the '/' ("search regex") command Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 17/19] built-in add -p: implement the 'q' ("quit") command Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 18/19] built-in add -p: only show the applicable parts of the help text Johannes Schindelin via GitGitGadget
2019-12-13  8:08 ` [PATCH 19/19] built-in add -p: show helpful hint when nothing can be staged Johannes Schindelin via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2cd9855fffe97ce31640dfa5ed22bffa98047403.1576224486.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=johannes.schindelin@gmx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).