git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Stefan Beller <sbeller@google.com>
To: sbeller@google.com
Cc: git@vger.kernel.org, gitster@pobox.com, simon@ruderich.org
Subject: [PATCHv2 21/25] diff.c: buffer all output if asked to
Date: Fri, 30 Jun 2017 13:53:06 -0700	[thread overview]
Message-ID: <20170630205310.7380-22-sbeller@google.com> (raw)
In-Reply-To: <20170630205310.7380-1-sbeller@google.com>

Introduce a new option 'emitted_symbols' in the struct diff_options which
controls whether all output is buffered up until all output is available.
It is set internally in diff.c when necessary.

We'll have a new struct 'emitted_string' in diff.c which will be used to
buffer each line.  The emitted_string will duplicate the memory of the
line to buffer as that is easiest to reason about for now. In a future
patch we may want to decrease the memory usage by not duplicating all
output for buffering but rather we may want to store offsets into the
file or in case of hunk descriptions such as the similarity score, we
could just store the relevant number and reproduce the text later on.

This approach was chosen as a first step because it is quite simple
compared to the alternative with less memory footprint.

emit_diff_symbol factors out the emission part and depending on the
diff_options->emitted_symbols the emission will be performed directly
when calling emit_diff_symbol or after the whole process is done, i.e.
by buffering we have add the possibility for a second pass over the
whole output before doing the actual output.

In 6440d34 (2012-03-14, diff: tweak a _copy_ of diff_options with
word-diff) we introduced a duplicate diff options struct for word
emissions as we may have different regex settings in there.
When buffering the output, we need to operate on just one buffer,
so we have to copy back the emissions of the word buffer into the
main buffer.

Unconditionally enable output via buffer in this patch as it yields
a great opportunity for testing, i.e. all the diff tests from the
test suite pass without having reordering issues (i.e. only parts
of the output got buffered, and we forgot to buffer other parts).
The test suite passes, which gives confidence that we converted all
functions to use emit_string for output.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 diff.h |   2 ++
 2 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 2db0d7c0f5..31720abf8f 100644
--- a/diff.c
+++ b/diff.c
@@ -605,6 +605,47 @@ enum diff_symbol {
 #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
 #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK)
 
+/*
+ * This struct is used when we need to buffer the output of the diff output.
+ *
+ * NEEDSWORK: Instead of storing a copy of the line, add an offset pointer
+ * into the pre/post image file. This pointer could be a union with the
+ * line pointer. By storing an offset into the file instead of the literal line,
+ * we can decrease the memory footprint for the buffered output. At first we
+ * may want to only have indirection for the content lines, but we could also
+ * enhance the state for emitting prefabricated lines, e.g. the similarity
+ * score line or hunk/file headers would only need to store a number or path
+ * and then the output can be constructed later on depending on state.
+ */
+struct emitted_diff_symbol {
+	const char *line;
+	int len;
+	int flags;
+	enum diff_symbol s;
+};
+#define EMITTED_DIFF_SYMBOL_INIT {NULL}
+
+struct emitted_diff_symbols {
+	struct emitted_diff_symbol *buf;
+	int nr, alloc;
+};
+#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0}
+
+static void append_emitted_diff_symbol(struct diff_options *o,
+				       struct emitted_diff_symbol *e)
+{
+	struct emitted_diff_symbol *f;
+
+	ALLOC_GROW(o->emitted_symbols->buf,
+		   o->emitted_symbols->nr + 1,
+		   o->emitted_symbols->alloc);
+	f = &o->emitted_symbols->buf[o->emitted_symbols->nr++];
+
+	memcpy(f, e, sizeof(struct emitted_diff_symbol));
+	f->line = e->line ? xmemdupz(e->line, e->len) : NULL;
+}
+
+
 static void emit_line_ws_markup(struct diff_options *o,
 				const char *set, const char *reset,
 				const char *line, int len, char sign,
@@ -631,12 +672,18 @@ static void emit_line_ws_markup(struct diff_options *o,
 	}
 }
 
-static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
-			     const char *line, int len, unsigned flags)
+static void emit_diff_symbol_from_struct(struct diff_options *o,
+					 struct emitted_diff_symbol *eds)
 {
 	static const char *nneof = " No newline at end of file\n";
 	const char *context, *reset, *set, *meta, *fraginfo;
 	struct strbuf sb = STRBUF_INIT;
+
+	enum diff_symbol s = eds->s;
+	const char *line = eds->line;
+	int len = eds->len;
+	unsigned flags = eds->flags;
+
 	switch (s) {
 	case DIFF_SYMBOL_NO_LF_EOF:
 		context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -778,6 +825,17 @@ static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
 	strbuf_release(&sb);
 }
 
+static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
+			     const char *line, int len, unsigned flags)
+{
+	struct emitted_diff_symbol e = {line, len, flags, s};
+
+	if (o->emitted_symbols)
+		append_emitted_diff_symbol(o, &e);
+	else
+		emit_diff_symbol_from_struct(o, &e);
+}
+
 void diff_emit_submodule_del(struct diff_options *o, const char *line)
 {
 	emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0);
@@ -1374,9 +1432,29 @@ static void diff_words_show(struct diff_words_data *diff_words)
 /* In "color-words" mode, show word-diff of words accumulated in the buffer */
 static void diff_words_flush(struct emit_callback *ecbdata)
 {
+	struct diff_options *wo = ecbdata->diff_words->opt;
+
 	if (ecbdata->diff_words->minus.text.size ||
 	    ecbdata->diff_words->plus.text.size)
 		diff_words_show(ecbdata->diff_words);
+
+	if (wo->emitted_symbols) {
+		struct diff_options *o = ecbdata->opt;
+		struct emitted_diff_symbols *wol = wo->emitted_symbols;
+		int i;
+
+		/*
+		 * NEEDSWORK:
+		 * Instead of appending each, concat all words to a line?
+		 */
+		for (i = 0; i < wol->nr; i++)
+			append_emitted_diff_symbol(o, &wol->buf[i]);
+
+		for (i = 0; i < wol->nr; i++)
+			free((void *)wol->buf[i].line);
+
+		wol->nr = 0;
+	}
 }
 
 static void diff_filespec_load_driver(struct diff_filespec *one)
@@ -1412,6 +1490,11 @@ static void init_diff_words_data(struct emit_callback *ecbdata,
 		xcalloc(1, sizeof(struct diff_words_data));
 	ecbdata->diff_words->type = o->word_diff;
 	ecbdata->diff_words->opt = o;
+
+	if (orig_opts->emitted_symbols)
+		o->emitted_symbols =
+			xcalloc(1, sizeof(struct emitted_diff_symbols));
+
 	if (!o->word_regex)
 		o->word_regex = userdiff_word_regex(one);
 	if (!o->word_regex)
@@ -1446,6 +1529,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 {
 	if (ecbdata->diff_words) {
 		diff_words_flush(ecbdata);
+		free (ecbdata->diff_words->opt->emitted_symbols);
 		free (ecbdata->diff_words->opt);
 		free (ecbdata->diff_words->minus.text.ptr);
 		free (ecbdata->diff_words->minus.orig);
@@ -4996,16 +5080,37 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc)
 static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 {
 	int i;
+	static struct emitted_diff_symbols esm = EMITTED_DIFF_SYMBOLS_INIT;
 	struct diff_queue_struct *q = &diff_queued_diff;
 
 	if (WSEH_NEW & WS_RULE_MASK)
 		die("BUG: WS rules bit mask overlaps with diff symbol flags");
 
+	/*
+	 * For testing purposes we want to make sure the diff machinery
+	 * works completely with the buffer. If there is anything emitted
+	 * outside the emit_string, then the order is screwed
+	 * up and the tests will fail.
+	 *
+	 * TODO (later in this series):
+	 * We'll unset this pointer in a later patch.
+	 */
+	o->emitted_symbols = &esm;
+
 	for (i = 0; i < q->nr; i++) {
 		struct diff_filepair *p = q->queue[i];
 		if (check_pair_status(p))
 			diff_flush_patch(p, o);
 	}
+
+	if (o->emitted_symbols) {
+		for (i = 0; i < esm.nr; i++)
+			emit_diff_symbol_from_struct(o, &esm.buf[i]);
+
+		for (i = 0; i < esm.nr; i++)
+			free((void *)esm.buf[i].line);
+	}
+	esm.nr = 0;
 }
 
 void diff_flush(struct diff_options *options)
diff --git a/diff.h b/diff.h
index d6094a1eed..4a3b9bde40 100644
--- a/diff.h
+++ b/diff.h
@@ -186,6 +186,8 @@ struct diff_options {
 	void *output_prefix_data;
 
 	int diff_path_counter;
+
+	struct emitted_diff_symbols *emitted_symbols;
 };
 
 void diff_emit_submodule_del(struct diff_options *o, const char *line);
-- 
2.13.0.31.g9b732c453e


  parent reply	other threads:[~2017-06-30 20:53 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-30  0:06 [PATCH 00/25] Reroll of sb/diff-color-moved Stefan Beller
2017-06-30  0:06 ` [PATCH 01/25] diff.c: readability fix Stefan Beller
2017-06-30  0:06 ` [PATCH 02/25] diff.c: move line ending check into emit_hunk_header Stefan Beller
2017-06-30  0:06 ` [PATCH 03/25] diff.c: factor out diff_flush_patch_all_file_pairs Stefan Beller
2017-06-30  0:06 ` [PATCH 04/25] diff.c: introduce emit_diff_symbol Stefan Beller
2017-06-30  0:06 ` [PATCH 05/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_MARKER Stefan Beller
2017-06-30  0:06 ` [PATCH 06/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_FRAGINFO Stefan Beller
2017-06-30  0:06 ` [PATCH 07/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_NO_LF_EOF Stefan Beller
2017-06-30  0:06 ` [PATCH 08/25] diff.c: migrate emit_line_checked to use emit_diff_symbol Stefan Beller
2017-06-30  0:06 ` [PATCH 09/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_WORDS[_PORCELAIN] Stefan Beller
2017-06-30  0:06 ` [PATCH 10/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_INCOMPLETE Stefan Beller
2017-06-30  0:06 ` [PATCH 11/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_FILEPAIR_{PLUS, MINUS} Stefan Beller
2017-06-30  0:06 ` [PATCH 12/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_HEADER Stefan Beller
2017-06-30  0:06 ` [PATCH 13/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_BINARY_FILES Stefan Beller
2017-06-30  0:06 ` [PATCH 14/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_REWRITE_DIFF Stefan Beller
2017-06-30  0:07 ` [PATCH 15/25] submodule.c: migrate diff output to use emit_diff_symbol Stefan Beller
2017-06-30  0:07 ` [PATCH 16/25] diff.c: convert emit_binary_diff_body " Stefan Beller
2017-06-30  0:07 ` [PATCH 17/25] diff.c: convert show_stats " Stefan Beller
2017-06-30  0:07 ` [PATCH 18/25] diff.c: convert word diffing " Stefan Beller
2017-06-30  0:07 ` [PATCH 19/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_STAT_SEP Stefan Beller
2017-06-30  0:07 ` [PATCH 20/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_SUMMARY Stefan Beller
2017-06-30  0:07 ` [PATCH 21/25] diff.c: buffer all output if asked to Stefan Beller
2017-06-30  0:07 ` [PATCH 22/25] diff.c: color moved lines differently Stefan Beller
2017-06-30 17:54   ` Junio C Hamano
2017-06-30 18:13     ` Stefan Beller
2017-06-30 18:41       ` Junio C Hamano
2017-06-30  0:07 ` [PATCH 23/25] diff.c: color moved lines differently, plain mode Stefan Beller
2017-06-30  0:07 ` [PATCH 24/25] diff.c: add dimming to moved line detection Stefan Beller
2017-06-30  0:07 ` [PATCH 25/25] diff: document the new --color-moved setting Stefan Beller
2017-06-30  7:26   ` Simon Ruderich
2017-06-30 16:04     ` Stefan Beller
2017-06-30 20:31       ` Simon Ruderich
2017-06-30 20:33         ` Stefan Beller
2017-06-30 20:52 ` [PATCHv2 00/25] Reroll of sb/diff-color-moved Stefan Beller
2017-06-30 20:52   ` [PATCHv2 01/25] diff.c: readability fix Stefan Beller
2017-06-30 20:52   ` [PATCHv2 02/25] diff.c: move line ending check into emit_hunk_header Stefan Beller
2017-06-30 20:52   ` [PATCHv2 03/25] diff.c: factor out diff_flush_patch_all_file_pairs Stefan Beller
2017-06-30 20:52   ` [PATCHv2 04/25] diff.c: introduce emit_diff_symbol Stefan Beller
2017-06-30 20:52   ` [PATCHv2 05/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_MARKER Stefan Beller
2017-06-30 20:52   ` [PATCHv2 06/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_FRAGINFO Stefan Beller
2017-06-30 20:52   ` [PATCHv2 07/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_NO_LF_EOF Stefan Beller
2017-06-30 20:52   ` [PATCHv2 08/25] diff.c: migrate emit_line_checked to use emit_diff_symbol Stefan Beller
2017-06-30 20:52   ` [PATCHv2 09/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_WORDS[_PORCELAIN] Stefan Beller
2017-06-30 20:52   ` [PATCHv2 10/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_CONTEXT_INCOMPLETE Stefan Beller
2017-06-30 20:52   ` [PATCHv2 11/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_FILEPAIR_{PLUS, MINUS} Stefan Beller
2017-06-30 20:52   ` [PATCHv2 12/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_HEADER Stefan Beller
2017-06-30 20:52   ` [PATCHv2 13/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_BINARY_FILES Stefan Beller
2017-06-30 20:52   ` [PATCHv2 14/25] diff.c: emit_diff_symbol learns DIFF_SYMBOL_REWRITE_DIFF Stefan Beller
2017-06-30 20:53   ` [PATCHv2 15/25] submodule.c: migrate diff output to use emit_diff_symbol Stefan Beller
2017-06-30 20:53   ` [PATCHv2 16/25] diff.c: convert emit_binary_diff_body " Stefan Beller
2017-06-30 20:53   ` [PATCHv2 17/25] diff.c: convert show_stats " Stefan Beller
2017-06-30 20:53   ` [PATCHv2 18/25] diff.c: convert word diffing " Stefan Beller
2017-06-30 20:53   ` [PATCHv2 19/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_STAT_SEP Stefan Beller
2017-06-30 20:53   ` [PATCHv2 20/25] diff.c: emit_diff_symbol learns about DIFF_SYMBOL_SUMMARY Stefan Beller
2017-06-30 20:53   ` Stefan Beller [this message]
2017-06-30 20:53   ` [PATCHv2 22/25] diff.c: color moved lines differently Stefan Beller
2017-06-30 21:11     ` Junio C Hamano
2017-06-30 21:21       ` Stefan Beller
2017-06-30 21:36         ` Junio C Hamano
2017-06-30 20:53   ` [PATCHv2 23/25] diff.c: color moved lines differently, plain mode Stefan Beller
2017-06-30 20:53   ` [PATCHv2 24/25] diff.c: add dimming to moved line detection Stefan Beller
2017-06-30 20:53   ` [PATCHv2 25/25] diff: document the new --color-moved setting Stefan Beller
2017-06-30 21:49   ` [PATCHv2 00/25] Reroll of sb/diff-color-moved Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2017-06-20  2:47 [PATCH 00/26] reroll " Stefan Beller
2017-06-23  1:28 ` [PATCHv2 00/25] " Stefan Beller
2017-06-23  1:29   ` [PATCHv2 21/25] diff.c: buffer all output if asked to Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170630205310.7380-22-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=simon@ruderich.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).