git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH v5 1/2] grep: refactor next_match() and match_one_pattern() for external use
@ 2021-09-16 14:09 Hamza Mahfooz
  2021-09-16 14:09 ` [PATCH v5 2/2] pretty: colorize pattern matches in commit messages Hamza Mahfooz
  0 siblings, 1 reply; 6+ messages in thread
From: Hamza Mahfooz @ 2021-09-16 14:09 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Hamza Mahfooz

These changes are made in preparation of, the colorization support for the
"git log" subcommands that, rely on regex functionality (i.e. "--author",
"--committer" and "--grep"). These changes are necessary primarily because
the format of "bol" differs in the context that I require to use
match_one_pattern() in and because next_match() doesn't handle the case of
"ctx == GREP_CONTEXT_HEAD" at all. So, teach each function how to handle
the new cases.

Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
---
v5: separate grep changes from pretty changes.
---
 grep.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
 grep.h |  3 +++
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/grep.c b/grep.c
index 424a39591b..e77c8643d2 100644
--- a/grep.c
+++ b/grep.c
@@ -951,31 +951,38 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 			     enum grep_context ctx,
 			     regmatch_t *pmatch, int eflags)
 {
+	const char *field;
+	size_t len;
 	int hit = 0;
 	int saved_ch = 0;
 	const char *start = bol;
+	const char *end = eol;
 
 	if ((p->token != GREP_PATTERN) &&
-	    ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
+	    ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)) &&
+	    ((p->token == GREP_PATTERN_BODY) != (ctx == GREP_CONTEXT_BODY)))
 		return 0;
 
 	if (p->token == GREP_PATTERN_HEAD) {
-		const char *field;
-		size_t len;
-		assert(p->field < ARRAY_SIZE(header_field));
-		field = header_field[p->field].field;
-		len = header_field[p->field].len;
-		if (strncmp(bol, field, len))
-			return 0;
-		bol += len;
 		switch (p->field) {
 		case GREP_HEADER_AUTHOR:
 		case GREP_HEADER_COMMITTER:
 			saved_ch = strip_timestamp(bol, &eol);
+			if (eol == end)
+				goto again;
 			break;
 		default:
 			break;
 		}
+
+		assert(p->field < ARRAY_SIZE(header_field));
+		field = header_field[p->field].field;
+		len = header_field[p->field].len;
+
+		if (strncmp(bol, field, len))
+			goto restore;
+
+		bol += len;
 	}
 
  again:
@@ -1021,12 +1028,17 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 				goto again;
 		}
 	}
-	if (p->token == GREP_PATTERN_HEAD && saved_ch)
-		*eol = saved_ch;
+
 	if (hit) {
 		pmatch[0].rm_so += bol - start;
 		pmatch[0].rm_eo += bol - start;
 	}
+
+restore:
+	if (p->token == GREP_PATTERN_HEAD && saved_ch)
+		*eol = saved_ch;
+
+
 	return hit;
 }
 
@@ -1159,21 +1171,27 @@ static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
 	return 1;
 }
 
-static int next_match(struct grep_opt *opt, char *bol, char *eol,
-		      enum grep_context ctx, regmatch_t *pmatch, int eflags)
+int grep_next_match(struct grep_opt *opt, char *bol, char *eol,
+		    enum grep_context ctx, regmatch_t *pmatch,
+		    enum grep_header_field field, int eflags)
 {
 	struct grep_pat *p;
 	int hit = 0;
 
 	pmatch->rm_so = pmatch->rm_eo = -1;
 	if (bol < eol) {
-		for (p = opt->pattern_list; p; p = p->next) {
+		for (p = ((ctx == GREP_CONTEXT_HEAD)
+			   ? opt->header_list : opt->pattern_list);
+			  p; p = p->next) {
 			switch (p->token) {
 			case GREP_PATTERN: /* atom */
 			case GREP_PATTERN_HEAD:
 			case GREP_PATTERN_BODY:
-				hit |= match_next_pattern(p, bol, eol, ctx,
-							  pmatch, eflags);
+				if ((field == GREP_HEADER_FIELD_MAX) ||
+				    (p->field == field))
+					hit |= match_next_pattern(p, bol, eol,
+								  ctx, pmatch,
+								  eflags);
 				break;
 			default:
 				break;
@@ -1262,7 +1280,8 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 				line_color = opt->colors[GREP_COLOR_FUNCTION];
 		}
 		*eol = '\0';
-		while (next_match(opt, bol, eol, ctx, &match, eflags)) {
+		while (grep_next_match(opt, bol, eol, ctx, &match,
+				       GREP_HEADER_FIELD_MAX, eflags)) {
 			if (match.rm_so == match.rm_eo)
 				break;
 
diff --git a/grep.h b/grep.h
index 72f82b1e30..d2943e29ea 100644
--- a/grep.h
+++ b/grep.h
@@ -177,6 +177,9 @@ void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const
 void compile_grep_patterns(struct grep_opt *opt);
 void free_grep_patterns(struct grep_opt *opt);
 int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size);
+int grep_next_match(struct grep_opt *opt, char *bol, char *eol,
+		    enum grep_context ctx, regmatch_t *pmatch,
+		    enum grep_header_field field, int eflags);
 
 struct grep_source {
 	char *name;
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v5 2/2] pretty: colorize pattern matches in commit messages
  2021-09-16 14:09 [PATCH v5 1/2] grep: refactor next_match() and match_one_pattern() for external use Hamza Mahfooz
@ 2021-09-16 14:09 ` Hamza Mahfooz
  2021-09-17  7:10   ` Eric Sunshine
  0 siblings, 1 reply; 6+ messages in thread
From: Hamza Mahfooz @ 2021-09-16 14:09 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Hamza Mahfooz

The "git log" command limits its output to the commits that contain strings
matched by a pattern when the "--grep=<pattern>" option is used, but unlike
output from "git grep -e <pattern>", the matches are not highlighted,
making them harder to spot.

Teach the pretty-printer code to highlight matches from the
"--grep=<pattern>", "--author=<pattern>" and "--committer=<pattern>"
options (to view the last one, you may have to ask for --pretty=fuller).

Also, it must be noted that we are effectively greping the content twice,
however it only slows down "git log --author=^H" on this repository by
around 1-2% (compared to v2.33.0), so it should be a small enough slow
down to justify the addition of the feature.

Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
---
v2: make the commit message whole (add the missing ingredients), rename
    append_matched_line() to append_line_with_color(), use
    colors[GREP_COLOR_MATCH_SELECTED] instead of
    colors[GREP_COLOR_MATCH_CONTEXT], allow the background color to be
    customized, don't copy strings to a buffer when not coloring in
    append_line_with_color(), rename next_match() to grep_next_match(),
    repurpose grep_next_match()/match_one_pattern() for use in
    append_line_with_color() (allowing us to remove duplicated matching
    code in append_line_with_color()), document how to customize the
    feature and modify some of the tests to fit the feature better.

v3: fix a formatting issue with the added documentation.

v4: add strbuf_add_with_color(), use the correct color code scheme in the
    unit tests and add more unit tests.

v5: separate grep changes from pretty changes and add some performance
    analysis in the commit message.
---
 Documentation/git-log.txt |   8 +++
 pretty.c                  | 110 +++++++++++++++++++++++++++++++++-----
 t/t4202-log.sh            |  55 +++++++++++++++++++
 3 files changed, 161 insertions(+), 12 deletions(-)

diff --git a/Documentation/git-log.txt b/Documentation/git-log.txt
index 0498e7bacb..c689f7d235 100644
--- a/Documentation/git-log.txt
+++ b/Documentation/git-log.txt
@@ -241,6 +241,14 @@ This setting can be disabled by the `--no-notes` option,
 overridden by the `GIT_NOTES_DISPLAY_REF` environment variable,
 and overridden by the `--notes=<ref>` option.
 
+color.grep.selected::
+	Determines the non matching text (background) color of selected lines,
+	when `--grep`, `--author` or `--committer` are used.
+
+color.grep.matchSelected::
+	Determines the matching text (foreground) color of selected lines, when
+	`--grep`, `--author` or `--committer` are used.
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/pretty.c b/pretty.c
index 73b5ead509..77969a623b 100644
--- a/pretty.c
+++ b/pretty.c
@@ -431,15 +431,74 @@ const char *show_ident_date(const struct ident_split *ident,
 	return show_date(date, tz, mode);
 }
 
+static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
+					 char *buf, size_t buflen)
+{
+	strbuf_add(sb, color, strlen(color));
+	strbuf_add(sb, buf, buflen);
+	if (strlen(color))
+		strbuf_add(sb, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
+}
+
+static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
+				   const char *line, size_t linelen,
+				   int color, enum grep_context ctx,
+				   enum grep_header_field field)
+{
+	char *buf, *eol;
+	const char *line_color, *match_color;
+	regmatch_t match;
+	struct strbuf tmp_sb;
+	int eflags = 0;
+
+	if (!opt || !want_color(color) || opt->invert) {
+		strbuf_add(sb, line, linelen);
+		return;
+	}
+
+	strbuf_init(&tmp_sb, linelen + 1);
+	strbuf_add(&tmp_sb, line, linelen);
+
+	buf = tmp_sb.buf;
+	eol = buf + linelen;
+	line_color = opt->colors[GREP_COLOR_SELECTED];
+	match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
+
+	while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
+		if (match.rm_so == match.rm_eo)
+			break;
+
+		strbuf_grow(sb, strlen(line_color) + strlen(match_color) +
+			    (2 * strlen(GIT_COLOR_RESET)));
+		strbuf_add_with_color(sb, line_color, buf, match.rm_so);
+		strbuf_add_with_color(sb, match_color, buf + match.rm_so,
+				      match.rm_eo - match.rm_so);
+		buf += match.rm_eo;
+		eflags = REG_NOTBOL;
+	}
+
+	if (buf != line) {
+		strbuf_grow(sb, strlen(line_color) + strlen(GIT_COLOR_RESET));
+		strbuf_add_with_color(sb, line_color, buf, eol - buf);
+	} else {
+		strbuf_add(sb, buf, eol - buf);
+	}
+
+	strbuf_release(&tmp_sb);
+}
+
 void pp_user_info(struct pretty_print_context *pp,
 		  const char *what, struct strbuf *sb,
 		  const char *line, const char *encoding)
 {
+	struct strbuf id;
 	struct ident_split ident;
 	char *line_end;
 	const char *mailbuf, *namebuf;
 	size_t namelen, maillen;
 	int max_length = 78; /* per rfc2822 */
+	enum grep_header_field field = GREP_HEADER_FIELD_MAX;
+	struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
 
 	if (pp->fmt == CMIT_FMT_ONELINE)
 		return;
@@ -496,9 +555,22 @@ void pp_user_info(struct pretty_print_context *pp,
 			strbuf_addch(sb, '\n');
 		strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
 	} else {
-		strbuf_addf(sb, "%s: %.*s%.*s <%.*s>\n", what,
-			    (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, "    ",
-			    (int)namelen, namebuf, (int)maillen, mailbuf);
+		strbuf_init(&id, namelen + maillen + 4);
+
+		if (!strcmp(what, "Author"))
+			field = GREP_HEADER_AUTHOR;
+		else if (!strcmp(what, "Commit"))
+			field = GREP_HEADER_COMMITTER;
+
+		strbuf_addf(sb, "%s: %.*s", what,
+			    (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, "    ");
+		strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
+			    (int)maillen, mailbuf);
+
+		append_line_with_color(sb, opt, id.buf, id.len, pp->color,
+				       GREP_CONTEXT_HEAD, field);
+		strbuf_addch(sb, '\n');
+		strbuf_release(&id);
 	}
 
 	switch (pp->fmt) {
@@ -1939,8 +2011,9 @@ static int pp_utf8_width(const char *start, const char *end)
 	return width;
 }
 
-static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
-				 const char *line, int linelen)
+static void strbuf_add_tabexpand(struct strbuf *sb, struct grep_opt *opt,
+				 int color, int tabwidth, const char *line,
+				 int linelen)
 {
 	const char *tab;
 
@@ -1957,7 +2030,9 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
 			break;
 
 		/* Output the data .. */
-		strbuf_add(sb, line, tab - line);
+		append_line_with_color(sb, opt, line, tab - line, color,
+				       GREP_CONTEXT_BODY,
+				       GREP_HEADER_FIELD_MAX);
 
 		/* .. and the de-tabified tab */
 		strbuf_addchars(sb, ' ', tabwidth - (width % tabwidth));
@@ -1972,7 +2047,8 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
 	 * worrying about width - there's nothing more to
 	 * align.
 	 */
-	strbuf_add(sb, line, linelen);
+	append_line_with_color(sb, opt, line, linelen, color, GREP_CONTEXT_BODY,
+			       GREP_HEADER_FIELD_MAX);
 }
 
 /*
@@ -1984,11 +2060,16 @@ static void pp_handle_indent(struct pretty_print_context *pp,
 			     struct strbuf *sb, int indent,
 			     const char *line, int linelen)
 {
+	struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
+
 	strbuf_addchars(sb, ' ', indent);
 	if (pp->expand_tabs_in_log)
-		strbuf_add_tabexpand(sb, pp->expand_tabs_in_log, line, linelen);
+		strbuf_add_tabexpand(sb, opt, pp->color, pp->expand_tabs_in_log,
+				     line, linelen);
 	else
-		strbuf_add(sb, line, linelen);
+		append_line_with_color(sb, opt, line, linelen, pp->color,
+				       GREP_CONTEXT_BODY,
+				       GREP_HEADER_FIELD_MAX);
 }
 
 static int is_mboxrd_from(const char *line, int len)
@@ -2006,7 +2087,9 @@ void pp_remainder(struct pretty_print_context *pp,
 		  struct strbuf *sb,
 		  int indent)
 {
+	struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
 	int first = 1;
+
 	for (;;) {
 		const char *line = *msg_p;
 		int linelen = get_one_line(line);
@@ -2027,14 +2110,17 @@ void pp_remainder(struct pretty_print_context *pp,
 		if (indent)
 			pp_handle_indent(pp, sb, indent, line, linelen);
 		else if (pp->expand_tabs_in_log)
-			strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
-					     line, linelen);
+			strbuf_add_tabexpand(sb, opt, pp->color,
+					     pp->expand_tabs_in_log, line,
+					     linelen);
 		else {
 			if (pp->fmt == CMIT_FMT_MBOXRD &&
 					is_mboxrd_from(line, linelen))
 				strbuf_addch(sb, '>');
 
-			strbuf_add(sb, line, linelen);
+			append_line_with_color(sb, opt, line, linelen,
+					       pp->color, GREP_CONTEXT_BODY,
+					       GREP_HEADER_FIELD_MAX);
 		}
 		strbuf_addch(sb, '\n');
 	}
diff --git a/t/t4202-log.sh b/t/t4202-log.sh
index 9dfead936b..943c00e338 100755
--- a/t/t4202-log.sh
+++ b/t/t4202-log.sh
@@ -449,6 +449,61 @@ test_expect_success !FAIL_PREREQS 'log with various grep.patternType configurati
 	)
 '
 
+cat > expect << EOF
+Author: <BOLD;RED>A U<RESET> Thor <author@example.com>
+EOF
+
+test_expect_success 'log --author' '
+	git log -1 --color=always --author="A U" >log &&
+	grep Author log >actual.raw &&
+	test_decode_color <actual.raw >actual &&
+	test_cmp expect actual
+'
+
+cat > expect << EOF
+Commit:     C O Mitter <committer@<BOLD;RED>example<RESET>.com>
+EOF
+
+test_expect_success 'log --committer' '
+	git log -1 --color=always --pretty=fuller --committer="example" >log &&
+	grep "Commit:" log >actual.raw &&
+	test_decode_color <actual.raw >actual &&
+	test_cmp expect actual
+'
+
+cat > expect << EOF
+    <BOLD;RED>Sec<RESET>ond
+    <BOLD;RED>sec<RESET>ond
+EOF
+
+test_expect_success 'log -i --grep with color' '
+	git log --color=always -i --grep=sec >log &&
+	grep -i sec log >actual.raw &&
+	test_decode_color <actual.raw >actual &&
+	test_cmp expect actual
+'
+cat > expect << EOF
+    <GREEN>th<RESET><BOLD;RED>ir<RESET><GREEN>d<RESET>
+EOF
+
+test_expect_success '-c color.grep.selected log --grep' '
+	git -c color.grep.selected="green" log --color=always --grep=ir >log &&
+	grep ir log >actual.raw &&
+	test_decode_color <actual.raw >actual &&
+	test_cmp expect actual
+'
+
+cat > expect << EOF
+    <BLUE>i<RESET>n<BLUE>i<RESET>t<BLUE>i<RESET>al
+EOF
+
+test_expect_success '-c color.grep.matchSelected log --grep' '
+	git -c color.grep.matchSelected="blue" log --color=always --grep=i >log &&
+	grep al log >actual.raw &&
+	test_decode_color <actual.raw >actual &&
+	test_cmp expect actual
+'
+
 cat > expect <<EOF
 * Second
 * sixth
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 2/2] pretty: colorize pattern matches in commit messages
  2021-09-16 14:09 ` [PATCH v5 2/2] pretty: colorize pattern matches in commit messages Hamza Mahfooz
@ 2021-09-17  7:10   ` Eric Sunshine
  2021-09-17 21:14     ` Hamza Mahfooz
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Sunshine @ 2021-09-17  7:10 UTC (permalink / raw)
  To: Hamza Mahfooz; +Cc: Git List, Junio C Hamano

On Thu, Sep 16, 2021 at 10:10 AM Hamza Mahfooz
<someguy@effective-light.com> wrote:
> [...]
> Teach the pretty-printer code to highlight matches from the
> "--grep=<pattern>", "--author=<pattern>" and "--committer=<pattern>"
> options (to view the last one, you may have to ask for --pretty=fuller).
> [...]
> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>

A few relatively superficial comments below...

> diff --git a/Documentation/git-log.txt b/Documentation/git-log.txt
> @@ -241,6 +241,14 @@ This setting can be disabled by the `--no-notes` option,
> +color.grep.selected::
> +       Determines the non matching text (background) color of selected lines,
> +       when `--grep`, `--author` or `--committer` are used.
> +
> +color.grep.matchSelected::
> +       Determines the matching text (foreground) color of selected lines, when
> +       `--grep`, `--author` or `--committer` are used.

Should these be documented instead in Documentation/config/color.txt
as entries in the table under `color.grep.<slot>`?

> diff --git a/pretty.c b/pretty.c
> @@ -431,15 +431,74 @@ const char *show_ident_date(const struct ident_split *ident,
> +static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
> +                                        char *buf, size_t buflen)
> +{
> +       strbuf_add(sb, color, strlen(color));

There is no need to call strlen() here; instead use strbuf_addstr():

    strbuf_addstr(sb, color);

> +       strbuf_add(sb, buf, buflen);
> +       if (strlen(color))
> +               strbuf_add(sb, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));

Likewise, no need for strlen() in the conditional or when adding the
RESET color:

    if (*color)
        strbuf_addstr(sb, GIT_COLOR_RESET);

> +}
> +
> +static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
> +                                  const char *line, size_t linelen,
> +                                  int color, enum grep_context ctx,
> +                                  enum grep_header_field field)
> +{
> +       strbuf_init(&tmp_sb, linelen + 1);

What is the +1 for? Is that to account for the NUL byte at the end of
the string? If so, there's no need to do so manually since strbuf will
take that into account itself.

> +       strbuf_add(&tmp_sb, line, linelen);
> +
> +       buf = tmp_sb.buf;
> +       eol = buf + linelen;

`buf` and `eol` seem like an accident waiting to happen...

> +       line_color = opt->colors[GREP_COLOR_SELECTED];
> +       match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
> +
> +       while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
> +               if (match.rm_so == match.rm_eo)
> +                       break;
> +
> +               strbuf_grow(sb, strlen(line_color) + strlen(match_color) +
> +                           (2 * strlen(GIT_COLOR_RESET)));

... because strbuf_grow() may reallocate the underlying buffer, which
means that `buf` and `eol` will end up pointing at freed memory, which
will be accessed by the next call to grep_next_match().

I also wonder if these manual calls to strbuf_grow() and the "hint"
passed to strbuf_init() are actually helping considering that strbuf
should do a pretty good job of managing its underlying buffer growth
without this sort of micromanagement. Have you done performance
testing which indicates that such manual management is beneficial (and
that this isn't a case of premature optimization)?

> +               strbuf_add_with_color(sb, line_color, buf, match.rm_so);
> +               strbuf_add_with_color(sb, match_color, buf + match.rm_so,
> +                                     match.rm_eo - match.rm_so);
> +               buf += match.rm_eo;
> +               eflags = REG_NOTBOL;
> +       }
> +
> +       if (buf != line) {
> +               strbuf_grow(sb, strlen(line_color) + strlen(GIT_COLOR_RESET));
> +               strbuf_add_with_color(sb, line_color, buf, eol - buf);
> +       } else {
> +               strbuf_add(sb, buf, eol - buf);
> +       }

I'm confused by this. How can `buf` ever equal `line` considering that
the above code does:

    strbuf_add(&tmp_sb, line, linelen);
    ...
    buf = tmp_sb.buf;

so that the strbuf contains a _copy_ of `line`, and `buf` is pointing
into the strbuf.

> +       strbuf_release(&tmp_sb);
> +}
> +
>  void pp_user_info(struct pretty_print_context *pp,
>                   const char *what, struct strbuf *sb,
>                   const char *line, const char *encoding)
>  {
> +       struct strbuf id;
> +       enum grep_header_field field = GREP_HEADER_FIELD_MAX;
> +       struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;

These new variables only ever seem to be used...

>         if (pp->fmt == CMIT_FMT_ONELINE)
>                 return;
> @@ -496,9 +555,22 @@ void pp_user_info(struct pretty_print_context *pp,
>                         strbuf_addch(sb, '\n');
>                 strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
>         } else {
> -               strbuf_addf(sb, "%s: %.*s%.*s <%.*s>\n", what,
> -                           (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, "    ",
> -                           (int)namelen, namebuf, (int)maillen, mailbuf);

... within this block, so they should be declared here rather than in
the outer block in order to reduce their scope.

> +               strbuf_init(&id, namelen + maillen + 4);

Again, I wonder if this micromanagement of strbuf allocation is a case
of premature optimization.

> +               if (!strcmp(what, "Author"))
> +                       field = GREP_HEADER_AUTHOR;
> +               else if (!strcmp(what, "Commit"))
> +                       field = GREP_HEADER_COMMITTER;
> +
> +               strbuf_addf(sb, "%s: %.*s", what,
> +                           (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, "    ");

It's subjective, but this may be overly clever and unnecessarily
compact. The slightly more verbose:

    strbuf_addf(sb, "%s: ", what);
    if (pp->fmt == CMIT_FMT_FULLER)
        strbuf_addchars(sb, ' ', 4);

is easier to understand at-a-glance, as is this equivalent:

    strbuf_addf(sb, "%s: ", what);
    if (pp->fmt == CMIT_FMT_FULLER)
        strbuf_addstr(sb, "    ");

> +               strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
> +                           (int)maillen, mailbuf);
> +
> +               append_line_with_color(sb, opt, id.buf, id.len, pp->color,
> +                                      GREP_CONTEXT_HEAD, field);
> +               strbuf_addch(sb, '\n');
> +               strbuf_release(&id);
>         }
> diff --git a/t/t4202-log.sh b/t/t4202-log.sh
> @@ -449,6 +449,61 @@ test_expect_success !FAIL_PREREQS 'log with various grep.patternType configurati
> +cat > expect << EOF
> +Author: <BOLD;RED>A U<RESET> Thor <author@example.com>
> +EOF
> +
> +test_expect_success 'log --author' '
> +       git log -1 --color=always --author="A U" >log &&
> +       grep Author log >actual.raw &&
> +       test_decode_color <actual.raw >actual &&
> +       test_cmp expect actual
> +'

I realize that you're mirroring how this is done in a few existing
tests in this script, but these days we like to place all code,
including creation of the `expect` file, inside the test body. Also,
style guidelines state that there shouldn't be any whitespace
following the `>` and `<<` operators. Finally, since no variables need
to be interpolated into the here-doc content, we use `\EOF` instead of
`EOF`, and since we want to indent the here-doc content inside the
test body, we use `-\EOF`. So:

    test_expect_success 'log --author' '
        cat >expect <<-\EOF
        Author: <BOLD;RED>A U<RESET> Thor <author@example.com>
        EOF
        ...

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 2/2] pretty: colorize pattern matches in commit messages
  2021-09-17  7:10   ` Eric Sunshine
@ 2021-09-17 21:14     ` Hamza Mahfooz
  2021-09-17 22:00       ` Jeff King
  0 siblings, 1 reply; 6+ messages in thread
From: Hamza Mahfooz @ 2021-09-17 21:14 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Junio C Hamano


On Fri, Sep 17 2021 at 03:10:12 AM -0400, Eric Sunshine 
<sunshine@sunshineco.com> wrote:
> `buf` and `eol` seem like an accident waiting to happen...
> 
>>  +       line_color = opt->colors[GREP_COLOR_SELECTED];
>>  +       match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
>>  +
>>  +       while (grep_next_match(opt, buf, eol, ctx, &match, field, 
>> eflags)) {
>>  +               if (match.rm_so == match.rm_eo)
>>  +                       break;
>>  +
>>  +               strbuf_grow(sb, strlen(line_color) + 
>> strlen(match_color) +
>>  +                           (2 * strlen(GIT_COLOR_RESET)));
> 
> ... because strbuf_grow() may reallocate the underlying buffer, which
> means that `buf` and `eol` will end up pointing at freed memory, which
> will be accessed by the next call to grep_next_match().

I don't see how it's problematic, since `tmp_sb` isn't modified after 
`buf`
is initialized (until strbuf_release() is called, of course).



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 2/2] pretty: colorize pattern matches in commit messages
  2021-09-17 21:14     ` Hamza Mahfooz
@ 2021-09-17 22:00       ` Jeff King
  2021-09-17 22:15         ` Eric Sunshine
  0 siblings, 1 reply; 6+ messages in thread
From: Jeff King @ 2021-09-17 22:00 UTC (permalink / raw)
  To: Hamza Mahfooz; +Cc: Eric Sunshine, Git List, Junio C Hamano

On Fri, Sep 17, 2021 at 05:14:56PM -0400, Hamza Mahfooz wrote:

> 
> On Fri, Sep 17 2021 at 03:10:12 AM -0400, Eric Sunshine
> <sunshine@sunshineco.com> wrote:
> > `buf` and `eol` seem like an accident waiting to happen...
> > 
> > >  +       line_color = opt->colors[GREP_COLOR_SELECTED];
> > >  +       match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
> > >  +
> > >  +       while (grep_next_match(opt, buf, eol, ctx, &match, field,
> > > eflags)) {
> > >  +               if (match.rm_so == match.rm_eo)
> > >  +                       break;
> > >  +
> > >  +               strbuf_grow(sb, strlen(line_color) +
> > > strlen(match_color) +
> > >  +                           (2 * strlen(GIT_COLOR_RESET)));
> > 
> > ... because strbuf_grow() may reallocate the underlying buffer, which
> > means that `buf` and `eol` will end up pointing at freed memory, which
> > will be accessed by the next call to grep_next_match().
> 
> I don't see how it's problematic, since `tmp_sb` isn't modified after `buf`
> is initialized (until strbuf_release() is called, of course).

Yes, you are correct. However, I do think the code would be much clearer
if you skipped the strbuf entirely, like:

  char *line_as_string;
  ...
  line_as_string = xmemdupz(line, linelen);
  ...
  buf = line_as_string;
  eol = buf + linelen;
  ...
  free(line_as_string);

which makes it much clearer that you don't intend to modify it further
(especially with all those other calls operating on the _other_ strbuf,
it's hard to see immediately that this is the case).

The "as_string" name is assuming the purpose is to get a NUL-terminated
string. I'm not sure why we need one, though, since we pass the buf/eol
pointers to grep_next_match(). A comment above the xmemdupz() line might
be a good place to explain that (which would help especially if the
reasons change later and we can get rid of it).

-Peff

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 2/2] pretty: colorize pattern matches in commit messages
  2021-09-17 22:00       ` Jeff King
@ 2021-09-17 22:15         ` Eric Sunshine
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Sunshine @ 2021-09-17 22:15 UTC (permalink / raw)
  To: Jeff King; +Cc: Hamza Mahfooz, Git List, Junio C Hamano

On Fri, Sep 17, 2021 at 6:01 PM Jeff King <peff@peff.net> wrote:
> On Fri, Sep 17, 2021 at 05:14:56PM -0400, Hamza Mahfooz wrote:
> > On Fri, Sep 17 2021 at 03:10:12 AM -0400, Eric Sunshine
> > <sunshine@sunshineco.com> wrote:
> > > `buf` and `eol` seem like an accident waiting to happen...
> > > ... because strbuf_grow() may reallocate the underlying buffer, which
> > > means that `buf` and `eol` will end up pointing at freed memory, which
> > > will be accessed by the next call to grep_next_match().
> >
> > I don't see how it's problematic, since `tmp_sb` isn't modified after `buf`
> > is initialized (until strbuf_release() is called, of course).
>
> Yes, you are correct.

Indeed, I got confused by the multiple strbufs with similar names. However...

> However, I do think the code would be much clearer
> if you skipped the strbuf entirely, like:
>
>   line_as_string = xmemdupz(line, linelen);
>   ...
>   buf = line_as_string;
>   eol = buf + linelen;
>
> which makes it much clearer that you don't intend to modify it further
> (especially with all those other calls operating on the _other_ strbuf,
> it's hard to see immediately that this is the case).
>
> The "as_string" name is assuming the purpose is to get a NUL-terminated
> string. I'm not sure why we need one, though, since we pass the buf/eol
> pointers to grep_next_match(). A comment above the xmemdupz() line might
> be a good place to explain that (which would help especially if the
> reasons change later and we can get rid of it).

... as Peff points out, it indeed is unclear why a copy of `line` is
needed at all. It seems like a simple:

    buf = line;
    eol = buf + linelen;

would more than suffice without making any copies. If you do that, then the:

    if (buf != line) {

later in the code -- and which I questioned in my review as making no
sense -- suddenly makes sense.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-09-17 22:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-16 14:09 [PATCH v5 1/2] grep: refactor next_match() and match_one_pattern() for external use Hamza Mahfooz
2021-09-16 14:09 ` [PATCH v5 2/2] pretty: colorize pattern matches in commit messages Hamza Mahfooz
2021-09-17  7:10   ` Eric Sunshine
2021-09-17 21:14     ` Hamza Mahfooz
2021-09-17 22:00       ` Jeff King
2021-09-17 22:15         ` Eric Sunshine

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).