segmentation fault (nullpointer) with git log --submodule -p

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* segmentation fault (nullpointer) with git log --submodule -p
@ 2013-01-23 14:38 Armin
  2013-01-23 20:02 ` Jeff King
  0 siblings, 1 reply; 24+ messages in thread
From: Armin @ 2013-01-23 14:38 UTC (permalink / raw
  To: git; +Cc: netzverweigerer

Hello dear git people.

I experience a reproducible segmentation fault on one of my repositories when doing a "git log --submodule -p", tested with newest version on Arch Linux (git version 1.8.1.1) and built fresh (git version 1.8.1.1.347.g9591fcc), tried on 2 seperate systems:


Program terminated with signal 11, Segmentation fault.
#0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
752     for (i = 0; msg[i]; i++) {
    (gdb) bt
#0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
#1  format_commit_one (context=<optimized out>, placeholder=0x526b1e "s", sb=0x7ffff69b6ad0) at pretty.c:1157
#2  format_commit_item (sb=0x7ffff69b6ad0, placeholder=0x526b1e "s", context=<optimized out>) at pretty.c:1224
#3  0x00000000004dacd9 in strbuf_expand (sb=sb@entry=0x7ffff69b6ad0, format=0x526b1e "s", format@entry=0x526b18 "  %m %s", fn=fn@entry=0x4b4730 <format_commit_item>, context=context@entry=0x7ffff69b6980)
    at strbuf.c:247
#4  0x00000000004b5816 in format_commit_message (commit=commit@entry=0x1ffafd8, format=format@entry=0x526b18 "  %m %s", sb=sb@entry=0x7ffff69b6ad0, pretty_ctx=pretty_ctx@entry=0x7ffff69b6af0) at pretty.c:1284
#5  0x00000000004dde52 in print_submodule_summary (reset=0x754640 "\033[m", add=0x754708 "\033[32m", del=0x7546e0 "\033[31m", f=0x7f0685bac7a0, rev=0x7ffff69b6b40) at submodule.c:236
#6  show_submodule_summary (f=0x7f0685bac7a0, path=<optimized out>, one=one@entry=0x1ff2af0 "\020\\vC\371\070\vJ\352\344\205\340\226u\273\021\372\330\234\004", 
    two=two@entry=0x2030a60 "\301a(\350\371\372\340mb[խo_\272\301\223V˙", dirty_submodule=<optimized out>, meta=meta@entry=0x754690 "\033[1m", del=del@entry=0x7546e0 "\033[31m", add=0x754708 "\033[32m", 
        reset=reset@entry=0x754640 "\033[m") at submodule.c:307
#7  0x000000000048dd1d in builtin_diff (name_a=name_a@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", name_b=name_b@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", 
    one=one@entry=0x1ff2af0, two=two@entry=0x2030a60, xfrm_msg=0x2039a20 "\033[1mindex 105c764..c16128e 160000\033[m\n", must_show_header=must_show_header@entry=0, o=o@entry=0x7ffff69b7b88, 
        complete_rewrite=complete_rewrite@entry=0) at diff.c:2267
#8  0x000000000048e60e in run_diff_cmd (pgm=pgm@entry=0x0, name=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", other=<optimized out>, 
    attr_path=attr_path@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", one=one@entry=0x1ff2af0, two=two@entry=0x2030a60, msg=msg@entry=0x7ffff69b74b0, o=o@entry=0x7ffff69b7b88, p=p@entry=0x20371b0)
    at diff.c:3057

(gdb) bt
#0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
#1  format_commit_one (context=<optimized out>, placeholder=0x526b1e "s", sb=0x7ffff69b6ad0) at pretty.c:1157
#2  format_commit_item (sb=0x7ffff69b6ad0, placeholder=0x526b1e "s", context=<optimized out>) at pretty.c:1224
#3  0x00000000004dacd9 in strbuf_expand (sb=sb@entry=0x7ffff69b6ad0, format=0x526b1e "s", format@entry=0x526b18 "  %m %s", fn=fn@entry=0x4b4730 <format_commit_item>, context=context@entry=0x7ffff69b6980)
    at strbuf.c:247
#4  0x00000000004b5816 in format_commit_message (commit=commit@entry=0x1ffafd8, format=format@entry=0x526b18 "  %m %s", sb=sb@entry=0x7ffff69b6ad0, pretty_ctx=pretty_ctx@entry=0x7ffff69b6af0) at pretty.c:1284
#5  0x00000000004dde52 in print_submodule_summary (reset=0x754640 "\033[m", add=0x754708 "\033[32m", del=0x7546e0 "\033[31m", f=0x7f0685bac7a0, rev=0x7ffff69b6b40) at submodule.c:236
#6  show_submodule_summary (f=0x7f0685bac7a0, path=<optimized out>, one=one@entry=0x1ff2af0 "\020\\vC\371\070\vJ\352\344\205\340\226u\273\021\372\330\234\004", 
    two=two@entry=0x2030a60 "\301a(\350\371\372\340mb[խo_\272\301\223V˙", dirty_submodule=<optimized out>, meta=meta@entry=0x754690 "\033[1m", del=del@entry=0x7546e0 "\033[31m", add=0x754708 "\033[32m", 
    reset=reset@entry=0x754640 "\033[m") at submodule.c:307
#7  0x000000000048dd1d in builtin_diff (name_a=name_a@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", name_b=name_b@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", 
    one=one@entry=0x1ff2af0, two=two@entry=0x2030a60, xfrm_msg=0x2039a20 "\033[1mindex 105c764..c16128e 160000\033[m\n", must_show_header=must_show_header@entry=0, o=o@entry=0x7ffff69b7b88, 
    complete_rewrite=complete_rewrite@entry=0) at diff.c:2267
#8  0x000000000048e60e in run_diff_cmd (pgm=pgm@entry=0x0, name=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", other=<optimized out>, 
    attr_path=attr_path@entry=0x1ff2b50 "Packages/Application/Amadeus.Somea.Dialog", one=one@entry=0x1ff2af0, two=two@entry=0x2030a60, msg=msg@entry=0x7ffff69b74b0, o=o@entry=0x7ffff69b7b88, p=p@entry=0x20371b0)
    at diff.c:3057
#9  0x000000000048eb3d in run_diff (o=0x7ffff69b7b88, p=0x20371b0) at diff.c:3145
#10 diff_flush_patch (o=0x7ffff69b7b88, p=0x20371b0) at diff.c:3979
#11 diff_flush_patch (p=0x20371b0, o=0x7ffff69b7b88) at diff.c:3970
#12 0x000000000048f15f in diff_flush (options=options@entry=0x7ffff69b7b88) at diff.c:4500
#13 0x00000000004a211a in log_tree_diff_flush (opt=opt@entry=0x7ffff69b7850) at log-tree.c:776
#14 0x00000000004a22b2 in log_tree_diff (log=0x7ffff69b7720, commit=0x1ffdf60, opt=0x7ffff69b7850) at log-tree.c:836
#15 log_tree_commit (opt=opt@entry=0x7ffff69b7850, commit=commit@entry=0x1ffdf60) at log-tree.c:859
#16 0x00000000004393d3 in cmd_log_walk (rev=rev@entry=0x7ffff69b7850) at builtin/log.c:310
#17 0x0000000000439f38 in cmd_log (argc=3, argv=0x7ffff69b80c0, prefix=0x0) at builtin/log.c:582
#18 0x0000000000405978 in run_builtin (argv=0x7ffff69b80c0, argc=3, p=0x74fd20) at git.c:281
#19 handle_internal_command (argc=3, argv=0x7ffff69b80c0) at git.c:442
#20 0x0000000000404de2 in run_argv (argv=0x7ffff69b7f50, argcp=0x7ffff69b7f5c) at git.c:488
#21 main (argc=3, argv=0x7ffff69b80c0) at git.c:563
(gdb) f 0
#0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
752     for (i = 0; msg[i]; i++) {
(gdb) l
747 static void parse_commit_header(struct format_commit_context *context)
748 {
749     const char *msg = context->message;
750     int i;
751 
752     for (i = 0; msg[i]; i++) {
753         int eol;
754         for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
755             ; /* do nothing */
756 
(gdb) p msg
$7 = <optimized out>
(gdb) p context->message
$8 = 0x0
(gdb) x/8i $pc
=> 0x4b51e5 <format_commit_item+2741>:  movzbl (%rcx),%eax
   0x4b51e8 <format_commit_item+2744>:  mov    %rcx,0x18(%rsp)
   0x4b51ed <format_commit_item+2749>:  mov    %rcx,%r10
   0x4b51f0 <format_commit_item+2752>:  test   %al,%al
   0x4b51f2 <format_commit_item+2754>:  je     0x4b52a3 <format_commit_item+2931>
   0x4b51f8 <format_commit_item+2760>:  nopl   0x0(%rax,%rax,1)
   0x4b5200 <format_commit_item+2768>:  test   %al,%al
   0x4b5202 <format_commit_item+2770>:  je     0x4b529e <format_commit_item+2926>
(gdb) i r rcx
rcx            0x0  0


Does this help in any way? Can i provide any further information that helps?

Many thanks for reading this and all the best,


Armin

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-23 14:38 segmentation fault (nullpointer) with git log --submodule -p Armin
@ 2013-01-23 20:02 ` Jeff King
  2013-01-24 12:11   ` Stefan Näwe
  0 siblings, 1 reply; 24+ messages in thread
From: Jeff King @ 2013-01-23 20:02 UTC (permalink / raw
  To: Armin; +Cc: git

On Wed, Jan 23, 2013 at 03:38:16PM +0100, Armin wrote:

> Hello dear git people.
> 
> I experience a reproducible segmentation fault on one of my
> repositories when doing a "git log --submodule -p", tested with newest
> version on Arch Linux (git version 1.8.1.1) and built fresh (git
> version 1.8.1.1.347.g9591fcc), tried on 2 seperate systems:
> 
> 
> Program terminated with signal 11, Segmentation fault.
> #0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
> 752     for (i = 0; msg[i]; i++) {
> [...]
> (gdb) l
> 747 static void parse_commit_header(struct format_commit_context *context)
> 748 {
> 749     const char *msg = context->message;
> 750     int i;
> 751 
> 752     for (i = 0; msg[i]; i++) {
> 753         int eol;
> 754         for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
> 755             ; /* do nothing */
> 756 
> (gdb) p msg
> $7 = <optimized out>
> (gdb) p context->message
> $8 = 0x0

Yeah, that should definitely not be NULL. I can't reproduce here with a
few simple examples, though.

Does it fail with older versions of git? If so, can you bisect?

Is it possible for you to make your repo available?

-Peff

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-23 20:02 ` Jeff King
@ 2013-01-24 12:11   ` Stefan Näwe
  2013-01-24 13:40     ` Duy Nguyen
  0 siblings, 1 reply; 24+ messages in thread
From: Stefan Näwe @ 2013-01-24 12:11 UTC (permalink / raw
  To: Jeff King; +Cc: Armin, git@vger.kernel.org

Am 23.01.2013 21:02, schrieb Jeff King:
> On Wed, Jan 23, 2013 at 03:38:16PM +0100, Armin wrote:
> 
>> Hello dear git people.
>>
>> I experience a reproducible segmentation fault on one of my
>> repositories when doing a "git log --submodule -p", tested with newest
>> version on Arch Linux (git version 1.8.1.1) and built fresh (git
>> version 1.8.1.1.347.g9591fcc), tried on 2 seperate systems:
>>
>>
>> Program terminated with signal 11, Segmentation fault.
>> #0  0x00000000004b51e5 in parse_commit_header (context=0x7ffff69b6980) at pretty.c:752
>> 752     for (i = 0; msg[i]; i++) {
>> [...]
>> (gdb) l
>> 747 static void parse_commit_header(struct format_commit_context *context)
>> 748 {
>> 749     const char *msg = context->message;
>> 750     int i;
>> 751 
>> 752     for (i = 0; msg[i]; i++) {
>> 753         int eol;
>> 754         for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
>> 755             ; /* do nothing */
>> 756 
>> (gdb) p msg
>> $7 = <optimized out>
>> (gdb) p context->message
>> $8 = 0x0
> 
> Yeah, that should definitely not be NULL. I can't reproduce here with a
> few simple examples, though.
> 
> Does it fail with older versions of git? If so, can you bisect?

I did. My bisection told me this is the suspect:

ccdc603 (parse_object: try internal cache before reading object db)

My git-fu is not good enough to analyze that...

> Is it possible for you to make your repo available?

Unfortunately not. It crashes with one particular repos (using submodules)
that I can't make available but not with another which is available
at https://github.com/snaewe/super.git

HTH

Stefan
-- 
----------------------------------------------------------------
/dev/random says: There must be more to life than compile-and-go.
python -c "print '73746566616e2e6e616577654061746c61732d656c656b74726f6e696b2e636f6d'.decode('hex')"

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 12:11   ` Stefan Näwe
@ 2013-01-24 13:40     ` Duy Nguyen
  2013-01-24 14:06       ` Stefan Näwe
  0 siblings, 1 reply; 24+ messages in thread
From: Duy Nguyen @ 2013-01-24 13:40 UTC (permalink / raw
  To: Stefan Näwe; +Cc: Jeff King, Armin, git@vger.kernel.org

On Thu, Jan 24, 2013 at 7:11 PM, Stefan Näwe
<stefan.naewe@atlas-elektronik.com> wrote:
>> Does it fail with older versions of git? If so, can you bisect?
>
> I did. My bisection told me this is the suspect:
>
> ccdc603 (parse_object: try internal cache before reading object db)

diff --git a/object.c b/object.c
index d8d09f9..6b06297 100644
--- a/object.c
+++ b/object.c
@@ -191,10 +191,15 @@ struct object *parse_object(const unsigned char *sha1)
        enum object_type type;
        int eaten;
        const unsigned char *repl = lookup_replace_object(sha1);
-       void *buffer = read_sha1_file(sha1, &type, &size);
+       void *buffer;
+       struct object *obj;
+
+       obj = lookup_object(sha1);
+       if (obj && obj->parsed)
+               return obj;

Any chance obj->parsed is 1 but ((struct commit*)obj)->buffer is NULL?
What if you change that "if" to

if (obj && obj->parsed && (obj->type != OBJ_COMMIT || ((struct commit
*)obj)->buffer))

??

Also you did not encode commits in any specific encoding, nor set
i18n.logOutputEncoding?
-- 
Duy

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 13:40     ` Duy Nguyen
@ 2013-01-24 14:06       ` Stefan Näwe
  2013-01-24 14:14         ` Duy Nguyen
  0 siblings, 1 reply; 24+ messages in thread
From: Stefan Näwe @ 2013-01-24 14:06 UTC (permalink / raw
  To: Duy Nguyen; +Cc: Jeff King, Armin, git@vger.kernel.org

Am Donnerstag, 24. Januar 2013 14:40:47 schrieb Duy Nguyen:
> On Thu, Jan 24, 2013 at 7:11 PM, Stefan Näwe
> <stefan.naewe@atlas-elektronik.com> wrote:
>>> Does it fail with older versions of git? If so, can you bisect?
>>
>> I did. My bisection told me this is the suspect:
>>
>> ccdc603 (parse_object: try internal cache before reading object db)
>
> diff --git a/object.c b/object.c
> index d8d09f9..6b06297 100644
> --- a/object.c
> +++ b/object.c
> @@ -191,10 +191,15 @@ struct object *parse_object(const unsigned char *sha1)
>         enum object_type type;
>         int eaten;
>         const unsigned char *repl = lookup_replace_object(sha1);
> -       void *buffer = read_sha1_file(sha1, &type, &size);
> +       void *buffer;
> +       struct object *obj;
> +
> +       obj = lookup_object(sha1);
> +       if (obj && obj->parsed)
> +               return obj;
>
> Any chance obj->parsed is 1 but ((struct commit*)obj)->buffer is NULL?
> What if you change that "if" to
>
> if (obj && obj->parsed && (obj->type != OBJ_COMMIT || ((struct commit
> *)obj)->buffer))
>

No more segfault!

> Also you did not encode commits in any specific encoding,

We're using Git for Windows and some commits contain 'umlauts' (äöü).
But those characters should be encoded in UTF-8, shouldn't they?
But the 'git log...' only crashes on a Debian/Linux machine.

> nor set i18n.logOutputEncoding?

It's not set.

(only i18n.filesEncoding is set to utf-8 on my machine)

Oh, and it's not crashing if I do:

git log -p --submodule |cat

Stefan
--
----------------------------------------------------------------
/dev/random says: Dumb luck beats sound planning every time. Trust me.
python -c "print 
'73746566616e2e6e616577654061746c61732d656c656b74726f6e696b2e636f6d'.decode('hex')"

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 14:06       ` Stefan Näwe
@ 2013-01-24 14:14         ` Duy Nguyen
  2013-01-24 23:27           ` Jeff King
  0 siblings, 1 reply; 24+ messages in thread
From: Duy Nguyen @ 2013-01-24 14:14 UTC (permalink / raw
  To: Stefan Näwe; +Cc: Jeff King, Armin, git@vger.kernel.org

On Thu, Jan 24, 2013 at 9:06 PM, Stefan Näwe
<stefan.naewe@atlas-elektronik.com> wrote:
> Am Donnerstag, 24. Januar 2013 14:40:47 schrieb Duy Nguyen:
>> On Thu, Jan 24, 2013 at 7:11 PM, Stefan Näwe
>> <stefan.naewe@atlas-elektronik.com> wrote:
>>>> Does it fail with older versions of git? If so, can you bisect?
>>>
>>> I did. My bisection told me this is the suspect:
>>>
>>> ccdc603 (parse_object: try internal cache before reading object db)
>>
>> diff --git a/object.c b/object.c
>> index d8d09f9..6b06297 100644
>> --- a/object.c
>> +++ b/object.c
>> @@ -191,10 +191,15 @@ struct object *parse_object(const unsigned char *sha1)
>>         enum object_type type;
>>         int eaten;
>>         const unsigned char *repl = lookup_replace_object(sha1);
>> -       void *buffer = read_sha1_file(sha1, &type, &size);
>> +       void *buffer;
>> +       struct object *obj;
>> +
>> +       obj = lookup_object(sha1);
>> +       if (obj && obj->parsed)
>> +               return obj;
>>
>> Any chance obj->parsed is 1 but ((struct commit*)obj)->buffer is NULL?
>> What if you change that "if" to
>>
>> if (obj && obj->parsed && (obj->type != OBJ_COMMIT || ((struct commit
>> *)obj)->buffer))
>>
>
> No more segfault!

Sweet. I have no idea how that fixes it. Maybe Jeff can give some
explanation after he wakes up.
-- 
Duy

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 14:14         ` Duy Nguyen
@ 2013-01-24 23:27           ` Jeff King
  2013-01-24 23:56             ` Junio C Hamano
  0 siblings, 1 reply; 24+ messages in thread
From: Jeff King @ 2013-01-24 23:27 UTC (permalink / raw
  To: Duy Nguyen
  Cc: Stefan Näwe, Armin, Junio C Hamano, Jonathon Mah,
	git@vger.kernel.org

On Thu, Jan 24, 2013 at 09:14:47PM +0700, Nguyen Thai Ngoc Duy wrote:

> >>> I did. My bisection told me this is the suspect:
> >>>
> >>> ccdc603 (parse_object: try internal cache before reading object db)
> >>
> >> diff --git a/object.c b/object.c
> >> index d8d09f9..6b06297 100644
> >> --- a/object.c
> >> +++ b/object.c
> >> @@ -191,10 +191,15 @@ struct object *parse_object(const unsigned char *sha1)
> >>         enum object_type type;
> >>         int eaten;
> >>         const unsigned char *repl = lookup_replace_object(sha1);
> >> -       void *buffer = read_sha1_file(sha1, &type, &size);
> >> +       void *buffer;
> >> +       struct object *obj;
> >> +
> >> +       obj = lookup_object(sha1);
> >> +       if (obj && obj->parsed)
> >> +               return obj;
> >>
> >> Any chance obj->parsed is 1 but ((struct commit*)obj)->buffer is NULL?
> >> What if you change that "if" to
> >>
> >> if (obj && obj->parsed && (obj->type != OBJ_COMMIT || ((struct commit
> >> *)obj)->buffer))
> >>
> >
> > No more segfault!
> 
> Sweet. I have no idea how that fixes it. Maybe Jeff can give some
> explanation after he wakes up.

Ugh. I think I know why it fixes it. We free the commit's buffer as part
of the log traversal, but then later want to access it as part of the
diff. We presumably call parse_object somewhere in the middle to make
sure it is parsed.

Before ccdc603, a side effect of parse_object is that even for a parsed
object, we would fill in the buffer field of a commit or tree. See
parse_object_buffer:

        } else if (type == OBJ_COMMIT) {
                struct commit *commit = lookup_commit(sha1);
                if (commit) {
                        if (parse_commit_buffer(commit, buffer, size))
                                return NULL;
                        if (!commit->buffer) {
                                commit->buffer = buffer;
                                eaten = 1;
                        }
                        obj = &commit->object;
                }

When this patch was originally proposed, I wrote[1]:

  On Thu, Jan 05, 2012 at 01:55:22PM -0800, Junio C Hamano wrote:
  > > So I think it is safe short of somebody doing some horrible manual
  > > munging of a "struct object".
  >
  > Yeah, I was worried about codepaths like commit-pretty-printing
  > might be mucking with the contents of commit->buffer, perhaps
  > reencoding the text and then calling parse_object() to get the
  > unmodified original back, or something silly like that. But the
  > lookup_object() call at the beginning of the parse_object() already
  > prevents us from doing such a thing, so we should be OK, I would
  > think.

  [...]

  What saves you is that the parse_*_buffer functions all do nothing
  when the object.parsed flag is set, and the code I added makes sure
  that object.parsed is set in the object that lookup_object returns.

  So yeah, anytime you tweak the contents of commit->buffer but don't
  unset the "parsed" flag, you are asking for trouble.

Which is true, but obviously I missed that in addition to calling
parse_*_buffer, which will be a no-op, we _also_ set the buffer
independently. So parse_object was functioning in a belt-and-suspenders
for that case. And I think this is probably the same root cause as the
segfault which came up here:

  http://thread.gmane.org/gmane.comp.version-control.git/214366

So what to do?

We can revert ccdc603, but I do not think we need to. We can catch the
problematic cases with something like your patch, but still get the
optimization when the buffer really is already filled in. I think we'd
need to extend your patch to handle trees, too, to be totally correct.

But there are still some loose ends that I note:

  1. Making such a change would be parse_object erring on the side of
     providing the buffer. But it doesn't actually know if the buffer is
     desired or not. For instance, upload-pack benefited from this
     optimization, but does not need save_commit_buffer on at all. So
     commit->buffer is _always_ NULL there, and that's just fine; we
     really don't need to read the object.

     Now this may be a bad example, because due to my follow-on patches,
     we avoid calling parse_object at all in most cases, so I don't
     think it matters any longer to upload-pack. But I suspect there are
     other places with similar circumstances. Fundamentally parse_object
     doesn't know what the caller is interested in.

  2. This means that parse_commit and parse_object behave differently in
     this regard. The former will leave the buffer unfilled. Meaning we
     may still have issues with code paths that munge the buffer without
     resetting the parsed flag, independent of ccdc603 and fixing this.

To me, these highlight that our commit->buffer management is fragile and
is largely about guessing in various circumstances whether somebody will
later want the buffer. I'm not sure of the right solution, though. It
seems like something that inherently blurs the lines between bits of
code (e.g., how should "log" know that a submodule diff might later want
to see the same entry? Should we optimistically free and then make it
easier for the later user to reliably ensure the buffer is primed? Or
should we err on the side of keeping it in place?).

-Peff

[1] http://article.gmane.org/gmane.comp.version-control.git/188000

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 23:27           ` Jeff King
@ 2013-01-24 23:56             ` Junio C Hamano
  2013-01-25  0:55               ` Jeff King
  2013-01-25  3:59               ` Junio C Hamano
  0 siblings, 2 replies; 24+ messages in thread
From: Junio C Hamano @ 2013-01-24 23:56 UTC (permalink / raw
  To: Jeff King
  Cc: Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

Jeff King <peff@peff.net> writes:

> ... (e.g., how should "log" know that a submodule diff might later want
> to see the same entry? Should we optimistically free and then make it
> easier for the later user to reliably ensure the buffer is primed? Or
> should we err on the side of keeping it in place?).

My knee-jerk reaction is that we should consider that commit->buffer
belongs to the revision traversal machinery.  Any other uses bolted
on later can borrow it if buffer still exists (I do not think pretty
code rewrites the buffer contents in place in any way), or they can
ask read_sha1_file() to read it themselves and free when they are
done.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 23:56             ` Junio C Hamano
@ 2013-01-25  0:55               ` Jeff King
  2013-01-25  2:05                 ` Duy Nguyen
  2013-01-25  3:59               ` Junio C Hamano
  1 sibling, 1 reply; 24+ messages in thread
From: Jeff King @ 2013-01-25  0:55 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

On Thu, Jan 24, 2013 at 03:56:23PM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > ... (e.g., how should "log" know that a submodule diff might later want
> > to see the same entry? Should we optimistically free and then make it
> > easier for the later user to reliably ensure the buffer is primed? Or
> > should we err on the side of keeping it in place?).
> 
> My knee-jerk reaction is that we should consider that commit->buffer
> belongs to the revision traversal machinery.  Any other uses bolted
> on later can borrow it if buffer still exists (I do not think pretty
> code rewrites the buffer contents in place in any way), or they can
> ask read_sha1_file() to read it themselves and free when they are
> done.

Yeah, that is probably the sanest way forward. It at least leaves
ownership in one place, and everybody else is an opportunistic consumer.
We do need to annotate each user site though with something like the
"ensure" function I mentioned.

If they are not owners, then the better pattern is probably something
like:

  /*
   * Get the commit buffer, either opportunistically using
   * the cached version attached to the commit object, or loading it
   * from disk if necessary.
   */
  const char *use_commit_buffer(struct commit *c)
  {
          enum object_type type;
          unsigned long size;

          if (c->buffer)
                  return c->buffer;
          /*
           * XXX check type == OBJ_COMMIT?
           * XXX die() on NULL as a convenience to callers?
           */
          return read_sha1_file(c->object.sha1, &type, &size);
  }

  void unuse_commit_buffer(const char *buf, struct commit *c)
  {
          /*
           * If we used the cached copy attached to the commit,
           * we don't want to free it; it's not our responsibility.
           */
          if (buf == c->buffer)
                  return;

          free((char *)buf);
  }

I suspect that putting a use/unuse pair inside format_commit_message
would handle most cases.

-Peff

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  0:55               ` Jeff King
@ 2013-01-25  2:05                 ` Duy Nguyen
  0 siblings, 0 replies; 24+ messages in thread
From: Duy Nguyen @ 2013-01-25  2:05 UTC (permalink / raw
  To: Jeff King
  Cc: Junio C Hamano, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

On Fri, Jan 25, 2013 at 7:55 AM, Jeff King <peff@peff.net> wrote:
> On Thu, Jan 24, 2013 at 03:56:23PM -0800, Junio C Hamano wrote:
>
>> Jeff King <peff@peff.net> writes:
>>
>> > ... (e.g., how should "log" know that a submodule diff might later want
>> > to see the same entry? Should we optimistically free and then make it
>> > easier for the later user to reliably ensure the buffer is primed? Or
>> > should we err on the side of keeping it in place?).
>>
>> My knee-jerk reaction is that we should consider that commit->buffer
>> belongs to the revision traversal machinery.  Any other uses bolted
>> on later can borrow it if buffer still exists (I do not think pretty
>> code rewrites the buffer contents in place in any way), or they can
>> ask read_sha1_file() to read it themselves and free when they are
>> done.
>
> Yeah, that is probably the sanest way forward. It at least leaves
> ownership in one place, and everybody else is an opportunistic consumer.
> We do need to annotate each user site though with something like the
> "ensure" function I mentioned.
>
> If they are not owners, then the better pattern is probably something
> like:

You probably should rename "buffer" (to _buffer or something) and let
the compiler catches all the places commit->buffer illegally used.

>
>   /*
>    * Get the commit buffer, either opportunistically using
>    * the cached version attached to the commit object, or loading it
>    * from disk if necessary.
>    */
>   const char *use_commit_buffer(struct commit *c)
>   {
>           enum object_type type;
>           unsigned long size;
>
>           if (c->buffer)
>                   return c->buffer;
>           /*
>            * XXX check type == OBJ_COMMIT?
>            * XXX die() on NULL as a convenience to callers?
>            */
>           return read_sha1_file(c->object.sha1, &type, &size);
>   }
>
>   void unuse_commit_buffer(const char *buf, struct commit *c)
>   {
>           /*
>            * If we used the cached copy attached to the commit,
>            * we don't want to free it; it's not our responsibility.
>            */
>           if (buf == c->buffer)
>                   return;
>
>           free((char *)buf);
>   }
>
> I suspect that putting a use/unuse pair inside format_commit_message
> would handle most cases.
>
> -Peff
-- 
Duy

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-24 23:56             ` Junio C Hamano
  2013-01-25  0:55               ` Jeff King
@ 2013-01-25  3:59               ` Junio C Hamano
  2013-01-25  4:08                 ` Jeff King
  2013-01-25  5:53                 ` Jonathan Nieder
  1 sibling, 2 replies; 24+ messages in thread
From: Junio C Hamano @ 2013-01-25  3:59 UTC (permalink / raw
  To: Jeff King
  Cc: Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

Junio C Hamano <gitster@pobox.com> writes:

> Jeff King <peff@peff.net> writes:
>
>> ... (e.g., how should "log" know that a submodule diff might later want
>> to see the same entry? Should we optimistically free and then make it
>> easier for the later user to reliably ensure the buffer is primed? Or
>> should we err on the side of keeping it in place?).
>
> My knee-jerk reaction is that we should consider that commit->buffer
> belongs to the revision traversal machinery.  Any other uses bolted
> on later can borrow it if buffer still exists (I do not think pretty
> code rewrites the buffer contents in place in any way), or they can
> ask read_sha1_file() to read it themselves and free when they are
> done.

I've been toying with an idea along this line.

 commit.h        | 16 ++++++++++++++++
 builtin/blame.c | 27 ++++++++-------------------
 commit.c        | 20 ++++++++++++++++++++
 3 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/commit.h b/commit.h
index c16c8a7..b559535 100644
--- a/commit.h
+++ b/commit.h
@@ -226,4 +226,20 @@ extern void print_commit_list(struct commit_list *list,
 			      const char *format_cur,
 			      const char *format_last);
 
+extern int ensure_commit_buffer(struct commit *);
+extern void discard_commit_buffer(struct commit *);
+
+#define with_commit_buffer(commit) \
+	do { \
+		int had_buffer_ = !!commit->buffer; \
+		if (!had_buffer_) \
+			ensure_commit_buffer(commit); \
+		do
+
+#define done_with_commit_buffer(commit) \
+		while (0); \
+		if (!had_buffer_) \
+			discard_commit_buffer(commit); \
+	} while (0)
+
 #endif /* COMMIT_H */
diff --git a/builtin/blame.c b/builtin/blame.c
index b431ba3..8b2e4a5 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1424,25 +1424,14 @@ static void get_commit_info(struct commit *commit,
 
 	commit_info_init(ret);
 
-	/*
-	 * We've operated without save_commit_buffer, so
-	 * we now need to populate them for output.
-	 */
-	if (!commit->buffer) {
-		enum object_type type;
-		unsigned long size;
-		commit->buffer =
-			read_sha1_file(commit->object.sha1, &type, &size);
-		if (!commit->buffer)
-			die("Cannot read commit %s",
-			    sha1_to_hex(commit->object.sha1));
-	}
-	encoding = get_log_output_encoding();
-	reencoded = logmsg_reencode(commit, encoding);
-	message   = reencoded ? reencoded : commit->buffer;
-	get_ac_line(message, "\nauthor ",
-		    &ret->author, &ret->author_mail,
-		    &ret->author_time, &ret->author_tz);
+	with_commit_buffer(commit) {
+		encoding = get_log_output_encoding();
+		reencoded = logmsg_reencode(commit, encoding);
+		message   = reencoded ? reencoded : commit->buffer;
+		get_ac_line(message, "\nauthor ",
+			    &ret->author, &ret->author_mail,
+			    &ret->author_time, &ret->author_tz);
+	} done_with_commit_buffer(commit);
 
 	if (!detailed) {
 		free(reencoded);
diff --git a/commit.c b/commit.c
index e8eb0ae..a627eea 100644
--- a/commit.c
+++ b/commit.c
@@ -1357,3 +1357,23 @@ void print_commit_list(struct commit_list *list,
 		printf(format, sha1_to_hex(list->item->object.sha1));
 	}
 }
+
+int ensure_commit_buffer(struct commit *commit)
+{
+	enum object_type type;
+	unsigned long size;
+
+	if (commit->buffer)
+		return 0;
+	commit->buffer = read_sha1_file(commit->object.sha1, &type, &size);
+	if (commit->buffer)
+		return -1;
+	else
+		return 0;
+}
+
+void discard_commit_buffer(struct commit *commit)
+{
+	free(commit->buffer);
+	commit->buffer = NULL;
+}

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  3:59               ` Junio C Hamano
@ 2013-01-25  4:08                 ` Jeff King
  2013-01-25  4:21                   ` Junio C Hamano
  2013-01-25  5:53                 ` Jonathan Nieder
  1 sibling, 1 reply; 24+ messages in thread
From: Jeff King @ 2013-01-25  4:08 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

On Thu, Jan 24, 2013 at 07:59:58PM -0800, Junio C Hamano wrote:

> Junio C Hamano <gitster@pobox.com> writes:
> 
> > Jeff King <peff@peff.net> writes:
> >
> >> ... (e.g., how should "log" know that a submodule diff might later want
> >> to see the same entry? Should we optimistically free and then make it
> >> easier for the later user to reliably ensure the buffer is primed? Or
> >> should we err on the side of keeping it in place?).
> >
> > My knee-jerk reaction is that we should consider that commit->buffer
> > belongs to the revision traversal machinery.  Any other uses bolted
> > on later can borrow it if buffer still exists (I do not think pretty
> > code rewrites the buffer contents in place in any way), or they can
> > ask read_sha1_file() to read it themselves and free when they are
> > done.
> 
> I've been toying with an idea along this line.
> 
>  commit.h        | 16 ++++++++++++++++
>  builtin/blame.c | 27 ++++++++-------------------
>  commit.c        | 20 ++++++++++++++++++++
>  3 files changed, 44 insertions(+), 19 deletions(-)

I think we are on the same page as far as what needs to happen at the
call sites.

My suggested implementation had a separate buffer, but you are right
that we may need to actually set "commit->buffer" because sub-functions
expect to find it there (the alternative might be cleaning up the
sub-function interfaces). I haven't looked at the call-sites yet.

This:

> +extern int ensure_commit_buffer(struct commit *);
> +extern void discard_commit_buffer(struct commit *);
> +
> +#define with_commit_buffer(commit) \
> +	do { \
> +		int had_buffer_ = !!commit->buffer; \
> +		if (!had_buffer_) \
> +			ensure_commit_buffer(commit); \
> +		do
> +
> +#define done_with_commit_buffer(commit) \
> +		while (0); \
> +		if (!had_buffer_) \
> +			discard_commit_buffer(commit); \
> +	} while (0)

is pretty nasty, though. I know it gets the job done, but in my
experience, macros which do not behave syntactically like functions are
usually a good sign that you are doing something gross and
unmaintainable.

I dunno.

-Peff

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  4:08                 ` Jeff King
@ 2013-01-25  4:21                   ` Junio C Hamano
  0 siblings, 0 replies; 24+ messages in thread
From: Junio C Hamano @ 2013-01-25  4:21 UTC (permalink / raw
  To: Jeff King
  Cc: Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

Jeff King <peff@peff.net> writes:

> ... I know it gets the job done, but in my
> experience, macros which do not behave syntactically like functions are
> usually a good sign that you are doing something gross and
> unmaintainable.

Yeah, it is a bit too cute for my taste, too, even though it was fun
;-)

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  3:59               ` Junio C Hamano
  2013-01-25  4:08                 ` Jeff King
@ 2013-01-25  5:53                 ` Jonathan Nieder
  2013-01-25  7:27                   ` Junio C Hamano
  2013-01-25  7:32                   ` Jonathon Mah
  1 sibling, 2 replies; 24+ messages in thread
From: Jonathan Nieder @ 2013-01-25  5:53 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jeff King, Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

Hi,

Junio C Hamano wrote:

> I've been toying with an idea along this line.

Heh.  Just for fun, here's an uglier version:

	struct wcb_data {
		int had_buffer;
		int using_buffer;
	};
	#define WITH_COMMIT_BUFFER_DATA_INIT { 0, 0 }

	extern void acquire_commit_buffer(struct commit *, struct wcb_data *);
	extern void done_with_commit_buffer(struct commit *, struct wcb_data *);

	/*
	 * usage:
	 *	struct wcb_data buf = WITH_COMMIT_BUFFER_INIT;
	 *
	 *	with_commit_buffer(commit, buf) {
	 *		...
	 *	}
	 */
	#define with_commit_buffer(commit, i) \
		for (acquire_commit_buffer(commit, &i); \
		     i.using_buffer; \
		     done_with_commit_buffer(commit, &i))

	void acquire_commit_buffer(struct commit *commit, struct wcb_data *i)
	{
		enum object_type type;
		unsigned long size;

		assert(!i->using_buffer);
		i->using_buffer = 1;
		i->had_buffer = !!commit->buffer;

		if (i->had_buffer)
			return;
		commit->buffer = read_sha1_file(commit->object.sha1, &type, &size);
		if (!commit->buffer)
			die("unable to read commit %s", sha1_to_hex(commit->object.sha1));
	}

	void done_with_commit_buffer(struct commit *commit, struct wcb_data *i)
	{
		assert(i->using_buffer);
		i->using_buffer = 0;

		if (!i->had_buffer) {
			free(commit->buffer);
			commit->buffer = NULL;
		}
	}

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  5:53                 ` Jonathan Nieder
@ 2013-01-25  7:27                   ` Junio C Hamano
  2013-01-25  7:32                   ` Jonathon Mah
  1 sibling, 0 replies; 24+ messages in thread
From: Junio C Hamano @ 2013-01-25  7:27 UTC (permalink / raw
  To: Jonathan Nieder
  Cc: Jeff King, Duy Nguyen, Stefan Näwe, Armin, Jonathon Mah,
	git@vger.kernel.org

Jonathan Nieder <jrnieder@gmail.com> writes:

> Hi,
>
> Junio C Hamano wrote:
>
>> I've been toying with an idea along this line.
>
> Heh.  Just for fun, here's an uglier version:

Much nicer, though.

>
> 	struct wcb_data {
> 		int had_buffer;
> 		int using_buffer;
> 	};
> 	#define WITH_COMMIT_BUFFER_DATA_INIT { 0, 0 }
>
> 	extern void acquire_commit_buffer(struct commit *, struct wcb_data *);
> 	extern void done_with_commit_buffer(struct commit *, struct wcb_data *);
>
> 	/*
> 	 * usage:
> 	 *	struct wcb_data buf = WITH_COMMIT_BUFFER_INIT;
> 	 *
> 	 *	with_commit_buffer(commit, buf) {
> 	 *		...
> 	 *	}
> 	 */
> 	#define with_commit_buffer(commit, i) \
> 		for (acquire_commit_buffer(commit, &i); \
> 		     i.using_buffer; \
> 		     done_with_commit_buffer(commit, &i))
>
> 	void acquire_commit_buffer(struct commit *commit, struct wcb_data *i)
> 	{
> 		enum object_type type;
> 		unsigned long size;
>
> 		assert(!i->using_buffer);
> 		i->using_buffer = 1;
> 		i->had_buffer = !!commit->buffer;
>
> 		if (i->had_buffer)
> 			return;
> 		commit->buffer = read_sha1_file(commit->object.sha1, &type, &size);
> 		if (!commit->buffer)
> 			die("unable to read commit %s", sha1_to_hex(commit->object.sha1));
> 	}
>
> 	void done_with_commit_buffer(struct commit *commit, struct wcb_data *i)
> 	{
> 		assert(i->using_buffer);
> 		i->using_buffer = 0;
>
> 		if (!i->had_buffer) {
> 			free(commit->buffer);
> 			commit->buffer = NULL;
> 		}
> 	}

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  5:53                 ` Jonathan Nieder
  2013-01-25  7:27                   ` Junio C Hamano
@ 2013-01-25  7:32                   ` Jonathon Mah
  2013-01-25 15:36                     ` Junio C Hamano
  1 sibling, 1 reply; 24+ messages in thread
From: Jonathon Mah @ 2013-01-25  7:32 UTC (permalink / raw
  To: Jonathan Nieder
  Cc: Junio C Hamano, Jeff King, Duy Nguyen, Stefan Näwe, Armin,
	git@vger.kernel.org

Just to note, the proposals so far don't prevent a "smart-ass" function from freeing the buffer when it's called underneath the use/release scope, as in:

with_commit_buffer(commit); {
	fn1_needing_buffer(commit);
	walk_rev_tree_or_something();
	fn2_needing_buffer(commit);
} done_with_commit_buffer(commit);

walk_rev_tree_or_something() might need to read commits to do its thing, and it could still choose to free their buffers (as in rev-list.c finish_commit()). If those commits includes the one being "retained", the call to fn2 will still see NULL despite it being in a 'protected scope'.

Are the objections to using a reference count?

Jonathon Mah
me@JonathonMah.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: segmentation fault (nullpointer) with git log --submodule -p
  2013-01-25  7:32                   ` Jonathon Mah
@ 2013-01-25 15:36                     ` Junio C Hamano
  2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
  0 siblings, 1 reply; 24+ messages in thread
From: Junio C Hamano @ 2013-01-25 15:36 UTC (permalink / raw
  To: Jonathon Mah
  Cc: Jonathan Nieder, Jeff King, Duy Nguyen, Stefan Näwe, Armin,
	git@vger.kernel.org

Jonathon Mah <jmah@me.com> writes:

> Just to note, the proposals so far don't prevent a "smart-ass"
> function from freeing the buffer when it's called underneath the
> use/release scope, as in:
>
> with_commit_buffer(commit); {
> 	fn1_needing_buffer(commit);
> 	walk_rev_tree_or_something();
> 	fn2_needing_buffer(commit);
> } done_with_commit_buffer(commit);

I think the goal of everybody discussing these ideas is to make sure
that all code follows the simple ownership policy proposed at the
beginning of this subthread: commit->buffer belongs to the revision
traversal machinery, and other users could borrow it when available.

Even though your sample code will break, from that point of view, I
do not think it is something worth worrying about.  If the function
"walk_rev_tree_or_something()" discards commit->buffer, it by
definition must be a part of the revision traversal machinery, and
any code that calls it inside with_commit_buffer() or uses the field
after such a call without revalidating commit->buffer, is already in
violation.  With or without such a macro, we would need to be
careful about enforcing the ownership rule, and I think a code
structure like the above example is easier to spot problems in
during the review than the current code.

Because retaining commit->buffer is done for the benefit of the
next/future users of the data, and not for the users that _are_
using them right now, I do not think the usual refcounting that
discards when nobody references the data is a good match to the
problem we are discussing.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 0/3] lazily load commit->buffer
  2013-01-25 15:36                     ` Junio C Hamano
@ 2013-01-26  9:40                       ` Jeff King
  2013-01-26  9:42                         ` [PATCH 1/3] commit: drop useless xstrdup of commit message Jeff King
                                           ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Jeff King @ 2013-01-26  9:40 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

On Fri, Jan 25, 2013 at 07:36:18AM -0800, Junio C Hamano wrote:

> Jonathon Mah <jmah@me.com> writes:
> 
> > Just to note, the proposals so far don't prevent a "smart-ass"
> > function from freeing the buffer when it's called underneath the
> > use/release scope, as in:
> >
> > with_commit_buffer(commit); {
> > 	fn1_needing_buffer(commit);
> > 	walk_rev_tree_or_something();
> > 	fn2_needing_buffer(commit);
> > } done_with_commit_buffer(commit);
> 
> I think the goal of everybody discussing these ideas is to make sure
> that all code follows the simple ownership policy proposed at the
> beginning of this subthread: commit->buffer belongs to the revision
> traversal machinery, and other users could borrow it when available.

Yeah, agreed. I started to fix this up with a use/unuse pattern and
realized something: all of the call sites are calling logmsg_reencode
anyway, because that is the next logical step in doing anything with the
buffer that is not just parsing out the parent/timestamp/tree info. And
since that function already might allocate (for the re-encoded copy),
callers have to handle the maybe-borrowed-maybe-free situation already.

So I came up with this patch series, which I think should fix the
problem, and actually makes the call-sites easier to read, rather than
harder.

  [1/3]: commit: drop useless xstrdup of commit message
  [2/3]: logmsg_reencode: never return NULL
  [3/3]: logmsg_reencode: lazily load missing commit buffers

Here's the diffstat:

 builtin/blame.c                  | 22 ++-------
 builtin/commit.c                 | 14 +-----
 commit.h                         |  1 +
 pretty.c                         | 93 ++++++++++++++++++++++++++---------
 t/t4042-diff-textconv-caching.sh |  8 +++
 5 files changed, 85 insertions(+), 53 deletions(-)

Not too bad, and 27 of the lines added in pretty.c are new comments
explaining the flow of logmsg_reencode. So even if this doesn't get
every case, I think it's a nice cleanup.

-Peff

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/3] commit: drop useless xstrdup of commit message
  2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
@ 2013-01-26  9:42                         ` Jeff King
  2013-01-26  9:44                         ` [PATCH 2/3] logmsg_reencode: never return NULL Jeff King
                                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Jeff King @ 2013-01-26  9:42 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

When git-commit is asked to reuse a commit message via "-c",
we call read_commit_message, which looks up the commit and
hands back either the re-encoded result, or a copy of the
original. We make a copy in the latter case so that the
ownership semantics of the return value are clear (in either
case, it can be freed).

However, since we return a "const char *", and since the
resulting buffer's lifetime is the same as that of the whole
program, we never bother to free it at all.

Let's just drop the copy. That saves us a copy in the common
case. While it does mean we leak in the re-encode case, it
doesn't matter, since we are relying on program exit to free
the memory anyway.

Signed-off-by: Jeff King <peff@peff.net>
---
This one isn't strictly necessary, but it makes it a lot more obvious
what is going on with the memory ownership of this code in the next
patch.

 builtin/commit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 38b9a9c..fbbb40f 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -962,7 +962,7 @@ static const char *read_commit_message(const char *name)
 	 * encodings are identical.
 	 */
 	if (out == NULL)
-		out = xstrdup(commit->buffer);
+		out = commit->buffer;
 	return out;
 }

-- 
1.8.0.2.16.g72e2fc9

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/3] logmsg_reencode: never return NULL
  2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
  2013-01-26  9:42                         ` [PATCH 1/3] commit: drop useless xstrdup of commit message Jeff King
@ 2013-01-26  9:44                         ` Jeff King
  2013-01-26  9:44                         ` [PATCH 3/3] logmsg_reencode: lazily load missing commit buffers Jeff King
  2013-01-26 21:26                         ` [PATCH 0/3] lazily load commit->buffer Junio C Hamano
  3 siblings, 0 replies; 24+ messages in thread
From: Jeff King @ 2013-01-26  9:44 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

The logmsg_reencode function will return the reencoded
commit buffer, or NULL if reencoding failed or no reencoding
was necessary. Since every caller then ends up checking for NULL
and just using the commit's original buffer, anyway, we can
be a bit more helpful and just return that buffer when we
would have returned NULL.

Since the resulting string may or may not need to be freed,
we introduce a logmsg_free, which checks whether the buffer
came from the commit object or not (callers either
implemented the same check already, or kept two separate
pointers, one to mark the buffer to be used, and one for the
to-be-freed string).

Pushing this logic into logmsg_* simplifies the callers, and
will let future patches lazily load the commit buffer in a
single place.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/blame.c  |  9 ++++-----
 builtin/commit.c | 14 ++------------
 commit.h         |  1 +
 pretty.c         | 38 ++++++++++++++++++++++----------------
 4 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index b431ba3..962e4e3 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1420,7 +1420,7 @@ static void get_commit_info(struct commit *commit,
 {
 	int len;
 	const char *subject, *encoding;
-	char *reencoded, *message;
+	char *message;
 
 	commit_info_init(ret);
 
@@ -1438,14 +1438,13 @@ static void get_commit_info(struct commit *commit,
 			    sha1_to_hex(commit->object.sha1));
 	}
 	encoding = get_log_output_encoding();
-	reencoded = logmsg_reencode(commit, encoding);
-	message   = reencoded ? reencoded : commit->buffer;
+	message = logmsg_reencode(commit, encoding);
 	get_ac_line(message, "\nauthor ",
 		    &ret->author, &ret->author_mail,
 		    &ret->author_time, &ret->author_tz);
 
 	if (!detailed) {
-		free(reencoded);
+		logmsg_free(message, commit);
 		return;
 	}
 
@@ -1459,7 +1458,7 @@ static void get_commit_info(struct commit *commit,
 	else
 		strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1));
 
-	free(reencoded);
+	logmsg_free(message, commit);
 }
 
 /*
diff --git a/builtin/commit.c b/builtin/commit.c
index fbbb40f..6169f1e 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -946,24 +946,14 @@ static const char *read_commit_message(const char *name)
 
 static const char *read_commit_message(const char *name)
 {
-	const char *out_enc, *out;
+	const char *out_enc;
 	struct commit *commit;
 
 	commit = lookup_commit_reference_by_name(name);
 	if (!commit)
 		die(_("could not lookup commit %s"), name);
 	out_enc = get_commit_output_encoding();
-	out = logmsg_reencode(commit, out_enc);
-
-	/*
-	 * If we failed to reencode the buffer, just copy it
-	 * byte for byte so the user can try to fix it up.
-	 * This also handles the case where input and output
-	 * encodings are identical.
-	 */
-	if (out == NULL)
-		out = commit->buffer;
-	return out;
+	return logmsg_reencode(commit, out_enc);
 }
 
 static int parse_and_validate_options(int argc, const char *argv[],
diff --git a/commit.h b/commit.h
index c16c8a7..e770649 100644
--- a/commit.h
+++ b/commit.h
@@ -101,6 +101,7 @@ extern char *logmsg_reencode(const struct commit *commit,
 struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
 extern char *logmsg_reencode(const struct commit *commit,
 			     const char *output_encoding);
+extern void logmsg_free(char *msg, const struct commit *commit);
 extern void get_commit_format(const char *arg, struct rev_info *);
 extern const char *format_subject(struct strbuf *sb, const char *msg,
 				  const char *line_separator);
diff --git a/pretty.c b/pretty.c
index 07fc062..c675349 100644
--- a/pretty.c
+++ b/pretty.c
@@ -524,10 +524,11 @@ static char *get_header(const struct commit *commit, const char *key)
 	strbuf_addch(sb, '\n');
 }
 
-static char *get_header(const struct commit *commit, const char *key)
+static char *get_header(const struct commit *commit, const char *msg,
+			const char *key)
 {
 	int key_len = strlen(key);
-	const char *line = commit->buffer;
+	const char *line = msg;
 
 	while (line) {
 		const char *eol = strchr(line, '\n'), *next;
@@ -588,17 +589,18 @@ char *logmsg_reencode(const struct commit *commit,
 	static const char *utf8 = "UTF-8";
 	const char *use_encoding;
 	char *encoding;
+	char *msg = commit->buffer;
 	char *out;
 
 	if (!output_encoding || !*output_encoding)
-		return NULL;
-	encoding = get_header(commit, "encoding");
+		return msg;
+	encoding = get_header(commit, msg, "encoding");
 	use_encoding = encoding ? encoding : utf8;
 	if (same_encoding(use_encoding, output_encoding))
 		if (encoding) /* we'll strip encoding header later */
 			out = xstrdup(commit->buffer);
 		else
-			return NULL; /* nothing to do */
+			return msg; /* nothing to do */
 	else
 		out = reencode_string(commit->buffer,
 				      output_encoding, use_encoding);
@@ -606,7 +608,17 @@ char *logmsg_reencode(const struct commit *commit,
 		out = replace_encoding_header(out, output_encoding);
 
 	free(encoding);
-	return out;
+	/*
+	 * If the re-encoding failed, out might be NULL here; in that
+	 * case we just return the commit message verbatim.
+	 */
+	return out ? out : msg;
+}
+
+void logmsg_free(char *msg, const struct commit *commit)
+{
+	if (msg != commit->buffer)
+		free(msg);
 }
 
 static int mailmap_name(const char **email, size_t *email_len,
@@ -1278,14 +1290,11 @@ void format_commit_message(const struct commit *commit,
 	context.pretty_ctx = pretty_ctx;
 	context.wrap_start = sb->len;
 	context.message = logmsg_reencode(commit, output_enc);
-	if (!context.message)
-		context.message = commit->buffer;
 
 	strbuf_expand(sb, format, format_commit_item, &context);
 	rewrap_message_tail(sb, &context, 0, 0, 0);
 
-	if (context.message != commit->buffer)
-		free(context.message);
+	logmsg_free(context.message, commit);
 	free(context.signature.gpg_output);
 	free(context.signature.signer);
 }
@@ -1432,7 +1441,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 {
 	unsigned long beginning_of_body;
 	int indent = 4;
-	const char *msg = commit->buffer;
+	const char *msg;
 	char *reencoded;
 	const char *encoding;
 	int need_8bit_cte = pp->need_8bit_cte;
@@ -1443,10 +1452,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 	}
 
 	encoding = get_log_output_encoding();
-	reencoded = logmsg_reencode(commit, encoding);
-	if (reencoded) {
-		msg = reencoded;
-	}
+	msg = reencoded = logmsg_reencode(commit, encoding);
 
 	if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
 		indent = 0;
@@ -1503,7 +1509,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
 	if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
 		strbuf_addch(sb, '\n');
 
-	free(reencoded);
+	logmsg_free(reencoded, commit);
 }
 
 void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,
-- 
1.8.0.2.16.g72e2fc9

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/3] logmsg_reencode: lazily load missing commit buffers
  2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
  2013-01-26  9:42                         ` [PATCH 1/3] commit: drop useless xstrdup of commit message Jeff King
  2013-01-26  9:44                         ` [PATCH 2/3] logmsg_reencode: never return NULL Jeff King
@ 2013-01-26  9:44                         ` Jeff King
  2013-01-26 21:26                         ` [PATCH 0/3] lazily load commit->buffer Junio C Hamano
  3 siblings, 0 replies; 24+ messages in thread
From: Jeff King @ 2013-01-26  9:44 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

Usually a commit that makes it to logmsg_reencode will have
been parsed, and the commit->buffer struct member will be
valid. However, some code paths will free commit buffers
after having used them (for example, the log traversal
machinery will do so to keep memory usage down).

Most of the time this is fine; log should only show a commit
once, and then exits. However, there are some code paths
where this does not work. At least two are known:

  1. A commit may be shown as part of a regular ref, and
     then it may be shown again as part of a submodule diff
     (e.g., if a repo contains refs to both the superproject
     and subproject).

  2. A notes-cache commit may be shown during "log --all",
     and then later used to access a textconv cache during a
     diff.

Lazily loading in logmsg_reencode does not necessarily catch
all such cases, but it should catch most of them. Users of
the commit buffer tend to be either parsing for structure
(in which they will call parse_commit, and either we will
already have parsed, or we will load commit->buffer lazily
there), or outputting (either to the user, or fetching a
part of the commit message via format_commit_message). In
the latter case, we should always be using logmsg_reencode
anyway (and typically we do so via the pretty-print
machinery).

If there are any cases that this misses, we can fix them up
to use logmsg_reencode (or handle them on a case-by-case
basis if that is inappropriate).

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/blame.c                  | 13 ---------
 pretty.c                         | 57 ++++++++++++++++++++++++++++++++++------
 t/t4042-diff-textconv-caching.sh |  8 ++++++
 3 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 962e4e3..86100e9 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1424,19 +1424,6 @@ static void get_commit_info(struct commit *commit,
 
 	commit_info_init(ret);
 
-	/*
-	 * We've operated without save_commit_buffer, so
-	 * we now need to populate them for output.
-	 */
-	if (!commit->buffer) {
-		enum object_type type;
-		unsigned long size;
-		commit->buffer =
-			read_sha1_file(commit->object.sha1, &type, &size);
-		if (!commit->buffer)
-			die("Cannot read commit %s",
-			    sha1_to_hex(commit->object.sha1));
-	}
 	encoding = get_log_output_encoding();
 	message = logmsg_reencode(commit, encoding);
 	get_ac_line(message, "\nauthor ",
diff --git a/pretty.c b/pretty.c
index c675349..eae57ad 100644
--- a/pretty.c
+++ b/pretty.c
@@ -592,18 +592,59 @@ char *logmsg_reencode(const struct commit *commit,
 	char *msg = commit->buffer;
 	char *out;
 
+	if (!msg) {
+		enum object_type type;
+		unsigned long size;
+
+		msg = read_sha1_file(commit->object.sha1, &type, &size);
+		if (!msg)
+			die("Cannot read commit object %s",
+			    sha1_to_hex(commit->object.sha1));
+		if (type != OBJ_COMMIT)
+			die("Expected commit for '%s', got %s",
+			    sha1_to_hex(commit->object.sha1), typename(type));
+	}
+
 	if (!output_encoding || !*output_encoding)
 		return msg;
 	encoding = get_header(commit, msg, "encoding");
 	use_encoding = encoding ? encoding : utf8;
-	if (same_encoding(use_encoding, output_encoding))
-		if (encoding) /* we'll strip encoding header later */
-			out = xstrdup(commit->buffer);
-		else
-			return msg; /* nothing to do */
-	else
-		out = reencode_string(commit->buffer,
-				      output_encoding, use_encoding);
+	if (same_encoding(use_encoding, output_encoding)) {
+		/*
+		 * No encoding work to be done. If we have no encoding header
+		 * at all, then there's nothing to do, and we can return the
+		 * message verbatim (whether newly allocated or not).
+		 */
+		if (!encoding)
+			return msg;
+
+		/*
+		 * Otherwise, we still want to munge the encoding header in the
+		 * result, which will be done by modifying the buffer. If we
+		 * are using a fresh copy, we can reuse it. But if we are using
+		 * the cached copy from commit->buffer, we need to duplicate it
+		 * to avoid munging commit->buffer.
+		 */
+		out = msg;
+		if (out == commit->buffer)
+			out = xstrdup(out);
+	}
+	else {
+		/*
+		 * There's actual encoding work to do. Do the reencoding, which
+		 * still leaves the header to be replaced in the next step. At
+		 * this point, we are done with msg. If we allocated a fresh
+		 * copy, we can free it.
+		 */
+		out = reencode_string(msg, output_encoding, use_encoding);
+		if (out && msg != commit->buffer)
+			free(msg);
+	}
+
+	/*
+	 * This replacement actually consumes the buffer we hand it, so we do
+	 * not have to worry about freeing the old "out" here.
+	 */
 	if (out)
 		out = replace_encoding_header(out, output_encoding);
 
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 91f8198..04a44d5 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' '
 	test_cmp expect actual
 '
 
+# The point here is to test that we can log the notes cache and still use it to
+# produce a diff later (older versions of git would segfault on this). It's
+# much more likely to come up in the real world with "log --all -p", but using
+# --no-walk lets us reliably reproduce the order of traversal.
+test_expect_success 'log notes cache and still use cache for -p' '
+	git log --no-walk -p refs/notes/textconv/magic HEAD
+'
+
 test_done
-- 
1.8.0.2.16.g72e2fc9

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/3] lazily load commit->buffer
  2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
                                           ` (2 preceding siblings ...)
  2013-01-26  9:44                         ` [PATCH 3/3] logmsg_reencode: lazily load missing commit buffers Jeff King
@ 2013-01-26 21:26                         ` Junio C Hamano
  2013-01-26 22:14                           ` Jeff King
  3 siblings, 1 reply; 24+ messages in thread
From: Junio C Hamano @ 2013-01-26 21:26 UTC (permalink / raw
  To: Jeff King
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

Jeff King <peff@peff.net> writes:

> Yeah, agreed. I started to fix this up with a use/unuse pattern and
> realized something: all of the call sites are calling logmsg_reencode
> anyway, because that is the next logical step in doing anything with the
> buffer that is not just parsing out the parent/timestamp/tree info. And
> since that function already might allocate (for the re-encoded copy),
> callers have to handle the maybe-borrowed-maybe-free situation already.
>
> So I came up with this patch series, which I think should fix the
> problem, and actually makes the call-sites easier to read, rather than
> harder.
>
>   [1/3]: commit: drop useless xstrdup of commit message
>   [2/3]: logmsg_reencode: never return NULL
>   [3/3]: logmsg_reencode: lazily load missing commit buffers
>
> Here's the diffstat:
>
>  builtin/blame.c                  | 22 ++-------
>  builtin/commit.c                 | 14 +-----
>  commit.h                         |  1 +
>  pretty.c                         | 93 ++++++++++++++++++++++++++---------
>  t/t4042-diff-textconv-caching.sh |  8 +++
>  5 files changed, 85 insertions(+), 53 deletions(-)
>
> Not too bad, and 27 of the lines added in pretty.c are new comments
> explaining the flow of logmsg_reencode. So even if this doesn't get
> every case, I think it's a nice cleanup.

This looks very good.

I wonder if this lets us get rid of the hack in cmd_log_walk() that
does this:

        while ((commit = get_revision(rev)) != NULL) {
                if (!log_tree_commit(rev, commit) &&
                    rev->max_count >= 0)
                        rev->max_count++;
!               if (!rev->reflog_info) {
!                       /* we allow cycles in reflog ancestry */
                        free(commit->buffer);
                        commit->buffer = NULL;
!               }
                free_commit_list(commit->parents);
                commit->parents = NULL;

After log_tree_commit() handles the commit, using the buffer, we
discard the memory associated to it because we know we no longer
will use it in normal cases.

The "do not do that if rev->reflog_info is true" was added in
a6c7306 (--walk-reflogs: do not crash with cyclic reflog ancestry,
2007-01-20) because the second and subsequent display of "commit"
(which happens to occur only when walking reflogs) needs to look at
commit->buffer again, and this hack forces us to retain the buffer
for _all_ commit objects.

But your patches could be seen as a different (and more correct) way
to fix the same issue.  Once the display side learns how to re-read
the log text of the commit object, the above becomes unnecessary, no?

We may still be helped if majority of commit objects that appear in
the reflog appear more than once, in which case retaining the buffer
for _all_ commits could be an overall win.  Not having to read the
buffer for the same commit each time it is shown for majority of
multiply-appearing commits, in exchange for having to keep the
buffer for commits that appears only once that are minority and
suffering increasted page cache pressure.  That could be seen as an
optimization.

But that is a performance thing, not a correctness issue, so "we
allow cycles" implying "therefore if we discard the buffer, we will
show wrong output" becomes an incorrect justification.

I happen to have HEAD reflog that is 30k entries long; more than 26k
represent a checkout of unique commit.  So I suspect that the above
hack to excessive retain commit->buffer for already used commits will
not help us performance-wise, either.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/3] lazily load commit->buffer
  2013-01-26 21:26                         ` [PATCH 0/3] lazily load commit->buffer Junio C Hamano
@ 2013-01-26 22:14                           ` Jeff King
  2013-01-27  5:32                             ` Junio C Hamano
  0 siblings, 1 reply; 24+ messages in thread
From: Jeff King @ 2013-01-26 22:14 UTC (permalink / raw
  To: Junio C Hamano
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

On Sat, Jan 26, 2013 at 01:26:53PM -0800, Junio C Hamano wrote:

> This looks very good.
> 
> I wonder if this lets us get rid of the hack in cmd_log_walk() that
> does this:
> 
>         while ((commit = get_revision(rev)) != NULL) {
>                 if (!log_tree_commit(rev, commit) &&
>                     rev->max_count >= 0)
>                         rev->max_count++;
> !               if (!rev->reflog_info) {
> !                       /* we allow cycles in reflog ancestry */
>                         free(commit->buffer);
>                         commit->buffer = NULL;
> !               }
>                 free_commit_list(commit->parents);
>                 commit->parents = NULL;
> 
> After log_tree_commit() handles the commit, using the buffer, we
> discard the memory associated to it because we know we no longer
> will use it in normal cases.
> [...]
> But that is a performance thing, not a correctness issue, so "we
> allow cycles" implying "therefore if we discard the buffer, we will
> show wrong output" becomes an incorrect justification.

Right. I think the correctness issue goes away with my patches, and it
is just a question of estimating the workload for performance. I doubt
it makes a big difference either way, especially when compared to
actually showing the commit (even a single pathspec limiter, or doing
"-p", would likely dwarf a few extra commit decompressions).

My HEAD has about 400/3000 non-unique commits, which matches your
numbers percentage-wise. Dropping the lines above (and always freeing)
takes my best-of-five for "git log -g" from 0.085s to 0.080s. Which is
well within the noise.  Doing "git log -g Makefile" ended up at 0.183s
both before and after.

So I suspect it does not matter at all in normal cases, and the time is
indeed dwarfed by adding even a rudimentary pathspec. I'd be in favor of
dropping the lines just to decrease complexity of the code.

-Peff

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/3] lazily load commit->buffer
  2013-01-26 22:14                           ` Jeff King
@ 2013-01-27  5:32                             ` Junio C Hamano
  0 siblings, 0 replies; 24+ messages in thread
From: Junio C Hamano @ 2013-01-27  5:32 UTC (permalink / raw
  To: Jeff King
  Cc: Jonathon Mah, Jonathan Nieder, Duy Nguyen, Stefan Näwe,
	Armin, git@vger.kernel.org

Jeff King <peff@peff.net> writes:

> My HEAD has about 400/3000 non-unique commits, which matches your
> numbers percentage-wise. Dropping the lines above (and always freeing)
> takes my best-of-five for "git log -g" from 0.085s to 0.080s. Which is
> well within the noise.  Doing "git log -g Makefile" ended up at 0.183s
> both before and after.
>
> ... I'd be in favor of
> dropping the lines just to decrease complexity of the code.

I think we are in agreement, then.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2013-01-27  5:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-23 14:38 segmentation fault (nullpointer) with git log --submodule -p Armin
2013-01-23 20:02 ` Jeff King
2013-01-24 12:11   ` Stefan Näwe
2013-01-24 13:40     ` Duy Nguyen
2013-01-24 14:06       ` Stefan Näwe
2013-01-24 14:14         ` Duy Nguyen
2013-01-24 23:27           ` Jeff King
2013-01-24 23:56             ` Junio C Hamano
2013-01-25  0:55               ` Jeff King
2013-01-25  2:05                 ` Duy Nguyen
2013-01-25  3:59               ` Junio C Hamano
2013-01-25  4:08                 ` Jeff King
2013-01-25  4:21                   ` Junio C Hamano
2013-01-25  5:53                 ` Jonathan Nieder
2013-01-25  7:27                   ` Junio C Hamano
2013-01-25  7:32                   ` Jonathon Mah
2013-01-25 15:36                     ` Junio C Hamano
2013-01-26  9:40                       ` [PATCH 0/3] lazily load commit->buffer Jeff King
2013-01-26  9:42                         ` [PATCH 1/3] commit: drop useless xstrdup of commit message Jeff King
2013-01-26  9:44                         ` [PATCH 2/3] logmsg_reencode: never return NULL Jeff King
2013-01-26  9:44                         ` [PATCH 3/3] logmsg_reencode: lazily load missing commit buffers Jeff King
2013-01-26 21:26                         ` [PATCH 0/3] lazily load commit->buffer Junio C Hamano
2013-01-26 22:14                           ` Jeff King
2013-01-27  5:32                             ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).