git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] builtins: search builtin commands via binary search.
@ 2013-07-26 20:50 Stefan Beller
  2013-07-26 20:57 ` Jonathan Nieder
  2013-07-27  0:26 ` Eric Sunshine
  0 siblings, 2 replies; 8+ messages in thread
From: Stefan Beller @ 2013-07-26 20:50 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

There are currently 115 commands built into the git executable.
Before this commit, it was iterated over these commands in a linear
order, i.e. each command was checked.

As it turns out the commands are already sorted alphabetically, it is easy
to perform a binary search instead of linear searching.
This results in 7 lookups in the worst case.

Signed-off-by: Stefan Beller <stefanbeller@googlemail.com>
---
 git.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/git.c b/git.c
index 2025f77..0d7a9b5 100644
--- a/git.c
+++ b/git.c
@@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	return 0;
 }
 
+static int compare_internal_command(const void *a, const void *b) {
+	/* The first parameter is of type char* describing the name,
+	   the second is a struct cmd_struct */
+	const char *name = (const char*)a;
+	const struct cmd_struct *cmd_struct = (struct cmd_struct*)b;
+	return strcmp(name, cmd_struct->cmd);
+}
+
 static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
+	/* commands must be sorted alphabetically */
 	static struct cmd_struct commands[] = {
 		{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
 		{ "annotate", cmd_annotate, RUN_SETUP },
@@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv)
 		argv[0] = cmd = "help";
 	}
 
-	for (i = 0; i < ARRAY_SIZE(commands); i++) {
-		struct cmd_struct *p = commands+i;
-		if (strcmp(p->cmd, cmd))
-			continue;
+	struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
+				ARRAY_SIZE(commands), sizeof(struct cmd_struct),
+				compare_internal_command);
+
+	if (p)
 		exit(run_builtin(p, argc, argv));
-	}
 }
 
 static void execv_dashed_external(const char **argv)
-- 
1.8.4.rc0.1.g8f6a3e5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-26 20:50 [PATCH] builtins: search builtin commands via binary search Stefan Beller
@ 2013-07-26 20:57 ` Jonathan Nieder
  2013-07-27  8:49   ` Stefan Beller
  2013-07-27  0:26 ` Eric Sunshine
  1 sibling, 1 reply; 8+ messages in thread
From: Jonathan Nieder @ 2013-07-26 20:57 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

Hi,

Stefan Beller wrote:

> --- a/git.c
> +++ b/git.c
> @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
>  	return 0;
>  }
>  
> +static int compare_internal_command(const void *a, const void *b) {
> +	/* The first parameter is of type char* describing the name,
> +	   the second is a struct cmd_struct */

Style:

	/*
	 * Multi-line comments in git look like this, with an initial
	 * "/*" line, a leading "*" on each line with text, and a line
	 * with '*' '/' at the end.
	 */

[...]
> @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv)
>  		argv[0] = cmd = "help";
>  	}
>  
> -	for (i = 0; i < ARRAY_SIZE(commands); i++) {
> -		struct cmd_struct *p = commands+i;
> -		if (strcmp(p->cmd, cmd))
> -			continue;
> +	struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
> +				ARRAY_SIZE(commands), sizeof(struct cmd_struct),
> +				compare_internal_command);

No need to cast --- this is C.

Fun.  Does this result in a measurable speedup, or is it just for more
pleasant reading?

Thanks and hope that helps,
Jonathan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-26 20:50 [PATCH] builtins: search builtin commands via binary search Stefan Beller
  2013-07-26 20:57 ` Jonathan Nieder
@ 2013-07-27  0:26 ` Eric Sunshine
  1 sibling, 0 replies; 8+ messages in thread
From: Eric Sunshine @ 2013-07-27  0:26 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Git List

On Fri, Jul 26, 2013 at 4:50 PM, Stefan Beller
<stefanbeller@googlemail.com> wrote:
> There are currently 115 commands built into the git executable.
> Before this commit, it was iterated over these commands in a linear
> order, i.e. each command was checked.
>
> As it turns out the commands are already sorted alphabetically, it is easy
> to perform a binary search instead of linear searching.
> This results in 7 lookups in the worst case.
>
> Signed-off-by: Stefan Beller <stefanbeller@googlemail.com>
> ---
>  git.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/git.c b/git.c
> index 2025f77..0d7a9b5 100644
> --- a/git.c
> +++ b/git.c
> @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
>         return 0;
>  }
>
> +static int compare_internal_command(const void *a, const void *b) {
> +       /* The first parameter is of type char* describing the name,
> +          the second is a struct cmd_struct */
> +       const char *name = (const char*)a;
> +       const struct cmd_struct *cmd_struct = (struct cmd_struct*)b;

Comments typically exist to elucidate something non-obvious in the
code, however, in this case the code and comment say the same thing,
making the comment redundant. Such redundancy can make code harder to
read since the reader has to take extra time to figure out if the
comment is really explaining something not obvious in the code. Thus,
this comment can be removed without loss of clarity.

> +       return strcmp(name, cmd_struct->cmd);
> +}
> +
>  static void handle_internal_command(int argc, const char **argv)
>  {
>         const char *cmd = argv[0];
> +       /* commands must be sorted alphabetically */
>         static struct cmd_struct commands[] = {

This new comment, on the other hand does explain something not obvious
at this point in the code.

>                 { "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
>                 { "annotate", cmd_annotate, RUN_SETUP },
> @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv)
>                 argv[0] = cmd = "help";
>         }
>
> -       for (i = 0; i < ARRAY_SIZE(commands); i++) {
> -               struct cmd_struct *p = commands+i;
> -               if (strcmp(p->cmd, cmd))
> -                       continue;
> +       struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
> +                               ARRAY_SIZE(commands), sizeof(struct cmd_struct),
> +                               compare_internal_command);

Since this will break down if the commands[] array becomes unsorted,
it would make sense to protect against such a failure. For instance,
you could add a check in Makefile which triggers when git.c is edited.
It might do something like this:

  awk '/cmd_struct commands/,/};/ { if (match($2,/"/)) print $2 }'
    <git.c >builtin.actual &&
  sort <builtin.actual >builtin.expect &&
  cmp -s builtin.expect builtin.actual &&
  rm builtin.expect builtin.actual

> +
> +       if (p)
>                 exit(run_builtin(p, argc, argv));
> -       }
>  }
>
>  static void execv_dashed_external(const char **argv)
> --

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-26 20:57 ` Jonathan Nieder
@ 2013-07-27  8:49   ` Stefan Beller
  2013-07-27  8:50     ` Stefan Beller
                       ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Stefan Beller @ 2013-07-27  8:49 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Eric Sunshine

[-- Attachment #1: Type: text/plain, Size: 2762 bytes --]

On 07/26/2013 10:57 PM, Jonathan Nieder wrote:
> Hi,
> 
> Stefan Beller wrote:
> 
>> --- a/git.c
>> +++ b/git.c
>> @@ -309,9 +309,18 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
>>  	return 0;
>>  }
>>  
>> +static int compare_internal_command(const void *a, const void *b) {
>> +	/* The first parameter is of type char* describing the name,
>> +	   the second is a struct cmd_struct */
> 
> Style:
> 
> 	/*
> 	 * Multi-line comments in git look like this, with an initial
> 	 * "/*" line, a leading "*" on each line with text, and a line
> 	 * with '*' '/' at the end.
> 	 */
> 
> [...]

Thanks for noting, however as Eric points out, that comment was not enlightening, so I removed it.


>> @@ -447,12 +456,12 @@ static void handle_internal_command(int argc, const char **argv)
>>  		argv[0] = cmd = "help";
>>  	}
>>  
>> -	for (i = 0; i < ARRAY_SIZE(commands); i++) {
>> -		struct cmd_struct *p = commands+i;
>> -		if (strcmp(p->cmd, cmd))
>> -			continue;
>> +	struct cmd_struct *p = (struct cmd_struct *)bsearch(cmd, commands,
>> +				ARRAY_SIZE(commands), sizeof(struct cmd_struct),
>> +				compare_internal_command);
> 
> No need to cast --- this is C.

Also removed.

> 
> Fun.  Does this result in a measurable speedup, or is it just for more
> pleasant reading?
> 
> Thanks and hope that helps,
> Jonathan
> 

premature optimization is the root of all evil....
I tried hard to come up with a benchmark, but this is lost in the
noise. I could not figure out a way to reproducably make sure this
patch is really faster.
So I tried to `time git add COPYING` to show it's not getting slower
for the first entries in the list as well as `git fast-external-command`
whereas the fast-external-command is just an int main() {return 0; }
to check if the external commands, which are executed after searching 
through all the internals come up faster.

However I could not find a speedup.
So if the patch is accepted, it would only be for readability.

I was fiddling around with make now to include the suggestion of Eric to
check the arguments for being sorted in make. However I do not 
seem to fully understand the syntax yet.
My approach would have been:

sorted_internal_cmds: git.c
	{ awk '/cmd_struct commands/,/};/ { if (match($2,/"/)) print $2 }' <git.c >builtin.actual && \
	sort <builtin.actual >builtin.expect && \
	cmp -s builtin.expect builtin.actual && \
	rm builtin.expect builtin.actual \
	}

all:: sorted_internal_cmds

But then there is 
$ make 
...
	}
/bin/sh: 5: Syntax error: end of file unexpected (expecting "}")

So I suspect the { within the shell code inside the awk parameter is messing up?

Thanks,
Stefan



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 899 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] builtins: search builtin commands via binary search.
  2013-07-27  8:49   ` Stefan Beller
@ 2013-07-27  8:50     ` Stefan Beller
  2013-07-27  9:13     ` Andreas Schwab
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Stefan Beller @ 2013-07-27  8:50 UTC (permalink / raw)
  To: git, jrnieder, sunshine; +Cc: Stefan Beller

There are currently 115 commands built into the git executable.
Before this commit, it was iterated over these commands in a linear
order, i.e. each command was checked.

As it turns out the commands are already sorted alphabetically, it is easy
to perform a binary search instead of linear searching.
This results in 7 lookups in the worst case.

Signed-off-by: Stefan Beller <stefanbeller@googlemail.com>
---
 git.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/git.c b/git.c
index 2025f77..6d4de2b 100644
--- a/git.c
+++ b/git.c
@@ -309,9 +309,14 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	return 0;
 }
 
+static int compare_internal_command(const void *name, const void *cmd) {
+	return strcmp((const char*)name, ((const struct cmd_struct*)cmd)->cmd);
+}
+
 static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
+	/* commands must be sorted alphabetically for binary search */
 	static struct cmd_struct commands[] = {
 		{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
 		{ "annotate", cmd_annotate, RUN_SETUP },
@@ -447,12 +452,12 @@ static void handle_internal_command(int argc, const char **argv)
 		argv[0] = cmd = "help";
 	}
 
-	for (i = 0; i < ARRAY_SIZE(commands); i++) {
-		struct cmd_struct *p = commands+i;
-		if (strcmp(p->cmd, cmd))
-			continue;
+	struct cmd_struct *p = bsearch(cmd, commands,
+				ARRAY_SIZE(commands), sizeof(struct cmd_struct),
+				compare_internal_command);
+
+	if (p)
 		exit(run_builtin(p, argc, argv));
-	}
 }
 
 static void execv_dashed_external(const char **argv)
-- 
1.8.4.rc0.1.g8f6a3e5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-27  8:49   ` Stefan Beller
  2013-07-27  8:50     ` Stefan Beller
@ 2013-07-27  9:13     ` Andreas Schwab
  2013-07-28  9:05     ` Eric Sunshine
  2013-07-29 16:18     ` Junio C Hamano
  3 siblings, 0 replies; 8+ messages in thread
From: Andreas Schwab @ 2013-07-27  9:13 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Eric Sunshine

Stefan Beller <stefanbeller@googlemail.com> writes:

> My approach would have been:
>
> sorted_internal_cmds: git.c
> 	{ awk '/cmd_struct commands/,/};/ { if (match($2,/"/)) print $2 }' <git.c >builtin.actual && \
> 	sort <builtin.actual >builtin.expect && \
> 	cmp -s builtin.expect builtin.actual && \
> 	rm builtin.expect builtin.actual \
> 	}

You need a semicolon before the closing }.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-27  8:49   ` Stefan Beller
  2013-07-27  8:50     ` Stefan Beller
  2013-07-27  9:13     ` Andreas Schwab
@ 2013-07-28  9:05     ` Eric Sunshine
  2013-07-29 16:18     ` Junio C Hamano
  3 siblings, 0 replies; 8+ messages in thread
From: Eric Sunshine @ 2013-07-28  9:05 UTC (permalink / raw)
  To: Stefan Beller, Jonathan Nieder; +Cc: Git List

On Sat, Jul 27, 2013 at 4:49 AM, Stefan Beller
<stefanbeller@googlemail.com> wrote:
> I was fiddling around with make now to include the suggestion of Eric to
> check the arguments for being sorted in make. However I do not
> seem to fully understand the syntax yet.
> My approach would have been:
>
> sorted_internal_cmds: git.c
>         { awk '/cmd_struct commands/,/};/ { if (match($2,/"/)) print $2 }' <git.c >builtin.actual && \
>         sort <builtin.actual >builtin.expect && \
>         cmp -s builtin.expect builtin.actual && \
>         rm builtin.expect builtin.actual \
>         }
>
> all:: sorted_internal_cmds
>
> But then there is
> $ make
> ...
>         }
> /bin/sh: 5: Syntax error: end of file unexpected (expecting "}")
>
> So I suspect the { within the shell code inside the awk parameter is messing up?

As Andreas noted, you need a semicolon before the closing shell '}',
however it's not clear why you added the braces since they are not
needed. The following works (after fixing whitespace corruption):

-->8--
diff --git a/Makefile b/Makefile
index ef442eb..82e727c 100644
--- a/Makefile
+++ b/Makefile
@@ -1681,6 +1681,15 @@ shell_compatibility_test:
please_set_SHELL_PATH_to_a_more_modern_shell
 strip: $(PROGRAMS) git$X
  $(STRIP) $(STRIP_OPTS) $^

+.PHONY: sorted_internal_cmds
+all:: sorted_internal_cmds
+
+sorted_internal_cmds: git.c
+ @awk '/cmd_struct commands/,/};/ { if (match($$2,/"/)) print $$2 }'
<git.c >builtin.actual && \
+ sort <builtin.actual >builtin.expect && \
+ cmp -s builtin.expect builtin.actual && \
+ rm builtin.expect builtin.actual
+
 ### Target-specific flags and dependencies

 # The generic compilation pattern rule and automatically
-->8--

Note the $$2 in awk expression. Also the .PHONY is a good idea.

However, perhaps, it is better for this check to be part of the test
suite. It might look like this (after fixing whitespace corruption):

-->8--
diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh
index 10be52b..e5ba504 100755
--- a/t/t0000-basic.sh
+++ b/t/t0000-basic.sh
@@ -387,6 +387,14 @@ test_expect_success 'tests clean up even on failures' "
 ################################################################
 # Basics of the basics

+# git.c commands[] of builtins is properly sorted
+test_expect_success 'builtin commands[] sorted' '
+ awk "/cmd_struct commands/,/};/ { if (match(\$2,/\"/)) print \$2 }" \
+ <../../git.c >actual && \
+ sort <actual >expect && \
+ test_cmp expect actual
+'
+
 # updating a new file without --add should fail.
 test_expect_success 'git update-index without --add should fail adding' '
  test_must_fail git update-index should-be-empty
-->8--

I'm not sure that referencing ../../git.c from within the test suite
is kosher. Perhaps Jonathan can say something about that.

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] builtins: search builtin commands via binary search.
  2013-07-27  8:49   ` Stefan Beller
                       ` (2 preceding siblings ...)
  2013-07-28  9:05     ` Eric Sunshine
@ 2013-07-29 16:18     ` Junio C Hamano
  3 siblings, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2013-07-29 16:18 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, git, Eric Sunshine

Stefan Beller <stefanbeller@googlemail.com> writes:

> However I could not find a speedup.
> So if the patch is accepted, it would only be for readability.

This adds a maintenance burden.  It is very easy for somebody to
mistakenly run "sort" on the region in her editor under non C
locale.

Perhaps with a change like the attached, if we were to do so.  Then
have a test that runs "git --check-builtins-sorted" will ensure we
won't ever screw it up.

FOR ILLUSTRATION PURPOSES ONLY: diff --git a/git.c b/git.c
index 2025f77..3155273 100644
--- a/git.c
+++ b/git.c
@@ -34,6 +34,150 @@ static void commit_pager_choice(void) {
 	}
 }
 
+struct cmd_struct {
+	const char *cmd;
+	int (*fn)(int, const char **, const char *);
+	int option;
+};
+
+#define RUN_SETUP		(1<<0)
+#define RUN_SETUP_GENTLY	(1<<1)
+#define USE_PAGER		(1<<2)
+/*
+ * require working tree to be present -- anything uses this needs
+ * RUN_SETUP for reading from the configuration file.
+ */
+#define NEED_WORK_TREE		(1<<3)
+
+static struct cmd_struct builtin_commands[] = {
+	{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
+	{ "annotate", cmd_annotate, RUN_SETUP },
+	...
+	{ "whatchanged", cmd_whatchanged, RUN_SETUP },
+	{ "write-tree", cmd_write_tree, RUN_SETUP },
+};
+
+static void make_sure_the_builtin_array_is_sorted(void)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(builtin_commands) - 1; i++) {
+		struct cmd_struct *it = &builtin_commands[i];
+		if (strcmp(it[0].cmd, it[1].cmd) >= 0)
+			die("BUG: builtin command list not sorted %s > %s",
+			    it[0].cmd, it[1].cmd);
+	}
+}
+
 static int handle_options(const char ***argv, int *argc, int *envchanged)
 {
 	const char **orig_argv = *argv;
@@ -62,6 +206,9 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 				puts(git_exec_path());
 				exit(0);
 			}
+		} else if (!strcmp(cmd, "--check-builtins-sorted")) {
+			make_sure_the_builtin_array_is_sorted();
+			exit(0);
 		} else if (!strcmp(cmd, "--html-path")) {
 			puts(system_path(GIT_HTML_PATH));
 			exit(0);
@@ -241,21 +388,6 @@ static int handle_alias(int *argcp, const char ***argv)
 	return ret;
 }
 
-#define RUN_SETUP		(1<<0)
-#define RUN_SETUP_GENTLY	(1<<1)
-#define USE_PAGER		(1<<2)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE		(1<<3)
-
-struct cmd_struct {
-	const char *cmd;
-	int (*fn)(int, const char **, const char *);
-	int option;
-};
-
 static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 {
 	int status, help;
@@ -312,123 +444,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
-	static struct cmd_struct commands[] = {
-		{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
-		{ "annotate", cmd_annotate, RUN_SETUP },
-		...
-		{ "whatchanged", cmd_whatchanged, RUN_SETUP },
-		{ "write-tree", cmd_write_tree, RUN_SETUP },
-	};
 	int i;
 	static const char ext[] = STRIP_EXTENSION;
 
@@ -447,8 +462,8 @@ static void handle_internal_command(int argc, const char **argv)
 		argv[0] = cmd = "help";
 	}
 
-	for (i = 0; i < ARRAY_SIZE(commands); i++) {
-		struct cmd_struct *p = commands+i;
+	for (i = 0; i < ARRAY_SIZE(builtin_commands); i++) {
+		struct cmd_struct *p = builtin_commands+i;
 		if (strcmp(p->cmd, cmd))
 			continue;
 		exit(run_builtin(p, argc, argv));

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-07-29 16:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-26 20:50 [PATCH] builtins: search builtin commands via binary search Stefan Beller
2013-07-26 20:57 ` Jonathan Nieder
2013-07-27  8:49   ` Stefan Beller
2013-07-27  8:50     ` Stefan Beller
2013-07-27  9:13     ` Andreas Schwab
2013-07-28  9:05     ` Eric Sunshine
2013-07-29 16:18     ` Junio C Hamano
2013-07-27  0:26 ` Eric Sunshine

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).