git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] userdiff: add build-in pattern for shell
@ 2017-03-29 16:53 Ivan Tham
  2017-03-29 17:39 ` Junio C Hamano
  2017-03-30 18:08 ` [PATCH v2] " Ivan Tham
  0 siblings, 2 replies; 6+ messages in thread
From: Ivan Tham @ 2017-03-29 16:53 UTC (permalink / raw)
  To: git

Shell are widely used but comes with lots of different patterns. The
build-in pattern aim for POSIX-compatible shells with some additions:

- Notably ${g//re/s} and ${g#cut}
- "function" from bash

Signed-off-by: Ivan Tham <pickfire@riseup.net>
---
 Documentation/gitattributes.txt |  2 ++
 t/t4034-diff-words.sh           |  1 +
 t/t4034/sh/expect               | 14 ++++++++++++++
 t/t4034/sh/post                 |  7 +++++++
 t/t4034/sh/pre                  |  7 +++++++
 userdiff.c                      |  5 +++++
 6 files changed, 36 insertions(+)
 create mode 100644 t/t4034/sh/expect
 create mode 100644 t/t4034/sh/post
 create mode 100644 t/t4034/sh/pre

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index a53d093ca..1bad72df2 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -706,6 +706,8 @@ patterns are available:
 
 - `ruby` suitable for source code in the Ruby language.
 
+- `sh` suitable for source code in POSIX-compatible shells.
+
 - `tex` suitable for source code for LaTeX documents.
 
 
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 912df9122..2eb662f89 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -313,6 +313,7 @@ test_language_driver perl
 test_language_driver php
 test_language_driver python
 test_language_driver ruby
+test_language_driver sh
 test_language_driver tex
 
 test_expect_success 'word-diff with diff.sbe' '
diff --git a/t/t4034/sh/expect b/t/t4034/sh/expect
new file mode 100644
index 000000000..e7b0a9ae3
--- /dev/null
+++ b/t/t4034/sh/expect
@@ -0,0 +1,14 @@
+<BOLD>diff --git a/pre b/post<RESET>
+<BOLD>index 7bb0d15..df3845b 100644<RESET>
+<BOLD>--- a/pre<RESET>
+<BOLD>+++ b/post<RESET>
+<CYAN>@@ -1,7 +1,7 @@<RESET>
+echo <GREEN>"<RESET>Hello world<RED>!<RESET>
+<RED>bomb<RESET><GREEN>?"<RESET>
+<GREEN>fork<RESET>(){ <RED>bomb<RESET><GREEN>fork<RESET>|<RED>bomb<RESET><GREEN>fork<RESET>& }
+<RED>; bomb<RESET>
+<RED>a<RESET><GREEN>x<RESET>=<RED>1 a<RESET><GREEN>2 x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>+<RED>1<RESET><GREEN>2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a-1<RESET><GREEN>x-2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>*<RED>1<RESET><GREEN>2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>/<RED>1<RESET><GREEN>2<RESET>))
+<RED>a<RESET><GREEN>x<RESET>=$(<RED>a<RESET><GREEN>x<RESET>) <RED>a<RESET><GREEN>x<RESET>=`<RED>a<RESET><GREEN>x<RESET>` <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>#<RED>a<RESET><GREEN>x<RESET>*} <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>%<RED>a<RESET><GREEN>x<RESET>*} <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>//<RED>a<RESET><GREEN>x<RESET>/<RED>a<RESET><GREEN>x<RESET>}
+command <RED>-h -v<RESET><GREEN>--help=all -q<RESET> | xargs -- echo <GREEN>2<RESET>><RED>&1 &<RESET><GREEN>/dev/null<RESET>
+[ <RED>$a -eq $b<RESET><GREEN>$x -ne $y<RESET> ]&&echo <RED>aa<RESET><GREEN>xx<RESET>||echo <RED>bb<RESET><GREEN>yy<RESET>
+[ "<RED>$a<RESET><GREEN>$x<RESET>"!=<RED>1<RESET><GREEN>2<RESET> ] && echo <RED>a<RESET><GREEN>x<RESET> || echo <RED>b<RESET><GREEN>y<RESET>
diff --git a/t/t4034/sh/post b/t/t4034/sh/post
new file mode 100644
index 000000000..df3845b4f
--- /dev/null
+++ b/t/t4034/sh/post
@@ -0,0 +1,7 @@
+echo "Hello world?"
+fork(){ fork|fork& }
+x=2 x=$((x+2)) x=$((x-2)) x=$((x*2)) x=$((x/2))
+x=$(x) x=`x` x=${x#x*} x=${x%x*} x=${x//x/x}
+command --help=all -q | xargs -- echo 2>/dev/null
+[ $x -ne $y ]&&echo xx||echo yy
+[ "$x"!=2 ] && echo x || echo y
diff --git a/t/t4034/sh/pre b/t/t4034/sh/pre
new file mode 100644
index 000000000..7bb0d1562
--- /dev/null
+++ b/t/t4034/sh/pre
@@ -0,0 +1,7 @@
+echo Hello world!
+bomb(){ bomb|bomb& }; bomb
+a=1 a=$((a+1)) a=$((a-1)) a=$((a*1)) a=$((a/1))
+a=$(a) a=`a` a=${a#a*} a=${a%a*} a=${a//a/a}
+command -h -v | xargs -- echo >&1 &
+[ $a -eq $b ]&&echo aa||echo bb
+[ "$a"!=1 ] && echo a || echo b
diff --git a/userdiff.c b/userdiff.c
index 8b732e40b..8d5127fb6 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -148,6 +148,11 @@ PATTERNS("csharp",
 	 "[a-zA-Z_][a-zA-Z0-9_]*"
 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
 	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"),
+PATTERNS("sh",
+	 "^[ \t]*(function )?[A-Za-z_][A-Za-z_0-9]*[ \t]*()[\t]*\\{?$",
+	 /* -- */
+	 "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#)|--" /* command/param */
+	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),
 IPATTERN("css",
 	 "![:;][[:space:]]*$\n"
 	 "^[_a-z0-9].*$",
-- 
2.12.2.609.gf7d0c115f


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] userdiff: add build-in pattern for shell
  2017-03-29 16:53 [PATCH] userdiff: add build-in pattern for shell Ivan Tham
@ 2017-03-29 17:39 ` Junio C Hamano
  2017-03-30  2:28   ` Pickfire
  2017-03-30 18:08 ` [PATCH v2] " Ivan Tham
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2017-03-29 17:39 UTC (permalink / raw)
  To: Ivan Tham; +Cc: git

Ivan Tham <pickfire@riseup.net> writes:

> Shell are widely used but comes with lots of different patterns. The
> build-in pattern aim for POSIX-compatible shells with some additions:
>
> - Notably ${g//re/s} and ${g#cut}
> - "function" from bash
>
> Signed-off-by: Ivan Tham <pickfire@riseup.net>
> ---
>  Documentation/gitattributes.txt |  2 ++
>  t/t4034-diff-words.sh           |  1 +
>  t/t4034/sh/expect               | 14 ++++++++++++++
>  t/t4034/sh/post                 |  7 +++++++
>  t/t4034/sh/pre                  |  7 +++++++
>  userdiff.c                      |  5 +++++
>  6 files changed, 36 insertions(+)
>  create mode 100644 t/t4034/sh/expect
>  create mode 100644 t/t4034/sh/post
>  create mode 100644 t/t4034/sh/pre
>
> diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
> index a53d093ca..1bad72df2 100644
> --- a/Documentation/gitattributes.txt
> +++ b/Documentation/gitattributes.txt
> @@ -706,6 +706,8 @@ patterns are available:
>  
>  - `ruby` suitable for source code in the Ruby language.
>  
> +- `sh` suitable for source code in POSIX-compatible shells.

The new test you added seems to show that this is not limited to
POSIX shells but also understands bashisms like ${x//x/x}.  Perhaps
drop "POSIX-compatible" from here.

> diff --git a/userdiff.c b/userdiff.c
> index 8b732e40b..8d5127fb6 100644
> --- a/userdiff.c
> +++ b/userdiff.c
> @@ -148,6 +148,11 @@ PATTERNS("csharp",
>  	 "[a-zA-Z_][a-zA-Z0-9_]*"
>  	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
>  	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"),
> +PATTERNS("sh",
> +	 "^[ \t]*(function )?[A-Za-z_][A-Za-z_0-9]*[ \t]*()[\t]*\\{?$",

There is something funky going on around parentheses on this line.
The ones around "function " is meant to be syntactic metacharacters
to produce a group in the regexp so that you can apply '?'
(i.e. zero or one occurrence) to it.  But I think the second pair of
parentheses that appears later on the line, which enclose nothing,
are meant to be literal?  E.g. "hello (){\n\techo world;\n}\n"  They
would need some quoting, perhaps like

	...[ \t]*\\(\\)[\t]*....

> +	 /* -- */
> +	 "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#)|--" /* command/param */

TBH, I have no idea what this line-noise is doing.

$foobar, $4, --foobar, foobar, 123 and -- can be seen easily out of
these patterns.  I am not sure what --# would be (perhaps you meant
to only catch $# and --# is included by accident, in which case it
is understandable).  It feels a bit strange to see that $# is
supported but not $?; --foo but not --foo=bar; foobar but not "foo
bar" inside a dq-pair.

> +	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),

And this one is even more dense.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Re: [PATCH] userdiff: add build-in pattern for shell
  2017-03-29 17:39 ` Junio C Hamano
@ 2017-03-30  2:28   ` Pickfire
  2017-03-30  6:25     ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Pickfire @ 2017-03-30  2:28 UTC (permalink / raw)
  To: gitster; +Cc: git

Junio C Hamano <gitster@pobox.com> wrote:

> Ivan Tham <pickfire@riseup.net> writes:
> 
> > Shell are widely used but comes with lots of different patterns. The
> > build-in pattern aim for POSIX-compatible shells with some additions:
> >
> > - Notably ${g//re/s} and ${g#cut}
> > - "function" from bash
> >
> > Signed-off-by: Ivan Tham <pickfire@riseup.net>
> > ---
> >  Documentation/gitattributes.txt |  2 ++
> >  t/t4034-diff-words.sh           |  1 +
> >  t/t4034/sh/expect               | 14 ++++++++++++++
> >  t/t4034/sh/post                 |  7 +++++++
> >  t/t4034/sh/pre                  |  7 +++++++
> >  userdiff.c                      |  5 +++++
> >  6 files changed, 36 insertions(+)
> >  create mode 100644 t/t4034/sh/expect
> >  create mode 100644 t/t4034/sh/post
> >  create mode 100644 t/t4034/sh/pre
> >
> > diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
> > index a53d093ca..1bad72df2 100644
> > --- a/Documentation/gitattributes.txt
> > +++ b/Documentation/gitattributes.txt
> > @@ -706,6 +706,8 @@ patterns are available:
> >  
> >  - `ruby` suitable for source code in the Ruby language.
> >  
> > +- `sh` suitable for source code in POSIX-compatible shells.
> 
> The new test you added seems to show that this is not limited to
> POSIX shells but also understands bashisms like ${x//x/x}.  Perhaps
> drop "POSIX-compatible" from here

Those shells are still POSIX-compatible so I think it is true to put
that or otherwise, something like fish shell will break since it is
as well a shell but the syntax is totally different.

> > diff --git a/userdiff.c b/userdiff.c
> > index 8b732e40b..8d5127fb6 100644
> > --- a/userdiff.c
> > +++ b/userdiff.c
> > @@ -148,6 +148,11 @@ PATTERNS("csharp",
> >  	 "[a-zA-Z_][a-zA-Z0-9_]*"
> >  	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
> >  	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"),
> > +PATTERNS("sh",
> > +	 "^[ \t]*(function )?[A-Za-z_][A-Za-z_0-9]*[ \t]*()[\t]*\\{?$",
> 
> There is something funky going on around parentheses on this line.
> The ones around "function " is meant to be syntactic metacharacters
> to produce a group in the regexp so that you can apply '?'
> (i.e. zero or one occurrence) to it.  But I think the second pair of
> parentheses that appears later on the line, which enclose nothing,
> are meant to be literal?  E.g. "hello (){\n\techo world;\n}\n"  They
> would need some quoting, perhaps like
> 
> 	...[ \t]*\\(\\)[\t]*....

Ah, I think I forgot to escape the quoting of ( and ). I will send in another
patch for that.

> > +	 /* -- */
> > +	 "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#)|--" /* command/param */
> 
> TBH, I have no idea what this line-noise is doing.

That breaks word into "a", "$a" and "-a" as well as "$1" and "$#". I tried
supporting $? by adding +|#|\\?)--" but it doesn't seemed like it is working.

> $foobar, $4, --foobar, foobar, 123 and -- can be seen easily out of
> these patterns.  I am not sure what --# would be (perhaps you meant
> to only catch $# and --# is included by accident, in which case it
> is understandable).  It feels a bit strange to see that $# is
> supported but not $?; --foo but not --foo=bar; foobar but not "foo
> bar" inside a dq-pair.

Yes, getting --# will be very rare in shell. I think it is better to seperate
the --foo=bar into --foo and bar. I don't get what you man by the dq-pair.

> > +	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),
> 
> And this one is even more dense.

Yes, that takes care of the operators, special symbols and stuff.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] userdiff: add build-in pattern for shell
  2017-03-30  2:28   ` Pickfire
@ 2017-03-30  6:25     ` Junio C Hamano
  2017-03-30  7:20       ` Ivan Tham
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2017-03-30  6:25 UTC (permalink / raw)
  To: Pickfire; +Cc: git

Pickfire <pickfire@riseup.net> writes:

>> > +- `sh` suitable for source code in POSIX-compatible shells.
>> 
>> The new test you added seems to show that this is not limited to
>> POSIX shells but also understands bashisms like ${x//x/x}.  Perhaps
>> drop "POSIX-compatible" from here
>
> Those shells are still POSIX-compatible so I think it is true to put
> that or otherwise, something like fish shell will break since it is
> as well a shell but the syntax is totally different.

Scripts with bash-isms are not necessarily usable by POSIX
compatible shells (think "dash") and this highlighter recognises
bash specific enhancements (which by the way is a plus), so if you
absolutely want to say "POSIX something" in order to clarify that
csh and friends do not apply, say "POSIX-like".

>> 	...[ \t]*\\(\\)[\t]*....
>
> Ah, I think I forgot to escape the quoting of ( and ). I will send in another
> patch for that.

OK.  Note that we usually avoid applying a patch whose brokenness
was noticed while review (which then necessitates a follow up patch
"oops, the previous was botched; here is a fix-up").  The "another
patch" needs to be a v2, i.e. pretending as if the version of the
patch we are discussing never happened, not an incremental on top of
the patch we are discussing..

>> > +	 /* -- */
>> > +	 "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#)|--" /* command/param */
>> 
>> TBH, I have no idea what this line-noise is doing.
>
> That breaks word into "a", "$a" and "-a" as well as "$1" and "$#". I tried
> supporting $? by adding +|#|\\?)--" but it doesn't seemed like it is working.

This ...

>> $foobar, $4, --foobar, foobar, 123 and -- can be seen easily out of
>> these patterns.  I am not sure what --# would be (perhaps you meant
>> to only catch $# and --# is included by accident, in which case it
>> is understandable).  It feels a bit strange to see that $# is
>> supported but not $?; --foo but not --foo=bar; foobar but not "foo
>> bar" inside a dq-pair.
>
> Yes, getting --# will be very rare in shell. I think it is better to seperate
> the --foo=bar into --foo and bar. I don't get what you man by the dq-pair.

These design decisions (e.g. what you decided are the tokens to be
taken as a word---taking "--foo" and "bar" as separate things when
given "--foo=bar" is a good example but with this regexp you are
making many other design decisions) need to be explained in the log
message.  A string inside a double-quote pair is taken as a single
parameter to the shell, e.g.

    cmd "arg that is quoted inside double-quote pair" $#

It is unclear what your regexp is doing to such an argument.

>> > +	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),
>> 
>> And this one is even more dense.

FYI, this is also pointing out the need to explain what kind of
things you wanted to recognise as words; explaining in a reply
message is a good first step, as the questioner may find the
explanation in your response still inadequate, in which case you
have a chance to refine it, but the ultimate goal is to put the
polished explanation that would help people who later want to
understand what you added to the codebase by describing what you
wanted to do with the change in either in-code comment or commit log
message when you send an updated patch.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] userdiff: add build-in pattern for shell
  2017-03-30  6:25     ` Junio C Hamano
@ 2017-03-30  7:20       ` Ivan Tham
  0 siblings, 0 replies; 6+ messages in thread
From: Ivan Tham @ 2017-03-30  7:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Wed, Mar 29, 2017 at 11:25:15PM -0700, Junio C Hamano wrote:
>Pickfire <pickfire@riseup.net> writes:
>
>>> > +- `sh` suitable for source code in POSIX-compatible shells.
>>>
>>> The new test you added seems to show that this is not limited to
>>> POSIX shells but also understands bashisms like ${x//x/x}.  Perhaps
>>> drop "POSIX-compatible" from here
>>
>> Those shells are still POSIX-compatible so I think it is true to put
>> that or otherwise, something like fish shell will break since it is
>> as well a shell but the syntax is totally different.

Okay, I will change it from POSIX-compatible to POSIX-like.

>Scripts with bash-isms are not necessarily usable by POSIX compatible
>shells (think "dash") and this highlighter recognises bash specific
>enhancements (which by the way is a plus), so if you absolutely want to
>say "POSIX something" in order to clarify that csh and friends do not
>apply, say "POSIX-like".
>
>>> 	...[ \t]*\\(\\)[\t]*....
>>
>> Ah, I think I forgot to escape the quoting of ( and ). I will send in
>> another patch for that.
>
>OK.  Note that we usually avoid applying a patch whose brokenness was
>noticed while review (which then necessitates a follow up patch "oops,
>the previous was botched; here is a fix-up").  The "another patch"
>needs to be a v2, i.e. pretending as if the version of the patch we are
>discussing never happened, not an incremental on top of the patch we
>are discussing..

Yes, I will put in a V2 which comes with "[PATCH v2]" in reply to this
thread.

>>> > +	 /* -- */ +
>>> > "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#)|--" /* command/param
>>> > */
>>>
>>> TBH, I have no idea what this line-noise is doing.
>>
>> That breaks word into "a", "$a" and "-a" as well as "$1" and "$#". I
>> tried supporting $? by adding +|#|\\?)--" but it doesn't seemed like
>> it is working.
>
>This ...

???

>>> $foobar, $4, --foobar, foobar, 123 and -- can be seen easily out of
>>> these patterns.  I am not sure what --# would be (perhaps you meant
>>> to only catch $# and --# is included by accident, in which case it
>>> is understandable).  It feels a bit strange to see that $# is
>>> supported but not $?; --foo but not --foo=bar; foobar but not "foo
>>> bar" inside a dq-pair.
>>
>> Yes, getting --# will be very rare in shell. I think it is better to
>> seperate the --foo=bar into --foo and bar. I don't get what you man
>> by the dq-pair.
>
>These design decisions (e.g. what you decided are the tokens to be
>taken as a word---taking "--foo" and "bar" as separate things when
>given "--foo=bar" is a good example but with this regexp you are making
>many other design decisions) need to be explained in the log message.
>A string inside a double-quote pair is taken as a single parameter to
>the shell, e.g.
>
>    cmd "arg that is quoted inside double-quote pair" $#
>
>It is unclear what your regexp is doing to such an argument.

Okay, I will put that into the log. I still don't quite know what you
want to achieve with:

    cmd "arg that is quoted inside double-quote pair" $#

If I am correct, you are probably talking about:

    "cmd "arg that is quoted inside double-quote pair" $#"

That will be handled with other words together.

>>> > +	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),
>>>
>>> And this one is even more dense.
>
>FYI, this is also pointing out the need to explain what kind of things
>you wanted to recognise as words; explaining in a reply message is a
>good first step, as the questioner may find the explanation in your
>response still inadequate, in which case you have a chance to refine
>it, but the ultimate goal is to put the polished explanation that would
>help people who later want to understand what you added to the codebase
>by describing what you wanted to do with the change in either in-code
>comment or commit log message when you send an updated patch.

Ah, I can point it out here:

\\$[({]           start of $( or ${
[)}]              ends ^
[-+*/=!]=?        operators
[\\]&%#/|]{1,2}   pipes and stuff like ${a##a} or &&
[<>]{1,3}         io redirections
[ \t]#.*          comments

I hope that makes it clear and concise.

-- 
Do what you like, like what you do.  -- Pickfire

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] userdiff: add build-in pattern for shell
  2017-03-29 16:53 [PATCH] userdiff: add build-in pattern for shell Ivan Tham
  2017-03-29 17:39 ` Junio C Hamano
@ 2017-03-30 18:08 ` Ivan Tham
  1 sibling, 0 replies; 6+ messages in thread
From: Ivan Tham @ 2017-03-30 18:08 UTC (permalink / raw)
  To: git

Shell are widely used but comes with lots of different patterns. The
build-in pattern aim for POSIX-like shells with some additions:

- Notably ${g//re/s} and ${g#cut}
- Bashisms such as "function"

Signed-off-by: Ivan Tham <pickfire@riseup.net>
---
 Documentation/gitattributes.txt |  2 ++
 t/t4034-diff-words.sh           |  1 +
 t/t4034/sh/expect               | 14 ++++++++++++++
 t/t4034/sh/post                 |  7 +++++++
 t/t4034/sh/pre                  |  7 +++++++
 userdiff.c                      |  5 +++++
 6 files changed, 36 insertions(+)
 create mode 100644 t/t4034/sh/expect
 create mode 100644 t/t4034/sh/post
 create mode 100644 t/t4034/sh/pre

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index a53d093ca..f8440119d 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -706,6 +706,8 @@ patterns are available:
 
 - `ruby` suitable for source code in the Ruby language.
 
+- `sh` suitable for source code in POSIX-like shells.
+
 - `tex` suitable for source code for LaTeX documents.
 
 
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 912df9122..2eb662f89 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -313,6 +313,7 @@ test_language_driver perl
 test_language_driver php
 test_language_driver python
 test_language_driver ruby
+test_language_driver sh
 test_language_driver tex
 
 test_expect_success 'word-diff with diff.sbe' '
diff --git a/t/t4034/sh/expect b/t/t4034/sh/expect
new file mode 100644
index 000000000..e7b0a9ae3
--- /dev/null
+++ b/t/t4034/sh/expect
@@ -0,0 +1,14 @@
+<BOLD>diff --git a/pre b/post<RESET>
+<BOLD>index 7bb0d15..df3845b 100644<RESET>
+<BOLD>--- a/pre<RESET>
+<BOLD>+++ b/post<RESET>
+<CYAN>@@ -1,7 +1,7 @@<RESET>
+echo <GREEN>"<RESET>Hello world<RED>!<RESET>
+<RED>bomb<RESET><GREEN>?"<RESET>
+<GREEN>fork<RESET>(){ <RED>bomb<RESET><GREEN>fork<RESET>|<RED>bomb<RESET><GREEN>fork<RESET>& }
+<RED>; bomb<RESET>
+<RED>a<RESET><GREEN>x<RESET>=<RED>1 a<RESET><GREEN>2 x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>+<RED>1<RESET><GREEN>2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a-1<RESET><GREEN>x-2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>*<RED>1<RESET><GREEN>2<RESET>)) <RED>a<RESET><GREEN>x<RESET>=$((<RED>a<RESET><GREEN>x<RESET>/<RED>1<RESET><GREEN>2<RESET>))
+<RED>a<RESET><GREEN>x<RESET>=$(<RED>a<RESET><GREEN>x<RESET>) <RED>a<RESET><GREEN>x<RESET>=`<RED>a<RESET><GREEN>x<RESET>` <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>#<RED>a<RESET><GREEN>x<RESET>*} <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>%<RED>a<RESET><GREEN>x<RESET>*} <RED>a<RESET><GREEN>x<RESET>=${<RED>a<RESET><GREEN>x<RESET>//<RED>a<RESET><GREEN>x<RESET>/<RED>a<RESET><GREEN>x<RESET>}
+command <RED>-h -v<RESET><GREEN>--help=all -q<RESET> | xargs -- echo <GREEN>2<RESET>><RED>&1 &<RESET><GREEN>/dev/null<RESET>
+[ <RED>$a -eq $b<RESET><GREEN>$x -ne $y<RESET> ]&&echo <RED>aa<RESET><GREEN>xx<RESET>||echo <RED>bb<RESET><GREEN>yy<RESET>
+[ "<RED>$a<RESET><GREEN>$x<RESET>"!=<RED>1<RESET><GREEN>2<RESET> ] && echo <RED>a<RESET><GREEN>x<RESET> || echo <RED>b<RESET><GREEN>y<RESET>
diff --git a/t/t4034/sh/post b/t/t4034/sh/post
new file mode 100644
index 000000000..df3845b4f
--- /dev/null
+++ b/t/t4034/sh/post
@@ -0,0 +1,7 @@
+echo "Hello world?"
+fork(){ fork|fork& }
+x=2 x=$((x+2)) x=$((x-2)) x=$((x*2)) x=$((x/2))
+x=$(x) x=`x` x=${x#x*} x=${x%x*} x=${x//x/x}
+command --help=all -q | xargs -- echo 2>/dev/null
+[ $x -ne $y ]&&echo xx||echo yy
+[ "$x"!=2 ] && echo x || echo y
diff --git a/t/t4034/sh/pre b/t/t4034/sh/pre
new file mode 100644
index 000000000..7bb0d1562
--- /dev/null
+++ b/t/t4034/sh/pre
@@ -0,0 +1,7 @@
+echo Hello world!
+bomb(){ bomb|bomb& }; bomb
+a=1 a=$((a+1)) a=$((a-1)) a=$((a*1)) a=$((a/1))
+a=$(a) a=`a` a=${a#a*} a=${a%a*} a=${a//a/a}
+command -h -v | xargs -- echo >&1 &
+[ $a -eq $b ]&&echo aa||echo bb
+[ "$a"!=1 ] && echo a || echo b
diff --git a/userdiff.c b/userdiff.c
index 8b732e40b..72ffd85f1 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -148,6 +148,11 @@ PATTERNS("csharp",
 	 "[a-zA-Z_][a-zA-Z0-9_]*"
 	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
 	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"),
+PATTERNS("sh",
+	 "^[ \t]*(function )?[A-Za-z_][A-Za-z_0-9]*[ \t]*\\(\\)[\t]*\\{?$",
+	 /* -- */
+	 "(\\$|--?)?([a-zA-Z_][a-zA-Z0-9._]*|[0-9]+|#|\\?)|--" /* command/param */
+	 "|\\$[({]|[)}]|[-+*/=!]=?|[\\]&%#/|]{1,2}|[<>]{1,3}|[ \t]#.*"),
 IPATTERN("css",
 	 "![:;][[:space:]]*$\n"
 	 "^[_a-z0-9].*$",
-- 
2.12.2.609.gf7d0c115f


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-03-30 18:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-29 16:53 [PATCH] userdiff: add build-in pattern for shell Ivan Tham
2017-03-29 17:39 ` Junio C Hamano
2017-03-30  2:28   ` Pickfire
2017-03-30  6:25     ` Junio C Hamano
2017-03-30  7:20       ` Ivan Tham
2017-03-30 18:08 ` [PATCH v2] " Ivan Tham

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).