From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org
Cc: David Turner <dturner@twitter.com>
Subject: [PATCH v10 1/1] refs.c: SSE2 optimizations for check_refname_component
Date: Tue, 17 Jun 2014 13:13:19 -0400 [thread overview]
Message-ID: <1403025199-21360-2-git-send-email-dturner@twitter.com> (raw)
In-Reply-To: <1403025199-21360-1-git-send-email-dturner@twitter.com>
Optimize check_refname_component using SSE2 on x86_64.
git rev-parse HEAD is a good test-case for this, since it does almost
nothing except parse refs. For one particular repo with about 60k
refs, almost all packed, the timings are:
Look up table: 29 ms
SSE2: 23 ms
This cuts about 20% off of the runtime.
Ondřej Bílka <neleai@seznam.cz> suggested an SSE2 approach to the
substring searches, which netted a speed boost over the SSE4.2 code I
had initially written.
Signed-off-by: David Turner <dturner@twitter.com>
---
git-compat-util.h | 11 +++
refs.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++++-----
t/t5511-refspec.sh | 20 +++++
3 files changed, 236 insertions(+), 18 deletions(-)
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..0aec58e 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -668,6 +668,17 @@ void git_qsort(void *base, size_t nmemb, size_t size,
#endif
#endif
+#if defined(__GNUC__) && defined(__x86_64__)
+#include <emmintrin.h>
+/*
+ * This is the system memory page size; it's used so that we can read
+ * outside the bounds of an allocation without segfaulting.
+ */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+#endif
+
#ifdef UNRELIABLE_FSTAT
#define fstat_is_reliable() 0
#else
diff --git a/refs.c b/refs.c
index 46139d2..5c2f68b 100644
--- a/refs.c
+++ b/refs.c
@@ -8,20 +8,22 @@
/*
* How to handle various characters in refnames:
* 0: An acceptable character for refs
- * 1: End-of-component
- * 2: ., look for a preceding . to reject .. in refs
- * 3: {, look for a preceding @ to reject @{ in refs
- * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
+ * 1: \0: End-of-component and string
+ * 2: /: End-of-component
+ * 3: ., look for a preceding . to reject .. in refs
+ * 4: {, look for a preceding @ to reject @{ in refs
+ * 5: *, usually a bad character except, once as a wildcard
+ * 6: A bad character except * (see check_refname_component below)
*/
static unsigned char refname_disposition[256] = {
- 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
+ 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 6, 6
};
/*
@@ -33,8 +35,9 @@ static unsigned char refname_disposition[256] = {
* - any path component of it begins with ".", or
* - it has double dots "..", or
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
- * - it ends with a "/".
- * - it ends with ".lock"
+ * - it has pattern-matching notation "*", "?", "[", anywhere, or
+ * - it ends with a "/", or
+ * - it ends with ".lock", or
* - it contains a "\" (backslash)
*/
static int check_refname_component(const char *refname, int flags)
@@ -46,17 +49,19 @@ static int check_refname_component(const char *refname, int flags)
int ch = *cp & 255;
unsigned char disp = refname_disposition[ch];
switch(disp) {
- case 1:
- goto out;
+ case 1: /* fall-through */
case 2:
+ goto out;
+ case 3:
if (last == '.')
return -1; /* Refname contains "..". */
break;
- case 3:
+ case 4:
if (last == '@')
return -1; /* Refname contains "@{". */
break;
- case 4:
+ case 5: /* fall-through */
+ case 6:
return -1;
}
last = ch;
@@ -79,14 +84,13 @@ out:
return cp - refname;
}
-int check_refname_format(const char *refname, int flags)
+static int check_refname_format_bytewise(const char *refname, int flags)
{
int component_len, component_count = 0;
if (!strcmp(refname, "@"))
/* Refname is a single character '@'. */
return -1;
-
while (1) {
/* We are at the start of a path component. */
component_len = check_refname_component(refname, flags);
@@ -115,6 +119,189 @@ int check_refname_format(const char *refname, int flags)
return 0;
}
+#if defined(__GNUC__) && defined(__x86_64__)
+#define SSE_VECTOR_BYTES 16
+
+/* Vectorized version of check_refname_format. */
+int check_refname_format(const char *refname, int flags)
+{
+ const char *cp = refname;
+
+ const __m128i dot = _mm_set1_epi8('.');
+ const __m128i at = _mm_set1_epi8('@');
+ const __m128i curly = _mm_set1_epi8('{');
+ const __m128i slash = _mm_set1_epi8('/');
+ const __m128i zero = _mm_set1_epi8('\000');
+ const __m128i el = _mm_set1_epi8('l');
+
+ /* below '*', all characters are forbidden or rare */
+ const __m128i star_ub = _mm_set1_epi8('*' + 1);
+
+ const __m128i colon = _mm_set1_epi8(':');
+ const __m128i question = _mm_set1_epi8('?');
+
+ /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
+ const __m128i bracket_lb = _mm_set1_epi8('[' - 1);
+ const __m128i caret_ub = _mm_set1_epi8('^' + 1);
+
+ /* '~' and above are forbidden */
+ const __m128i tilde_lb = _mm_set1_epi8('~' - 1);
+
+ if (refname[0] == 0 || refname[0] == '/') {
+ /* entirely empty ref or initial ref component */
+ return -1;
+ }
+
+ /*
+ * Initial ref component of '.'; below we look for /. so we'll
+ * miss this.
+ */
+ if (refname[0] == '.') {
+ if (refname[1] == '/' || refname[1] == '\0')
+ return -1;
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1;
+ }
+ while(1) {
+ __m128i tmp, tmp1, result;
+ uint64_t mask;
+
+ if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1)
+ /*
+ * End-of-page; fall back to slow method for
+ * this entire ref.
+ */
+ return check_refname_format_bytewise(refname, flags);
+
+ tmp = _mm_loadu_si128((__m128i *)cp);
+ tmp1 = _mm_loadu_si128((__m128i *)(cp + 1));
+
+ /*
+ * This range (note the lt) contains some
+ * permissible-but-rare characters (including all
+ * characters >= 128), which we handle later. It also
+ * includes \000.
+ */
+ result = _mm_cmplt_epi8(tmp, star_ub);
+
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question));
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon));
+
+ /* This range contains the permissible ] as bycatch */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpgt_epi8(tmp, bracket_lb),
+ _mm_cmplt_epi8(tmp, caret_ub)));
+
+ result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb));
+
+ /* .. */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, dot),
+ _mm_cmpeq_epi8(tmp1, dot)));
+ /* @{ */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, at),
+ _mm_cmpeq_epi8(tmp1, curly)));
+ /* // */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, slash)));
+ /* trailing / */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, zero)));
+ /* .l, beginning of .lock */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, dot),
+ _mm_cmpeq_epi8(tmp1, el)));
+ /*
+ * Even though /. is not necessarily an error, we flag
+ * it anyway. If we find it, we'll check if it's valid
+ * and if so we'll advance just past it.
+ */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, dot)));
+
+ mask = _mm_movemask_epi8(result);
+ if (mask) {
+ /*
+ * We've found either end-of-string, or some
+ * probably-bad character or substring.
+ */
+ int i = __builtin_ctz(mask);
+ switch (refname_disposition[cp[i] & 255]) {
+ case 0:
+ /*
+ * bycatch: a good character that's in
+ * one of the ranges of mostly-forbidden
+ * characters
+ */
+ cp += i + 1;
+ break;
+ case 1:
+ cp += i;
+ goto success;
+ case 2:
+ /*
+ * Even if leading dots are allowed, don't
+ * allow "." as a component (".." is
+ * prevented by case 3 below).
+ */
+ if (cp[i + 1] == '.') {
+ if (cp[i + 2] == '\0')
+ return -1;
+ if (flags & REFNAME_DOT_COMPONENT) {
+ /* skip to just after the /. */
+ cp += i + 2;
+ break;
+ }
+ return -1;
+ } else if (cp[i + 1] == '/' || cp[i + 1] == '\0')
+ return -1;
+ break;
+ case 3:
+ if (cp[i + 1] == '.' || cp[i + 1] == '\0')
+ return -1;
+ /* .lock as end-of-component or end-of-string */
+ if ((!strncmp(cp + i, ".lock", 5))
+ && (cp[i + 5] == '/' || cp[i + 5] == 0))
+ return -1;
+ cp += 1;
+ break;
+ case 4:
+ if (cp[i + 1] == '{')
+ return -1;
+ break;
+ case 5:
+ if (((cp == refname + i) || cp[i - 1] == '/')
+ && (cp[i + 1] == '/' || cp[i + 1] == 0))
+ if (flags & REFNAME_REFSPEC_PATTERN) {
+ flags &= ~REFNAME_REFSPEC_PATTERN;
+ /* restart after the * */
+ cp += i + 1;
+ continue;
+ }
+ /* fall-through */
+ case 6:
+ return -1;
+ }
+ } else
+ cp += SSE_VECTOR_BYTES;
+ }
+success:
+ return 0;
+}
+
+#else
+
+int check_refname_format (const char *refname, int flags)
+{
+ return check_refname_format_bytewise(refname, flags);
+}
+
+#endif
+
struct ref_entry;
/*
diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index de6db86..51d23b1 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -29,6 +29,10 @@ test_refspec push '+:'
test_refspec fetch ''
test_refspec fetch ':'
test_refspec fetch '::' invalid
+test_refspec fetch '.' invalid
+test_refspec fetch './frotz' invalid
+test_refspec fetch 'refs/.' invalid
+test_refspec push '*//:refs/remotes/frotz/*' invalid
test_refspec push 'refs/heads/*:refs/remotes/frotz/*'
test_refspec push 'refs/heads/*:refs/remotes/frotz' invalid
@@ -71,6 +75,9 @@ test_refspec fetch ':refs/remotes/frotz/HEAD-to-me'
test_refspec push ':refs/remotes/frotz/delete me' invalid
test_refspec fetch ':refs/remotes/frotz/HEAD to me' invalid
+test_refspec fetch ':refs/remotes/frotz/something.lock' invalid
+test_refspec fetch ':refs/remotes/frotz/a.lock/b' invalid
+
test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid
test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid
@@ -88,4 +95,17 @@ test_refspec fetch "refs/heads/${good}"
bad=$(printf '\011tab')
test_refspec fetch "refs/heads/${bad}" invalid
+test_refspec fetch 'refs/heads/a-very-long-refname'
+test_refspec fetch 'refs/heads/.a-very-long-refname' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234..' invalid
+test_refspec fetch 'refs/heads/abcdefgh012345..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123.a'
+test_refspec fetch 'refs/heads/abcdefgh01234.a'
+test_refspec fetch 'refs/heads/abcdefgh012345.a'
+test_refspec fetch 'refs/heads/abcdefgh0123456.a'
+test_refspec fetch 'refs/heads/abcdefgh01234567.a'
+
test_done
--
2.0.0.rc1.24.g0588c94.dirty
next prev parent reply other threads:[~2014-06-17 17:13 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-17 17:13 (unknown), David Turner
2014-06-17 17:13 ` David Turner [this message]
2014-06-17 18:03 ` [PATCH v10 1/1] refs.c: SSE2 optimizations for check_refname_component Junio C Hamano
2014-06-17 18:07 ` David Turner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1403025199-21360-2-git-send-email-dturner@twitter.com \
--to=dturner@twopensource.com \
--cc=dturner@twitter.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).