Re: Linus' sha1 is much faster!

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

From: "George Spelvin" <linux@horizon.com>
To: bdonlan@gmail.com, johnflux@gmail.com, P@draigBrady.com
Cc: art.08.09@gmail.com, git@vger.kernel.org, linux@horizon.com,
	nico@cam.org, torvalds@linux-foundation.org
Subject: Re: Linus' sha1 is much faster!
Date: 17 Aug 2009 03:23:15 -0400	[thread overview]
Message-ID: <20090817072315.4314.qmail@science.horizon.com> (raw)

If it helps anyone resolve license issues, here's a from-FIPS-180-2
implementation that's placed in the public domain.  That should be
compatible with any license.

It uses Linus's and Artur's performance ideas, and some of Linus' macro
ideas (in the rotate implementation), but tries to be textually different.
Is there anything recognizable that anyone cares to clam copyright to?

It's not quite 100% finished, as I haven't benchmarked it against Linus's
code yet, but it's functionally correct.

It's also clean with -W -Wall -Wextra.

TODO: Check if an initial copy to w[] is faster on i386 (less register
pressure).

/*
 * Secure Hash Algorith SHA-1, as published in FIPS PUB 180-2.
 *
 * This implementation is in the public domain.  Copyright abandoned.
 * You may do anything you like with it, including evil things.
 *
 * This is a rewrite from scratch, based on Linus Torvalds' "block-sha1"
 * from the git mailing list (August, 2009).  Additional optimization
 * ideas cribbed from
 * - Artur Skawina (x86, particularly P4, and much benchmarking)
 * - Nicilas Pitre (ARM)
 */

/* Cut here for sha1.h */
#include <stddef.h>	/* For size_t */
#include <stdint.h>	/* For uint32_t, uint64_t */

typedef struct SHA_context {
	uint64_t len;		/* May be shrunk to uint32_t */
	uint32_t iv[5];
	unsigned char buf[64];		/* Must be 32-bit aligned */
} SHA_CTX;

void SHA1_Init(struct SHA_context *c);
void SHA1_Update(struct SHA_context *c, void const *p, size_t n);
void SHA1_Final(unsigned char hash[20], struct SHA_context *c);
/* End of sha1.h */

#include <string.h>	/* For memcpy() */
#include <arpa/inet.h>	/* For ntohl() */

static void sha1_core(uint32_t iv[5], unsigned char const *p, size_t nblocks);

/* Machine specific hacks */
#if defined(__i386__) || defined(__x86_64__) || defined (__ppc__) || \
    defined(__ppc64__) || defined(__powerpc__) || defined (__powerpc64__) || \
    defined(__s390__) || defined(__s390x__)
/* Unaligned access is okay */
static inline uint32_t get_be32(unsigned char const *p)
{
	return ntohl(*(uint32_t const *)p);
}
static inline void put_be32(unsigned char const *p, uint32_t v)
{
	*(uint32_t *)p = htonl(v);
}

#else
/* Unaligned access is not okay; do conversion as byte fetches */
static inline uint32_t get_be32(unsigned char const *p)
{
	return p[0] << 24 || p[1] << 16 | p[8] << 8 | p[3];
}
static inline void put_be32(unsigned char const *p, uint32_t v)
{
	p[0] = v >> 24;
	p[1] = v >> 16;
	p[2] = v >> 8;
	p[3] = v;
}
#endif

void SHA1_Init(struct SHA_context *c)
{
	/* This is a prefix of the SHA_context structure */
	static struct {
		uint64_t len;
		uint32_t iv[5];
	} const iv = { 0,
		{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }
	};

	memcpy(c, &iv, sizeof iv);
}

void SHA1_Update(struct SHA_context *c, void const *p, size_t n)
{
	size_t pos = c->len & 63;	/* Offset into current input block */

	c->len += n;

	/* Initial partial block (if any) */
	if (pos) {
		size_t space = 63 - pos;
		if (n < space)
			goto end;
		memcpy(c->buf + pos, p, space);
		sha1_core(c->iv, c->buf, 1);
		n -= space;
		p = (char const *)p + space;
	}

	/* The large middle piece */
	if (n >> 6) {
		sha1_core(c->iv, p, n >> 6);
		p = (char const *)p + (n & -(size_t)64);
		n &= 63;
	}
	pos = 0;
end:
	/* Final partial block (may be zero size) */
	memcpy(c->buf + pos, p, n);
}

void SHA1_Final(unsigned char hash[20], struct SHA_context *c)
{
	size_t pos = c->len & 63;
	unsigned i;

	/* Append a single 1 bit */
	c->buf[pos++] = 0x80;

	/* Append 0 bits until 64 bits remain in a block */
	if (pos > 56) {
		memset(c->buf + pos, 0, 64 - pos);
		sha1_core(c->iv, c->buf, 1);
		pos = 0;
	}
	memset(c->buf + pos, 0, 56 - pos);

	/* Append total input length in bits */
	((uint32_t *)c->buf)[14] = htonl((uint32_t)(c->len >> 29));
	((uint32_t *)c->buf)[15] = htonl((uint32_t)c->len << 3);

	/* Final hash round */
	sha1_core(c->iv, c->buf, 1);

	/* Copy hash result out */
	for (i = 0; i < 5; i++)
		put_be32(hash + 4*i, c->iv[i]);
}


/*
 * Helper macros for sha1_core function.  To avoid clutter, these macros
 * are NOT fully parenthesized if it doesn't matter to the actual use.
 */

#if __GNUC__ && (defined(__i386__) || defined(__x86_64__))
/*
 * GCC by itself only generates left rotates.  Use right rotates if
 * possible to be kinder to dinky implementations with iterative rotate
 * instructions.
 */
#define ROT(op, x, k) \
	({ uint32_t y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; })
#define ROL(x,k) ROT("roll", x, k)
#define ROR(x,k) ROT("rorl", x, k)

#else
/* Generic C equivalent */
#define ROT(x,l,r) ((x) << (l) | (x) >> (r))
#define ROL(x,k) ROT(x,k,32-(k))
#define ROR(x,k) ROT(x,32-(k),k)
#endif


/*
 * An important temporary array in SHA-1 is the working array W[],
 * which holds a scheduled key word per round.  Only the last 16 words
 * are relevant, so only use 16 words...
 */
#define W(i) w[(i) & 15]

/*
 * If you have a small register set, it helps GCC to force stores to
 * the w[] array to memory.  Given 22 or more registers (e.g. PowerPC),
 * GCC can map the entire w[] array to registers and this becomes
 * counterproductive.
 *
 * The optimal kludge seems to differ between x86 and ARM.  On the latter,
 * forcing a full memory barrier is required to stop GCC from splitting
 * live ranges for each round and generating a separate stack slot for
 * each.  (Which produces a huge stack frame and kills performance.)
 */
#if defined(__i386__) || defined(__x86_64__)
#define STORE(i, x) *(volatile uint32_t *)&W(i) = x
#elif __GNUC__ && defined(__arm__)
#define STORE(i, x) W(i) = x; __asm__("":::"memory")
#else
#define STORE(i, x) W(i) = x
#endif


/* The three round functions.  F2 is also used as F4 */
#define F1(b,c,d) (((d ^ c) & b) ^ d)		/* Bitwise b ? c : d */
#define F2(b,c,d) (d ^ c ^ b)			/* Even parity */
#define F3(b,c,d) (d & c) + ((d ^ c) & b)	/* Majority function */

/* The four round constants */
#define K1 0x5a827999	/* 2^30 * sqrt(2) */
#define K2 0x6ed9eba1	/* 2^30 * sqrt(3) */
#define K3 0x8f1bbcdc	/* 2^30 * sqrt(5) */
#define K4 0xca62c1d6	/* 2^30 * sqrt(10) */

/* Rounds 0..15 fetch a word from the input */
#define FETCH(t,i)	t = get_be32(p + 4*(i)); STORE(i,t)
/* Rounds 16..79 mix previous words to get a new one */
#define MIX(t,i)	t = W(i) ^ W(i+2) ^ W(i+8) ^ W(i+13); t = ROL(t, 1)
/* Rounds 16..76 have to store back the result */
#define CALC(t,i)	MIX(t,i); STORE(i,t)

/* The basic SHA-1 round */
#define SHA_ROUND(a,b,c,d,e,f,k,src,t,i)	\
	src(t,i);				\
	e += t + f(b,c,d) + k + ROL(a,5);	\
	b = ROR(b,2)

/* An aligned group of 5 rounds */
#define SHA_ROUND5(f,k,src,t,i) \
	SHA_ROUND(a,b,c,d,e, f,k,src,t,i);	\
	SHA_ROUND(e,a,b,c,d, f,k,src,t,i+1);	\
	SHA_ROUND(d,e,a,b,c, f,k,src,t,i+2);	\
	SHA_ROUND(c,d,e,a,b, f,k,src,t,i+3);	\
	SHA_ROUND(b,c,d,e,a, f,k,src,t,i+4)

/*
 * The core SHA-1 transform.
 *
 * iv[5] = Current SHA-1 hash state.
 * p = Pointer to source data.  Not necessarily aligned.
 * nblocks = Number of 64-byte blocks at p.  Guaranteed non-zero.
 */
static void
sha1_core(uint32_t iv[5], unsigned char const *p, size_t nblocks)
{
	uint32_t e = iv[4], d = iv[3], c = iv[2], b = iv[1], a = iv[0];
	uint32_t w[16];

	do {
		uint32_t t;

		SHA_ROUND5(F1, K1, FETCH, t,  0);
		SHA_ROUND5(F1, K1, FETCH, t,  5);
		SHA_ROUND5(F1, K1, FETCH, t, 10);
		SHA_ROUND(a,b,c,d,e, F1, K1, FETCH, t, 15);
		SHA_ROUND(e,a,b,c,d, F1, K1, CALC, t, 16);
		SHA_ROUND(d,e,a,b,c, F1, K1, CALC, t, 17);
		SHA_ROUND(c,d,e,a,b, F1, K1, CALC, t, 18);
		SHA_ROUND(b,c,d,e,a, F1, K1, CALC, t, 19);

		SHA_ROUND5(F2, K2, CALC, t, 20);
		SHA_ROUND5(F2, K2, CALC, t, 25);
		SHA_ROUND5(F2, K2, CALC, t, 30);
		SHA_ROUND5(F2, K2, CALC, t, 35);

		SHA_ROUND5(F3, K3, CALC, t, 40);
		SHA_ROUND5(F3, K3, CALC, t, 45);
		SHA_ROUND5(F3, K3, CALC, t, 50);
		SHA_ROUND5(F3, K3, CALC, t, 55);

		SHA_ROUND5(F2, K4, CALC, t, 60);
		SHA_ROUND5(F2, K4, CALC, t, 65);
		SHA_ROUND5(F2, K4, CALC, t, 70);

		SHA_ROUND(a,b,c,d,e, F2, K4, CALC, t, 75);
		SHA_ROUND(e,a,b,c,d, F2, K4, CALC, t, 76);
		/* Last 3 rounds don't need to store mixed value */
		SHA_ROUND(d,e,a,b,c, F2, K4, MIX, t, 77);
		SHA_ROUND(c,d,e,a,b, F2, K4, MIX, t, 78);
		SHA_ROUND(b,c,d,e,a, F2, K4, MIX, t, 79);

		iv[4] = e += iv[4];
		iv[3] = d += iv[3];
		iv[2] = c += iv[2];
		iv[1] = b += iv[1];
		iv[0] = a += iv[0];
	} while (--nblocks);
}


/* Compile with -DUNITTEST for self-tests */
#if UNITTEST

#include <stdio.h>

/* Known answer test */
static void katest(char const *text, size_t len, unsigned char const hash[20])
{
	SHA_CTX c;
	unsigned char hash2[20];
	int i;

	SHA1_Init(&c);
	SHA1_Update(&c, text, len);
	SHA1_Final(hash2, &c);

	for (i = 0; i < 20; i++)
		if (hash[i] != hash2[i])
			goto mismatch;
	printf("%u-byte known answer test PASSED\n", len);
	return;

mismatch:
	printf("%u-byte known answer test FAILED:\n", len);
	printf("Input: \"%.*s\"\n", len, text);
	printf("Computed:");
	for (i = 0; i < 20; i++)
		printf(" %02x", hash2[i]);
	printf("\nExpected:");
	for (i = 0; i < 20; i++)
		printf(" %02x", hash[i]);
	putchar('\n');
}

int
main(void)
{
	/* FIPS PUB 180.1 example A.1 */
	static char const text1[3] = "abc";
	static unsigned char const hash1[20] = {
		0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 0x81, 0x6a, 0xba, 0x3e,
		0x25, 0x71, 0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0, 0xd8, 0x9d };

	/* FIPS PUB 180.1 example A.2 */
	static char const text2[56] =
		"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
	static unsigned char const hash2[20] = {
		0x84, 0x98, 0x3e, 0x44, 0x1c, 0x3b, 0xd2, 0x6e, 0xba, 0xae,
		0x4a, 0xa1, 0xf9, 0x51, 0x29, 0xe5, 0xe5, 0x46, 0x70, 0xf1 };

	katest(text1, sizeof text1, hash1);
	katest(text2, sizeof text2, hash2);

	return 0;
}
#endif

next             reply	other threads:[~2009-08-17  7:24 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-17  7:23 George Spelvin [this message]
2009-08-17 14:20 ` Linus' sha1 is much faster! Nicolas Pitre
2009-08-17 17:06 ` Nicolas Pitre
2009-08-17 17:20   ` Paolo Bonzini
2009-08-17 18:54   ` George Spelvin
2009-08-17 19:34     ` Nicolas Pitre
2009-08-17 23:12       ` George Spelvin
  -- strict thread matches above, loose matches on Subject: below --
2009-08-14 23:25 Pádraig Brady
2009-08-15 20:02 ` Bryan Donlan
2009-08-15 20:12   ` John Tapsell
2009-08-15 20:23     ` Linus Torvalds
2009-08-15 20:54       ` Linus Torvalds
2009-08-17  1:55         ` Nicolas Pitre
2009-08-26 11:39           ` Pádraig Brady
2017-04-20 21:35             ` galt
2017-04-20 21:38             ` galt
2009-08-17  8:22         ` Andreas Ericsson
2009-08-16  0:06     ` Theodore Tso
2009-08-16 19:25 ` Giuseppe Scrivano
2009-08-16 20:10   ` Linus Torvalds
2009-08-16 22:15     ` Giuseppe Scrivano
2009-08-16 22:47       ` Linus Torvalds
2009-08-17  1:53   ` Pádraig Brady
2009-08-17 10:51     ` Giuseppe Scrivano
2009-08-17 15:44       ` Steven Noonan
2009-08-17 16:22         ` Linus Torvalds
2009-08-17 21:43           ` Steven Noonan
2009-08-17 17:32         ` Giuseppe Scrivano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090817072315.4314.qmail@science.horizon.com \
    --to=linux@horizon.com \
    --cc=P@draigBrady.com \
    --cc=art.08.09@gmail.com \
    --cc=bdonlan@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=johnflux@gmail.com \
    --cc=nico@cam.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).