author | Jens Alfke <jens@mooseyard.com> |
Thu Mar 20 09:05:58 2008 -0700 (2008-03-20) | |
changeset 1 | e55a17cdabd2 |
permissions | -rw-r--r-- |
jens@1 | 1 |
/* |
jens@1 | 2 |
* MurmurHash.c |
jens@1 | 3 |
* MYUtilities |
jens@1 | 4 |
* |
jens@1 | 5 |
* This file created by Jens Alfke on 3/17/08. |
jens@1 | 6 |
* Algorithm & source code by Austin Appleby, released to public domain. |
jens@1 | 7 |
* <http://murmurhash.googlepages.com/> |
jens@1 | 8 |
* Downloaded 3/16/2008. |
jens@1 | 9 |
* Modified slightly by Jens Alfke (use standard uint32_t and size_t types; |
jens@1 | 10 |
* change 'm' and 'r' to #defines for better C compatibility.) |
jens@1 | 11 |
* |
jens@1 | 12 |
*/ |
jens@1 | 13 |
|
jens@1 | 14 |
#include "MurmurHash.h" |
jens@1 | 15 |
|
jens@1 | 16 |
|
jens@1 | 17 |
//----------------------------------------------------------------------------- |
jens@1 | 18 |
// MurmurHash2, by Austin Appleby |
jens@1 | 19 |
|
jens@1 | 20 |
// Note - This code makes a few assumptions about how your machine behaves - |
jens@1 | 21 |
|
jens@1 | 22 |
// 1. We can read a 4-byte value from any address without crashing |
jens@1 | 23 |
// 2. sizeof(int) == 4 **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'** |
jens@1 | 24 |
|
jens@1 | 25 |
// And it has a few limitations - |
jens@1 | 26 |
|
jens@1 | 27 |
// 1. It will not work incrementally. |
jens@1 | 28 |
// 2. It will not produce the same results on little-endian and big-endian |
jens@1 | 29 |
// machines. |
jens@1 | 30 |
|
jens@1 | 31 |
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ) |
jens@1 | 32 |
{ |
jens@1 | 33 |
// 'm' and 'r' are mixing constants generated offline. |
jens@1 | 34 |
// They're not really 'magic', they just happen to work well. |
jens@1 | 35 |
|
jens@1 | 36 |
#define m 0x5bd1e995 |
jens@1 | 37 |
#define r 24 |
jens@1 | 38 |
|
jens@1 | 39 |
// Initialize the hash to a 'random' value |
jens@1 | 40 |
|
jens@1 | 41 |
uint32_t h = seed ^ len; |
jens@1 | 42 |
|
jens@1 | 43 |
// Mix 4 bytes at a time into the hash |
jens@1 | 44 |
|
jens@1 | 45 |
const unsigned char * data = (const unsigned char *)key; |
jens@1 | 46 |
|
jens@1 | 47 |
while(len >= 4) |
jens@1 | 48 |
{ |
jens@1 | 49 |
uint32_t k = *(uint32_t *)data; |
jens@1 | 50 |
|
jens@1 | 51 |
k *= m; |
jens@1 | 52 |
k ^= k >> r; |
jens@1 | 53 |
k *= m; |
jens@1 | 54 |
|
jens@1 | 55 |
h *= m; |
jens@1 | 56 |
h ^= k; |
jens@1 | 57 |
|
jens@1 | 58 |
data += 4; |
jens@1 | 59 |
len -= 4; |
jens@1 | 60 |
} |
jens@1 | 61 |
|
jens@1 | 62 |
// Handle the last few bytes of the input array |
jens@1 | 63 |
|
jens@1 | 64 |
switch(len) |
jens@1 | 65 |
{ |
jens@1 | 66 |
case 3: h ^= data[2] << 16; |
jens@1 | 67 |
case 2: h ^= data[1] << 8; |
jens@1 | 68 |
case 1: h ^= data[0]; |
jens@1 | 69 |
h *= m; |
jens@1 | 70 |
}; |
jens@1 | 71 |
|
jens@1 | 72 |
// Do a few final mixes of the hash to ensure the last few |
jens@1 | 73 |
// bytes are well-incorporated. |
jens@1 | 74 |
|
jens@1 | 75 |
h ^= h >> 13; |
jens@1 | 76 |
h *= m; |
jens@1 | 77 |
h ^= h >> 15; |
jens@1 | 78 |
|
jens@1 | 79 |
return h; |
jens@1 | 80 |
} |