| author | snej@snej.local | 
| Tue Mar 10 22:34:39 2009 -0700 (2009-03-10) | |
| changeset 19 | 5ade3e09a827 | 
| permissions | -rw-r--r-- | 
| jens@1 | 1 | /* | 
| jens@1 | 2 | * MurmurHash.c | 
| jens@1 | 3 | * MYUtilities | 
| jens@1 | 4 | * | 
| jens@1 | 5 | * This file created by Jens Alfke on 3/17/08. | 
| jens@1 | 6 | * Algorithm & source code by Austin Appleby, released to public domain. | 
| jens@1 | 7 | * <http://murmurhash.googlepages.com/> | 
| jens@1 | 8 | * Downloaded 3/16/2008. | 
| jens@1 | 9 | * Modified slightly by Jens Alfke (use standard uint32_t and size_t types; | 
| jens@1 | 10 | * change 'm' and 'r' to #defines for better C compatibility.) | 
| jens@1 | 11 | * | 
| jens@1 | 12 | */ | 
| jens@1 | 13 | |
| jens@1 | 14 | #include "MurmurHash.h" | 
| jens@1 | 15 | |
| jens@1 | 16 | |
| jens@1 | 17 | //----------------------------------------------------------------------------- | 
| jens@1 | 18 | // MurmurHash2, by Austin Appleby | 
| jens@1 | 19 | |
| jens@1 | 20 | // Note - This code makes a few assumptions about how your machine behaves - | 
| jens@1 | 21 | |
| jens@1 | 22 | // 1. We can read a 4-byte value from any address without crashing | 
| jens@1 | 23 | // 2. sizeof(int) == 4 **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'** | 
| jens@1 | 24 | |
| jens@1 | 25 | // And it has a few limitations - | 
| jens@1 | 26 | |
| jens@1 | 27 | // 1. It will not work incrementally. | 
| jens@1 | 28 | // 2. It will not produce the same results on little-endian and big-endian | 
| jens@1 | 29 | // machines. | 
| jens@1 | 30 | |
| jens@1 | 31 | uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ) | 
| jens@1 | 32 | {
 | 
| jens@1 | 33 | // 'm' and 'r' are mixing constants generated offline. | 
| jens@1 | 34 | // They're not really 'magic', they just happen to work well. | 
| jens@1 | 35 | |
| jens@1 | 36 | #define m 0x5bd1e995 | 
| jens@1 | 37 | #define r 24 | 
| jens@1 | 38 | |
| jens@1 | 39 | // Initialize the hash to a 'random' value | 
| jens@1 | 40 | |
| jens@1 | 41 | uint32_t h = seed ^ len; | 
| jens@1 | 42 | |
| jens@1 | 43 | // Mix 4 bytes at a time into the hash | 
| jens@1 | 44 | |
| jens@1 | 45 | const unsigned char * data = (const unsigned char *)key; | 
| jens@1 | 46 | |
| jens@1 | 47 | while(len >= 4) | 
| jens@1 | 48 |     {
 | 
| jens@1 | 49 | uint32_t k = *(uint32_t *)data; | 
| jens@1 | 50 | |
| jens@1 | 51 | k *= m; | 
| jens@1 | 52 | k ^= k >> r; | 
| jens@1 | 53 | k *= m; | 
| jens@1 | 54 | |
| jens@1 | 55 | h *= m; | 
| jens@1 | 56 | h ^= k; | 
| jens@1 | 57 | |
| jens@1 | 58 | data += 4; | 
| jens@1 | 59 | len -= 4; | 
| jens@1 | 60 | } | 
| jens@1 | 61 | |
| jens@1 | 62 | // Handle the last few bytes of the input array | 
| jens@1 | 63 | |
| jens@1 | 64 | switch(len) | 
| jens@1 | 65 |     {
 | 
| jens@1 | 66 | case 3: h ^= data[2] << 16; | 
| jens@1 | 67 | case 2: h ^= data[1] << 8; | 
| jens@1 | 68 | case 1: h ^= data[0]; | 
| jens@1 | 69 | h *= m; | 
| jens@1 | 70 | }; | 
| jens@1 | 71 | |
| jens@1 | 72 | // Do a few final mixes of the hash to ensure the last few | 
| jens@1 | 73 | // bytes are well-incorporated. | 
| jens@1 | 74 | |
| jens@1 | 75 | h ^= h >> 13; | 
| jens@1 | 76 | h *= m; | 
| jens@1 | 77 | h ^= h >> 15; | 
| jens@1 | 78 | |
| jens@1 | 79 | return h; | 
| jens@1 | 80 | } |