MurmurHash.c
changeset 1 e55a17cdabd2
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/MurmurHash.c	Thu Mar 20 09:05:58 2008 -0700
     1.3 @@ -0,0 +1,80 @@
     1.4 +/*
     1.5 + *  MurmurHash.c
     1.6 + *  MYUtilities
     1.7 + *
     1.8 + *  This file created by Jens Alfke on 3/17/08.
     1.9 + *  Algorithm & source code by Austin Appleby, released to public domain.
    1.10 + *  <http://murmurhash.googlepages.com/>
    1.11 + *  Downloaded 3/16/2008.
    1.12 + *  Modified slightly by Jens Alfke (use standard uint32_t and size_t types;
    1.13 + *  change 'm' and 'r' to #defines for better C compatibility.)
    1.14 + *
    1.15 + */
    1.16 +
    1.17 +#include "MurmurHash.h"
    1.18 +
    1.19 +
    1.20 +//-----------------------------------------------------------------------------
    1.21 +// MurmurHash2, by Austin Appleby
    1.22 +
    1.23 +// Note - This code makes a few assumptions about how your machine behaves -
    1.24 +
    1.25 +// 1. We can read a 4-byte value from any address without crashing
    1.26 +// 2. sizeof(int) == 4      **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'**
    1.27 +
    1.28 +// And it has a few limitations -
    1.29 +
    1.30 +// 1. It will not work incrementally.
    1.31 +// 2. It will not produce the same results on little-endian and big-endian
    1.32 +//    machines.
    1.33 +
    1.34 +uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
    1.35 +{
    1.36 +    // 'm' and 'r' are mixing constants generated offline.
    1.37 +    // They're not really 'magic', they just happen to work well.
    1.38 +    
    1.39 +    #define m 0x5bd1e995
    1.40 +    #define r 24
    1.41 +    
    1.42 +    // Initialize the hash to a 'random' value
    1.43 +    
    1.44 +    uint32_t h = seed ^ len;
    1.45 +    
    1.46 +    // Mix 4 bytes at a time into the hash
    1.47 +    
    1.48 +    const unsigned char * data = (const unsigned char *)key;
    1.49 +    
    1.50 +    while(len >= 4)
    1.51 +    {
    1.52 +        uint32_t k = *(uint32_t *)data;
    1.53 +        
    1.54 +        k *= m; 
    1.55 +        k ^= k >> r; 
    1.56 +        k *= m; 
    1.57 +        
    1.58 +        h *= m; 
    1.59 +        h ^= k;
    1.60 +        
    1.61 +        data += 4;
    1.62 +        len -= 4;
    1.63 +    }
    1.64 +    
    1.65 +    // Handle the last few bytes of the input array
    1.66 +    
    1.67 +    switch(len)
    1.68 +    {
    1.69 +	case 3: h ^= data[2] << 16;
    1.70 +	case 2: h ^= data[1] << 8;
    1.71 +	case 1: h ^= data[0];
    1.72 +                h *= m;
    1.73 +    };
    1.74 +    
    1.75 +    // Do a few final mixes of the hash to ensure the last few
    1.76 +    // bytes are well-incorporated.
    1.77 +    
    1.78 +    h ^= h >> 13;
    1.79 +    h *= m;
    1.80 +    h ^= h >> 15;
    1.81 +    
    1.82 +    return h;
    1.83 +}