MurmurHash.c
author Jens Alfke <jens@mooseyard.com>
Tue May 20 17:40:28 2008 -0700 (2008-05-20)
changeset 10 82a37ccf6b8c
permissions -rw-r--r--
Split ExceptionUtils out of Test.
     1 /*
     2  *  MurmurHash.c
     3  *  MYUtilities
     4  *
     5  *  This file created by Jens Alfke on 3/17/08.
     6  *  Algorithm & source code by Austin Appleby, released to public domain.
     7  *  <http://murmurhash.googlepages.com/>
     8  *  Downloaded 3/16/2008.
     9  *  Modified slightly by Jens Alfke (use standard uint32_t and size_t types;
    10  *  change 'm' and 'r' to #defines for better C compatibility.)
    11  *
    12  */
    13 
    14 #include "MurmurHash.h"
    15 
    16 
    17 //-----------------------------------------------------------------------------
    18 // MurmurHash2, by Austin Appleby
    19 
    20 // Note - This code makes a few assumptions about how your machine behaves -
    21 
    22 // 1. We can read a 4-byte value from any address without crashing
    23 // 2. sizeof(int) == 4      **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'**
    24 
    25 // And it has a few limitations -
    26 
    27 // 1. It will not work incrementally.
    28 // 2. It will not produce the same results on little-endian and big-endian
    29 //    machines.
    30 
    31 uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
    32 {
    33     // 'm' and 'r' are mixing constants generated offline.
    34     // They're not really 'magic', they just happen to work well.
    35     
    36     #define m 0x5bd1e995
    37     #define r 24
    38     
    39     // Initialize the hash to a 'random' value
    40     
    41     uint32_t h = seed ^ len;
    42     
    43     // Mix 4 bytes at a time into the hash
    44     
    45     const unsigned char * data = (const unsigned char *)key;
    46     
    47     while(len >= 4)
    48     {
    49         uint32_t k = *(uint32_t *)data;
    50         
    51         k *= m; 
    52         k ^= k >> r; 
    53         k *= m; 
    54         
    55         h *= m; 
    56         h ^= k;
    57         
    58         data += 4;
    59         len -= 4;
    60     }
    61     
    62     // Handle the last few bytes of the input array
    63     
    64     switch(len)
    65     {
    66 	case 3: h ^= data[2] << 16;
    67 	case 2: h ^= data[1] << 8;
    68 	case 1: h ^= data[0];
    69                 h *= m;
    70     };
    71     
    72     // Do a few final mixes of the hash to ensure the last few
    73     // bytes are well-incorporated.
    74     
    75     h ^= h >> 13;
    76     h *= m;
    77     h ^= h >> 15;
    78     
    79     return h;
    80 }