MurmurHash.c
author Jens Alfke <jens@mooseyard.com>
Tue May 20 17:40:28 2008 -0700 (2008-05-20)
changeset 10 82a37ccf6b8c
permissions -rw-r--r--
Split ExceptionUtils out of Test.
jens@1
     1
/*
jens@1
     2
 *  MurmurHash.c
jens@1
     3
 *  MYUtilities
jens@1
     4
 *
jens@1
     5
 *  This file created by Jens Alfke on 3/17/08.
jens@1
     6
 *  Algorithm & source code by Austin Appleby, released to public domain.
jens@1
     7
 *  <http://murmurhash.googlepages.com/>
jens@1
     8
 *  Downloaded 3/16/2008.
jens@1
     9
 *  Modified slightly by Jens Alfke (use standard uint32_t and size_t types;
jens@1
    10
 *  change 'm' and 'r' to #defines for better C compatibility.)
jens@1
    11
 *
jens@1
    12
 */
jens@1
    13
jens@1
    14
#include "MurmurHash.h"
jens@1
    15
jens@1
    16
jens@1
    17
//-----------------------------------------------------------------------------
jens@1
    18
// MurmurHash2, by Austin Appleby
jens@1
    19
jens@1
    20
// Note - This code makes a few assumptions about how your machine behaves -
jens@1
    21
jens@1
    22
// 1. We can read a 4-byte value from any address without crashing
jens@1
    23
// 2. sizeof(int) == 4      **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'**
jens@1
    24
jens@1
    25
// And it has a few limitations -
jens@1
    26
jens@1
    27
// 1. It will not work incrementally.
jens@1
    28
// 2. It will not produce the same results on little-endian and big-endian
jens@1
    29
//    machines.
jens@1
    30
jens@1
    31
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
jens@1
    32
{
jens@1
    33
    // 'm' and 'r' are mixing constants generated offline.
jens@1
    34
    // They're not really 'magic', they just happen to work well.
jens@1
    35
    
jens@1
    36
    #define m 0x5bd1e995
jens@1
    37
    #define r 24
jens@1
    38
    
jens@1
    39
    // Initialize the hash to a 'random' value
jens@1
    40
    
jens@1
    41
    uint32_t h = seed ^ len;
jens@1
    42
    
jens@1
    43
    // Mix 4 bytes at a time into the hash
jens@1
    44
    
jens@1
    45
    const unsigned char * data = (const unsigned char *)key;
jens@1
    46
    
jens@1
    47
    while(len >= 4)
jens@1
    48
    {
jens@1
    49
        uint32_t k = *(uint32_t *)data;
jens@1
    50
        
jens@1
    51
        k *= m; 
jens@1
    52
        k ^= k >> r; 
jens@1
    53
        k *= m; 
jens@1
    54
        
jens@1
    55
        h *= m; 
jens@1
    56
        h ^= k;
jens@1
    57
        
jens@1
    58
        data += 4;
jens@1
    59
        len -= 4;
jens@1
    60
    }
jens@1
    61
    
jens@1
    62
    // Handle the last few bytes of the input array
jens@1
    63
    
jens@1
    64
    switch(len)
jens@1
    65
    {
jens@1
    66
	case 3: h ^= data[2] << 16;
jens@1
    67
	case 2: h ^= data[1] << 8;
jens@1
    68
	case 1: h ^= data[0];
jens@1
    69
                h *= m;
jens@1
    70
    };
jens@1
    71
    
jens@1
    72
    // Do a few final mixes of the hash to ensure the last few
jens@1
    73
    // bytes are well-incorporated.
jens@1
    74
    
jens@1
    75
    h ^= h >> 13;
jens@1
    76
    h *= m;
jens@1
    77
    h ^= h >> 15;
jens@1
    78
    
jens@1
    79
    return h;
jens@1
    80
}