src/Dictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Thu Sep 24 10:28:50 2009 -0700 (2009-09-24)
changeset 3 8e3ae153e2c9
parent 2 851de24ecb61
permissions -rw-r--r--
Fixed a nasty bug in HashDictionary that could cause heap corruption after removing a value. Added a test case to catch that bug. A few tweaks to Hash.
jens@0
     1
/*
jens@0
     2
 *  Dictionary.cpp
jens@0
     3
 *  Ottoman
jens@0
     4
 *
jens@0
     5
 *  Created by Jens Alfke on 8/23/09.
jens@0
     6
 *  Copyright 2009 Jens Alfke. All rights reserved.
jens@0
     7
 *  BSD-Licensed: See the file "LICENSE.txt" for details.
jens@0
     8
 */
jens@0
     9
jens@0
    10
#include "Dictionary.h"
jens@0
    11
#include "Hash.h"
jens@0
    12
#include <assert.h>
jens@0
    13
#include <algorithm>
jens@0
    14
#include <math.h>
jens@0
    15
jens@0
    16
namespace Mooseyard {
jens@0
    17
    
jens@0
    18
    Dictionary::Dictionary() {
jens@0
    19
    }
jens@0
    20
    
jens@0
    21
    Dictionary::~Dictionary() {
jens@0
    22
    }
jens@0
    23
    
jens@0
    24
    Dictionary::Iterator::~Iterator() {
jens@0
    25
    }
jens@0
    26
    
jens@0
    27
    
jens@0
    28
    const int Dictionary::kMinSize = 8;
jens@2
    29
    const float Dictionary::kMinLoadFactor = 0.25f;
jens@2
    30
    const float Dictionary::kMaxLoadFactor = 0.75f;
jens@0
    31
        
jens@0
    32
    // Choose the smallest power of two that's large enough to hold the given capacity
jens@0
    33
    // of entries with no greater than the given load (fraction full).
jens@0
    34
    int Dictionary::choosePowerOfTwoSize (int capacity, float load) {
jens@0
    35
        int idealSize = capacity / load;
jens@0
    36
        int size;
jens@0
    37
        for (size=kMinSize; size<idealSize; size *= 2)
jens@0
    38
            ;
jens@0
    39
        return size;
jens@0
    40
    }
jens@0
    41
    
jens@0
    42
    int Dictionary::chooseAnyOldSize (int capacity, float load) {
jens@0
    43
        return std::max((int)::ceil(capacity / load), 2);
jens@0
    44
    }
jens@0
    45
    
jens@0
    46
    
jens@0
    47
    //-----------------------------------------------------------------------------
jens@0
    48
    // MurmurHashNeutral2, by Austin Appleby
jens@0
    49
    // http://murmurhash.googlepages.com/MurmurHashNeutral2.cpp
jens@0
    50
    
jens@0
    51
    // Same as MurmurHash2, but endian- and alignment-neutral.
jens@0
    52
    // Half the speed though, alas.
jens@0
    53
    
jens@0
    54
    static uint32_t MurmurHashNeutral2 ( const void * key, int32_t len, uint32_t seed )
jens@0
    55
    {
jens@0
    56
        static const uint32_t m = 0x5bd1e995;
jens@0
    57
        static const int32_t r = 24;
jens@0
    58
        
jens@0
    59
        uint32_t h = seed ^ len;
jens@0
    60
        
jens@0
    61
        const unsigned char * data = (const unsigned char *)key;
jens@0
    62
        
jens@0
    63
        while(len >= 4)
jens@0
    64
        {
jens@0
    65
            uint32_t k;
jens@0
    66
            
jens@0
    67
            k  = data[0];
jens@0
    68
            k |= data[1] << 8;
jens@0
    69
            k |= data[2] << 16;
jens@0
    70
            k |= data[3] << 24;
jens@0
    71
            
jens@0
    72
            k *= m; 
jens@0
    73
            k ^= k >> r; 
jens@0
    74
            k *= m;
jens@0
    75
            
jens@0
    76
            h *= m;
jens@0
    77
            h ^= k;
jens@0
    78
            
jens@0
    79
            data += 4;
jens@0
    80
            len -= 4;
jens@0
    81
        }
jens@0
    82
        
jens@0
    83
        switch(len)
jens@0
    84
        {
jens@0
    85
            case 3: h ^= data[2] << 16;
jens@0
    86
            case 2: h ^= data[1] << 8;
jens@0
    87
            case 1: h ^= data[0];
jens@0
    88
                h *= m;
jens@0
    89
        };
jens@0
    90
        
jens@0
    91
        h ^= h >> 13;
jens@0
    92
        h *= m;
jens@0
    93
        h ^= h >> 15;
jens@0
    94
        
jens@0
    95
        return h;
jens@0
    96
    } 
jens@0
    97
    
jens@0
    98
    HashCode Key::computeHash (Blob key) {
jens@0
    99
        return MurmurHashNeutral2(key.bytes, key.length, 0);
jens@0
   100
    }
jens@0
   101
    
jens@0
   102
    
jens@0
   103
    
jens@0
   104
    void MutableDictionary::add (Dictionary* dict) {
jens@0
   105
        Iterator *it = dict->iterate();
jens@0
   106
        for (; *it; it->next())
jens@0
   107
            put(it->key(), it->value());
jens@0
   108
        delete it;
jens@0
   109
    }
jens@0
   110
    
jens@0
   111
    
jens@0
   112
        
jens@0
   113
    const Dictionary& Dictionary::kEmpty = * new HashDictionary();
jens@0
   114
    
jens@0
   115
jens@0
   116
#pragma mark -
jens@0
   117
#pragma mark KEY AND VALUE
jens@0
   118
    
jens@0
   119
    class KeyAndValue {
jens@0
   120
    public:
jens@0
   121
        static KeyAndValue* create (Blob key, Blob value) {
jens@0
   122
            size_t size = 2*sizeof(uint32_t) + key.length;
jens@0
   123
            size = (size + 0x3) & ~0x3; // 32-bit align start of value
jens@0
   124
            if (value!=kDeletedValue)
jens@0
   125
                size += value.length;
jens@0
   126
            KeyAndValue *kv = (KeyAndValue*) ::operator new(size);
jens@0
   127
            kv->_keyLength = key.length;
jens@0
   128
            memcpy(&kv->_keyLength + 1, key.bytes, key.length);
jens@0
   129
            uint32_t *valueLengthPtr = const_cast<uint32_t*>(kv->valueLengthPtr());
jens@0
   130
            *valueLengthPtr = value.length;
jens@0
   131
            if (value!=kDeletedValue)
jens@0
   132
                memcpy(valueLengthPtr + 1, value.bytes, value.length);
jens@0
   133
            return kv;
jens@0
   134
        }
jens@0
   135
        Blob key() const {
jens@0
   136
            return Blob(&_keyLength+1, _keyLength);
jens@0
   137
        }
jens@0
   138
        
jens@0
   139
        Blob value() const{
jens@0
   140
            const uint32_t *v = this->valueLengthPtr();
jens@0
   141
            if (*v != kDeletedValue.length)
jens@0
   142
                return Blob(v+1, *v);
jens@0
   143
            else
jens@0
   144
                return kDeletedValue;
jens@0
   145
        }
jens@0
   146
jens@0
   147
        /** Magic value OverlayDictionary stores in me to represent a deleted value. */
jens@0
   148
        static const Blob kDeletedValue;
jens@0
   149
    private:
jens@0
   150
        const uint32_t* valueLengthPtr() const {
jens@0
   151
            size_t ptr = (size_t)(&_keyLength + 1) + _keyLength;
jens@0
   152
            ptr = (ptr + 0x3) & ~0x3;       // 32-bit align
jens@0
   153
            return (uint32_t*)ptr;
jens@0
   154
        }
jens@0
   155
        
jens@0
   156
        uint32_t _keyLength;
jens@0
   157
        //...there follows the key, valueLength and value...
jens@0
   158
    };
jens@0
   159
jens@0
   160
    const Blob KeyAndValue::kDeletedValue(NULL, (size_t)-1);
jens@0
   161
jens@0
   162
    
jens@0
   163
jens@0
   164
    
jens@0
   165
#pragma mark -
jens@0
   166
#pragma mark HASH DICTIONARY:
jens@0
   167
    
jens@0
   168
    HashDictionary::~HashDictionary() {
jens@0
   169
        removeAll();
jens@0
   170
    }
jens@0
   171
    
jens@0
   172
    Blob HashDictionary::_convertValue (void *value) {
jens@0
   173
        return value ?((KeyAndValue*)value)->value() :Blob();
jens@0
   174
    }
jens@0
   175
    
jens@0
   176
    bool HashDictionary::contains (Key key) const {
jens@0
   177
        Blob value = get(key);
jens@0
   178
        return value.bytes || value.length;
jens@0
   179
    }
jens@0
   180
jens@0
   181
    
jens@0
   182
    void HashDictionary::put (Key key, Blob value) {
jens@0
   183
        // Allocate a block to store both the Blob struct and the data
jens@0
   184
        KeyAndValue *kv = KeyAndValue::create(key,value);
jens@0
   185
        _hash.put(Key(kv->key(),key.hash), kv);
jens@0
   186
    }
jens@0
   187
    
jens@0
   188
    bool HashDictionary::remove (Key key) {
jens@3
   189
        KeyAndValue *kv = (KeyAndValue*) _hash.remove(key);
jens@0
   190
        if (kv) {
jens@3
   191
            delete kv;
jens@0
   192
            return true;
jens@0
   193
        } else
jens@0
   194
            return false;
jens@0
   195
    }
jens@0
   196
    
jens@0
   197
    void HashDictionary::removeAll() {
jens@0
   198
        for (Hash::Iterator it(&_hash); it; ++it)
jens@3
   199
            delete (KeyAndValue*) it.value();
jens@0
   200
        _hash.removeAll();
jens@0
   201
    }
jens@0
   202
        
jens@0
   203
    
jens@0
   204
    
jens@0
   205
    
jens@0
   206
#pragma mark -
jens@0
   207
#pragma mark OVERLAY DICTIONARY:
jens@0
   208
    
jens@0
   209
    
jens@0
   210
    OverlayDictionary::OverlayDictionary (const Dictionary *base)
jens@0
   211
    :_base(base),
jens@0
   212
     _overlay(new HashDictionary()),
jens@0
   213
     _count(base->count()),
jens@0
   214
     _allRemoved(false)
jens@0
   215
    { }
jens@0
   216
    
jens@0
   217
    OverlayDictionary::~OverlayDictionary() {
jens@0
   218
        delete _overlay;
jens@0
   219
    }
jens@0
   220
    
jens@0
   221
    int OverlayDictionary::count() const {
jens@0
   222
        return _count;
jens@0
   223
    }
jens@0
   224
    
jens@0
   225
    bool OverlayDictionary::isChanged() const {
jens@0
   226
        return _overlay->count() > 0;
jens@0
   227
    }
jens@0
   228
    
jens@0
   229
    Blob OverlayDictionary::get (Key key) const {
jens@0
   230
        Blob result = _overlay->get(key);
jens@0
   231
        if (!result) {
jens@0
   232
            if (result==KeyAndValue::kDeletedValue)
jens@0
   233
                result = Blob();                // It's been marked as deleted
jens@0
   234
            else if (!_allRemoved)
jens@0
   235
                result = _base->get(key);
jens@0
   236
        }
jens@0
   237
        return result;
jens@0
   238
    }
jens@0
   239
    
jens@0
   240
    bool OverlayDictionary::contains (Key key) const {
jens@0
   241
        Blob result = _overlay->get(key);
jens@0
   242
        if (result)
jens@0
   243
            return true;
jens@0
   244
        else if (result==KeyAndValue::kDeletedValue)
jens@0
   245
            return false;
jens@0
   246
        else if (!_allRemoved)
jens@0
   247
            return _base->get(key);
jens@0
   248
        else 
jens@0
   249
            return false;
jens@0
   250
    }
jens@0
   251
    
jens@0
   252
    void OverlayDictionary::put (Key key, Blob value) {
jens@0
   253
        assert(value.bytes || value.length==0);    // make sure it doesn't look like magic kDeletedValue
jens@0
   254
        _overlay->put(key,value);
jens@0
   255
        if (_allRemoved || !_base->contains(key))
jens@0
   256
            _count++;
jens@0
   257
    }
jens@0
   258
    
jens@0
   259
    bool OverlayDictionary::remove (Key key) {
jens@0
   260
        if (!_allRemoved && _base->contains(key)) {
jens@0
   261
            _overlay->put(key,KeyAndValue::kDeletedValue);
jens@0
   262
            _count--;
jens@0
   263
            return true;
jens@0
   264
        } else if (_overlay->remove(key)) {
jens@0
   265
            _count--;
jens@0
   266
            return true;
jens@0
   267
        } else
jens@0
   268
            return false;
jens@0
   269
    }
jens@0
   270
    
jens@0
   271
    void OverlayDictionary::removeAll() {
jens@0
   272
        _allRemoved = true;
jens@0
   273
        _count = 0;
jens@0
   274
    }
jens@0
   275
    
jens@0
   276
    
jens@0
   277
    void OverlayDictionary::revert() {
jens@0
   278
        _overlay->removeAll();
jens@0
   279
        _count = _base->count();
jens@0
   280
        _allRemoved = false;
jens@0
   281
    }
jens@0
   282
    
jens@0
   283
    void OverlayDictionary::revertTo (const Dictionary* newBase) {
jens@0
   284
        _base = newBase;
jens@0
   285
        revert();
jens@0
   286
    }
jens@0
   287
    
jens@0
   288
    void OverlayDictionary::rebase (const Dictionary* newBase) {
jens@0
   289
        _base = newBase;
jens@0
   290
    }
jens@0
   291
jens@0
   292
    OverlayDictionary::Iterator::Iterator (const OverlayDictionary &dict)
jens@0
   293
        :_iter(dict.base()->iterate()),
jens@0
   294
         _overlay(dict._overlay)
jens@0
   295
    {
jens@0
   296
        if (skipCurrentState())
jens@0
   297
            next();
jens@0
   298
    }
jens@0
   299
    
jens@0
   300
    Key OverlayDictionary::Iterator::key() const        {return _iter ?_iter->key() :Key();}
jens@0
   301
    Blob OverlayDictionary::Iterator::value() const     {return _iter ?_iter->value() :Blob();}
jens@0
   302
    
jens@0
   303
    bool OverlayDictionary::Iterator::next() {
jens@0
   304
        if (_iter) {
jens@0
   305
            do {
jens@0
   306
                _iter->next();
jens@0
   307
            } while (skipCurrentState());
jens@0
   308
        }
jens@0
   309
        return _iter && *_iter;
jens@0
   310
    }
jens@0
   311
    
jens@0
   312
    bool OverlayDictionary::Iterator::skipCurrentState() {
jens@0
   313
        if (_iter) {
jens@0
   314
            if (*_iter) {
jens@0
   315
                if (_overlay) {
jens@0
   316
                    if (_overlay->contains(_iter->key()))
jens@0
   317
                        return true;        // Skip overridden value in base dict
jens@0
   318
                } else {
jens@0
   319
                    if (_iter->value() == KeyAndValue::kDeletedValue)
jens@0
   320
                        return true;        // Skip marked-deleted value in overlay dict
jens@0
   321
                }
jens@0
   322
            } else {
jens@0
   323
                delete _iter;
jens@0
   324
                if (_overlay) {
jens@0
   325
                    // end of base iterator; switch to overlay
jens@0
   326
                    _iter = _overlay->iterate();
jens@0
   327
                    _overlay = NULL;
jens@0
   328
                    return skipCurrentState();
jens@0
   329
                } else {
jens@0
   330
                    _iter = NULL;
jens@0
   331
                }
jens@0
   332
            }
jens@0
   333
        }
jens@0
   334
        return false;
jens@0
   335
    }
jens@0
   336
    
jens@0
   337
    
jens@0
   338
#pragma mark -
jens@0
   339
#pragma mark CHANGE ITERATOR:
jens@0
   340
    
jens@0
   341
    
jens@0
   342
    Dictionary::ChangeIterator::ChangeIterator (const Dictionary *dict1, const Dictionary *dict2) {
jens@0
   343
        _init(dict1, dict2);
jens@0
   344
    }
jens@0
   345
    
jens@0
   346
    Dictionary::ChangeIterator::ChangeIterator (const OverlayDictionary *overlay) {
jens@0
   347
        _init(overlay->_overlay, overlay->base());
jens@0
   348
    }
jens@0
   349
    
jens@0
   350
    void Dictionary::ChangeIterator::_init(const Dictionary *dict1, const Dictionary *dict2) {
jens@0
   351
        _dict2 = dict2;
jens@0
   352
        _iter = dict1->iterate();
jens@0
   353
        if (*_iter)
jens@0
   354
            _skipMatching();
jens@0
   355
    }
jens@0
   356
    
jens@0
   357
    Dictionary::ChangeIterator::~ChangeIterator() {
jens@0
   358
        delete _iter;
jens@0
   359
    }
jens@0
   360
    
jens@0
   361
    bool Dictionary::ChangeIterator::next() {
jens@0
   362
        return _iter->next() && this->_skipMatching();
jens@0
   363
    }
jens@0
   364
    
jens@0
   365
    bool Dictionary::ChangeIterator::_skipMatching() {
jens@0
   366
        do{
jens@0
   367
            _otherValue = _dict2->get(_iter->key());
jens@0
   368
            if (!_otherValue.equals(_iter->value()))
jens@0
   369
                return true;
jens@0
   370
        }while (_iter->next());
jens@0
   371
        return false;
jens@0
   372
    }
jens@0
   373
    
jens@0
   374
    
jens@0
   375
jens@0
   376
}