jbm: it compiles... but bombs in unit tests
5 * Created by Jens Alfke on 8/27/09.
6 * Copyright 2009 Jens Alfke. All rights reserved.
7 * BSD-Licensed: See the file "LICENSE.txt" for details.
13 #include "Dictionary.h"
19 #define __STDC_LIMIT_MACROS /* I <3 you, C99 */
20 # define UINT32_MAX (4294967295U)
27 // Quadratic probing results in fewer bucket tests on lookup, but makes the hash tables
28 // significantly larger, which increases the file size. In practice a smaller table size seems
29 // to be a bigger win since disk I/O is slow.
30 #define PROBE_QUADRATIC 0
32 // Similarly use a higher max load than for memory-resident tables, to save space.
33 static const float kMaxLoadFactor = 0.85f;
35 static const uint32_t kIndexMagicNumber = 0x54378543;
38 /** An index (hash-table) entry as stored in the memory-mapped file. */
41 explicit Entry() :_hash(0), _valuePosition(0) { }
42 explicit Entry (HashCode h, FilePosition p)
43 :_hash(h), _valuePosition(p) { }
44 LittleEndian<HashCode> hash() const {return _hash;}
45 bool hasHash (LittleEndian<HashCode> h) const {return _hash == h;}
47 const KeyValueChunk* value (const File *file) const {
48 return (const KeyValueChunk*) file->mappedPosition(_valuePosition);
50 FilePosition valuePosition() const {return _valuePosition;}
53 bool isAvailable() const {return _valuePosition == 0;} // false if deleted
54 bool hasValue() const {return _valuePosition > 1;} // false if deleted
56 void markDeleted() {_valuePosition = kDeletedPosition;}
57 void markEmpty() {_valuePosition = 0;}
60 static const FilePosition kDeletedPosition = 1;
63 LittleEndian<HashCode> _hash;
64 LittleEndian<FilePosition> _valuePosition;
69 #pragma mark CREATION:
71 Index::Index (uint32_t itsTableSize)
72 :Chunk(sizeof(Index) + itsTableSize*sizeof(Index::Entry), kChunkType),
73 _magicNumber(kIndexMagicNumber),
75 _tableSize(itsTableSize)
78 Index* Index::create (int capacity) {
80 int tableSize = Dictionary::choosePowerOfTwoSize(capacity);
82 int tableSize = Dictionary::chooseAnyOldSize(capacity, kMaxLoadFactor);
84 size_t size = sizeof(Index) + tableSize*sizeof(Index::Entry);
85 void* storage = ::operator new(size);
86 memset(storage,0,size);
87 return new(storage) Index(tableSize);
90 const Index* Index::indexMappedAt (const void* ptr) {
91 const Index *index = (const Index*)ptr;
96 void Index::validate() const throw(File::Error) {
97 if (_magicNumber != kIndexMagicNumber)
98 throw File::Error(ERANGE, "Bad Index (wrong magic number)");
99 if (_tableSize < 2 || _count > _tableSize)
100 throw File::Error(ERANGE, "Bad Index (wrong count or tableSize)");
102 if ((_tableSize & (_tableSize-1)) != 0)
103 throw File::Error(ERANGE, "Bad Index (size not power of 2)");
105 if (size() != sizeof(Index)+_tableSize*sizeof(Index::Entry))
106 throw File::Error(ERANGE, "Bad Index (wrong size)");
109 void Index::validateEntries (const File *file) const throw(File::Error) {
111 uint32_t size = _tableSize;
113 const Entry *entry = table(0);
114 for (uint32_t i=0; i<size; i++, entry++) {
115 if (entry->hasValue()) {
117 const KeyValueChunk *chunk = entry->value(file);
118 if ((void*)chunk->end() > (void*)this)
119 throw File::Error(ERANGE, "Bad Index entry (points beyond index)");
121 if (entry->hash() != Key::computeHash(chunk->key()))
122 throw File::Error(ERANGE, "Bad Index entry (wrong hash code)");
126 throw File::Error(ERANGE, "Bad Index (inconsistent count)");
133 const Index::Entry* Index::table (int index) const {
134 return &((const Index::Entry*)&_table)[index];
137 Index::Entry* Index::table (int index) {
138 return &((Index::Entry*)&_table)[index];
142 const KeyValueChunk* Index::_find (File *file, Key key) const {
143 int index = key.hash % _tableSize;
144 LittleEndian<HashCode> lhash = key.hash;
149 const Index::Entry *found = table(index);
150 if (found->isAvailable()) {
152 } else if (found->hasHash(lhash)) {
153 const KeyValueChunk *value = found->value(file);
154 if (value->hasKey(key)) {
159 index = (index + ++probe) % _tableSize;
161 index = (index + 1) % _tableSize;
166 Blob Index::get (File *file, Key key) const {
167 const KeyValueChunk *kv = _find(file, key);
168 return kv ?kv->value() :Blob();
172 // Common implementation of put and delete. (If deleting, valuePosition will be kDeletedPosition.)
173 // While searching, entries past maxPosition should be considered newly-added and not compared,
174 // since they're not in the memory-mapped part of the file yet.
175 // Returns true if the entry was added/removed.
176 bool Index::_put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
177 bool result = (valuePosition <= Entry::kDeletedPosition);
178 int index = key.hash % _tableSize;
179 Index::Entry entry(key.hash, valuePosition);
185 found = table(index);
186 if (!found->hasValue()) {
187 if (entry.hasValue()) {
189 break; // it's a put and we found an empty entry
190 } else if (found->isAvailable()) {
191 return false; // it's a delete but the key wasn't found
194 else if (found->hasHash(entry.hash())) {
196 break; // no file given, so assume no duplicate keys
197 else if (found->valuePosition() < maxPosition && found->value(file)->hasKey(key))
198 break; // found existing (not newly-inserted) entry with the matching key
200 printf("** hash collision! hash=%u\n", key.hash);*/
203 index = (index + ++probe) % _tableSize;
205 index = (index + 1) % _tableSize;
208 // Finally found where to put it:
213 bool Index::put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
214 bool added = _put(file, key, valuePosition, maxPosition);
220 bool Index::remove (File *file, Key key, FilePosition maxPosition) {
221 bool removed = _put(file, key, Index::Entry::kDeletedPosition, maxPosition);
229 void Index::removeAll () {
230 memset(table(0), 0, _tableSize*sizeof(Entry));
235 void Index::copyFrom (File *file, const Index *index) {
236 const Entry *src = index->table(0);
237 if (index->_tableSize == _tableSize) {
238 memcpy(table(0), src, _tableSize*sizeof(Entry));
241 for (uint32_t i=0; i<index->_tableSize; i++,src++)
243 _put(NULL, Key(NULL,0,src->hash()), src->valuePosition(), UINT32_MAX);
245 _count = index->count();
250 Index::Iterator::Iterator (File *file, const Index *index)
252 _current(index->table(-1)),
253 _end(index->table(index->_tableSize))
258 bool Index::Iterator::hasValue() const {return _current != NULL;}
259 Key Index::Iterator::key() const {return Key(_current->value(_file)->key(), _current->hash());}
260 Blob Index::Iterator::value() const {return _current->value(_file)->value();}
262 bool Index::Iterator::next() {
264 while (++_current < _end) {
265 if (_current->hasValue())
273 FilePosition Index::Iterator::valuePosition() {
274 return _current ?_current->valuePosition() :0;