First official checkin.
5 * Created by Jens Alfke on 8/27/09.
6 * Copyright 2009 Jens Alfke. All rights reserved.
7 * BSD-Licensed: See the file "LICENSE.txt" for details.
13 #include "Dictionary.h"
21 // Quadratic probing results in fewer bucket tests on lookup, but makes the hash tables
22 // significantly larger, which increases the file size. In practice a smaller table size seems
23 // to be a bigger win since disk I/O is slow.
24 #define PROBE_QUADRATIC 0
26 // Similarly use a higher max load than for memory-resident tables, to save space.
27 static const float kMaxLoadFactor = 0.85;
29 static const uint32_t kIndexMagicNumber = 0x54378543;
32 /** An index (hash-table) entry as stored in the memory-mapped file. */
35 explicit Entry() :_hash(0), _valuePosition(0) { }
36 explicit Entry (HashCode h, FilePosition p)
37 :_hash(h), _valuePosition(p) { }
38 LittleEndian<HashCode> hash() const {return _hash;}
39 bool hasHash (LittleEndian<HashCode> h) const {return _hash == h;}
41 const KeyValueChunk* value (const File *file) const {
42 return (const KeyValueChunk*) file->mappedPosition(_valuePosition);
44 FilePosition valuePosition() const {return _valuePosition;}
47 bool isAvailable() const {return _valuePosition == 0;} // false if deleted
48 bool hasValue() const {return _valuePosition > 1;} // false if deleted
50 void markDeleted() {_valuePosition = kDeletedPosition;}
51 void markEmpty() {_valuePosition = 0;}
54 static const FilePosition kDeletedPosition = 1;
57 LittleEndian<HashCode> _hash;
58 LittleEndian<FilePosition> _valuePosition;
63 #pragma mark CREATION:
65 Index::Index (uint32_t itsTableSize)
66 :Chunk(sizeof(Index) + itsTableSize*sizeof(Index::Entry), kChunkType),
67 _magicNumber(kIndexMagicNumber),
69 _tableSize(itsTableSize)
72 Index* Index::create (int capacity) {
74 int tableSize = Dictionary::choosePowerOfTwoSize(capacity);
76 int tableSize = Dictionary::chooseAnyOldSize(capacity, kMaxLoadFactor);
78 size_t size = sizeof(Index) + tableSize*sizeof(Index::Entry);
79 void* storage = ::operator new(size);
80 memset(storage,0,size);
81 return new(storage) Index(tableSize);
84 const Index* Index::indexMappedAt (const void* ptr) {
85 const Index *index = (const Index*)ptr;
90 void Index::validate() const throw(File::Error) {
91 if (_magicNumber != kIndexMagicNumber)
92 throw File::Error(ERANGE, "Bad Index (wrong magic number)");
93 if (_tableSize < 2 || _count > _tableSize)
94 throw File::Error(ERANGE, "Bad Index (wrong count or tableSize)");
96 if ((_tableSize & (_tableSize-1)) != 0)
97 throw File::Error(ERANGE, "Bad Index (size not power of 2)");
99 if (size() != sizeof(Index)+_tableSize*sizeof(Index::Entry))
100 throw File::Error(ERANGE, "Bad Index (wrong size)");
103 void Index::validateEntries (const File *file) const throw(File::Error) {
105 uint32_t size = _tableSize;
107 const Entry *entry = table(0);
108 for (uint32_t i=0; i<size; i++, entry++) {
109 if (entry->hasValue()) {
111 const KeyValueChunk *chunk = entry->value(file);
112 if ((void*)chunk->end() > (void*)this)
113 throw File::Error(ERANGE, "Bad Index entry (points beyond index)");
115 if (entry->hash() != Key::computeHash(chunk->key()))
116 throw File::Error(ERANGE, "Bad Index entry (wrong hash code)");
120 throw File::Error(ERANGE, "Bad Index (inconsistent count)");
127 const Index::Entry* Index::table (int index) const {
128 return &((const Index::Entry*)&_table)[index];
131 Index::Entry* Index::table (int index) {
132 return &((Index::Entry*)&_table)[index];
136 const KeyValueChunk* Index::_find (File *file, Key key) const {
137 int index = key.hash % _tableSize;
138 LittleEndian<HashCode> lhash = key.hash;
143 const Index::Entry *found = table(index);
144 if (found->isAvailable()) {
146 } else if (found->hasHash(lhash)) {
147 const KeyValueChunk *value = found->value(file);
148 if (value->hasKey(key)) {
153 index = (index + ++probe) % _tableSize;
155 index = (index + 1) % _tableSize;
160 Blob Index::get (File *file, Key key) const {
161 const KeyValueChunk *kv = _find(file, key);
162 return kv ?kv->value() :Blob();
166 // Common implementation of put and delete. (If deleting, valuePosition will be kDeletedPosition.)
167 // While searching, entries past maxPosition should be considered newly-added and not compared,
168 // since they're not in the memory-mapped part of the file yet.
169 // Returns true if the entry was added/removed.
170 bool Index::_put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
171 bool result = (valuePosition <= Entry::kDeletedPosition);
172 int index = key.hash % _tableSize;
173 Index::Entry entry(key.hash, valuePosition);
179 found = table(index);
180 if (!found->hasValue()) {
181 if (entry.hasValue()) {
183 break; // it's a put and we found an empty entry
184 } else if (found->isAvailable()) {
185 return false; // it's a delete but the key wasn't found
188 else if (found->hasHash(entry.hash())) {
190 break; // no file given, so assume no duplicate keys
191 else if (found->valuePosition() < maxPosition && found->value(file)->hasKey(key))
192 break; // found existing (not newly-inserted) entry with the matching key
194 printf("** hash collision! hash=%u\n", key.hash);*/
197 index = (index + ++probe) % _tableSize;
199 index = (index + 1) % _tableSize;
202 // Finally found where to put it:
207 bool Index::put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
208 bool added = _put(file, key, valuePosition, maxPosition);
214 bool Index::remove (File *file, Key key, FilePosition maxPosition) {
215 bool removed = _put(file, key, Index::Entry::kDeletedPosition, maxPosition);
223 void Index::removeAll () {
224 memset(table(0), 0, _tableSize*sizeof(Entry));
229 void Index::copyFrom (File *file, const Index *index) {
230 const Entry *src = index->table(0);
231 if (index->_tableSize == _tableSize) {
232 memcpy(table(0), src, _tableSize*sizeof(Entry));
235 for (uint32_t i=0; i<index->_tableSize; i++,src++)
237 _put(NULL, Key(NULL,0,src->hash()), src->valuePosition(), UINT32_MAX);
239 _count = index->count();
244 Index::Iterator::Iterator (File *file, const Index *index)
246 _current(index->table(-1)),
247 _end(index->table(index->_tableSize))
252 bool Index::Iterator::hasValue() const {return _current != NULL;}
253 Key Index::Iterator::key() const {return Key(_current->value(_file)->key(), _current->hash());}
254 Blob Index::Iterator::value() const {return _current->value(_file)->value();}
256 bool Index::Iterator::next() {
258 while (++_current < _end) {
259 if (_current->hasValue())
267 FilePosition Index::Iterator::valuePosition() {
268 return _current ?_current->valuePosition() :0;