* Merged in jbm's changes for Linux compatibility, fixing Mac compatibility in the process :)
* Fixed two regressions having to do with the _previousTrailerPosition in VersionDictionary.cpp.
* Made sure RTTI is enabled in the OttomanTest target because gtest requires it.
5 * Created by Jens Alfke on 8/27/09.
6 * Copyright 2009 Jens Alfke. All rights reserved.
7 * BSD-Licensed: See the file "LICENSE.txt" for details.
13 #include "Dictionary.h"
23 // Quadratic probing results in fewer bucket tests on lookup, but makes the hash tables
24 // significantly larger, which increases the file size. In practice a smaller table size seems
25 // to be a bigger win since disk I/O is slow.
26 #define PROBE_QUADRATIC 0
28 // Similarly use a higher max load than for memory-resident tables, to save space.
29 static const float kMaxLoadFactor = 0.85f;
31 static const uint32_t kIndexMagicNumber = 0x54378543;
34 /** An index (hash-table) entry as stored in the memory-mapped file. */
37 explicit Entry() :_hash(0), _valuePosition(0) { }
38 explicit Entry (HashCode h, FilePosition p)
39 :_hash(h), _valuePosition(p) { }
40 LittleEndian<HashCode> hash() const {return _hash;}
41 bool hasHash (LittleEndian<HashCode> h) const {return _hash == h;}
43 const KeyValueChunk* value (const File *file) const {
44 return (const KeyValueChunk*) file->mappedPosition(_valuePosition);
46 FilePosition valuePosition() const {return _valuePosition;}
49 bool isAvailable() const {return _valuePosition == 0;} // false if deleted
50 bool hasValue() const {return _valuePosition > 1;} // false if deleted
52 void markDeleted() {_valuePosition = kDeletedPosition;}
53 void markEmpty() {_valuePosition = 0;}
56 static const FilePosition kDeletedPosition = 1;
59 LittleEndian<HashCode> _hash;
60 LittleEndian<FilePosition> _valuePosition;
65 #pragma mark CREATION:
67 Index::Index (uint32_t itsTableSize)
68 :Chunk(sizeof(Index) + itsTableSize*sizeof(Index::Entry), kChunkType),
69 _magicNumber(kIndexMagicNumber),
71 _tableSize(itsTableSize)
74 Index* Index::create (int capacity) {
76 int tableSize = Dictionary::choosePowerOfTwoSize(capacity);
78 int tableSize = Dictionary::chooseAnyOldSize(capacity, kMaxLoadFactor);
80 size_t size = sizeof(Index) + tableSize*sizeof(Index::Entry);
81 void* storage = ::operator new(size);
82 memset(storage,0,size);
83 return new(storage) Index(tableSize);
86 const Index* Index::indexMappedAt (const void* ptr) {
87 const Index *index = (const Index*)ptr;
92 void Index::validate() const throw(File::Error) {
93 if (_magicNumber != kIndexMagicNumber)
94 throw File::Error(ERANGE, "Bad Index (wrong magic number)");
95 if (_tableSize < 2 || _count > _tableSize)
96 throw File::Error(ERANGE, "Bad Index (wrong count or tableSize)");
98 if ((_tableSize & (_tableSize-1)) != 0)
99 throw File::Error(ERANGE, "Bad Index (size not power of 2)");
101 if (size() != sizeof(Index)+_tableSize*sizeof(Index::Entry))
102 throw File::Error(ERANGE, "Bad Index (wrong size)");
105 void Index::validateEntries (const File *file) const throw(File::Error) {
107 uint32_t size = _tableSize;
109 const Entry *entry = table(0);
110 for (uint32_t i=0; i<size; i++, entry++) {
111 if (entry->hasValue()) {
113 const KeyValueChunk *chunk = entry->value(file);
114 if ((void*)chunk->end() > (void*)this)
115 throw File::Error(ERANGE, "Bad Index entry (points beyond index)");
117 if (entry->hash() != Key::computeHash(chunk->key()))
118 throw File::Error(ERANGE, "Bad Index entry (wrong hash code)");
122 throw File::Error(ERANGE, "Bad Index (inconsistent count)");
129 const Index::Entry* Index::table (int index) const {
130 return &((const Index::Entry*)&_table)[index];
133 Index::Entry* Index::table (int index) {
134 return &((Index::Entry*)&_table)[index];
138 const KeyValueChunk* Index::_find (File *file, Key key) const {
139 int index = key.hash % _tableSize;
140 LittleEndian<HashCode> lhash = key.hash;
145 const Index::Entry *found = table(index);
146 if (found->isAvailable()) {
148 } else if (found->hasHash(lhash)) {
149 const KeyValueChunk *value = found->value(file);
150 if (value->hasKey(key)) {
155 index = (index + ++probe) % _tableSize;
157 index = (index + 1) % _tableSize;
162 Blob Index::get (File *file, Key key) const {
163 const KeyValueChunk *kv = _find(file, key);
164 return kv ?kv->value() :Blob();
168 // Common implementation of put and delete. (If deleting, valuePosition will be kDeletedPosition.)
169 // While searching, entries past maxPosition should be considered newly-added and not compared,
170 // since they're not in the memory-mapped part of the file yet.
171 // Returns true if the entry was added/removed.
172 bool Index::_put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
173 bool result = (valuePosition <= Entry::kDeletedPosition);
174 int index = key.hash % _tableSize;
175 Index::Entry entry(key.hash, valuePosition);
181 found = table(index);
182 if (!found->hasValue()) {
183 if (entry.hasValue()) {
185 break; // it's a put and we found an empty entry
186 } else if (found->isAvailable()) {
187 return false; // it's a delete but the key wasn't found
190 else if (found->hasHash(entry.hash())) {
192 break; // no file given, so assume no duplicate keys
193 else if (found->valuePosition() < maxPosition && found->value(file)->hasKey(key))
194 break; // found existing (not newly-inserted) entry with the matching key
196 printf("** hash collision! hash=%u\n", key.hash);*/
199 index = (index + ++probe) % _tableSize;
201 index = (index + 1) % _tableSize;
204 // Finally found where to put it:
209 bool Index::put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) {
210 bool added = _put(file, key, valuePosition, maxPosition);
216 bool Index::remove (File *file, Key key, FilePosition maxPosition) {
217 bool removed = _put(file, key, Index::Entry::kDeletedPosition, maxPosition);
225 void Index::removeAll () {
226 memset(table(0), 0, _tableSize*sizeof(Entry));
231 void Index::copyFrom (File *file, const Index *index) {
232 const Entry *src = index->table(0);
233 if (index->_tableSize == _tableSize) {
234 memcpy(table(0), src, _tableSize*sizeof(Entry));
237 for (uint32_t i=0; i<index->_tableSize; i++,src++)
239 _put(NULL, Key(NULL,0,src->hash()), src->valuePosition(), UINT32_MAX);
241 _count = index->count();
246 Index::Iterator::Iterator (File *file, const Index *index)
248 _current(index->table(-1)),
249 _end(index->table(index->_tableSize))
254 bool Index::Iterator::hasValue() const {return _current != NULL;}
255 Key Index::Iterator::key() const {return Key(_current->value(_file)->key(), _current->hash());}
256 Blob Index::Iterator::value() const {return _current->value(_file)->value();}
258 bool Index::Iterator::next() {
260 while (++_current < _end) {
261 if (_current->hasValue())
269 FilePosition Index::Iterator::valuePosition() {
270 return _current ?_current->valuePosition() :0;