src/VersionDictionary.cpp
changeset 0 31a43d94cc26
child 2 851de24ecb61
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/VersionDictionary.cpp	Sun Sep 20 15:14:12 2009 -0700
     1.3 @@ -0,0 +1,429 @@
     1.4 +/*
     1.5 + *  VersionDictionary.cpp
     1.6 + *  Ottoman
     1.7 + *
     1.8 + *  Created by Jens Alfke on 8/21/09.
     1.9 + *  Copyright 2009 Jens Alfke. All rights reserved.
    1.10 + *  BSD-Licensed: See the file "LICENSE.txt" for details.
    1.11 + */
    1.12 +
    1.13 +#include "VersionDictionary.h"
    1.14 +#include "Chunk.h"
    1.15 +#include "Index.h"
    1.16 +#include "File.h"
    1.17 +#include "Chunk.h"
    1.18 +#include <assert.h>
    1.19 +#include <errno.h>
    1.20 +#include <algorithm>
    1.21 +
    1.22 +namespace Mooseyard {
    1.23 +            
    1.24 +    // Trailer is stored in file; all numbers are little-endian.
    1.25 +    class VersionDictionary::Trailer :public Chunk {
    1.26 +    public:
    1.27 +        LittleEndian<uint32_t>          magicNumber1;
    1.28 +        LittleEndian<uint32_t>          count;
    1.29 +        VersionDictionary::IndexPositions  indexPositions;
    1.30 +        LittleEndian<FilePosition>      previousTrailerPosition;
    1.31 +        LittleEndian<uint32_t>          generation;
    1.32 +        LittleEndianDouble              timestamp;
    1.33 +        LittleEndian<uint32_t>          magicNumber2;
    1.34 +                
    1.35 +        Trailer ()
    1.36 +            :Chunk(sizeof(Trailer), kChunkType)
    1.37 +        { }
    1.38 +        
    1.39 +        Trailer (int c,
    1.40 +                     VersionDictionary::IndexPositions indexPos, 
    1.41 +                     FilePosition previousTrailerPos,
    1.42 +                     uint32_t gen)
    1.43 +            :Chunk(sizeof(Trailer), kChunkType),
    1.44 +             magicNumber1(kMagicNumber1),
    1.45 +             count(c),
    1.46 +             indexPositions(indexPos),
    1.47 +             previousTrailerPosition(previousTrailerPos),
    1.48 +             generation(gen),
    1.49 +             timestamp(::time(NULL)),
    1.50 +             magicNumber2(kMagicNumber2)
    1.51 +        { }
    1.52 +        
    1.53 +        static const uint32_t kMagicNumber1 = 0xfe83b1cd;
    1.54 +        static const uint32_t kMagicNumber2 = 0xff84b2c1;
    1.55 +    };
    1.56 +    
    1.57 +    
    1.58 +    
    1.59 +    VersionDictionary::VersionDictionary (File *file)
    1.60 +        :_file(file),
    1.61 +         _trailerPosition(0),
    1.62 +         _previousTrailerPosition(0),
    1.63 +         _indexPositions(),
    1.64 +         _count(0),
    1.65 +         _previousVersion()
    1.66 +    {
    1.67 +        if (file->length() > sizeof(Trailer))
    1.68 +            _read();
    1.69 +    }
    1.70 +    
    1.71 +    VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
    1.72 +        :_file(file),
    1.73 +         _trailerPosition(trailerPosition),
    1.74 +         _previousTrailerPosition(0),
    1.75 +         _indexPositions(),
    1.76 +         _count(0),
    1.77 +         _previousVersion()
    1.78 +    {
    1.79 +        _read(trailerPosition);
    1.80 +    }
    1.81 +    
    1.82 +    int VersionDictionary::count() const {
    1.83 +        return _count;
    1.84 +    }
    1.85 +    
    1.86 +    const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
    1.87 +        if (_trailerPosition > 0)
    1.88 +            return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
    1.89 +        else 
    1.90 +            return NULL;      // Only happens in the empty-file case
    1.91 +    }
    1.92 +    
    1.93 +    const Index* VersionDictionary::_index (int i) const {
    1.94 +        if (_indexPositions[i] > 0)
    1.95 +            return (const Index*) _file->mappedPosition(_indexPositions[i]);
    1.96 +        else
    1.97 +            return NULL;
    1.98 +    }
    1.99 +    
   1.100 +    int VersionDictionary::generation() const {
   1.101 +        const VersionDictionary::Trailer *trailer = _trailer();
   1.102 +        return trailer ? (int)trailer->generation : -1;
   1.103 +    }
   1.104 +    
   1.105 +    time_t VersionDictionary::timestamp() const {
   1.106 +        const VersionDictionary::Trailer *trailer = _trailer();
   1.107 +        return trailer ? (time_t)trailer->timestamp : 0;
   1.108 +    }
   1.109 +    
   1.110 +    const VersionDictionary* VersionDictionary::previousVersion() const {
   1.111 +        if (!_previousVersion)
   1.112 +            if (_previousTrailerPosition > 0)
   1.113 +                _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
   1.114 +        return _previousVersion;
   1.115 +    }
   1.116 +        
   1.117 +    
   1.118 +    Blob VersionDictionary::get (Key key) const {
   1.119 +        const Index *index = _index(key.hash >> 24);
   1.120 +        return index ?index->get(_file, key) :Blob();
   1.121 +    }
   1.122 +    
   1.123 +    void VersionDictionary::_read (FilePosition trailerPos) {
   1.124 +        assert(_file);
   1.125 +        
   1.126 +        if (trailerPos > 0) {
   1.127 +            _file->setPosition(trailerPos);
   1.128 +        } else {
   1.129 +            // Determine position of trailer, at EOF:
   1.130 +            off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
   1.131 +            if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
   1.132 +                throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
   1.133 +            trailerPos = pos;
   1.134 +        }
   1.135 +        
   1.136 +        // Read & verify trailer:
   1.137 +        VersionDictionary::Trailer trailer;
   1.138 +        _file->read(trailer);
   1.139 +        _trailerPosition = trailerPos;
   1.140 +        _count = trailer.count;
   1.141 +        _indexPositions = trailer.indexPositions;
   1.142 +        
   1.143 +        if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
   1.144 +            || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
   1.145 +            throw File::Error(ERANGE, "No trailer found in file (invalid magic numbers)");\
   1.146 +
   1.147 +        
   1.148 +        // Map in the file:
   1.149 +        _file->mapRegion(0, _trailerPosition+sizeof(trailer));
   1.150 +
   1.151 +        // Verify Indexes:
   1.152 +        for (int i=0; i<256; i++) {
   1.153 +            const Index *index = _index(i);
   1.154 +            if (index)
   1.155 +                index->validate();
   1.156 +        }
   1.157 +}
   1.158 +    
   1.159 +    
   1.160 +    bool VersionDictionary::isLatestVersion() const {
   1.161 +        return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
   1.162 +    }
   1.163 +    
   1.164 +    
   1.165 +    /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
   1.166 +        as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
   1.167 +        Returns the position of the new trailer. */
   1.168 +    FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
   1.169 +                                          const Dictionary *addDict, 
   1.170 +                                          File *dstFile, 
   1.171 +                                          bool replace) 
   1.172 +    {
   1.173 +        File *srcFile = baseDict->_file;
   1.174 +        bool incremental = !replace && dstFile==srcFile;
   1.175 +        Index* newIndex[256];
   1.176 +
   1.177 +        {
   1.178 +            // Work out the needed capacity for each Index bucket:
   1.179 +            int newCounts[256] = {0};
   1.180 +            if (!replace) {
   1.181 +                for (int i=0; i<256; i++) {
   1.182 +                    const Index *oldIndex = baseDict->_index(i);
   1.183 +                    if (oldIndex)
   1.184 +                        newCounts[i] = oldIndex->count();
   1.185 +                }
   1.186 +            }
   1.187 +            Dictionary::Iterator *it = addDict->iterate();
   1.188 +            for (; *it; ++*it)
   1.189 +                newCounts[it->key().hash >> 24]++;
   1.190 +            delete it;
   1.191 +            
   1.192 +            // Allocate new Indexes, of sufficient capacity, for each growing bucket:
   1.193 +            for (int i=0; i<256; i++) {
   1.194 +                const Index *oldIndex = baseDict->_index(i);
   1.195 +                if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
   1.196 +                    newIndex[i] = Index::create(newCounts[i]);
   1.197 +                    if (incremental && oldIndex)
   1.198 +                        newIndex[i]->copyFrom(srcFile, oldIndex);
   1.199 +                } else
   1.200 +                    newIndex[i] = NULL;
   1.201 +            }
   1.202 +        }
   1.203 +        
   1.204 +        // Lock the file now, seek to the end, and make sure it's been prepared with a header,
   1.205 +        // since FilePositions of 0 and 1 are reserved.
   1.206 +        File::Lock lock(dstFile);
   1.207 +        const FilePosition startPos = dstFile->setPositionToEnd();
   1.208 +        if (startPos <= 1)
   1.209 +            throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
   1.210 +        
   1.211 +        // For safety's sake, make sure the old file hasn't been destroyed:
   1.212 +        FilePosition oldEOF = baseDict->_trailerPosition;
   1.213 +        if (oldEOF > 0)
   1.214 +            oldEOF += sizeof(VersionDictionary::Trailer);
   1.215 +        if (srcFile->length() < oldEOF)
   1.216 +            throw File::Error(ERANGE, "File has been truncated since being read");
   1.217 +            
   1.218 +        try{
   1.219 +            FilePosition pos = startPos;
   1.220 +            
   1.221 +            int newCount;
   1.222 +            if (replace)
   1.223 +                newCount = 0;
   1.224 +            else if (dstFile == srcFile)
   1.225 +                newCount = baseDict->_count;
   1.226 +            else {
   1.227 +                // Write out the surviving pre-existing entries from the old file:
   1.228 +                newCount = 0;
   1.229 +                for (VersionDictionary::Iterator it(baseDict); it; ++it) {
   1.230 +                    Key key = it.key();
   1.231 +                    if (!addDict->contains(key)) {
   1.232 +                        int bucket = key.hash >> 24;
   1.233 +                        int size = KeyValueChunk::write(dstFile,pos, key, it.value());
   1.234 +                        newIndex[bucket]->put(srcFile, key, pos, startPos);
   1.235 +                        pos += size;
   1.236 +                        newCount++;
   1.237 +                    }
   1.238 +                }
   1.239 +            }
   1.240 +            
   1.241 +            // Now write the items from the new dict:
   1.242 +            Dictionary::Iterator *it = addDict->iterate();
   1.243 +            for (; *it; ++*it) {
   1.244 +                Key key=it->key();
   1.245 +                Blob value=it->value();
   1.246 +                int bucket = key.hash >> 24;
   1.247 +                Index *index = newIndex[bucket];
   1.248 +                assert(index);
   1.249 +                                              
   1.250 +                if (value.bytes) {
   1.251 +                    int size = KeyValueChunk::write(dstFile,pos, key, value);
   1.252 +                    if (index->put(srcFile, key, pos, startPos))
   1.253 +                        newCount++;
   1.254 +                    pos += size;
   1.255 +                } else if (incremental) {
   1.256 +                    // NULL value is a deleted-entry marker used by OverlayDictionary
   1.257 +                    if (index->remove(srcFile, key, startPos))
   1.258 +                        newCount--;
   1.259 +                }
   1.260 +            }
   1.261 +            delete it;
   1.262 +            
   1.263 +            // Write out the new indexes:
   1.264 +            IndexPositions newIndexPositions;
   1.265 +            if (incremental)
   1.266 +                newIndexPositions = baseDict->_indexPositions;
   1.267 +            else
   1.268 +                memset(&newIndexPositions, 0, sizeof(newIndexPositions));
   1.269 +
   1.270 +            pos += Chunk::writePadding(dstFile);
   1.271 +            for (int i=0; i<256; i++) {
   1.272 +                if (newIndex[i]) {
   1.273 +                    Index *index = newIndex[i];
   1.274 +                    newIndexPositions[i] = pos;
   1.275 +                    pos += dstFile->write(index, index->size());
   1.276 +                    delete index;
   1.277 +                }
   1.278 +            }
   1.279 +            
   1.280 +            // Flush everything out to disk, with maximum paranoia, before writing the trailer.
   1.281 +            // Since scavenging corrupt files looks for trailers, we don't want to append a
   1.282 +            // trailer until we're certain that all of the real data is safely on-disk.
   1.283 +            dstFile->flushDisk();
   1.284 +            
   1.285 +            // Write the trailer:
   1.286 +            FilePosition newTrailerPosition = pos;
   1.287 +            VersionDictionary::Trailer trailer(newCount,
   1.288 +                                            newIndexPositions,
   1.289 +                                            baseDict->_trailerPosition,
   1.290 +                                            baseDict->generation() + 1);
   1.291 +            pos += dstFile->write(trailer);
   1.292 +            
   1.293 +            // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
   1.294 +            // real-time I/O in other apps, so it's bad to call it too often.
   1.295 +            dstFile->flush();
   1.296 +            assert(pos==dstFile->position());
   1.297 +            
   1.298 +            return newTrailerPosition;
   1.299 +            
   1.300 +        } catch (...) {
   1.301 +            // If something goes wrong, try to back out everything we wrote:
   1.302 +            try{
   1.303 +                dstFile->setLength(startPos);
   1.304 +            }catch(...) { }
   1.305 +            throw;
   1.306 +        }
   1.307 +    }
   1.308 + 
   1.309 +    
   1.310 +#pragma mark -
   1.311 +#pragma mark TESTING-ONLY:
   1.312 +    
   1.313 +    VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
   1.314 +                                                    File *dstFile, 
   1.315 +                                                    bool replace) const
   1.316 +    {
   1.317 +        FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
   1.318 +        VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
   1.319 +        nextVersion->_previousVersion = this;
   1.320 +        return nextVersion;
   1.321 +    }
   1.322 +    
   1.323 +    VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
   1.324 +        return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
   1.325 +    }
   1.326 +    
   1.327 +    
   1.328 +#pragma mark -
   1.329 +#pragma mark ITERATOR:
   1.330 +    
   1.331 +    Dictionary::Iterator* VersionDictionary::iterate() const {
   1.332 +        return new VersionDictionary::Iterator(this);
   1.333 +    }
   1.334 +    
   1.335 +    VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
   1.336 +    :_file(file),
   1.337 +     _bucket(-1),
   1.338 +     _iter(NULL)
   1.339 +    { 
   1.340 +        nextIndex();
   1.341 +    }
   1.342 +    
   1.343 +    VersionDictionary::Iterator::~Iterator() {
   1.344 +        delete _iter;
   1.345 +    }
   1.346 +    
   1.347 +    bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
   1.348 +    Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
   1.349 +    Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
   1.350 +    
   1.351 +    bool VersionDictionary::Iterator::next() {
   1.352 +        if (_iter->next())
   1.353 +            return true;
   1.354 +        else {
   1.355 +            delete _iter;
   1.356 +            return nextIndex();
   1.357 +        }
   1.358 +    }
   1.359 +    
   1.360 +    bool VersionDictionary::Iterator::nextIndex() {
   1.361 +        while (++_bucket < 256) {
   1.362 +            const Index *index = _file->_index(_bucket);
   1.363 +            if (index) {
   1.364 +                _iter = new Index::Iterator(_file->_file, index);
   1.365 +                if (*_iter)
   1.366 +                    return true;
   1.367 +                else
   1.368 +                    delete _iter;
   1.369 +            }
   1.370 +        }
   1.371 +        _iter = NULL;
   1.372 +        return false;
   1.373 +    }
   1.374 +    
   1.375 +    
   1.376 +#pragma mark -
   1.377 +#pragma mark CHANGE ITERATOR:
   1.378 +    
   1.379 +    VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
   1.380 +        return new VersionDictionary::ChangeIterator(this);
   1.381 +    }
   1.382 +    
   1.383 +    VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
   1.384 +    :_file(file),
   1.385 +     _bucket(-1),
   1.386 +     _iter(NULL)
   1.387 +    { 
   1.388 +        next();            
   1.389 +    }
   1.390 +    
   1.391 +    VersionDictionary::ChangeIterator::~ChangeIterator() {
   1.392 +        delete _iter;
   1.393 +    }
   1.394 +    
   1.395 +    bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
   1.396 +    Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
   1.397 +    Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
   1.398 +    
   1.399 +    bool VersionDictionary::ChangeIterator::next() {
   1.400 +        const VersionDictionary::Trailer *trailer = _file->_trailer();
   1.401 +        for (;;) {
   1.402 +            // Check if current iterator has a value that's from this version:
   1.403 +            if (_iter && _iter->hasValue()) {
   1.404 +                if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
   1.405 +                    return true;
   1.406 +                else if (_iter->next())
   1.407 +                    continue;
   1.408 +            }
   1.409 +            // If not, go to next Index:
   1.410 +            if (!nextIndex())
   1.411 +                return false;
   1.412 +        }
   1.413 +    }
   1.414 +    
   1.415 +    bool VersionDictionary::ChangeIterator::nextIndex() {
   1.416 +        delete _iter;
   1.417 +        const VersionDictionary::Trailer *trailer = _file->_trailer();
   1.418 +        while (++_bucket < 256) {
   1.419 +            const Index *index = _file->_index(_bucket);
   1.420 +            if (index) {
   1.421 +                // Skip indexes that weren't updated in this version:
   1.422 +                if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
   1.423 +                    _iter = new Index::Iterator(_file->_file, index);
   1.424 +                    return true;
   1.425 +                }
   1.426 +            }
   1.427 +        }
   1.428 +        _iter = NULL;
   1.429 +        return false;
   1.430 +    }
   1.431 +    
   1.432 +}