src/VersionDictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Thu Sep 24 21:46:17 2009 -0700 (2009-09-24)
changeset 5 4c10b7956435
parent 2 851de24ecb61
child 8 21a6c17f4e3e
permissions -rw-r--r--
Oops, forgot to set _previousTrailerPosition, which made the previousVersion() accessor not work. Fixed. Also added some sanity-checking of its value.
jens@0
     1
/*
jens@0
     2
 *  VersionDictionary.cpp
jens@0
     3
 *  Ottoman
jens@0
     4
 *
jens@0
     5
 *  Created by Jens Alfke on 8/21/09.
jens@0
     6
 *  Copyright 2009 Jens Alfke. All rights reserved.
jens@0
     7
 *  BSD-Licensed: See the file "LICENSE.txt" for details.
jens@0
     8
 */
jens@0
     9
jens@0
    10
#include "VersionDictionary.h"
jens@0
    11
#include "Chunk.h"
jens@0
    12
#include "Index.h"
jens@0
    13
#include "File.h"
jens@0
    14
#include "Chunk.h"
jens@0
    15
#include <assert.h>
jens@0
    16
#include <errno.h>
jens@0
    17
#include <algorithm>
jens@0
    18
jens@0
    19
namespace Mooseyard {
jens@0
    20
            
jens@0
    21
    // Trailer is stored in file; all numbers are little-endian.
jens@0
    22
    class VersionDictionary::Trailer :public Chunk {
jens@0
    23
    public:
jens@0
    24
        LittleEndian<uint32_t>          magicNumber1;
jens@0
    25
        LittleEndian<uint32_t>          count;
jens@0
    26
        VersionDictionary::IndexPositions  indexPositions;
jens@0
    27
        LittleEndian<FilePosition>      previousTrailerPosition;
jens@0
    28
        LittleEndian<uint32_t>          generation;
jens@0
    29
        LittleEndianDouble              timestamp;
jens@0
    30
        LittleEndian<uint32_t>          magicNumber2;
jens@0
    31
                
jens@0
    32
        Trailer ()
jens@0
    33
            :Chunk(sizeof(Trailer), kChunkType)
jens@0
    34
        { }
jens@0
    35
        
jens@0
    36
        Trailer (int c,
jens@0
    37
                     VersionDictionary::IndexPositions indexPos, 
jens@0
    38
                     FilePosition previousTrailerPos,
jens@0
    39
                     uint32_t gen)
jens@0
    40
            :Chunk(sizeof(Trailer), kChunkType),
jens@0
    41
             magicNumber1(kMagicNumber1),
jens@0
    42
             count(c),
jens@0
    43
             indexPositions(indexPos),
jens@0
    44
             previousTrailerPosition(previousTrailerPos),
jens@0
    45
             generation(gen),
jens@0
    46
             timestamp(::time(NULL)),
jens@0
    47
             magicNumber2(kMagicNumber2)
jens@0
    48
        { }
jens@0
    49
        
jens@0
    50
        static const uint32_t kMagicNumber1 = 0xfe83b1cd;
jens@0
    51
        static const uint32_t kMagicNumber2 = 0xff84b2c1;
jens@0
    52
    };
jens@0
    53
    
jens@0
    54
    
jens@0
    55
    
jens@0
    56
    VersionDictionary::VersionDictionary (File *file)
jens@0
    57
        :_file(file),
jens@0
    58
         _trailerPosition(0),
jens@0
    59
         _previousTrailerPosition(0),
jens@0
    60
         _indexPositions(),
jens@0
    61
         _count(0),
jens@0
    62
         _previousVersion()
jens@0
    63
    {
jens@0
    64
        if (file->length() > sizeof(Trailer))
jens@0
    65
            _read();
jens@0
    66
    }
jens@0
    67
    
jens@0
    68
    VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
jens@0
    69
        :_file(file),
jens@0
    70
         _trailerPosition(trailerPosition),
jens@0
    71
         _previousTrailerPosition(0),
jens@0
    72
         _indexPositions(),
jens@0
    73
         _count(0),
jens@0
    74
         _previousVersion()
jens@0
    75
    {
jens@0
    76
        _read(trailerPosition);
jens@0
    77
    }
jens@0
    78
    
jens@0
    79
    int VersionDictionary::count() const {
jens@0
    80
        return _count;
jens@0
    81
    }
jens@0
    82
    
jens@0
    83
    const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
jens@0
    84
        if (_trailerPosition > 0)
jens@0
    85
            return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
jens@0
    86
        else 
jens@0
    87
            return NULL;      // Only happens in the empty-file case
jens@0
    88
    }
jens@0
    89
    
jens@0
    90
    const Index* VersionDictionary::_index (int i) const {
jens@0
    91
        if (_indexPositions[i] > 0)
jens@0
    92
            return (const Index*) _file->mappedPosition(_indexPositions[i]);
jens@0
    93
        else
jens@0
    94
            return NULL;
jens@0
    95
    }
jens@0
    96
    
jens@0
    97
    int VersionDictionary::generation() const {
jens@0
    98
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
    99
        return trailer ? (int)trailer->generation : -1;
jens@0
   100
    }
jens@0
   101
    
jens@0
   102
    time_t VersionDictionary::timestamp() const {
jens@0
   103
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   104
        return trailer ? (time_t)trailer->timestamp : 0;
jens@0
   105
    }
jens@0
   106
    
jens@0
   107
    const VersionDictionary* VersionDictionary::previousVersion() const {
jens@0
   108
        if (!_previousVersion)
jens@0
   109
            if (_previousTrailerPosition > 0)
jens@0
   110
                _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
jens@0
   111
        return _previousVersion;
jens@0
   112
    }
jens@0
   113
        
jens@0
   114
    
jens@0
   115
    Blob VersionDictionary::get (Key key) const {
jens@0
   116
        const Index *index = _index(key.hash >> 24);
jens@0
   117
        return index ?index->get(_file, key) :Blob();
jens@0
   118
    }
jens@0
   119
    
jens@0
   120
    void VersionDictionary::_read (FilePosition trailerPos) {
jens@0
   121
        assert(_file);
jens@0
   122
        
jens@0
   123
        if (trailerPos > 0) {
jens@0
   124
            _file->setPosition(trailerPos);
jens@0
   125
        } else {
jens@0
   126
            // Determine position of trailer, at EOF:
jens@0
   127
            off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
jens@0
   128
            if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
jens@0
   129
                throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
jens@2
   130
            trailerPos = (FilePosition)pos;
jens@0
   131
        }
jens@0
   132
        
jens@0
   133
        // Read & verify trailer:
jens@0
   134
        VersionDictionary::Trailer trailer;
jens@0
   135
        _file->read(trailer);
jens@0
   136
        _trailerPosition = trailerPos;
jens@5
   137
        _previousTrailerPosition = trailer.previousTrailerPosition;
jens@0
   138
        _count = trailer.count;
jens@0
   139
        _indexPositions = trailer.indexPositions;
jens@0
   140
        
jens@0
   141
        if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
jens@0
   142
            || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
jens@5
   143
            throw File::Error("No trailer found in file (invalid magic numbers)");
jens@5
   144
        if (_previousTrailerPosition >= _trailerPosition)
jens@5
   145
            throw File::Error("Bad VersionDictionary trailer (illegal previousTrailerPosition)");
jens@0
   146
        
jens@0
   147
        // Map in the file:
jens@0
   148
        _file->mapRegion(0, _trailerPosition+sizeof(trailer));
jens@0
   149
jens@0
   150
        // Verify Indexes:
jens@0
   151
        for (int i=0; i<256; i++) {
jens@5
   152
            if (_indexPositions[i] > 0)
jens@5
   153
                if (_indexPositions[i] < _previousTrailerPosition || _indexPositions[i] >= _trailerPosition)
jens@5
   154
                    throw File::Error("Bad VersionDictionary trailer (illegal index position)");
jens@0
   155
            const Index *index = _index(i);
jens@0
   156
            if (index)
jens@0
   157
                index->validate();
jens@0
   158
        }
jens@0
   159
}
jens@0
   160
    
jens@0
   161
    
jens@0
   162
    bool VersionDictionary::isLatestVersion() const {
jens@0
   163
        return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
jens@0
   164
    }
jens@0
   165
    
jens@0
   166
    
jens@0
   167
    /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
jens@0
   168
        as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
jens@0
   169
        Returns the position of the new trailer. */
jens@0
   170
    FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
jens@0
   171
                                          const Dictionary *addDict, 
jens@0
   172
                                          File *dstFile, 
jens@0
   173
                                          bool replace) 
jens@0
   174
    {
jens@0
   175
        File *srcFile = baseDict->_file;
jens@0
   176
        bool incremental = !replace && dstFile==srcFile;
jens@0
   177
        Index* newIndex[256];
jens@0
   178
jens@0
   179
        {
jens@0
   180
            // Work out the needed capacity for each Index bucket:
jens@0
   181
            int newCounts[256] = {0};
jens@0
   182
            if (!replace) {
jens@0
   183
                for (int i=0; i<256; i++) {
jens@0
   184
                    const Index *oldIndex = baseDict->_index(i);
jens@0
   185
                    if (oldIndex)
jens@0
   186
                        newCounts[i] = oldIndex->count();
jens@0
   187
                }
jens@0
   188
            }
jens@0
   189
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   190
            for (; *it; ++*it)
jens@0
   191
                newCounts[it->key().hash >> 24]++;
jens@0
   192
            delete it;
jens@0
   193
            
jens@0
   194
            // Allocate new Indexes, of sufficient capacity, for each growing bucket:
jens@0
   195
            for (int i=0; i<256; i++) {
jens@0
   196
                const Index *oldIndex = baseDict->_index(i);
jens@0
   197
                if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
jens@0
   198
                    newIndex[i] = Index::create(newCounts[i]);
jens@0
   199
                    if (incremental && oldIndex)
jens@0
   200
                        newIndex[i]->copyFrom(srcFile, oldIndex);
jens@0
   201
                } else
jens@0
   202
                    newIndex[i] = NULL;
jens@0
   203
            }
jens@0
   204
        }
jens@0
   205
        
jens@0
   206
        // Lock the file now, seek to the end, and make sure it's been prepared with a header,
jens@0
   207
        // since FilePositions of 0 and 1 are reserved.
jens@0
   208
        File::Lock lock(dstFile);
jens@2
   209
        const FilePosition startPos = (FilePosition) dstFile->setPositionToEnd();
jens@0
   210
        if (startPos <= 1)
jens@0
   211
            throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
jens@0
   212
        
jens@0
   213
        // For safety's sake, make sure the old file hasn't been destroyed:
jens@0
   214
        FilePosition oldEOF = baseDict->_trailerPosition;
jens@0
   215
        if (oldEOF > 0)
jens@0
   216
            oldEOF += sizeof(VersionDictionary::Trailer);
jens@0
   217
        if (srcFile->length() < oldEOF)
jens@0
   218
            throw File::Error(ERANGE, "File has been truncated since being read");
jens@0
   219
            
jens@0
   220
        try{
jens@0
   221
            FilePosition pos = startPos;
jens@0
   222
            
jens@0
   223
            int newCount;
jens@0
   224
            if (replace)
jens@0
   225
                newCount = 0;
jens@0
   226
            else if (dstFile == srcFile)
jens@0
   227
                newCount = baseDict->_count;
jens@0
   228
            else {
jens@0
   229
                // Write out the surviving pre-existing entries from the old file:
jens@0
   230
                newCount = 0;
jens@0
   231
                for (VersionDictionary::Iterator it(baseDict); it; ++it) {
jens@0
   232
                    Key key = it.key();
jens@0
   233
                    if (!addDict->contains(key)) {
jens@0
   234
                        int bucket = key.hash >> 24;
jens@0
   235
                        int size = KeyValueChunk::write(dstFile,pos, key, it.value());
jens@0
   236
                        newIndex[bucket]->put(srcFile, key, pos, startPos);
jens@0
   237
                        pos += size;
jens@0
   238
                        newCount++;
jens@0
   239
                    }
jens@0
   240
                }
jens@0
   241
            }
jens@0
   242
            
jens@0
   243
            // Now write the items from the new dict:
jens@0
   244
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   245
            for (; *it; ++*it) {
jens@0
   246
                Key key=it->key();
jens@0
   247
                Blob value=it->value();
jens@0
   248
                int bucket = key.hash >> 24;
jens@0
   249
                Index *index = newIndex[bucket];
jens@0
   250
                assert(index);
jens@0
   251
                                              
jens@0
   252
                if (value.bytes) {
jens@0
   253
                    int size = KeyValueChunk::write(dstFile,pos, key, value);
jens@0
   254
                    if (index->put(srcFile, key, pos, startPos))
jens@0
   255
                        newCount++;
jens@0
   256
                    pos += size;
jens@0
   257
                } else if (incremental) {
jens@0
   258
                    // NULL value is a deleted-entry marker used by OverlayDictionary
jens@0
   259
                    if (index->remove(srcFile, key, startPos))
jens@0
   260
                        newCount--;
jens@0
   261
                }
jens@0
   262
            }
jens@0
   263
            delete it;
jens@0
   264
            
jens@0
   265
            // Write out the new indexes:
jens@0
   266
            IndexPositions newIndexPositions;
jens@0
   267
            if (incremental)
jens@0
   268
                newIndexPositions = baseDict->_indexPositions;
jens@0
   269
            else
jens@0
   270
                memset(&newIndexPositions, 0, sizeof(newIndexPositions));
jens@0
   271
jens@0
   272
            pos += Chunk::writePadding(dstFile);
jens@0
   273
            for (int i=0; i<256; i++) {
jens@0
   274
                if (newIndex[i]) {
jens@0
   275
                    Index *index = newIndex[i];
jens@0
   276
                    newIndexPositions[i] = pos;
jens@0
   277
                    pos += dstFile->write(index, index->size());
jens@0
   278
                    delete index;
jens@0
   279
                }
jens@0
   280
            }
jens@0
   281
            
jens@0
   282
            // Flush everything out to disk, with maximum paranoia, before writing the trailer.
jens@0
   283
            // Since scavenging corrupt files looks for trailers, we don't want to append a
jens@0
   284
            // trailer until we're certain that all of the real data is safely on-disk.
jens@0
   285
            dstFile->flushDisk();
jens@0
   286
            
jens@0
   287
            // Write the trailer:
jens@0
   288
            FilePosition newTrailerPosition = pos;
jens@0
   289
            VersionDictionary::Trailer trailer(newCount,
jens@0
   290
                                            newIndexPositions,
jens@0
   291
                                            baseDict->_trailerPosition,
jens@0
   292
                                            baseDict->generation() + 1);
jens@0
   293
            pos += dstFile->write(trailer);
jens@0
   294
            
jens@0
   295
            // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
jens@0
   296
            // real-time I/O in other apps, so it's bad to call it too often.
jens@0
   297
            dstFile->flush();
jens@0
   298
            assert(pos==dstFile->position());
jens@0
   299
            
jens@0
   300
            return newTrailerPosition;
jens@0
   301
            
jens@0
   302
        } catch (...) {
jens@0
   303
            // If something goes wrong, try to back out everything we wrote:
jens@0
   304
            try{
jens@0
   305
                dstFile->setLength(startPos);
jens@0
   306
            }catch(...) { }
jens@0
   307
            throw;
jens@0
   308
        }
jens@0
   309
    }
jens@0
   310
 
jens@0
   311
    
jens@0
   312
#pragma mark -
jens@0
   313
#pragma mark TESTING-ONLY:
jens@0
   314
    
jens@0
   315
    VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
jens@0
   316
                                                    File *dstFile, 
jens@0
   317
                                                    bool replace) const
jens@0
   318
    {
jens@0
   319
        FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
jens@0
   320
        VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
jens@0
   321
        nextVersion->_previousVersion = this;
jens@0
   322
        return nextVersion;
jens@0
   323
    }
jens@0
   324
    
jens@0
   325
    VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
jens@0
   326
        return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
jens@0
   327
    }
jens@0
   328
    
jens@0
   329
    
jens@0
   330
#pragma mark -
jens@0
   331
#pragma mark ITERATOR:
jens@0
   332
    
jens@0
   333
    Dictionary::Iterator* VersionDictionary::iterate() const {
jens@0
   334
        return new VersionDictionary::Iterator(this);
jens@0
   335
    }
jens@0
   336
    
jens@0
   337
    VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
jens@0
   338
    :_file(file),
jens@0
   339
     _bucket(-1),
jens@0
   340
     _iter(NULL)
jens@0
   341
    { 
jens@0
   342
        nextIndex();
jens@0
   343
    }
jens@0
   344
    
jens@0
   345
    VersionDictionary::Iterator::~Iterator() {
jens@0
   346
        delete _iter;
jens@0
   347
    }
jens@0
   348
    
jens@0
   349
    bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   350
    Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
jens@0
   351
    Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
jens@0
   352
    
jens@0
   353
    bool VersionDictionary::Iterator::next() {
jens@0
   354
        if (_iter->next())
jens@0
   355
            return true;
jens@0
   356
        else {
jens@0
   357
            delete _iter;
jens@0
   358
            return nextIndex();
jens@0
   359
        }
jens@0
   360
    }
jens@0
   361
    
jens@0
   362
    bool VersionDictionary::Iterator::nextIndex() {
jens@0
   363
        while (++_bucket < 256) {
jens@0
   364
            const Index *index = _file->_index(_bucket);
jens@0
   365
            if (index) {
jens@0
   366
                _iter = new Index::Iterator(_file->_file, index);
jens@0
   367
                if (*_iter)
jens@0
   368
                    return true;
jens@0
   369
                else
jens@0
   370
                    delete _iter;
jens@0
   371
            }
jens@0
   372
        }
jens@0
   373
        _iter = NULL;
jens@0
   374
        return false;
jens@0
   375
    }
jens@0
   376
    
jens@0
   377
    
jens@0
   378
#pragma mark -
jens@0
   379
#pragma mark CHANGE ITERATOR:
jens@0
   380
    
jens@0
   381
    VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
jens@0
   382
        return new VersionDictionary::ChangeIterator(this);
jens@0
   383
    }
jens@0
   384
    
jens@0
   385
    VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
jens@0
   386
    :_file(file),
jens@0
   387
     _bucket(-1),
jens@0
   388
     _iter(NULL)
jens@0
   389
    { 
jens@0
   390
        next();            
jens@0
   391
    }
jens@0
   392
    
jens@0
   393
    VersionDictionary::ChangeIterator::~ChangeIterator() {
jens@0
   394
        delete _iter;
jens@0
   395
    }
jens@0
   396
    
jens@0
   397
    bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   398
    Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
jens@0
   399
    Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
jens@0
   400
    
jens@0
   401
    bool VersionDictionary::ChangeIterator::next() {
jens@0
   402
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   403
        for (;;) {
jens@0
   404
            // Check if current iterator has a value that's from this version:
jens@0
   405
            if (_iter && _iter->hasValue()) {
jens@0
   406
                if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
jens@0
   407
                    return true;
jens@0
   408
                else if (_iter->next())
jens@0
   409
                    continue;
jens@0
   410
            }
jens@0
   411
            // If not, go to next Index:
jens@0
   412
            if (!nextIndex())
jens@0
   413
                return false;
jens@0
   414
        }
jens@0
   415
    }
jens@0
   416
    
jens@0
   417
    bool VersionDictionary::ChangeIterator::nextIndex() {
jens@0
   418
        delete _iter;
jens@0
   419
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   420
        while (++_bucket < 256) {
jens@0
   421
            const Index *index = _file->_index(_bucket);
jens@0
   422
            if (index) {
jens@0
   423
                // Skip indexes that weren't updated in this version:
jens@0
   424
                if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
jens@0
   425
                    _iter = new Index::Iterator(_file->_file, index);
jens@0
   426
                    return true;
jens@0
   427
                }
jens@0
   428
            }
jens@0
   429
        }
jens@0
   430
        _iter = NULL;
jens@0
   431
        return false;
jens@0
   432
    }
jens@0
   433
    
jens@0
   434
}