src/VersionDictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Sun Sep 20 15:14:12 2009 -0700 (2009-09-20)
changeset 0 31a43d94cc26
child 2 851de24ecb61
permissions -rw-r--r--
First official checkin.
jens@0
     1
/*
jens@0
     2
 *  VersionDictionary.cpp
jens@0
     3
 *  Ottoman
jens@0
     4
 *
jens@0
     5
 *  Created by Jens Alfke on 8/21/09.
jens@0
     6
 *  Copyright 2009 Jens Alfke. All rights reserved.
jens@0
     7
 *  BSD-Licensed: See the file "LICENSE.txt" for details.
jens@0
     8
 */
jens@0
     9
jens@0
    10
#include "VersionDictionary.h"
jens@0
    11
#include "Chunk.h"
jens@0
    12
#include "Index.h"
jens@0
    13
#include "File.h"
jens@0
    14
#include "Chunk.h"
jens@0
    15
#include <assert.h>
jens@0
    16
#include <errno.h>
jens@0
    17
#include <algorithm>
jens@0
    18
jens@0
    19
namespace Mooseyard {
jens@0
    20
            
jens@0
    21
    // Trailer is stored in file; all numbers are little-endian.
jens@0
    22
    class VersionDictionary::Trailer :public Chunk {
jens@0
    23
    public:
jens@0
    24
        LittleEndian<uint32_t>          magicNumber1;
jens@0
    25
        LittleEndian<uint32_t>          count;
jens@0
    26
        VersionDictionary::IndexPositions  indexPositions;
jens@0
    27
        LittleEndian<FilePosition>      previousTrailerPosition;
jens@0
    28
        LittleEndian<uint32_t>          generation;
jens@0
    29
        LittleEndianDouble              timestamp;
jens@0
    30
        LittleEndian<uint32_t>          magicNumber2;
jens@0
    31
                
jens@0
    32
        Trailer ()
jens@0
    33
            :Chunk(sizeof(Trailer), kChunkType)
jens@0
    34
        { }
jens@0
    35
        
jens@0
    36
        Trailer (int c,
jens@0
    37
                     VersionDictionary::IndexPositions indexPos, 
jens@0
    38
                     FilePosition previousTrailerPos,
jens@0
    39
                     uint32_t gen)
jens@0
    40
            :Chunk(sizeof(Trailer), kChunkType),
jens@0
    41
             magicNumber1(kMagicNumber1),
jens@0
    42
             count(c),
jens@0
    43
             indexPositions(indexPos),
jens@0
    44
             previousTrailerPosition(previousTrailerPos),
jens@0
    45
             generation(gen),
jens@0
    46
             timestamp(::time(NULL)),
jens@0
    47
             magicNumber2(kMagicNumber2)
jens@0
    48
        { }
jens@0
    49
        
jens@0
    50
        static const uint32_t kMagicNumber1 = 0xfe83b1cd;
jens@0
    51
        static const uint32_t kMagicNumber2 = 0xff84b2c1;
jens@0
    52
    };
jens@0
    53
    
jens@0
    54
    
jens@0
    55
    
jens@0
    56
    VersionDictionary::VersionDictionary (File *file)
jens@0
    57
        :_file(file),
jens@0
    58
         _trailerPosition(0),
jens@0
    59
         _previousTrailerPosition(0),
jens@0
    60
         _indexPositions(),
jens@0
    61
         _count(0),
jens@0
    62
         _previousVersion()
jens@0
    63
    {
jens@0
    64
        if (file->length() > sizeof(Trailer))
jens@0
    65
            _read();
jens@0
    66
    }
jens@0
    67
    
jens@0
    68
    VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
jens@0
    69
        :_file(file),
jens@0
    70
         _trailerPosition(trailerPosition),
jens@0
    71
         _previousTrailerPosition(0),
jens@0
    72
         _indexPositions(),
jens@0
    73
         _count(0),
jens@0
    74
         _previousVersion()
jens@0
    75
    {
jens@0
    76
        _read(trailerPosition);
jens@0
    77
    }
jens@0
    78
    
jens@0
    79
    int VersionDictionary::count() const {
jens@0
    80
        return _count;
jens@0
    81
    }
jens@0
    82
    
jens@0
    83
    const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
jens@0
    84
        if (_trailerPosition > 0)
jens@0
    85
            return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
jens@0
    86
        else 
jens@0
    87
            return NULL;      // Only happens in the empty-file case
jens@0
    88
    }
jens@0
    89
    
jens@0
    90
    const Index* VersionDictionary::_index (int i) const {
jens@0
    91
        if (_indexPositions[i] > 0)
jens@0
    92
            return (const Index*) _file->mappedPosition(_indexPositions[i]);
jens@0
    93
        else
jens@0
    94
            return NULL;
jens@0
    95
    }
jens@0
    96
    
jens@0
    97
    int VersionDictionary::generation() const {
jens@0
    98
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
    99
        return trailer ? (int)trailer->generation : -1;
jens@0
   100
    }
jens@0
   101
    
jens@0
   102
    time_t VersionDictionary::timestamp() const {
jens@0
   103
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   104
        return trailer ? (time_t)trailer->timestamp : 0;
jens@0
   105
    }
jens@0
   106
    
jens@0
   107
    const VersionDictionary* VersionDictionary::previousVersion() const {
jens@0
   108
        if (!_previousVersion)
jens@0
   109
            if (_previousTrailerPosition > 0)
jens@0
   110
                _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
jens@0
   111
        return _previousVersion;
jens@0
   112
    }
jens@0
   113
        
jens@0
   114
    
jens@0
   115
    Blob VersionDictionary::get (Key key) const {
jens@0
   116
        const Index *index = _index(key.hash >> 24);
jens@0
   117
        return index ?index->get(_file, key) :Blob();
jens@0
   118
    }
jens@0
   119
    
jens@0
   120
    void VersionDictionary::_read (FilePosition trailerPos) {
jens@0
   121
        assert(_file);
jens@0
   122
        
jens@0
   123
        if (trailerPos > 0) {
jens@0
   124
            _file->setPosition(trailerPos);
jens@0
   125
        } else {
jens@0
   126
            // Determine position of trailer, at EOF:
jens@0
   127
            off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
jens@0
   128
            if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
jens@0
   129
                throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
jens@0
   130
            trailerPos = pos;
jens@0
   131
        }
jens@0
   132
        
jens@0
   133
        // Read & verify trailer:
jens@0
   134
        VersionDictionary::Trailer trailer;
jens@0
   135
        _file->read(trailer);
jens@0
   136
        _trailerPosition = trailerPos;
jens@0
   137
        _count = trailer.count;
jens@0
   138
        _indexPositions = trailer.indexPositions;
jens@0
   139
        
jens@0
   140
        if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
jens@0
   141
            || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
jens@0
   142
            throw File::Error(ERANGE, "No trailer found in file (invalid magic numbers)");\
jens@0
   143
jens@0
   144
        
jens@0
   145
        // Map in the file:
jens@0
   146
        _file->mapRegion(0, _trailerPosition+sizeof(trailer));
jens@0
   147
jens@0
   148
        // Verify Indexes:
jens@0
   149
        for (int i=0; i<256; i++) {
jens@0
   150
            const Index *index = _index(i);
jens@0
   151
            if (index)
jens@0
   152
                index->validate();
jens@0
   153
        }
jens@0
   154
}
jens@0
   155
    
jens@0
   156
    
jens@0
   157
    bool VersionDictionary::isLatestVersion() const {
jens@0
   158
        return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
jens@0
   159
    }
jens@0
   160
    
jens@0
   161
    
jens@0
   162
    /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
jens@0
   163
        as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
jens@0
   164
        Returns the position of the new trailer. */
jens@0
   165
    FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
jens@0
   166
                                          const Dictionary *addDict, 
jens@0
   167
                                          File *dstFile, 
jens@0
   168
                                          bool replace) 
jens@0
   169
    {
jens@0
   170
        File *srcFile = baseDict->_file;
jens@0
   171
        bool incremental = !replace && dstFile==srcFile;
jens@0
   172
        Index* newIndex[256];
jens@0
   173
jens@0
   174
        {
jens@0
   175
            // Work out the needed capacity for each Index bucket:
jens@0
   176
            int newCounts[256] = {0};
jens@0
   177
            if (!replace) {
jens@0
   178
                for (int i=0; i<256; i++) {
jens@0
   179
                    const Index *oldIndex = baseDict->_index(i);
jens@0
   180
                    if (oldIndex)
jens@0
   181
                        newCounts[i] = oldIndex->count();
jens@0
   182
                }
jens@0
   183
            }
jens@0
   184
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   185
            for (; *it; ++*it)
jens@0
   186
                newCounts[it->key().hash >> 24]++;
jens@0
   187
            delete it;
jens@0
   188
            
jens@0
   189
            // Allocate new Indexes, of sufficient capacity, for each growing bucket:
jens@0
   190
            for (int i=0; i<256; i++) {
jens@0
   191
                const Index *oldIndex = baseDict->_index(i);
jens@0
   192
                if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
jens@0
   193
                    newIndex[i] = Index::create(newCounts[i]);
jens@0
   194
                    if (incremental && oldIndex)
jens@0
   195
                        newIndex[i]->copyFrom(srcFile, oldIndex);
jens@0
   196
                } else
jens@0
   197
                    newIndex[i] = NULL;
jens@0
   198
            }
jens@0
   199
        }
jens@0
   200
        
jens@0
   201
        // Lock the file now, seek to the end, and make sure it's been prepared with a header,
jens@0
   202
        // since FilePositions of 0 and 1 are reserved.
jens@0
   203
        File::Lock lock(dstFile);
jens@0
   204
        const FilePosition startPos = dstFile->setPositionToEnd();
jens@0
   205
        if (startPos <= 1)
jens@0
   206
            throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
jens@0
   207
        
jens@0
   208
        // For safety's sake, make sure the old file hasn't been destroyed:
jens@0
   209
        FilePosition oldEOF = baseDict->_trailerPosition;
jens@0
   210
        if (oldEOF > 0)
jens@0
   211
            oldEOF += sizeof(VersionDictionary::Trailer);
jens@0
   212
        if (srcFile->length() < oldEOF)
jens@0
   213
            throw File::Error(ERANGE, "File has been truncated since being read");
jens@0
   214
            
jens@0
   215
        try{
jens@0
   216
            FilePosition pos = startPos;
jens@0
   217
            
jens@0
   218
            int newCount;
jens@0
   219
            if (replace)
jens@0
   220
                newCount = 0;
jens@0
   221
            else if (dstFile == srcFile)
jens@0
   222
                newCount = baseDict->_count;
jens@0
   223
            else {
jens@0
   224
                // Write out the surviving pre-existing entries from the old file:
jens@0
   225
                newCount = 0;
jens@0
   226
                for (VersionDictionary::Iterator it(baseDict); it; ++it) {
jens@0
   227
                    Key key = it.key();
jens@0
   228
                    if (!addDict->contains(key)) {
jens@0
   229
                        int bucket = key.hash >> 24;
jens@0
   230
                        int size = KeyValueChunk::write(dstFile,pos, key, it.value());
jens@0
   231
                        newIndex[bucket]->put(srcFile, key, pos, startPos);
jens@0
   232
                        pos += size;
jens@0
   233
                        newCount++;
jens@0
   234
                    }
jens@0
   235
                }
jens@0
   236
            }
jens@0
   237
            
jens@0
   238
            // Now write the items from the new dict:
jens@0
   239
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   240
            for (; *it; ++*it) {
jens@0
   241
                Key key=it->key();
jens@0
   242
                Blob value=it->value();
jens@0
   243
                int bucket = key.hash >> 24;
jens@0
   244
                Index *index = newIndex[bucket];
jens@0
   245
                assert(index);
jens@0
   246
                                              
jens@0
   247
                if (value.bytes) {
jens@0
   248
                    int size = KeyValueChunk::write(dstFile,pos, key, value);
jens@0
   249
                    if (index->put(srcFile, key, pos, startPos))
jens@0
   250
                        newCount++;
jens@0
   251
                    pos += size;
jens@0
   252
                } else if (incremental) {
jens@0
   253
                    // NULL value is a deleted-entry marker used by OverlayDictionary
jens@0
   254
                    if (index->remove(srcFile, key, startPos))
jens@0
   255
                        newCount--;
jens@0
   256
                }
jens@0
   257
            }
jens@0
   258
            delete it;
jens@0
   259
            
jens@0
   260
            // Write out the new indexes:
jens@0
   261
            IndexPositions newIndexPositions;
jens@0
   262
            if (incremental)
jens@0
   263
                newIndexPositions = baseDict->_indexPositions;
jens@0
   264
            else
jens@0
   265
                memset(&newIndexPositions, 0, sizeof(newIndexPositions));
jens@0
   266
jens@0
   267
            pos += Chunk::writePadding(dstFile);
jens@0
   268
            for (int i=0; i<256; i++) {
jens@0
   269
                if (newIndex[i]) {
jens@0
   270
                    Index *index = newIndex[i];
jens@0
   271
                    newIndexPositions[i] = pos;
jens@0
   272
                    pos += dstFile->write(index, index->size());
jens@0
   273
                    delete index;
jens@0
   274
                }
jens@0
   275
            }
jens@0
   276
            
jens@0
   277
            // Flush everything out to disk, with maximum paranoia, before writing the trailer.
jens@0
   278
            // Since scavenging corrupt files looks for trailers, we don't want to append a
jens@0
   279
            // trailer until we're certain that all of the real data is safely on-disk.
jens@0
   280
            dstFile->flushDisk();
jens@0
   281
            
jens@0
   282
            // Write the trailer:
jens@0
   283
            FilePosition newTrailerPosition = pos;
jens@0
   284
            VersionDictionary::Trailer trailer(newCount,
jens@0
   285
                                            newIndexPositions,
jens@0
   286
                                            baseDict->_trailerPosition,
jens@0
   287
                                            baseDict->generation() + 1);
jens@0
   288
            pos += dstFile->write(trailer);
jens@0
   289
            
jens@0
   290
            // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
jens@0
   291
            // real-time I/O in other apps, so it's bad to call it too often.
jens@0
   292
            dstFile->flush();
jens@0
   293
            assert(pos==dstFile->position());
jens@0
   294
            
jens@0
   295
            return newTrailerPosition;
jens@0
   296
            
jens@0
   297
        } catch (...) {
jens@0
   298
            // If something goes wrong, try to back out everything we wrote:
jens@0
   299
            try{
jens@0
   300
                dstFile->setLength(startPos);
jens@0
   301
            }catch(...) { }
jens@0
   302
            throw;
jens@0
   303
        }
jens@0
   304
    }
jens@0
   305
 
jens@0
   306
    
jens@0
   307
#pragma mark -
jens@0
   308
#pragma mark TESTING-ONLY:
jens@0
   309
    
jens@0
   310
    VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
jens@0
   311
                                                    File *dstFile, 
jens@0
   312
                                                    bool replace) const
jens@0
   313
    {
jens@0
   314
        FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
jens@0
   315
        VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
jens@0
   316
        nextVersion->_previousVersion = this;
jens@0
   317
        return nextVersion;
jens@0
   318
    }
jens@0
   319
    
jens@0
   320
    VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
jens@0
   321
        return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
jens@0
   322
    }
jens@0
   323
    
jens@0
   324
    
jens@0
   325
#pragma mark -
jens@0
   326
#pragma mark ITERATOR:
jens@0
   327
    
jens@0
   328
    Dictionary::Iterator* VersionDictionary::iterate() const {
jens@0
   329
        return new VersionDictionary::Iterator(this);
jens@0
   330
    }
jens@0
   331
    
jens@0
   332
    VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
jens@0
   333
    :_file(file),
jens@0
   334
     _bucket(-1),
jens@0
   335
     _iter(NULL)
jens@0
   336
    { 
jens@0
   337
        nextIndex();
jens@0
   338
    }
jens@0
   339
    
jens@0
   340
    VersionDictionary::Iterator::~Iterator() {
jens@0
   341
        delete _iter;
jens@0
   342
    }
jens@0
   343
    
jens@0
   344
    bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   345
    Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
jens@0
   346
    Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
jens@0
   347
    
jens@0
   348
    bool VersionDictionary::Iterator::next() {
jens@0
   349
        if (_iter->next())
jens@0
   350
            return true;
jens@0
   351
        else {
jens@0
   352
            delete _iter;
jens@0
   353
            return nextIndex();
jens@0
   354
        }
jens@0
   355
    }
jens@0
   356
    
jens@0
   357
    bool VersionDictionary::Iterator::nextIndex() {
jens@0
   358
        while (++_bucket < 256) {
jens@0
   359
            const Index *index = _file->_index(_bucket);
jens@0
   360
            if (index) {
jens@0
   361
                _iter = new Index::Iterator(_file->_file, index);
jens@0
   362
                if (*_iter)
jens@0
   363
                    return true;
jens@0
   364
                else
jens@0
   365
                    delete _iter;
jens@0
   366
            }
jens@0
   367
        }
jens@0
   368
        _iter = NULL;
jens@0
   369
        return false;
jens@0
   370
    }
jens@0
   371
    
jens@0
   372
    
jens@0
   373
#pragma mark -
jens@0
   374
#pragma mark CHANGE ITERATOR:
jens@0
   375
    
jens@0
   376
    VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
jens@0
   377
        return new VersionDictionary::ChangeIterator(this);
jens@0
   378
    }
jens@0
   379
    
jens@0
   380
    VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
jens@0
   381
    :_file(file),
jens@0
   382
     _bucket(-1),
jens@0
   383
     _iter(NULL)
jens@0
   384
    { 
jens@0
   385
        next();            
jens@0
   386
    }
jens@0
   387
    
jens@0
   388
    VersionDictionary::ChangeIterator::~ChangeIterator() {
jens@0
   389
        delete _iter;
jens@0
   390
    }
jens@0
   391
    
jens@0
   392
    bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   393
    Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
jens@0
   394
    Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
jens@0
   395
    
jens@0
   396
    bool VersionDictionary::ChangeIterator::next() {
jens@0
   397
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   398
        for (;;) {
jens@0
   399
            // Check if current iterator has a value that's from this version:
jens@0
   400
            if (_iter && _iter->hasValue()) {
jens@0
   401
                if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
jens@0
   402
                    return true;
jens@0
   403
                else if (_iter->next())
jens@0
   404
                    continue;
jens@0
   405
            }
jens@0
   406
            // If not, go to next Index:
jens@0
   407
            if (!nextIndex())
jens@0
   408
                return false;
jens@0
   409
        }
jens@0
   410
    }
jens@0
   411
    
jens@0
   412
    bool VersionDictionary::ChangeIterator::nextIndex() {
jens@0
   413
        delete _iter;
jens@0
   414
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   415
        while (++_bucket < 256) {
jens@0
   416
            const Index *index = _file->_index(_bucket);
jens@0
   417
            if (index) {
jens@0
   418
                // Skip indexes that weren't updated in this version:
jens@0
   419
                if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
jens@0
   420
                    _iter = new Index::Iterator(_file->_file, index);
jens@0
   421
                    return true;
jens@0
   422
                }
jens@0
   423
            }
jens@0
   424
        }
jens@0
   425
        _iter = NULL;
jens@0
   426
        return false;
jens@0
   427
    }
jens@0
   428
    
jens@0
   429
}