src/VersionDictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Tue Sep 29 15:46:42 2009 -0700 (2009-09-29)
changeset 9 629f61203db1
parent 8 21a6c17f4e3e
permissions -rw-r--r--
* Merged in jbm's changes for Linux compatibility, fixing Mac compatibility in the process :)
* Fixed two regressions having to do with the _previousTrailerPosition in VersionDictionary.cpp.
* Made sure RTTI is enabled in the OttomanTest target because gtest requires it.
jens@0
     1
/*
jens@0
     2
 *  VersionDictionary.cpp
jens@0
     3
 *  Ottoman
jens@0
     4
 *
jens@0
     5
 *  Created by Jens Alfke on 8/21/09.
jens@0
     6
 *  Copyright 2009 Jens Alfke. All rights reserved.
jens@0
     7
 *  BSD-Licensed: See the file "LICENSE.txt" for details.
jens@0
     8
 */
jens@0
     9
jens@0
    10
#include "VersionDictionary.h"
jens@0
    11
#include "Chunk.h"
jens@0
    12
#include "Index.h"
jens@0
    13
#include "File.h"
jens@0
    14
#include "Chunk.h"
jens@9
    15
#include <algorithm>
jens@0
    16
#include <assert.h>
jens@0
    17
#include <errno.h>
jens@9
    18
#include <stdint.h>
jbm@8
    19
#include <time.h>
jbm@8
    20
jens@0
    21
jens@0
    22
namespace Mooseyard {
jens@0
    23
            
jens@0
    24
    // Trailer is stored in file; all numbers are little-endian.
jens@0
    25
    class VersionDictionary::Trailer :public Chunk {
jens@0
    26
    public:
jens@0
    27
        LittleEndian<uint32_t>          magicNumber1;
jens@0
    28
        LittleEndian<uint32_t>          count;
jens@0
    29
        VersionDictionary::IndexPositions  indexPositions;
jens@0
    30
        LittleEndian<FilePosition>      previousTrailerPosition;
jens@0
    31
        LittleEndian<uint32_t>          generation;
jens@0
    32
        LittleEndianDouble              timestamp;
jens@0
    33
        LittleEndian<uint32_t>          magicNumber2;
jens@0
    34
                
jens@0
    35
        Trailer ()
jens@0
    36
            :Chunk(sizeof(Trailer), kChunkType)
jens@0
    37
        { }
jens@0
    38
        
jens@0
    39
        Trailer (int c,
jens@0
    40
                     VersionDictionary::IndexPositions indexPos, 
jens@0
    41
                     FilePosition previousTrailerPos,
jens@0
    42
                     uint32_t gen)
jens@0
    43
            :Chunk(sizeof(Trailer), kChunkType),
jens@0
    44
             magicNumber1(kMagicNumber1),
jens@0
    45
             count(c),
jens@0
    46
             indexPositions(indexPos),
jens@0
    47
             previousTrailerPosition(previousTrailerPos),
jens@0
    48
             generation(gen),
jens@0
    49
             timestamp(::time(NULL)),
jens@0
    50
             magicNumber2(kMagicNumber2)
jens@0
    51
        { }
jens@0
    52
        
jens@0
    53
        static const uint32_t kMagicNumber1 = 0xfe83b1cd;
jens@0
    54
        static const uint32_t kMagicNumber2 = 0xff84b2c1;
jens@0
    55
    };
jens@0
    56
    
jens@0
    57
    
jens@0
    58
    
jens@0
    59
    VersionDictionary::VersionDictionary (File *file)
jens@0
    60
        :_file(file),
jens@0
    61
         _trailerPosition(0),
jens@0
    62
         _previousTrailerPosition(0),
jens@0
    63
         _indexPositions(),
jens@0
    64
         _count(0),
jens@0
    65
         _previousVersion()
jens@0
    66
    {
jens@0
    67
        if (file->length() > sizeof(Trailer))
jens@0
    68
            _read();
jens@0
    69
    }
jens@0
    70
    
jens@0
    71
    VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
jens@0
    72
        :_file(file),
jens@0
    73
         _trailerPosition(trailerPosition),
jens@0
    74
         _previousTrailerPosition(0),
jens@0
    75
         _indexPositions(),
jens@0
    76
         _count(0),
jens@0
    77
         _previousVersion()
jens@0
    78
    {
jens@0
    79
        _read(trailerPosition);
jens@0
    80
    }
jens@0
    81
    
jens@0
    82
    int VersionDictionary::count() const {
jens@0
    83
        return _count;
jens@0
    84
    }
jens@0
    85
    
jens@0
    86
    const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
jens@0
    87
        if (_trailerPosition > 0)
jens@0
    88
            return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
jens@0
    89
        else 
jens@0
    90
            return NULL;      // Only happens in the empty-file case
jens@0
    91
    }
jens@0
    92
    
jens@0
    93
    const Index* VersionDictionary::_index (int i) const {
jens@0
    94
        if (_indexPositions[i] > 0)
jens@0
    95
            return (const Index*) _file->mappedPosition(_indexPositions[i]);
jens@0
    96
        else
jens@0
    97
            return NULL;
jens@0
    98
    }
jens@0
    99
    
jens@0
   100
    int VersionDictionary::generation() const {
jens@0
   101
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   102
        return trailer ? (int)trailer->generation : -1;
jens@0
   103
    }
jens@0
   104
    
jens@0
   105
    time_t VersionDictionary::timestamp() const {
jens@0
   106
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   107
        return trailer ? (time_t)trailer->timestamp : 0;
jens@0
   108
    }
jens@0
   109
    
jens@0
   110
    const VersionDictionary* VersionDictionary::previousVersion() const {
jens@0
   111
        if (!_previousVersion)
jens@0
   112
            if (_previousTrailerPosition > 0)
jens@0
   113
                _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
jens@0
   114
        return _previousVersion;
jens@0
   115
    }
jens@0
   116
        
jens@0
   117
    
jens@0
   118
    Blob VersionDictionary::get (Key key) const {
jens@0
   119
        const Index *index = _index(key.hash >> 24);
jens@0
   120
        return index ?index->get(_file, key) :Blob();
jens@0
   121
    }
jens@0
   122
    
jens@0
   123
    void VersionDictionary::_read (FilePosition trailerPos) {
jens@0
   124
        assert(_file);
jens@0
   125
        
jens@0
   126
        if (trailerPos > 0) {
jens@0
   127
            _file->setPosition(trailerPos);
jens@0
   128
        } else {
jens@0
   129
            // Determine position of trailer, at EOF:
jens@0
   130
            off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
jens@0
   131
            if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
jens@0
   132
                throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
jens@2
   133
            trailerPos = (FilePosition)pos;
jens@0
   134
        }
jens@0
   135
        
jens@0
   136
        // Read & verify trailer:
jens@0
   137
        VersionDictionary::Trailer trailer;
jens@0
   138
        _file->read(trailer);
jens@0
   139
        _trailerPosition = trailerPos;
jens@5
   140
        _previousTrailerPosition = trailer.previousTrailerPosition;
jens@0
   141
        _count = trailer.count;
jens@0
   142
        _indexPositions = trailer.indexPositions;
jens@0
   143
        
jens@0
   144
        if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
jens@0
   145
            || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
jens@5
   146
            throw File::Error("No trailer found in file (invalid magic numbers)");
jens@5
   147
        if (_previousTrailerPosition >= _trailerPosition)
jens@5
   148
            throw File::Error("Bad VersionDictionary trailer (illegal previousTrailerPosition)");
jens@0
   149
        
jens@0
   150
        // Map in the file:
jens@0
   151
        _file->mapRegion(0, _trailerPosition+sizeof(trailer));
jens@0
   152
jens@0
   153
        // Verify Indexes:
jens@0
   154
        for (int i=0; i<256; i++) {
jens@9
   155
            if (_indexPositions[i] >= _trailerPosition)
jens@9
   156
                throw File::Error("Bad VersionDictionary trailer (illegal index position)");
jens@0
   157
            const Index *index = _index(i);
jens@0
   158
            if (index)
jens@0
   159
                index->validate();
jens@0
   160
        }
jens@0
   161
}
jens@0
   162
    
jens@0
   163
    
jens@0
   164
    bool VersionDictionary::isLatestVersion() const {
jens@0
   165
        return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
jens@0
   166
    }
jens@0
   167
    
jens@0
   168
    
jens@0
   169
    /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
jens@0
   170
        as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
jens@0
   171
        Returns the position of the new trailer. */
jens@0
   172
    FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
jens@0
   173
                                          const Dictionary *addDict, 
jens@0
   174
                                          File *dstFile, 
jens@0
   175
                                          bool replace) 
jens@0
   176
    {
jens@0
   177
        File *srcFile = baseDict->_file;
jens@0
   178
        bool incremental = !replace && dstFile==srcFile;
jens@0
   179
        Index* newIndex[256];
jens@0
   180
jens@0
   181
        {
jens@0
   182
            // Work out the needed capacity for each Index bucket:
jens@0
   183
            int newCounts[256] = {0};
jens@0
   184
            if (!replace) {
jens@0
   185
                for (int i=0; i<256; i++) {
jens@0
   186
                    const Index *oldIndex = baseDict->_index(i);
jens@0
   187
                    if (oldIndex)
jens@0
   188
                        newCounts[i] = oldIndex->count();
jens@0
   189
                }
jens@0
   190
            }
jens@0
   191
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   192
            for (; *it; ++*it)
jens@0
   193
                newCounts[it->key().hash >> 24]++;
jens@0
   194
            delete it;
jens@0
   195
            
jens@0
   196
            // Allocate new Indexes, of sufficient capacity, for each growing bucket:
jens@0
   197
            for (int i=0; i<256; i++) {
jens@0
   198
                const Index *oldIndex = baseDict->_index(i);
jens@0
   199
                if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
jens@0
   200
                    newIndex[i] = Index::create(newCounts[i]);
jens@0
   201
                    if (incremental && oldIndex)
jens@0
   202
                        newIndex[i]->copyFrom(srcFile, oldIndex);
jens@0
   203
                } else
jens@0
   204
                    newIndex[i] = NULL;
jens@0
   205
            }
jens@0
   206
        }
jens@0
   207
        
jens@0
   208
        // Lock the file now, seek to the end, and make sure it's been prepared with a header,
jens@0
   209
        // since FilePositions of 0 and 1 are reserved.
jens@0
   210
        File::Lock lock(dstFile);
jens@2
   211
        const FilePosition startPos = (FilePosition) dstFile->setPositionToEnd();
jens@0
   212
        if (startPos <= 1)
jens@0
   213
            throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
jens@0
   214
        
jens@0
   215
        // For safety's sake, make sure the old file hasn't been destroyed:
jens@0
   216
        FilePosition oldEOF = baseDict->_trailerPosition;
jens@0
   217
        if (oldEOF > 0)
jens@0
   218
            oldEOF += sizeof(VersionDictionary::Trailer);
jens@0
   219
        if (srcFile->length() < oldEOF)
jens@0
   220
            throw File::Error(ERANGE, "File has been truncated since being read");
jens@0
   221
            
jens@0
   222
        try{
jens@0
   223
            FilePosition pos = startPos;
jens@0
   224
            
jens@0
   225
            int newCount;
jens@0
   226
            if (replace)
jens@0
   227
                newCount = 0;
jens@0
   228
            else if (dstFile == srcFile)
jens@0
   229
                newCount = baseDict->_count;
jens@0
   230
            else {
jens@0
   231
                // Write out the surviving pre-existing entries from the old file:
jens@0
   232
                newCount = 0;
jens@0
   233
                for (VersionDictionary::Iterator it(baseDict); it; ++it) {
jens@0
   234
                    Key key = it.key();
jens@0
   235
                    if (!addDict->contains(key)) {
jens@0
   236
                        int bucket = key.hash >> 24;
jens@0
   237
                        int size = KeyValueChunk::write(dstFile,pos, key, it.value());
jens@0
   238
                        newIndex[bucket]->put(srcFile, key, pos, startPos);
jens@0
   239
                        pos += size;
jens@0
   240
                        newCount++;
jens@0
   241
                    }
jens@0
   242
                }
jens@0
   243
            }
jens@0
   244
            
jens@0
   245
            // Now write the items from the new dict:
jens@0
   246
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   247
            for (; *it; ++*it) {
jens@0
   248
                Key key=it->key();
jens@0
   249
                Blob value=it->value();
jens@0
   250
                int bucket = key.hash >> 24;
jens@0
   251
                Index *index = newIndex[bucket];
jens@0
   252
                assert(index);
jens@0
   253
                                              
jens@0
   254
                if (value.bytes) {
jens@0
   255
                    int size = KeyValueChunk::write(dstFile,pos, key, value);
jens@0
   256
                    if (index->put(srcFile, key, pos, startPos))
jens@0
   257
                        newCount++;
jens@0
   258
                    pos += size;
jens@0
   259
                } else if (incremental) {
jens@0
   260
                    // NULL value is a deleted-entry marker used by OverlayDictionary
jens@0
   261
                    if (index->remove(srcFile, key, startPos))
jens@0
   262
                        newCount--;
jens@0
   263
                }
jens@0
   264
            }
jens@0
   265
            delete it;
jens@0
   266
            
jens@0
   267
            // Write out the new indexes:
jens@0
   268
            IndexPositions newIndexPositions;
jens@0
   269
            if (incremental)
jens@0
   270
                newIndexPositions = baseDict->_indexPositions;
jens@0
   271
            else
jens@0
   272
                memset(&newIndexPositions, 0, sizeof(newIndexPositions));
jens@0
   273
jens@0
   274
            pos += Chunk::writePadding(dstFile);
jens@0
   275
            for (int i=0; i<256; i++) {
jens@0
   276
                if (newIndex[i]) {
jens@0
   277
                    Index *index = newIndex[i];
jens@0
   278
                    newIndexPositions[i] = pos;
jens@0
   279
                    pos += dstFile->write(index, index->size());
jens@0
   280
                    delete index;
jens@0
   281
                }
jens@0
   282
            }
jens@0
   283
            
jens@0
   284
            // Flush everything out to disk, with maximum paranoia, before writing the trailer.
jens@0
   285
            // Since scavenging corrupt files looks for trailers, we don't want to append a
jens@0
   286
            // trailer until we're certain that all of the real data is safely on-disk.
jens@0
   287
            dstFile->flushDisk();
jens@0
   288
            
jens@0
   289
            // Write the trailer:
jens@9
   290
            FilePosition previousTrailerPosition = 0;
jens@9
   291
            if (dstFile==srcFile)
jens@9
   292
                previousTrailerPosition = baseDict->_trailerPosition;
jens@0
   293
            FilePosition newTrailerPosition = pos;
jens@0
   294
            VersionDictionary::Trailer trailer(newCount,
jens@0
   295
                                            newIndexPositions,
jens@9
   296
                                            previousTrailerPosition,
jens@0
   297
                                            baseDict->generation() + 1);
jens@0
   298
            pos += dstFile->write(trailer);
jens@0
   299
            
jens@0
   300
            // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
jens@0
   301
            // real-time I/O in other apps, so it's bad to call it too often.
jens@0
   302
            dstFile->flush();
jens@0
   303
            assert(pos==dstFile->position());
jens@0
   304
            
jens@0
   305
            return newTrailerPosition;
jens@0
   306
            
jens@0
   307
        } catch (...) {
jens@0
   308
            // If something goes wrong, try to back out everything we wrote:
jens@0
   309
            try{
jens@0
   310
                dstFile->setLength(startPos);
jens@0
   311
            }catch(...) { }
jens@0
   312
            throw;
jens@0
   313
        }
jens@0
   314
    }
jens@0
   315
 
jens@9
   316
jens@0
   317
    VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
jens@0
   318
                                                    File *dstFile, 
jens@0
   319
                                                    bool replace) const
jens@0
   320
    {
jens@0
   321
        FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
jens@0
   322
        VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
jens@0
   323
        nextVersion->_previousVersion = this;
jens@0
   324
        return nextVersion;
jens@0
   325
    }
jens@0
   326
    
jens@9
   327
    
jens@9
   328
#pragma mark -
jens@9
   329
#pragma mark TESTING-ONLY:
jens@9
   330
jens@0
   331
    VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
jens@0
   332
        return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
jens@0
   333
    }
jens@0
   334
    
jens@0
   335
    
jens@0
   336
#pragma mark -
jens@0
   337
#pragma mark ITERATOR:
jens@0
   338
    
jens@0
   339
    Dictionary::Iterator* VersionDictionary::iterate() const {
jens@0
   340
        return new VersionDictionary::Iterator(this);
jens@0
   341
    }
jens@0
   342
    
jens@0
   343
    VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
jens@0
   344
    :_file(file),
jens@0
   345
     _bucket(-1),
jens@0
   346
     _iter(NULL)
jens@0
   347
    { 
jens@0
   348
        nextIndex();
jens@0
   349
    }
jens@0
   350
    
jens@0
   351
    VersionDictionary::Iterator::~Iterator() {
jens@0
   352
        delete _iter;
jens@0
   353
    }
jens@0
   354
    
jens@0
   355
    bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   356
    Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
jens@0
   357
    Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
jens@0
   358
    
jens@0
   359
    bool VersionDictionary::Iterator::next() {
jens@0
   360
        if (_iter->next())
jens@0
   361
            return true;
jens@0
   362
        else {
jens@0
   363
            delete _iter;
jens@0
   364
            return nextIndex();
jens@0
   365
        }
jens@0
   366
    }
jens@0
   367
    
jens@0
   368
    bool VersionDictionary::Iterator::nextIndex() {
jens@0
   369
        while (++_bucket < 256) {
jens@0
   370
            const Index *index = _file->_index(_bucket);
jens@0
   371
            if (index) {
jens@0
   372
                _iter = new Index::Iterator(_file->_file, index);
jens@0
   373
                if (*_iter)
jens@0
   374
                    return true;
jens@0
   375
                else
jens@0
   376
                    delete _iter;
jens@0
   377
            }
jens@0
   378
        }
jens@0
   379
        _iter = NULL;
jens@0
   380
        return false;
jens@0
   381
    }
jens@0
   382
    
jens@0
   383
    
jens@0
   384
#pragma mark -
jens@0
   385
#pragma mark CHANGE ITERATOR:
jens@0
   386
    
jens@0
   387
    VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
jens@0
   388
        return new VersionDictionary::ChangeIterator(this);
jens@0
   389
    }
jens@0
   390
    
jens@0
   391
    VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
jens@0
   392
    :_file(file),
jens@0
   393
     _bucket(-1),
jens@0
   394
     _iter(NULL)
jens@0
   395
    { 
jens@0
   396
        next();            
jens@0
   397
    }
jens@0
   398
    
jens@0
   399
    VersionDictionary::ChangeIterator::~ChangeIterator() {
jens@0
   400
        delete _iter;
jens@0
   401
    }
jens@0
   402
    
jens@0
   403
    bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   404
    Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
jens@0
   405
    Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
jens@0
   406
    
jens@0
   407
    bool VersionDictionary::ChangeIterator::next() {
jens@0
   408
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   409
        for (;;) {
jens@0
   410
            // Check if current iterator has a value that's from this version:
jens@0
   411
            if (_iter && _iter->hasValue()) {
jens@0
   412
                if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
jens@0
   413
                    return true;
jens@0
   414
                else if (_iter->next())
jens@0
   415
                    continue;
jens@0
   416
            }
jens@0
   417
            // If not, go to next Index:
jens@0
   418
            if (!nextIndex())
jens@0
   419
                return false;
jens@0
   420
        }
jens@0
   421
    }
jens@0
   422
    
jens@0
   423
    bool VersionDictionary::ChangeIterator::nextIndex() {
jens@0
   424
        delete _iter;
jens@0
   425
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   426
        while (++_bucket < 256) {
jens@0
   427
            const Index *index = _file->_index(_bucket);
jens@0
   428
            if (index) {
jens@0
   429
                // Skip indexes that weren't updated in this version:
jens@0
   430
                if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
jens@0
   431
                    _iter = new Index::Iterator(_file->_file, index);
jens@0
   432
                    return true;
jens@0
   433
                }
jens@0
   434
            }
jens@0
   435
        }
jens@0
   436
        _iter = NULL;
jens@0
   437
        return false;
jens@0
   438
    }
jens@0
   439
    
jens@0
   440
}