src/VersionDictionary.cpp
author jbm@humility
Mon Sep 28 23:39:08 2009 -0700 (2009-09-28)
changeset 8 21a6c17f4e3e
parent 5 4c10b7956435
child 9 629f61203db1
permissions -rw-r--r--
jbm: it compiles... but bombs in unit tests
jens@0
     1
/*
jens@0
     2
 *  VersionDictionary.cpp
jens@0
     3
 *  Ottoman
jens@0
     4
 *
jens@0
     5
 *  Created by Jens Alfke on 8/21/09.
jens@0
     6
 *  Copyright 2009 Jens Alfke. All rights reserved.
jens@0
     7
 *  BSD-Licensed: See the file "LICENSE.txt" for details.
jens@0
     8
 */
jens@0
     9
jens@0
    10
#include "VersionDictionary.h"
jens@0
    11
#include "Chunk.h"
jens@0
    12
#include "Index.h"
jens@0
    13
#include "File.h"
jens@0
    14
#include "Chunk.h"
jens@0
    15
#include <assert.h>
jens@0
    16
#include <errno.h>
jens@0
    17
#include <algorithm>
jbm@8
    18
#include <time.h>
jbm@8
    19
jbm@8
    20
# define UINT32_MAX             (4294967295U)
jbm@8
    21
jens@0
    22
jens@0
    23
namespace Mooseyard {
jens@0
    24
            
jens@0
    25
    // Trailer is stored in file; all numbers are little-endian.
jens@0
    26
    class VersionDictionary::Trailer :public Chunk {
jens@0
    27
    public:
jens@0
    28
        LittleEndian<uint32_t>          magicNumber1;
jens@0
    29
        LittleEndian<uint32_t>          count;
jens@0
    30
        VersionDictionary::IndexPositions  indexPositions;
jens@0
    31
        LittleEndian<FilePosition>      previousTrailerPosition;
jens@0
    32
        LittleEndian<uint32_t>          generation;
jens@0
    33
        LittleEndianDouble              timestamp;
jens@0
    34
        LittleEndian<uint32_t>          magicNumber2;
jens@0
    35
                
jens@0
    36
        Trailer ()
jens@0
    37
            :Chunk(sizeof(Trailer), kChunkType)
jens@0
    38
        { }
jens@0
    39
        
jens@0
    40
        Trailer (int c,
jens@0
    41
                     VersionDictionary::IndexPositions indexPos, 
jens@0
    42
                     FilePosition previousTrailerPos,
jens@0
    43
                     uint32_t gen)
jens@0
    44
            :Chunk(sizeof(Trailer), kChunkType),
jens@0
    45
             magicNumber1(kMagicNumber1),
jens@0
    46
             count(c),
jens@0
    47
             indexPositions(indexPos),
jens@0
    48
             previousTrailerPosition(previousTrailerPos),
jens@0
    49
             generation(gen),
jens@0
    50
             timestamp(::time(NULL)),
jens@0
    51
             magicNumber2(kMagicNumber2)
jens@0
    52
        { }
jens@0
    53
        
jens@0
    54
        static const uint32_t kMagicNumber1 = 0xfe83b1cd;
jens@0
    55
        static const uint32_t kMagicNumber2 = 0xff84b2c1;
jens@0
    56
    };
jens@0
    57
    
jens@0
    58
    
jens@0
    59
    
jens@0
    60
    VersionDictionary::VersionDictionary (File *file)
jens@0
    61
        :_file(file),
jens@0
    62
         _trailerPosition(0),
jens@0
    63
         _previousTrailerPosition(0),
jens@0
    64
         _indexPositions(),
jens@0
    65
         _count(0),
jens@0
    66
         _previousVersion()
jens@0
    67
    {
jens@0
    68
        if (file->length() > sizeof(Trailer))
jens@0
    69
            _read();
jens@0
    70
    }
jens@0
    71
    
jens@0
    72
    VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
jens@0
    73
        :_file(file),
jens@0
    74
         _trailerPosition(trailerPosition),
jens@0
    75
         _previousTrailerPosition(0),
jens@0
    76
         _indexPositions(),
jens@0
    77
         _count(0),
jens@0
    78
         _previousVersion()
jens@0
    79
    {
jens@0
    80
        _read(trailerPosition);
jens@0
    81
    }
jens@0
    82
    
jens@0
    83
    int VersionDictionary::count() const {
jens@0
    84
        return _count;
jens@0
    85
    }
jens@0
    86
    
jens@0
    87
    const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
jens@0
    88
        if (_trailerPosition > 0)
jens@0
    89
            return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
jens@0
    90
        else 
jens@0
    91
            return NULL;      // Only happens in the empty-file case
jens@0
    92
    }
jens@0
    93
    
jens@0
    94
    const Index* VersionDictionary::_index (int i) const {
jens@0
    95
        if (_indexPositions[i] > 0)
jens@0
    96
            return (const Index*) _file->mappedPosition(_indexPositions[i]);
jens@0
    97
        else
jens@0
    98
            return NULL;
jens@0
    99
    }
jens@0
   100
    
jens@0
   101
    int VersionDictionary::generation() const {
jens@0
   102
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   103
        return trailer ? (int)trailer->generation : -1;
jens@0
   104
    }
jens@0
   105
    
jens@0
   106
    time_t VersionDictionary::timestamp() const {
jens@0
   107
        const VersionDictionary::Trailer *trailer = _trailer();
jens@0
   108
        return trailer ? (time_t)trailer->timestamp : 0;
jens@0
   109
    }
jens@0
   110
    
jens@0
   111
    const VersionDictionary* VersionDictionary::previousVersion() const {
jens@0
   112
        if (!_previousVersion)
jens@0
   113
            if (_previousTrailerPosition > 0)
jens@0
   114
                _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
jens@0
   115
        return _previousVersion;
jens@0
   116
    }
jens@0
   117
        
jens@0
   118
    
jens@0
   119
    Blob VersionDictionary::get (Key key) const {
jens@0
   120
        const Index *index = _index(key.hash >> 24);
jens@0
   121
        return index ?index->get(_file, key) :Blob();
jens@0
   122
    }
jens@0
   123
    
jens@0
   124
    void VersionDictionary::_read (FilePosition trailerPos) {
jens@0
   125
        assert(_file);
jens@0
   126
        
jens@0
   127
        if (trailerPos > 0) {
jens@0
   128
            _file->setPosition(trailerPos);
jens@0
   129
        } else {
jens@0
   130
            // Determine position of trailer, at EOF:
jens@0
   131
            off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
jens@0
   132
            if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
jens@0
   133
                throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
jens@2
   134
            trailerPos = (FilePosition)pos;
jens@0
   135
        }
jens@0
   136
        
jens@0
   137
        // Read & verify trailer:
jens@0
   138
        VersionDictionary::Trailer trailer;
jens@0
   139
        _file->read(trailer);
jens@0
   140
        _trailerPosition = trailerPos;
jens@5
   141
        _previousTrailerPosition = trailer.previousTrailerPosition;
jens@0
   142
        _count = trailer.count;
jens@0
   143
        _indexPositions = trailer.indexPositions;
jens@0
   144
        
jens@0
   145
        if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
jens@0
   146
            || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
jens@5
   147
            throw File::Error("No trailer found in file (invalid magic numbers)");
jens@5
   148
        if (_previousTrailerPosition >= _trailerPosition)
jens@5
   149
            throw File::Error("Bad VersionDictionary trailer (illegal previousTrailerPosition)");
jens@0
   150
        
jens@0
   151
        // Map in the file:
jens@0
   152
        _file->mapRegion(0, _trailerPosition+sizeof(trailer));
jens@0
   153
jens@0
   154
        // Verify Indexes:
jens@0
   155
        for (int i=0; i<256; i++) {
jens@5
   156
            if (_indexPositions[i] > 0)
jens@5
   157
                if (_indexPositions[i] < _previousTrailerPosition || _indexPositions[i] >= _trailerPosition)
jens@5
   158
                    throw File::Error("Bad VersionDictionary trailer (illegal index position)");
jens@0
   159
            const Index *index = _index(i);
jens@0
   160
            if (index)
jens@0
   161
                index->validate();
jens@0
   162
        }
jens@0
   163
}
jens@0
   164
    
jens@0
   165
    
jens@0
   166
    bool VersionDictionary::isLatestVersion() const {
jens@0
   167
        return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
jens@0
   168
    }
jens@0
   169
    
jens@0
   170
    
jens@0
   171
    /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
jens@0
   172
        as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
jens@0
   173
        Returns the position of the new trailer. */
jens@0
   174
    FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
jens@0
   175
                                          const Dictionary *addDict, 
jens@0
   176
                                          File *dstFile, 
jens@0
   177
                                          bool replace) 
jens@0
   178
    {
jens@0
   179
        File *srcFile = baseDict->_file;
jens@0
   180
        bool incremental = !replace && dstFile==srcFile;
jens@0
   181
        Index* newIndex[256];
jens@0
   182
jens@0
   183
        {
jens@0
   184
            // Work out the needed capacity for each Index bucket:
jens@0
   185
            int newCounts[256] = {0};
jens@0
   186
            if (!replace) {
jens@0
   187
                for (int i=0; i<256; i++) {
jens@0
   188
                    const Index *oldIndex = baseDict->_index(i);
jens@0
   189
                    if (oldIndex)
jens@0
   190
                        newCounts[i] = oldIndex->count();
jens@0
   191
                }
jens@0
   192
            }
jens@0
   193
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   194
            for (; *it; ++*it)
jens@0
   195
                newCounts[it->key().hash >> 24]++;
jens@0
   196
            delete it;
jens@0
   197
            
jens@0
   198
            // Allocate new Indexes, of sufficient capacity, for each growing bucket:
jens@0
   199
            for (int i=0; i<256; i++) {
jens@0
   200
                const Index *oldIndex = baseDict->_index(i);
jens@0
   201
                if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
jens@0
   202
                    newIndex[i] = Index::create(newCounts[i]);
jens@0
   203
                    if (incremental && oldIndex)
jens@0
   204
                        newIndex[i]->copyFrom(srcFile, oldIndex);
jens@0
   205
                } else
jens@0
   206
                    newIndex[i] = NULL;
jens@0
   207
            }
jens@0
   208
        }
jens@0
   209
        
jens@0
   210
        // Lock the file now, seek to the end, and make sure it's been prepared with a header,
jens@0
   211
        // since FilePositions of 0 and 1 are reserved.
jens@0
   212
        File::Lock lock(dstFile);
jens@2
   213
        const FilePosition startPos = (FilePosition) dstFile->setPositionToEnd();
jens@0
   214
        if (startPos <= 1)
jens@0
   215
            throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
jens@0
   216
        
jens@0
   217
        // For safety's sake, make sure the old file hasn't been destroyed:
jens@0
   218
        FilePosition oldEOF = baseDict->_trailerPosition;
jens@0
   219
        if (oldEOF > 0)
jens@0
   220
            oldEOF += sizeof(VersionDictionary::Trailer);
jens@0
   221
        if (srcFile->length() < oldEOF)
jens@0
   222
            throw File::Error(ERANGE, "File has been truncated since being read");
jens@0
   223
            
jens@0
   224
        try{
jens@0
   225
            FilePosition pos = startPos;
jens@0
   226
            
jens@0
   227
            int newCount;
jens@0
   228
            if (replace)
jens@0
   229
                newCount = 0;
jens@0
   230
            else if (dstFile == srcFile)
jens@0
   231
                newCount = baseDict->_count;
jens@0
   232
            else {
jens@0
   233
                // Write out the surviving pre-existing entries from the old file:
jens@0
   234
                newCount = 0;
jens@0
   235
                for (VersionDictionary::Iterator it(baseDict); it; ++it) {
jens@0
   236
                    Key key = it.key();
jens@0
   237
                    if (!addDict->contains(key)) {
jens@0
   238
                        int bucket = key.hash >> 24;
jens@0
   239
                        int size = KeyValueChunk::write(dstFile,pos, key, it.value());
jens@0
   240
                        newIndex[bucket]->put(srcFile, key, pos, startPos);
jens@0
   241
                        pos += size;
jens@0
   242
                        newCount++;
jens@0
   243
                    }
jens@0
   244
                }
jens@0
   245
            }
jens@0
   246
            
jens@0
   247
            // Now write the items from the new dict:
jens@0
   248
            Dictionary::Iterator *it = addDict->iterate();
jens@0
   249
            for (; *it; ++*it) {
jens@0
   250
                Key key=it->key();
jens@0
   251
                Blob value=it->value();
jens@0
   252
                int bucket = key.hash >> 24;
jens@0
   253
                Index *index = newIndex[bucket];
jens@0
   254
                assert(index);
jens@0
   255
                                              
jens@0
   256
                if (value.bytes) {
jens@0
   257
                    int size = KeyValueChunk::write(dstFile,pos, key, value);
jens@0
   258
                    if (index->put(srcFile, key, pos, startPos))
jens@0
   259
                        newCount++;
jens@0
   260
                    pos += size;
jens@0
   261
                } else if (incremental) {
jens@0
   262
                    // NULL value is a deleted-entry marker used by OverlayDictionary
jens@0
   263
                    if (index->remove(srcFile, key, startPos))
jens@0
   264
                        newCount--;
jens@0
   265
                }
jens@0
   266
            }
jens@0
   267
            delete it;
jens@0
   268
            
jens@0
   269
            // Write out the new indexes:
jens@0
   270
            IndexPositions newIndexPositions;
jens@0
   271
            if (incremental)
jens@0
   272
                newIndexPositions = baseDict->_indexPositions;
jens@0
   273
            else
jens@0
   274
                memset(&newIndexPositions, 0, sizeof(newIndexPositions));
jens@0
   275
jens@0
   276
            pos += Chunk::writePadding(dstFile);
jens@0
   277
            for (int i=0; i<256; i++) {
jens@0
   278
                if (newIndex[i]) {
jens@0
   279
                    Index *index = newIndex[i];
jens@0
   280
                    newIndexPositions[i] = pos;
jens@0
   281
                    pos += dstFile->write(index, index->size());
jens@0
   282
                    delete index;
jens@0
   283
                }
jens@0
   284
            }
jens@0
   285
            
jens@0
   286
            // Flush everything out to disk, with maximum paranoia, before writing the trailer.
jens@0
   287
            // Since scavenging corrupt files looks for trailers, we don't want to append a
jens@0
   288
            // trailer until we're certain that all of the real data is safely on-disk.
jens@0
   289
            dstFile->flushDisk();
jens@0
   290
            
jens@0
   291
            // Write the trailer:
jens@0
   292
            FilePosition newTrailerPosition = pos;
jens@0
   293
            VersionDictionary::Trailer trailer(newCount,
jens@0
   294
                                            newIndexPositions,
jens@0
   295
                                            baseDict->_trailerPosition,
jens@0
   296
                                            baseDict->generation() + 1);
jens@0
   297
            pos += dstFile->write(trailer);
jens@0
   298
            
jens@0
   299
            // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
jens@0
   300
            // real-time I/O in other apps, so it's bad to call it too often.
jens@0
   301
            dstFile->flush();
jens@0
   302
            assert(pos==dstFile->position());
jens@0
   303
            
jens@0
   304
            return newTrailerPosition;
jens@0
   305
            
jens@0
   306
        } catch (...) {
jens@0
   307
            // If something goes wrong, try to back out everything we wrote:
jens@0
   308
            try{
jens@0
   309
                dstFile->setLength(startPos);
jens@0
   310
            }catch(...) { }
jens@0
   311
            throw;
jens@0
   312
        }
jens@0
   313
    }
jens@0
   314
 
jens@0
   315
    
jens@0
   316
#pragma mark -
jens@0
   317
#pragma mark TESTING-ONLY:
jens@0
   318
    
jens@0
   319
    VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
jens@0
   320
                                                    File *dstFile, 
jens@0
   321
                                                    bool replace) const
jens@0
   322
    {
jens@0
   323
        FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
jens@0
   324
        VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
jens@0
   325
        nextVersion->_previousVersion = this;
jens@0
   326
        return nextVersion;
jens@0
   327
    }
jens@0
   328
    
jens@0
   329
    VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
jens@0
   330
        return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
jens@0
   331
    }
jens@0
   332
    
jens@0
   333
    
jens@0
   334
#pragma mark -
jens@0
   335
#pragma mark ITERATOR:
jens@0
   336
    
jens@0
   337
    Dictionary::Iterator* VersionDictionary::iterate() const {
jens@0
   338
        return new VersionDictionary::Iterator(this);
jens@0
   339
    }
jens@0
   340
    
jens@0
   341
    VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
jens@0
   342
    :_file(file),
jens@0
   343
     _bucket(-1),
jens@0
   344
     _iter(NULL)
jens@0
   345
    { 
jens@0
   346
        nextIndex();
jens@0
   347
    }
jens@0
   348
    
jens@0
   349
    VersionDictionary::Iterator::~Iterator() {
jens@0
   350
        delete _iter;
jens@0
   351
    }
jens@0
   352
    
jens@0
   353
    bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   354
    Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
jens@0
   355
    Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
jens@0
   356
    
jens@0
   357
    bool VersionDictionary::Iterator::next() {
jens@0
   358
        if (_iter->next())
jens@0
   359
            return true;
jens@0
   360
        else {
jens@0
   361
            delete _iter;
jens@0
   362
            return nextIndex();
jens@0
   363
        }
jens@0
   364
    }
jens@0
   365
    
jens@0
   366
    bool VersionDictionary::Iterator::nextIndex() {
jens@0
   367
        while (++_bucket < 256) {
jens@0
   368
            const Index *index = _file->_index(_bucket);
jens@0
   369
            if (index) {
jens@0
   370
                _iter = new Index::Iterator(_file->_file, index);
jens@0
   371
                if (*_iter)
jens@0
   372
                    return true;
jens@0
   373
                else
jens@0
   374
                    delete _iter;
jens@0
   375
            }
jens@0
   376
        }
jens@0
   377
        _iter = NULL;
jens@0
   378
        return false;
jens@0
   379
    }
jens@0
   380
    
jens@0
   381
    
jens@0
   382
#pragma mark -
jens@0
   383
#pragma mark CHANGE ITERATOR:
jens@0
   384
    
jens@0
   385
    VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
jens@0
   386
        return new VersionDictionary::ChangeIterator(this);
jens@0
   387
    }
jens@0
   388
    
jens@0
   389
    VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
jens@0
   390
    :_file(file),
jens@0
   391
     _bucket(-1),
jens@0
   392
     _iter(NULL)
jens@0
   393
    { 
jens@0
   394
        next();            
jens@0
   395
    }
jens@0
   396
    
jens@0
   397
    VersionDictionary::ChangeIterator::~ChangeIterator() {
jens@0
   398
        delete _iter;
jens@0
   399
    }
jens@0
   400
    
jens@0
   401
    bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
jens@0
   402
    Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
jens@0
   403
    Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
jens@0
   404
    
jens@0
   405
    bool VersionDictionary::ChangeIterator::next() {
jens@0
   406
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   407
        for (;;) {
jens@0
   408
            // Check if current iterator has a value that's from this version:
jens@0
   409
            if (_iter && _iter->hasValue()) {
jens@0
   410
                if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
jens@0
   411
                    return true;
jens@0
   412
                else if (_iter->next())
jens@0
   413
                    continue;
jens@0
   414
            }
jens@0
   415
            // If not, go to next Index:
jens@0
   416
            if (!nextIndex())
jens@0
   417
                return false;
jens@0
   418
        }
jens@0
   419
    }
jens@0
   420
    
jens@0
   421
    bool VersionDictionary::ChangeIterator::nextIndex() {
jens@0
   422
        delete _iter;
jens@0
   423
        const VersionDictionary::Trailer *trailer = _file->_trailer();
jens@0
   424
        while (++_bucket < 256) {
jens@0
   425
            const Index *index = _file->_index(_bucket);
jens@0
   426
            if (index) {
jens@0
   427
                // Skip indexes that weren't updated in this version:
jens@0
   428
                if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
jens@0
   429
                    _iter = new Index::Iterator(_file->_file, index);
jens@0
   430
                    return true;
jens@0
   431
                }
jens@0
   432
            }
jens@0
   433
        }
jens@0
   434
        _iter = NULL;
jens@0
   435
        return false;
jens@0
   436
    }
jens@0
   437
    
jens@0
   438
}