src/VersionDictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Thu Sep 24 21:46:17 2009 -0700 (2009-09-24)
changeset 5 4c10b7956435
parent 2 851de24ecb61
child 8 21a6c17f4e3e
permissions -rw-r--r--
Oops, forgot to set _previousTrailerPosition, which made the previousVersion() accessor not work. Fixed. Also added some sanity-checking of its value.
     1 /*
     2  *  VersionDictionary.cpp
     3  *  Ottoman
     4  *
     5  *  Created by Jens Alfke on 8/21/09.
     6  *  Copyright 2009 Jens Alfke. All rights reserved.
     7  *  BSD-Licensed: See the file "LICENSE.txt" for details.
     8  */
     9 
    10 #include "VersionDictionary.h"
    11 #include "Chunk.h"
    12 #include "Index.h"
    13 #include "File.h"
    14 #include "Chunk.h"
    15 #include <assert.h>
    16 #include <errno.h>
    17 #include <algorithm>
    18 
    19 namespace Mooseyard {
    20             
    21     // Trailer is stored in file; all numbers are little-endian.
    22     class VersionDictionary::Trailer :public Chunk {
    23     public:
    24         LittleEndian<uint32_t>          magicNumber1;
    25         LittleEndian<uint32_t>          count;
    26         VersionDictionary::IndexPositions  indexPositions;
    27         LittleEndian<FilePosition>      previousTrailerPosition;
    28         LittleEndian<uint32_t>          generation;
    29         LittleEndianDouble              timestamp;
    30         LittleEndian<uint32_t>          magicNumber2;
    31                 
    32         Trailer ()
    33             :Chunk(sizeof(Trailer), kChunkType)
    34         { }
    35         
    36         Trailer (int c,
    37                      VersionDictionary::IndexPositions indexPos, 
    38                      FilePosition previousTrailerPos,
    39                      uint32_t gen)
    40             :Chunk(sizeof(Trailer), kChunkType),
    41              magicNumber1(kMagicNumber1),
    42              count(c),
    43              indexPositions(indexPos),
    44              previousTrailerPosition(previousTrailerPos),
    45              generation(gen),
    46              timestamp(::time(NULL)),
    47              magicNumber2(kMagicNumber2)
    48         { }
    49         
    50         static const uint32_t kMagicNumber1 = 0xfe83b1cd;
    51         static const uint32_t kMagicNumber2 = 0xff84b2c1;
    52     };
    53     
    54     
    55     
    56     VersionDictionary::VersionDictionary (File *file)
    57         :_file(file),
    58          _trailerPosition(0),
    59          _previousTrailerPosition(0),
    60          _indexPositions(),
    61          _count(0),
    62          _previousVersion()
    63     {
    64         if (file->length() > sizeof(Trailer))
    65             _read();
    66     }
    67     
    68     VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
    69         :_file(file),
    70          _trailerPosition(trailerPosition),
    71          _previousTrailerPosition(0),
    72          _indexPositions(),
    73          _count(0),
    74          _previousVersion()
    75     {
    76         _read(trailerPosition);
    77     }
    78     
    79     int VersionDictionary::count() const {
    80         return _count;
    81     }
    82     
    83     const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
    84         if (_trailerPosition > 0)
    85             return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
    86         else 
    87             return NULL;      // Only happens in the empty-file case
    88     }
    89     
    90     const Index* VersionDictionary::_index (int i) const {
    91         if (_indexPositions[i] > 0)
    92             return (const Index*) _file->mappedPosition(_indexPositions[i]);
    93         else
    94             return NULL;
    95     }
    96     
    97     int VersionDictionary::generation() const {
    98         const VersionDictionary::Trailer *trailer = _trailer();
    99         return trailer ? (int)trailer->generation : -1;
   100     }
   101     
   102     time_t VersionDictionary::timestamp() const {
   103         const VersionDictionary::Trailer *trailer = _trailer();
   104         return trailer ? (time_t)trailer->timestamp : 0;
   105     }
   106     
   107     const VersionDictionary* VersionDictionary::previousVersion() const {
   108         if (!_previousVersion)
   109             if (_previousTrailerPosition > 0)
   110                 _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
   111         return _previousVersion;
   112     }
   113         
   114     
   115     Blob VersionDictionary::get (Key key) const {
   116         const Index *index = _index(key.hash >> 24);
   117         return index ?index->get(_file, key) :Blob();
   118     }
   119     
   120     void VersionDictionary::_read (FilePosition trailerPos) {
   121         assert(_file);
   122         
   123         if (trailerPos > 0) {
   124             _file->setPosition(trailerPos);
   125         } else {
   126             // Determine position of trailer, at EOF:
   127             off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
   128             if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
   129                 throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
   130             trailerPos = (FilePosition)pos;
   131         }
   132         
   133         // Read & verify trailer:
   134         VersionDictionary::Trailer trailer;
   135         _file->read(trailer);
   136         _trailerPosition = trailerPos;
   137         _previousTrailerPosition = trailer.previousTrailerPosition;
   138         _count = trailer.count;
   139         _indexPositions = trailer.indexPositions;
   140         
   141         if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
   142             || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
   143             throw File::Error("No trailer found in file (invalid magic numbers)");
   144         if (_previousTrailerPosition >= _trailerPosition)
   145             throw File::Error("Bad VersionDictionary trailer (illegal previousTrailerPosition)");
   146         
   147         // Map in the file:
   148         _file->mapRegion(0, _trailerPosition+sizeof(trailer));
   149 
   150         // Verify Indexes:
   151         for (int i=0; i<256; i++) {
   152             if (_indexPositions[i] > 0)
   153                 if (_indexPositions[i] < _previousTrailerPosition || _indexPositions[i] >= _trailerPosition)
   154                     throw File::Error("Bad VersionDictionary trailer (illegal index position)");
   155             const Index *index = _index(i);
   156             if (index)
   157                 index->validate();
   158         }
   159 }
   160     
   161     
   162     bool VersionDictionary::isLatestVersion() const {
   163         return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
   164     }
   165     
   166     
   167     /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
   168         as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
   169         Returns the position of the new trailer. */
   170     FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
   171                                           const Dictionary *addDict, 
   172                                           File *dstFile, 
   173                                           bool replace) 
   174     {
   175         File *srcFile = baseDict->_file;
   176         bool incremental = !replace && dstFile==srcFile;
   177         Index* newIndex[256];
   178 
   179         {
   180             // Work out the needed capacity for each Index bucket:
   181             int newCounts[256] = {0};
   182             if (!replace) {
   183                 for (int i=0; i<256; i++) {
   184                     const Index *oldIndex = baseDict->_index(i);
   185                     if (oldIndex)
   186                         newCounts[i] = oldIndex->count();
   187                 }
   188             }
   189             Dictionary::Iterator *it = addDict->iterate();
   190             for (; *it; ++*it)
   191                 newCounts[it->key().hash >> 24]++;
   192             delete it;
   193             
   194             // Allocate new Indexes, of sufficient capacity, for each growing bucket:
   195             for (int i=0; i<256; i++) {
   196                 const Index *oldIndex = baseDict->_index(i);
   197                 if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
   198                     newIndex[i] = Index::create(newCounts[i]);
   199                     if (incremental && oldIndex)
   200                         newIndex[i]->copyFrom(srcFile, oldIndex);
   201                 } else
   202                     newIndex[i] = NULL;
   203             }
   204         }
   205         
   206         // Lock the file now, seek to the end, and make sure it's been prepared with a header,
   207         // since FilePositions of 0 and 1 are reserved.
   208         File::Lock lock(dstFile);
   209         const FilePosition startPos = (FilePosition) dstFile->setPositionToEnd();
   210         if (startPos <= 1)
   211             throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
   212         
   213         // For safety's sake, make sure the old file hasn't been destroyed:
   214         FilePosition oldEOF = baseDict->_trailerPosition;
   215         if (oldEOF > 0)
   216             oldEOF += sizeof(VersionDictionary::Trailer);
   217         if (srcFile->length() < oldEOF)
   218             throw File::Error(ERANGE, "File has been truncated since being read");
   219             
   220         try{
   221             FilePosition pos = startPos;
   222             
   223             int newCount;
   224             if (replace)
   225                 newCount = 0;
   226             else if (dstFile == srcFile)
   227                 newCount = baseDict->_count;
   228             else {
   229                 // Write out the surviving pre-existing entries from the old file:
   230                 newCount = 0;
   231                 for (VersionDictionary::Iterator it(baseDict); it; ++it) {
   232                     Key key = it.key();
   233                     if (!addDict->contains(key)) {
   234                         int bucket = key.hash >> 24;
   235                         int size = KeyValueChunk::write(dstFile,pos, key, it.value());
   236                         newIndex[bucket]->put(srcFile, key, pos, startPos);
   237                         pos += size;
   238                         newCount++;
   239                     }
   240                 }
   241             }
   242             
   243             // Now write the items from the new dict:
   244             Dictionary::Iterator *it = addDict->iterate();
   245             for (; *it; ++*it) {
   246                 Key key=it->key();
   247                 Blob value=it->value();
   248                 int bucket = key.hash >> 24;
   249                 Index *index = newIndex[bucket];
   250                 assert(index);
   251                                               
   252                 if (value.bytes) {
   253                     int size = KeyValueChunk::write(dstFile,pos, key, value);
   254                     if (index->put(srcFile, key, pos, startPos))
   255                         newCount++;
   256                     pos += size;
   257                 } else if (incremental) {
   258                     // NULL value is a deleted-entry marker used by OverlayDictionary
   259                     if (index->remove(srcFile, key, startPos))
   260                         newCount--;
   261                 }
   262             }
   263             delete it;
   264             
   265             // Write out the new indexes:
   266             IndexPositions newIndexPositions;
   267             if (incremental)
   268                 newIndexPositions = baseDict->_indexPositions;
   269             else
   270                 memset(&newIndexPositions, 0, sizeof(newIndexPositions));
   271 
   272             pos += Chunk::writePadding(dstFile);
   273             for (int i=0; i<256; i++) {
   274                 if (newIndex[i]) {
   275                     Index *index = newIndex[i];
   276                     newIndexPositions[i] = pos;
   277                     pos += dstFile->write(index, index->size());
   278                     delete index;
   279                 }
   280             }
   281             
   282             // Flush everything out to disk, with maximum paranoia, before writing the trailer.
   283             // Since scavenging corrupt files looks for trailers, we don't want to append a
   284             // trailer until we're certain that all of the real data is safely on-disk.
   285             dstFile->flushDisk();
   286             
   287             // Write the trailer:
   288             FilePosition newTrailerPosition = pos;
   289             VersionDictionary::Trailer trailer(newCount,
   290                                             newIndexPositions,
   291                                             baseDict->_trailerPosition,
   292                                             baseDict->generation() + 1);
   293             pos += dstFile->write(trailer);
   294             
   295             // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
   296             // real-time I/O in other apps, so it's bad to call it too often.
   297             dstFile->flush();
   298             assert(pos==dstFile->position());
   299             
   300             return newTrailerPosition;
   301             
   302         } catch (...) {
   303             // If something goes wrong, try to back out everything we wrote:
   304             try{
   305                 dstFile->setLength(startPos);
   306             }catch(...) { }
   307             throw;
   308         }
   309     }
   310  
   311     
   312 #pragma mark -
   313 #pragma mark TESTING-ONLY:
   314     
   315     VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
   316                                                     File *dstFile, 
   317                                                     bool replace) const
   318     {
   319         FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
   320         VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
   321         nextVersion->_previousVersion = this;
   322         return nextVersion;
   323     }
   324     
   325     VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
   326         return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
   327     }
   328     
   329     
   330 #pragma mark -
   331 #pragma mark ITERATOR:
   332     
   333     Dictionary::Iterator* VersionDictionary::iterate() const {
   334         return new VersionDictionary::Iterator(this);
   335     }
   336     
   337     VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
   338     :_file(file),
   339      _bucket(-1),
   340      _iter(NULL)
   341     { 
   342         nextIndex();
   343     }
   344     
   345     VersionDictionary::Iterator::~Iterator() {
   346         delete _iter;
   347     }
   348     
   349     bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
   350     Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
   351     Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
   352     
   353     bool VersionDictionary::Iterator::next() {
   354         if (_iter->next())
   355             return true;
   356         else {
   357             delete _iter;
   358             return nextIndex();
   359         }
   360     }
   361     
   362     bool VersionDictionary::Iterator::nextIndex() {
   363         while (++_bucket < 256) {
   364             const Index *index = _file->_index(_bucket);
   365             if (index) {
   366                 _iter = new Index::Iterator(_file->_file, index);
   367                 if (*_iter)
   368                     return true;
   369                 else
   370                     delete _iter;
   371             }
   372         }
   373         _iter = NULL;
   374         return false;
   375     }
   376     
   377     
   378 #pragma mark -
   379 #pragma mark CHANGE ITERATOR:
   380     
   381     VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
   382         return new VersionDictionary::ChangeIterator(this);
   383     }
   384     
   385     VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
   386     :_file(file),
   387      _bucket(-1),
   388      _iter(NULL)
   389     { 
   390         next();            
   391     }
   392     
   393     VersionDictionary::ChangeIterator::~ChangeIterator() {
   394         delete _iter;
   395     }
   396     
   397     bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
   398     Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
   399     Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
   400     
   401     bool VersionDictionary::ChangeIterator::next() {
   402         const VersionDictionary::Trailer *trailer = _file->_trailer();
   403         for (;;) {
   404             // Check if current iterator has a value that's from this version:
   405             if (_iter && _iter->hasValue()) {
   406                 if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
   407                     return true;
   408                 else if (_iter->next())
   409                     continue;
   410             }
   411             // If not, go to next Index:
   412             if (!nextIndex())
   413                 return false;
   414         }
   415     }
   416     
   417     bool VersionDictionary::ChangeIterator::nextIndex() {
   418         delete _iter;
   419         const VersionDictionary::Trailer *trailer = _file->_trailer();
   420         while (++_bucket < 256) {
   421             const Index *index = _file->_index(_bucket);
   422             if (index) {
   423                 // Skip indexes that weren't updated in this version:
   424                 if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
   425                     _iter = new Index::Iterator(_file->_file, index);
   426                     return true;
   427                 }
   428             }
   429         }
   430         _iter = NULL;
   431         return false;
   432     }
   433     
   434 }