src/VersionDictionary.cpp
author Jens Alfke <jens@mooseyard.com>
Tue Sep 29 15:46:42 2009 -0700 (2009-09-29)
changeset 9 629f61203db1
parent 8 21a6c17f4e3e
permissions -rw-r--r--
* Merged in jbm's changes for Linux compatibility, fixing Mac compatibility in the process :)
* Fixed two regressions having to do with the _previousTrailerPosition in VersionDictionary.cpp.
* Made sure RTTI is enabled in the OttomanTest target because gtest requires it.
     1 /*
     2  *  VersionDictionary.cpp
     3  *  Ottoman
     4  *
     5  *  Created by Jens Alfke on 8/21/09.
     6  *  Copyright 2009 Jens Alfke. All rights reserved.
     7  *  BSD-Licensed: See the file "LICENSE.txt" for details.
     8  */
     9 
    10 #include "VersionDictionary.h"
    11 #include "Chunk.h"
    12 #include "Index.h"
    13 #include "File.h"
    14 #include "Chunk.h"
    15 #include <algorithm>
    16 #include <assert.h>
    17 #include <errno.h>
    18 #include <stdint.h>
    19 #include <time.h>
    20 
    21 
    22 namespace Mooseyard {
    23             
    24     // Trailer is stored in file; all numbers are little-endian.
    25     class VersionDictionary::Trailer :public Chunk {
    26     public:
    27         LittleEndian<uint32_t>          magicNumber1;
    28         LittleEndian<uint32_t>          count;
    29         VersionDictionary::IndexPositions  indexPositions;
    30         LittleEndian<FilePosition>      previousTrailerPosition;
    31         LittleEndian<uint32_t>          generation;
    32         LittleEndianDouble              timestamp;
    33         LittleEndian<uint32_t>          magicNumber2;
    34                 
    35         Trailer ()
    36             :Chunk(sizeof(Trailer), kChunkType)
    37         { }
    38         
    39         Trailer (int c,
    40                      VersionDictionary::IndexPositions indexPos, 
    41                      FilePosition previousTrailerPos,
    42                      uint32_t gen)
    43             :Chunk(sizeof(Trailer), kChunkType),
    44              magicNumber1(kMagicNumber1),
    45              count(c),
    46              indexPositions(indexPos),
    47              previousTrailerPosition(previousTrailerPos),
    48              generation(gen),
    49              timestamp(::time(NULL)),
    50              magicNumber2(kMagicNumber2)
    51         { }
    52         
    53         static const uint32_t kMagicNumber1 = 0xfe83b1cd;
    54         static const uint32_t kMagicNumber2 = 0xff84b2c1;
    55     };
    56     
    57     
    58     
    59     VersionDictionary::VersionDictionary (File *file)
    60         :_file(file),
    61          _trailerPosition(0),
    62          _previousTrailerPosition(0),
    63          _indexPositions(),
    64          _count(0),
    65          _previousVersion()
    66     {
    67         if (file->length() > sizeof(Trailer))
    68             _read();
    69     }
    70     
    71     VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition)
    72         :_file(file),
    73          _trailerPosition(trailerPosition),
    74          _previousTrailerPosition(0),
    75          _indexPositions(),
    76          _count(0),
    77          _previousVersion()
    78     {
    79         _read(trailerPosition);
    80     }
    81     
    82     int VersionDictionary::count() const {
    83         return _count;
    84     }
    85     
    86     const VersionDictionary::Trailer* VersionDictionary::_trailer() const {
    87         if (_trailerPosition > 0)
    88             return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition);
    89         else 
    90             return NULL;      // Only happens in the empty-file case
    91     }
    92     
    93     const Index* VersionDictionary::_index (int i) const {
    94         if (_indexPositions[i] > 0)
    95             return (const Index*) _file->mappedPosition(_indexPositions[i]);
    96         else
    97             return NULL;
    98     }
    99     
   100     int VersionDictionary::generation() const {
   101         const VersionDictionary::Trailer *trailer = _trailer();
   102         return trailer ? (int)trailer->generation : -1;
   103     }
   104     
   105     time_t VersionDictionary::timestamp() const {
   106         const VersionDictionary::Trailer *trailer = _trailer();
   107         return trailer ? (time_t)trailer->timestamp : 0;
   108     }
   109     
   110     const VersionDictionary* VersionDictionary::previousVersion() const {
   111         if (!_previousVersion)
   112             if (_previousTrailerPosition > 0)
   113                 _previousVersion = new VersionDictionary(_file, _previousTrailerPosition);
   114         return _previousVersion;
   115     }
   116         
   117     
   118     Blob VersionDictionary::get (Key key) const {
   119         const Index *index = _index(key.hash >> 24);
   120         return index ?index->get(_file, key) :Blob();
   121     }
   122     
   123     void VersionDictionary::_read (FilePosition trailerPos) {
   124         assert(_file);
   125         
   126         if (trailerPos > 0) {
   127             _file->setPosition(trailerPos);
   128         } else {
   129             // Determine position of trailer, at EOF:
   130             off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer));
   131             if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX)
   132                 throw File::Error(ERANGE, "No trailer found in file (wrong EOF)");
   133             trailerPos = (FilePosition)pos;
   134         }
   135         
   136         // Read & verify trailer:
   137         VersionDictionary::Trailer trailer;
   138         _file->read(trailer);
   139         _trailerPosition = trailerPos;
   140         _previousTrailerPosition = trailer.previousTrailerPosition;
   141         _count = trailer.count;
   142         _indexPositions = trailer.indexPositions;
   143         
   144         if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1
   145             || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2)
   146             throw File::Error("No trailer found in file (invalid magic numbers)");
   147         if (_previousTrailerPosition >= _trailerPosition)
   148             throw File::Error("Bad VersionDictionary trailer (illegal previousTrailerPosition)");
   149         
   150         // Map in the file:
   151         _file->mapRegion(0, _trailerPosition+sizeof(trailer));
   152 
   153         // Verify Indexes:
   154         for (int i=0; i<256; i++) {
   155             if (_indexPositions[i] >= _trailerPosition)
   156                 throw File::Error("Bad VersionDictionary trailer (illegal index position)");
   157             const Index *index = _index(i);
   158             if (index)
   159                 index->validate();
   160         }
   161 }
   162     
   163     
   164     bool VersionDictionary::isLatestVersion() const {
   165         return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer);
   166     }
   167     
   168     
   169     /*  Append addDict to baseDict, writing the results into dstFile (which is usually the same
   170         as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict.
   171         Returns the position of the new trailer. */
   172     FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, 
   173                                           const Dictionary *addDict, 
   174                                           File *dstFile, 
   175                                           bool replace) 
   176     {
   177         File *srcFile = baseDict->_file;
   178         bool incremental = !replace && dstFile==srcFile;
   179         Index* newIndex[256];
   180 
   181         {
   182             // Work out the needed capacity for each Index bucket:
   183             int newCounts[256] = {0};
   184             if (!replace) {
   185                 for (int i=0; i<256; i++) {
   186                     const Index *oldIndex = baseDict->_index(i);
   187                     if (oldIndex)
   188                         newCounts[i] = oldIndex->count();
   189                 }
   190             }
   191             Dictionary::Iterator *it = addDict->iterate();
   192             for (; *it; ++*it)
   193                 newCounts[it->key().hash >> 24]++;
   194             delete it;
   195             
   196             // Allocate new Indexes, of sufficient capacity, for each growing bucket:
   197             for (int i=0; i<256; i++) {
   198                 const Index *oldIndex = baseDict->_index(i);
   199                 if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) {
   200                     newIndex[i] = Index::create(newCounts[i]);
   201                     if (incremental && oldIndex)
   202                         newIndex[i]->copyFrom(srcFile, oldIndex);
   203                 } else
   204                     newIndex[i] = NULL;
   205             }
   206         }
   207         
   208         // Lock the file now, seek to the end, and make sure it's been prepared with a header,
   209         // since FilePositions of 0 and 1 are reserved.
   210         File::Lock lock(dstFile);
   211         const FilePosition startPos = (FilePosition) dstFile->setPositionToEnd();
   212         if (startPos <= 1)
   213             throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file");
   214         
   215         // For safety's sake, make sure the old file hasn't been destroyed:
   216         FilePosition oldEOF = baseDict->_trailerPosition;
   217         if (oldEOF > 0)
   218             oldEOF += sizeof(VersionDictionary::Trailer);
   219         if (srcFile->length() < oldEOF)
   220             throw File::Error(ERANGE, "File has been truncated since being read");
   221             
   222         try{
   223             FilePosition pos = startPos;
   224             
   225             int newCount;
   226             if (replace)
   227                 newCount = 0;
   228             else if (dstFile == srcFile)
   229                 newCount = baseDict->_count;
   230             else {
   231                 // Write out the surviving pre-existing entries from the old file:
   232                 newCount = 0;
   233                 for (VersionDictionary::Iterator it(baseDict); it; ++it) {
   234                     Key key = it.key();
   235                     if (!addDict->contains(key)) {
   236                         int bucket = key.hash >> 24;
   237                         int size = KeyValueChunk::write(dstFile,pos, key, it.value());
   238                         newIndex[bucket]->put(srcFile, key, pos, startPos);
   239                         pos += size;
   240                         newCount++;
   241                     }
   242                 }
   243             }
   244             
   245             // Now write the items from the new dict:
   246             Dictionary::Iterator *it = addDict->iterate();
   247             for (; *it; ++*it) {
   248                 Key key=it->key();
   249                 Blob value=it->value();
   250                 int bucket = key.hash >> 24;
   251                 Index *index = newIndex[bucket];
   252                 assert(index);
   253                                               
   254                 if (value.bytes) {
   255                     int size = KeyValueChunk::write(dstFile,pos, key, value);
   256                     if (index->put(srcFile, key, pos, startPos))
   257                         newCount++;
   258                     pos += size;
   259                 } else if (incremental) {
   260                     // NULL value is a deleted-entry marker used by OverlayDictionary
   261                     if (index->remove(srcFile, key, startPos))
   262                         newCount--;
   263                 }
   264             }
   265             delete it;
   266             
   267             // Write out the new indexes:
   268             IndexPositions newIndexPositions;
   269             if (incremental)
   270                 newIndexPositions = baseDict->_indexPositions;
   271             else
   272                 memset(&newIndexPositions, 0, sizeof(newIndexPositions));
   273 
   274             pos += Chunk::writePadding(dstFile);
   275             for (int i=0; i<256; i++) {
   276                 if (newIndex[i]) {
   277                     Index *index = newIndex[i];
   278                     newIndexPositions[i] = pos;
   279                     pos += dstFile->write(index, index->size());
   280                     delete index;
   281                 }
   282             }
   283             
   284             // Flush everything out to disk, with maximum paranoia, before writing the trailer.
   285             // Since scavenging corrupt files looks for trailers, we don't want to append a
   286             // trailer until we're certain that all of the real data is safely on-disk.
   287             dstFile->flushDisk();
   288             
   289             // Write the trailer:
   290             FilePosition previousTrailerPosition = 0;
   291             if (dstFile==srcFile)
   292                 previousTrailerPosition = baseDict->_trailerPosition;
   293             FilePosition newTrailerPosition = pos;
   294             VersionDictionary::Trailer trailer(newCount,
   295                                             newIndexPositions,
   296                                             previousTrailerPosition,
   297                                             baseDict->generation() + 1);
   298             pos += dstFile->write(trailer);
   299             
   300             // Just a mild flush here; flushDisk() is very expensive and can be disruptive to
   301             // real-time I/O in other apps, so it's bad to call it too often.
   302             dstFile->flush();
   303             assert(pos==dstFile->position());
   304             
   305             return newTrailerPosition;
   306             
   307         } catch (...) {
   308             // If something goes wrong, try to back out everything we wrote:
   309             try{
   310                 dstFile->setLength(startPos);
   311             }catch(...) { }
   312             throw;
   313         }
   314     }
   315  
   316 
   317     VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict,
   318                                                     File *dstFile, 
   319                                                     bool replace) const
   320     {
   321         FilePosition nextVersionPos = _append(this, addDict, dstFile, replace);
   322         VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos);
   323         nextVersion->_previousVersion = this;
   324         return nextVersion;
   325     }
   326     
   327     
   328 #pragma mark -
   329 #pragma mark TESTING-ONLY:
   330 
   331     VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) {
   332         return VersionDictionary(file)._appendAndOpen(srcDict, file, true);
   333     }
   334     
   335     
   336 #pragma mark -
   337 #pragma mark ITERATOR:
   338     
   339     Dictionary::Iterator* VersionDictionary::iterate() const {
   340         return new VersionDictionary::Iterator(this);
   341     }
   342     
   343     VersionDictionary::Iterator::Iterator (const VersionDictionary *file)
   344     :_file(file),
   345      _bucket(-1),
   346      _iter(NULL)
   347     { 
   348         nextIndex();
   349     }
   350     
   351     VersionDictionary::Iterator::~Iterator() {
   352         delete _iter;
   353     }
   354     
   355     bool VersionDictionary::Iterator::hasValue() const               {return _iter && _iter->hasValue();}
   356     Key VersionDictionary::Iterator::key() const                     {return _iter->key();}
   357     Blob VersionDictionary::Iterator::value() const                  {return _iter->value();}
   358     
   359     bool VersionDictionary::Iterator::next() {
   360         if (_iter->next())
   361             return true;
   362         else {
   363             delete _iter;
   364             return nextIndex();
   365         }
   366     }
   367     
   368     bool VersionDictionary::Iterator::nextIndex() {
   369         while (++_bucket < 256) {
   370             const Index *index = _file->_index(_bucket);
   371             if (index) {
   372                 _iter = new Index::Iterator(_file->_file, index);
   373                 if (*_iter)
   374                     return true;
   375                 else
   376                     delete _iter;
   377             }
   378         }
   379         _iter = NULL;
   380         return false;
   381     }
   382     
   383     
   384 #pragma mark -
   385 #pragma mark CHANGE ITERATOR:
   386     
   387     VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const {
   388         return new VersionDictionary::ChangeIterator(this);
   389     }
   390     
   391     VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file)
   392     :_file(file),
   393      _bucket(-1),
   394      _iter(NULL)
   395     { 
   396         next();            
   397     }
   398     
   399     VersionDictionary::ChangeIterator::~ChangeIterator() {
   400         delete _iter;
   401     }
   402     
   403     bool VersionDictionary::ChangeIterator::hasValue() const               {return _iter && _iter->hasValue();}
   404     Key VersionDictionary::ChangeIterator::key() const                     {return _iter->key();}
   405     Blob VersionDictionary::ChangeIterator::value() const                  {return _iter->value();}
   406     
   407     bool VersionDictionary::ChangeIterator::next() {
   408         const VersionDictionary::Trailer *trailer = _file->_trailer();
   409         for (;;) {
   410             // Check if current iterator has a value that's from this version:
   411             if (_iter && _iter->hasValue()) {
   412                 if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition)
   413                     return true;
   414                 else if (_iter->next())
   415                     continue;
   416             }
   417             // If not, go to next Index:
   418             if (!nextIndex())
   419                 return false;
   420         }
   421     }
   422     
   423     bool VersionDictionary::ChangeIterator::nextIndex() {
   424         delete _iter;
   425         const VersionDictionary::Trailer *trailer = _file->_trailer();
   426         while (++_bucket < 256) {
   427             const Index *index = _file->_index(_bucket);
   428             if (index) {
   429                 // Skip indexes that weren't updated in this version:
   430                 if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) {
   431                     _iter = new Index::Iterator(_file->_file, index);
   432                     return true;
   433                 }
   434             }
   435         }
   436         _iter = NULL;
   437         return false;
   438     }
   439     
   440 }