# HG changeset patch # User Jens Alfke # Date 1253484852 25200 # Node ID 31a43d94cc262eb9cfd598dd623f54e3cc4137dd First official checkin. diff -r 000000000000 -r 31a43d94cc26 LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE.txt Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,23 @@ +[This is a standard BSD license.] + +Copyright (c) 2009, Jens Alfke . All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff -r 000000000000 -r 31a43d94cc26 Ottoman.xcodeproj/project.pbxproj --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Ottoman.xcodeproj/project.pbxproj Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,322 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 27156CAA104C9C44009EBD39 /* gtest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 27156CA9104C9C44009EBD39 /* gtest.framework */; }; + 27603901105AC81200D931A7 /* CoreFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 27603900105AC81200D931A7 /* CoreFoundation.framework */; }; + 276E5BCD1066D13D008A2171 /* Base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC41066D13D008A2171 /* Base.cpp */; }; + 276E5BCE1066D13D008A2171 /* Chunk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC51066D13D008A2171 /* Chunk.cpp */; }; + 276E5BCF1066D13D008A2171 /* Dictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC61066D13D008A2171 /* Dictionary.cpp */; }; + 276E5BD01066D13D008A2171 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC71066D13D008A2171 /* File.cpp */; }; + 276E5BD11066D13D008A2171 /* Hash.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC81066D13D008A2171 /* Hash.cpp */; }; + 276E5BD21066D13D008A2171 /* Index.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BC91066D13D008A2171 /* Index.cpp */; }; + 276E5BD31066D13D008A2171 /* MemoryMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BCA1066D13D008A2171 /* MemoryMap.cpp */; }; + 276E5BD41066D13D008A2171 /* Ottoman.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BCB1066D13D008A2171 /* Ottoman.cpp */; }; + 276E5BD51066D13D008A2171 /* VersionDictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BCC1066D13D008A2171 /* VersionDictionary.cpp */; }; + 276E5BDD1066D142008A2171 /* Dictionary_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BD71066D142008A2171 /* Dictionary_test.cpp */; }; + 276E5BDE1066D142008A2171 /* Hash_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BD81066D142008A2171 /* Hash_test.cpp */; }; + 276E5BDF1066D142008A2171 /* Ottoman_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BD91066D142008A2171 /* Ottoman_test.cpp */; }; + 276E5BE01066D142008A2171 /* TestUtils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BDA1066D142008A2171 /* TestUtils.cpp */; }; + 276E5BE11066D142008A2171 /* VersionDictionary_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 276E5BDC1066D142008A2171 /* VersionDictionary_test.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 27156CA9104C9C44009EBD39 /* gtest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = gtest.framework; path = /Library/Frameworks/gtest.framework; sourceTree = ""; }; + 27603900105AC81200D931A7 /* CoreFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreFoundation.framework; path = System/Library/Frameworks/CoreFoundation.framework; sourceTree = SDKROOT; }; + 276E5BBA1066D135008A2171 /* Base.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Base.h; sourceTree = ""; }; + 276E5BBB1066D135008A2171 /* Chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Chunk.h; path = ../include/Chunk.h; sourceTree = ""; }; + 276E5BBC1066D135008A2171 /* Dictionary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Dictionary.h; sourceTree = ""; }; + 276E5BBD1066D135008A2171 /* File.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = File.h; sourceTree = ""; }; + 276E5BBE1066D135008A2171 /* Hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Hash.h; path = ../include/Hash.h; sourceTree = ""; }; + 276E5BBF1066D135008A2171 /* Index.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Index.h; path = ../include/Index.h; sourceTree = ""; }; + 276E5BC01066D135008A2171 /* MemoryMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MemoryMap.h; path = ../include/MemoryMap.h; sourceTree = ""; }; + 276E5BC11066D135008A2171 /* Ottoman.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Ottoman.h; sourceTree = ""; }; + 276E5BC21066D135008A2171 /* VersionDictionary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VersionDictionary.h; sourceTree = ""; }; + 276E5BC41066D13D008A2171 /* Base.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Base.cpp; sourceTree = ""; }; + 276E5BC51066D13D008A2171 /* Chunk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Chunk.cpp; sourceTree = ""; }; + 276E5BC61066D13D008A2171 /* Dictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Dictionary.cpp; sourceTree = ""; }; + 276E5BC71066D13D008A2171 /* File.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = File.cpp; sourceTree = ""; }; + 276E5BC81066D13D008A2171 /* Hash.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Hash.cpp; sourceTree = ""; }; + 276E5BC91066D13D008A2171 /* Index.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Index.cpp; sourceTree = ""; }; + 276E5BCA1066D13D008A2171 /* MemoryMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MemoryMap.cpp; sourceTree = ""; }; + 276E5BCB1066D13D008A2171 /* Ottoman.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Ottoman.cpp; sourceTree = ""; }; + 276E5BCC1066D13D008A2171 /* VersionDictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VersionDictionary.cpp; sourceTree = ""; }; + 276E5BD71066D142008A2171 /* Dictionary_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Dictionary_test.cpp; sourceTree = ""; }; + 276E5BD81066D142008A2171 /* Hash_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Hash_test.cpp; sourceTree = ""; }; + 276E5BD91066D142008A2171 /* Ottoman_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Ottoman_test.cpp; sourceTree = ""; }; + 276E5BDA1066D142008A2171 /* TestUtils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TestUtils.cpp; sourceTree = ""; }; + 276E5BDB1066D142008A2171 /* TestUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TestUtils.h; sourceTree = ""; }; + 276E5BDC1066D142008A2171 /* VersionDictionary_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VersionDictionary_test.cpp; sourceTree = ""; }; + 8DD76F6C0486A84900D96B5E /* SafeStorageTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = SafeStorageTest; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 27156CAA104C9C44009EBD39 /* gtest.framework in Frameworks */, + 27603901105AC81200D931A7 /* CoreFoundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* BPlusTree */ = { + isa = PBXGroup; + children = ( + 276E5BB91066D135008A2171 /* include */, + 276E5BC31066D13D008A2171 /* src */, + 276E5BD61066D142008A2171 /* test */, + 27603900105AC81200D931A7 /* CoreFoundation.framework */, + 27156CA9104C9C44009EBD39 /* gtest.framework */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = BPlusTree; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* SafeStorageTest */, + ); + name = Products; + sourceTree = ""; + }; + 276E5BB91066D135008A2171 /* include */ = { + isa = PBXGroup; + children = ( + 276E5BBA1066D135008A2171 /* Base.h */, + 276E5BBC1066D135008A2171 /* Dictionary.h */, + 276E5BBD1066D135008A2171 /* File.h */, + 276E5BC11066D135008A2171 /* Ottoman.h */, + 276E5BC21066D135008A2171 /* VersionDictionary.h */, + ); + path = include; + sourceTree = ""; + }; + 276E5BC31066D13D008A2171 /* src */ = { + isa = PBXGroup; + children = ( + 276E5BC41066D13D008A2171 /* Base.cpp */, + 276E5BBB1066D135008A2171 /* Chunk.h */, + 276E5BC51066D13D008A2171 /* Chunk.cpp */, + 276E5BC61066D13D008A2171 /* Dictionary.cpp */, + 276E5BC71066D13D008A2171 /* File.cpp */, + 276E5BBE1066D135008A2171 /* Hash.h */, + 276E5BC81066D13D008A2171 /* Hash.cpp */, + 276E5BBF1066D135008A2171 /* Index.h */, + 276E5BC91066D13D008A2171 /* Index.cpp */, + 276E5BC01066D135008A2171 /* MemoryMap.h */, + 276E5BCA1066D13D008A2171 /* MemoryMap.cpp */, + 276E5BCB1066D13D008A2171 /* Ottoman.cpp */, + 276E5BCC1066D13D008A2171 /* VersionDictionary.cpp */, + ); + path = src; + sourceTree = ""; + }; + 276E5BD61066D142008A2171 /* test */ = { + isa = PBXGroup; + children = ( + 276E5BD71066D142008A2171 /* Dictionary_test.cpp */, + 276E5BD81066D142008A2171 /* Hash_test.cpp */, + 276E5BD91066D142008A2171 /* Ottoman_test.cpp */, + 276E5BDC1066D142008A2171 /* VersionDictionary_test.cpp */, + 276E5BDB1066D142008A2171 /* TestUtils.h */, + 276E5BDA1066D142008A2171 /* TestUtils.cpp */, + ); + path = test; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8DD76F620486A84900D96B5E /* OttomanTest */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "OttomanTest" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = OttomanTest; + productInstallPath = "$(HOME)/bin"; + productName = BPlusTree; + productReference = 8DD76F6C0486A84900D96B5E /* SafeStorageTest */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "Ottoman" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* BPlusTree */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8DD76F620486A84900D96B5E /* OttomanTest */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 276E5BCD1066D13D008A2171 /* Base.cpp in Sources */, + 276E5BCE1066D13D008A2171 /* Chunk.cpp in Sources */, + 276E5BCF1066D13D008A2171 /* Dictionary.cpp in Sources */, + 276E5BD01066D13D008A2171 /* File.cpp in Sources */, + 276E5BD11066D13D008A2171 /* Hash.cpp in Sources */, + 276E5BD21066D13D008A2171 /* Index.cpp in Sources */, + 276E5BD31066D13D008A2171 /* MemoryMap.cpp in Sources */, + 276E5BD41066D13D008A2171 /* Ottoman.cpp in Sources */, + 276E5BD51066D13D008A2171 /* VersionDictionary.cpp in Sources */, + 276E5BDD1066D142008A2171 /* Dictionary_test.cpp in Sources */, + 276E5BDE1066D142008A2171 /* Hash_test.cpp in Sources */, + 276E5BDF1066D142008A2171 /* Ottoman_test.cpp in Sources */, + 276E5BE01066D142008A2171 /* TestUtils.cpp in Sources */, + 276E5BE11066D142008A2171 /* VersionDictionary_test.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + /Code/googletest/xcode/build/Debug, + ); + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ""; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + /Code/googletest/xcode/build/Debug, + ); + PRODUCT_NAME = SafeStorageTest; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + /Code/googletest/xcode/build/Debug, + ); + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + /Code/googletest/xcode/build/Debug, + ); + PRODUCT_NAME = SafeStorageTest; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREPROCESSOR_DEFINITIONS = ""; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_EFFECTIVE_CPLUSPLUS_VIOLATIONS = NO; + GCC_WARN_SHADOW = NO; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + WARNING_CFLAGS = "-Wall"; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + DEAD_CODE_STRIPPING = YES; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREPROCESSOR_DEFINITIONS = NDEBUG; + GCC_TREAT_WARNINGS_AS_ERRORS = YES; + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_MISSING_NEWLINE = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_EFFECTIVE_CPLUSPLUS_VIOLATIONS = NO; + GCC_WARN_SHADOW = NO; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + WARNING_CFLAGS = "-Wall"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "OttomanTest" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "Ottoman" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff -r 000000000000 -r 31a43d94cc26 include/Base.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Base.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,113 @@ +/* + * Base.h + * Ottoman + * + * Created by Jens Alfke on 8/18/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_BASE_ +#define _MOOSEYARD_BASE_ +#include +#include +#include +#include +#include + +namespace Mooseyard { + + /** A 32-bit file position/offset. Supports files of up to 4GB. */ + typedef uint32_t FilePosition; + const FilePosition kNoFilePosition = (FilePosition)-1L; + + + /** A simple structure representing a range of memory (a pointer plus a length). + It doesn't do any memory management; it just points. */ + struct Blob { + const void *bytes; + size_t length; + + Blob() :bytes(NULL), length(0) { } + Blob (const void *b, size_t len) :bytes(b), length(len) { } + Blob (const char *str); + const void* end() const {return (const uint8_t*)bytes+length;} + + operator bool() const {return bytes!=NULL;} + bool operator== (const Blob &b) {return bytes==b.bytes && length==b.length;} + bool equals (const Blob &b) const {return length==b.length && memcmp(bytes,b.bytes,length)==0;} + void clear() {bytes=NULL; length=0;} + }; + + + // Utility to offset a pointer by some number of bytes. + inline const void* offsetBy(const void *src, size_t off) {return (const uint8_t*)src + off;} + inline void* offsetBy(void *src, size_t off) {return (uint8_t*)src + off;} + + + #define UNCOPYABLE(CLASS) private: CLASS(const CLASS&); CLASS& operator=(const CLASS&) + + +#pragma mark - +#pragma mark LITTLE-ENDIAN: + + /** A template for numeric types that are always stored in little-endian form. + Useful for structures stored in files that need to be useable on all architectures. */ + template + class LittleEndian { + public: + inline LittleEndian (); + inline LittleEndian (INT); + inline operator INT() const; + inline LittleEndian& operator= (INT); + inline LittleEndian& operator= (const LittleEndian&); + inline LittleEndian& operator++(); + inline LittleEndian& operator--(); + private: + static inline REP makeLittle (INT); + static inline INT makeNative (REP); + REP _value; + }; + + typedef LittleEndian LittleEndianDouble; + + + // Implementation gunk: + template + inline LittleEndian::LittleEndian () :_value() { } + template + inline LittleEndian::LittleEndian (INT i) :_value(makeLittle(i)) { } + template + inline LittleEndian::operator INT() const {return makeNative(_value);} + template + inline LittleEndian& LittleEndian::operator++() {return *this = (INT)*this + 1;} + template + inline LittleEndian& LittleEndian::operator--() {return *this = (INT)*this - 1;} + + template + inline LittleEndian& LittleEndian::operator= (INT i) { + _value = makeLittle(i); + return *this; + } + template + inline LittleEndian& LittleEndian::operator= (const LittleEndian &p) { + _value = p._value; + return *this; + } + + template <> inline uint32_t LittleEndian::makeLittle (uint32_t i) + {return OSSwapHostToLittleInt32(i);} + template <> inline uint32_t LittleEndian::makeNative (uint32_t i) + {return OSSwapLittleToHostInt32(i);} + template <> inline uint16_t LittleEndian::makeLittle (uint16_t i) + {return OSSwapHostToLittleInt16(i);} + template <> inline uint16_t LittleEndian::makeNative (uint16_t i) + {return OSSwapLittleToHostInt16(i);} + template <> inline CFSwappedFloat64 LittleEndian::makeLittle (double d) + {return CFConvertDoubleHostToSwapped(d);} + template <> inline double LittleEndian::makeNative (CFSwappedFloat64 d) + {return CFConvertDoubleSwappedToHost(d);} + +} + +#endif _MOOSEYARD_BASE_ diff -r 000000000000 -r 31a43d94cc26 include/Chunk.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Chunk.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,97 @@ +/* + * Chunk.h + * Ottoman + * + * Created by Jens Alfke on 8/27/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_CHUNK_ +#define _MOOSEYARD_CHUNK_ +#include "File.h" + +namespace Mooseyard { + + class KeyValueChunk; + +#pragma pack(2) + + /** An item stored in a file. It's prefixed with its length, so that the entire file + can be scanned for chunks easily. */ + class Chunk { + public: + uint32_t size() const {return _size;} + uint16_t type() const; + + /** If this is a KeyValueChunk, returns itself cast to that type, else NULL. */ + const KeyValueChunk* asKeyValue() const; + + /** Write a Chunk to the file, in pieces a la writev. */ + static size_t writeMultiple (File *file, uint16_t type, + Blob blobs[], int count) throw(File::Error); + + static size_t writePadding (File *file); + + static const uint16_t kChunkTypePadding = 0; + + const void* end() const {return offsetBy(this,_size);} + + protected: + Chunk (uint32_t size, uint16_t type) :_size(size), _keyLength(0xFFFF), _type(type) { } + + private: + // This is mapped to data in the file! Don't mess with it! + LittleEndian _size; + LittleEndian _keyLength; // Used by KeyValueChunk; else 0xFFFF + LittleEndian _type; // Not used by KeyValueChunk + friend class KeyValueChunk; + }; + + + /** A key/value pair as stored in the memory-mapped file. */ + class KeyValueChunk :public Chunk { + public: + Blob key() const; + Blob value() const; + + bool hasKey (Blob key) const; + + void validate() const throw(File::Error); + + /** Write a KeyValueChunk to a file. */ + static size_t write (File* file, FilePosition pos, Blob key, Blob value) throw(File::Error); + + static const uint16_t kChunkType = 1; + + private: + const char* valuePtr() const; + }; + +#pragma options align=reset + + + /** An iterator over all the Chunks in a file. */ + class ChunkIterator { + public: + ChunkIterator (File*, FilePosition start); + + const Chunk* chunk() const {return _chunk;} + FilePosition position() const {return _pos;} + bool atEOF() const; + bool next(); + + operator const Chunk*() const {return _chunk;} + virtual bool hasValue() const {return _chunk != NULL;} + operator bool() const {return hasValue();} + ChunkIterator& operator++() {next(); return *this;} + private: + void _loadChunk(); + File *_file; + FilePosition _pos, _length; + const Chunk* _chunk; + }; + +} + +#endif _MOOSEYARD_CHUNK_ diff -r 000000000000 -r 31a43d94cc26 include/Dictionary.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Dictionary.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,208 @@ +/* + * Dictionary.h + * Ottoman + * + * Created by Jens Alfke on 8/23/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_DICTIONARY_ +#define _MOOSEYARD_DICTIONARY_ +#include "Base.h" + +namespace Mooseyard { + + typedef uint32_t HashCode; + + /** A Key is a data blob treated as a dictionary key. + It carries its hash-code around with it, to avoid redundant computation. */ + struct Key :public Blob { + HashCode hash; + + Key () :Blob(), hash(0) { } + explicit Key (const Blob &b) :Blob(b), hash(computeHash(b)) { } + Key (const Blob &b, HashCode h) :Blob(b), hash(h) { } + Key (const char *str) :Blob(str), hash(computeHash(*this)) { } + Key (const void *b, uint32_t len, HashCode h) :Blob(b,len), hash(h) { } + + static HashCode computeHash (Blob); + }; + + + /** An abstract interface for key/value dictionaries. + Dictionary itself is read-only; its subclass MutableDictionary allows changes. */ + class Dictionary { + public: + class Iterator; + class ChangeIterator; + + virtual ~Dictionary(); + + /** The number of items in the dictionary. */ + virtual int count() const =0; + + /** Returns the value associated with the given key. + Keys are compared by exact binary data equality. + The data pointed to by the value returned is valid at least until the next call to + this Dictionary; some subclasses may guarantee a longer lifetime. + If there is no value for the given key, a Blob of {NULL, 0} will be returned. */ + virtual Blob get (Key) const =0; + + /** Returns true iff there is a value associated with the given key. */ + virtual bool contains (Key key) const {return get(key).bytes != NULL;} + + /** Returns a pointer to a new Iterator object that can be used to iterate this Dictionary. + The iterator must be deleted when you're done with it. */ + virtual Iterator* iterate() const =0; + + /** A singleton empty Dictionary. */ + static const Dictionary& kEmpty; + + // utilities + /** Returns a suggested size for a hash table that can hold 'capacity' items without + being more than the 'load' fraction full; result is always a power of two. */ + static int choosePowerOfTwoSize (int capacity, float load = kMaxLoadFactor); + + static int chooseAnyOldSize (int capacity, float load = kMaxLoadFactor); + + static const int kMinSize; + static const float kMinLoadFactor; + static const float kMaxLoadFactor; + + protected: + Dictionary(); + }; + + + /** An abstract interface for mutable key/value dictionaries; extends Dictionary. */ + class MutableDictionary : public Dictionary { + public: + /** Stores the given value for the given key, replacing any prior value. + The memory for the key and value is copied, so you don't need to preserve it after + the call returns. */ + virtual void put (Key, Blob value) =0; + + /** Adds the contents of the given Dictionary to this one. + Effectively calls this->put() with every key/value pair in the source Dictionary. */ + virtual void add (Dictionary*); + + /** Removes the value for the given key, if any. + Returns true if the value was removed, false if there was no value. */ + virtual bool remove (Key) =0; + + /** Removes all keys and values, returning the dictionary to an empty state. */ + virtual void removeAll() = 0; + }; + + + /** Abstract iterator interface. Each Dictionary subclass defines its own. + The order in which items are returned is undefined, and in effect quite random, due to + the properties of hash tables. */ + class Dictionary::Iterator { + public: + virtual ~Iterator(); + + /** The current key the iterator is pointing to. */ + virtual Key key() const =0; + /** The current value the iterator is pointing to. */ + virtual Blob value() const =0; + /** Returns true if the iterator has a current item, or false if it's reached the end. */ + virtual bool hasValue() const {return key().bytes != 0;} + /** Advances the Iterator to the next item. Returns false if it's reached the end. + (Note that the order of items is undefined and in practice quite random.) */ + virtual bool next() =0; + + operator bool() const {return hasValue();} + Iterator& operator++() {next(); return *this;} + }; + + class OverlayDictionary; + + + /** An iterator that walks through the differences between two Dictionaries. */ + class Dictionary::ChangeIterator :public Dictionary::Iterator { + public: + ChangeIterator (const Dictionary *dict1, const Dictionary *dict2); + explicit ChangeIterator (const OverlayDictionary*); + virtual ~ChangeIterator(); + + /** Returns the current key, whose values in the two dictionaries differ. */ + virtual Key key() const {return _iter->key();} + /** Returns the corresponding value from dict1. + May be empty (NULL,0) if the key does not appear in dict1 (i.e. inserted into dict2.) */ + virtual Blob value() const {return _iter->value();} + /** Returns the corresponding value from dict2. + May be empty (NULL,0) if the key does not appear in dict2 (i.e. deleted from dict2.) */ + virtual Blob otherValue() const {return _otherValue;} + virtual bool hasValue() const {return _iter->hasValue();} + virtual bool next(); + + private: + void _init (const Dictionary *dict1, const Dictionary *dict2); + bool _skipMatching(); + const Dictionary *_dict2; + Iterator *_iter; + Blob _otherValue; + }; + + + /** Adapter class that allows a Dictionary to appear to be mutable, by using a separate + mutable dictionary to remember the changes. */ + class OverlayDictionary :public MutableDictionary { + public: + /** Creates a mutable dictionary that overlays base. Initially its contents will appear + identical to base. It can be changed, but the changes will not affect the base. */ + explicit OverlayDictionary (const Dictionary *base); + virtual ~OverlayDictionary(); + + /** The base dictionary this is overlaying. */ + const Dictionary* base() const {return _base;} + /** The dictionary that contains the changes made to the base. + Only keys/values that have been changed appear here. */ + const Dictionary* overlay() const {return _overlay;} + /** Returns true if changes have been made. */ + bool isChanged() const; + /** Returns true if the base has been completely replaced by calling removeAll. */ + bool baseReplaced() const {return _allRemoved;} + + /** Removes all changes, restoring the overlay to look identical to the base. */ + void revert(); + /** Changes the overlay to a new base, clearing all changes. */ + void revertTo (const Dictionary* newBase); + + /** Changes the base without clearing changes. */ + void rebase (const Dictionary* newBase); + + virtual int count() const; + virtual Blob get (Key) const; + virtual bool contains (Key) const; + virtual void put (Key, Blob value); + virtual bool remove (Key); + virtual void removeAll(); + virtual Iterator* iterate() const {return new Iterator(*this);} + + class Iterator :public Dictionary::Iterator { + public: + explicit Iterator (const OverlayDictionary&); + virtual Key key() const; + virtual Blob value() const; + virtual bool next(); + private: + bool skipCurrentState(); + Dictionary::Iterator *_iter; + const MutableDictionary *_overlay; + }; + + private: + const Dictionary *_base; + MutableDictionary *_overlay; + int _count; + bool _allRemoved; + friend class ChangeIterator; + }; + + +} + +#endif _MOOSEYARD_DICTIONARY_ diff -r 000000000000 -r 31a43d94cc26 include/File.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/File.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,107 @@ +/* + * File.h + * Ottoman + * + * Created by Jens Alfke on 8/20/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_FILE_ +#define _MOOSEYARD_FILE_ +#include "Base.h" + +namespace Mooseyard { + +struct Blob; +class MemoryMap; + +/** An open file on disk; a thin wrapper around a Unix file descriptor. */ +class File { +public: + + struct Error { + const int code; + const char* const message; + + Error(int c, const char *m) :code(c), message(m) { } + Error(const char *m); + }; + static const int kEOF = 10001; + + + File (const char *filename, bool writeable =false, bool create =false) throw(Error); + File (const char *filename, int oflag) throw(Error); + ~File(); + + off_t length() const throw(Error); + void setLength (off_t) throw(Error); + + off_t position() const throw(Error); + void setPosition (off_t) throw(Error); + off_t setPositionToEnd (off_t bytesBefore =0) throw(Error); + + void read (void *dst, size_t) throw(Error); + size_t write (const void *src, size_t) throw(Error); + + void readFrom (off_t where, void *dst, size_t) throw(Error); + void writeTo (off_t where, const void *src, size_t) throw(Error); + + size_t writeMultiple (Blob blobs[], int count) throw(Error); + size_t writePadding (int alignment); // alignment should be a power of 2 + + MemoryMap* map(); + const MemoryMap* map() const {return const_cast(this)->map();} + void mapRegion (off_t position, size_t length); + const void* mappedPosition (off_t position) const; + + void flush() throw(Error); // Regular fsync call + void flushDisk() throw(Error); // Expensive F_FULLFSYNC call + + template + void read (T& t) throw(Error) {read(&t,sizeof(t));} + template + size_t write (const T& t) throw(Error) {return write(&t,sizeof(t));} + template + void readFrom (off_t where, T &t) throw(Error) {readFrom(where,&t,sizeof(t));} + template + void writeTo (off_t where, const T &t) throw(Error) {writeTo(where,&t,sizeof(t));} + + bool hasPath (const char *path) const throw(Error); + + static void unlink (const char *filename) throw(Error); + static void rename (const char *srcFilename, const char *dstFilename) throw(Error); + + + class Lock { + public: + Lock (File*, bool block =true) throw(Error); + ~Lock(); + bool isLocked() const {return _locked;} + operator bool() const {return _locked;} + bool retry(); + private: + UNCOPYABLE(Lock); + File *_file; + int _mode; + bool _locked; + }; + +private: + static int _open (const char *filename, int posixMode) throw(Error); + static int _check (int result) throw(Error); + static void _checkRead (ssize_t result, size_t expectedSize) throw(Error); + bool _lock (bool block); + void _unlock(); + UNCOPYABLE(File); + + const int _fd; + mutable MemoryMap *_memoryMap; + int _locked; + friend class Lock; + friend class MemoryMap; +}; + +} + +#endif _MOOSEYARD_FILE_ diff -r 000000000000 -r 31a43d94cc26 include/Hash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Hash.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,119 @@ +/* + * Hash.h + * Ottoman + * + * Created by Jens Alfke on 8/20/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_HASH_ +#define _MOOSEYARD_HASH_ +#include "Base.h" +#include "Dictionary.h" + +namespace Mooseyard { + + /** A fairly low-level hash-table class whose keys and values are arbitrary data blobs. + This class does no memory management of the keys and values: the memory ranges + pointed to by the key and value parameters to put() must remain valid as long as + that key or value remains in the hash-table. */ + class Hash { + + public: + typedef void* Value; + + /** Creates a new, empty Hash. */ + explicit Hash (int capacity = 50); + + ~Hash(); + + /** Gets the value associated with the given key. */ + Value get(Key) const; + Value operator[] (Key key) const {return get(key);} + + /** Returns the number of values in the Hash. */ + int count() const {return _count;} + + /** Sets the given value for the given key, replacing any existing value. */ + void put(Key, Value); + + /** Removes the value with the given key. */ + bool remove(Key); + + /** Removes all values. */ + void removeAll(); + + void dump() const; + + class IndexEntry; + + class Iterator { + public: + Iterator (const Hash *h); + Iterator (const Hash &h); + operator bool() const {return hasValue();} + Value operator*() const {return value();} + Iterator& operator++() {next(); return *this;} + + Key key() const; + Value value() const; + bool hasValue() const {return _entry < _end;} + bool next(); + private: + Iterator (IndexEntry *start, IndexEntry*end); + IndexEntry* _entry; + IndexEntry* const _end; + UNCOPYABLE(Iterator); + }; + + private: + IndexEntry& _find (Key key) const; + IndexEntry& _findForPut (const IndexEntry &newEntry); + void _resize(int newSize); + void _read(); + UNCOPYABLE(Hash); + + int _count; + int _tableSize; + IndexEntry *_table; + + friend class Iterator; + }; + + + + /** A concrete Dictionary implemented with a Hash. */ + class HashDictionary :public MutableDictionary { + public: + explicit HashDictionary (int capacity =50) :_hash(capacity) { } + virtual ~HashDictionary(); + virtual int count() const {return _hash.count();} + virtual bool contains (Key) const; + virtual Blob get (Key key) const {return _convertValue(_hash.get(key));} + virtual void put (Key, Blob value); + virtual bool remove (Key); + virtual void removeAll(); + virtual Iterator* iterate() const {return new Iterator(*this);} + + class Iterator :public Dictionary::Iterator { + public: + explicit Iterator (const HashDictionary &d) :_it(d._hash) { } + virtual Key key() const {return _it.key();} + virtual Blob value() const {return _convertValue(_it.value());} + virtual bool hasValue() const {return _it.hasValue();} + virtual bool next() {return _it.next();} + private: + Hash::Iterator _it; + }; + + private: + static Blob _convertValue (void*); + Hash _hash; + friend class Iterator; + }; + + +} + +#endif _MOOSEYARD_HASH_ diff -r 000000000000 -r 31a43d94cc26 include/Index.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Index.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,76 @@ +/* + * Index.h + * Ottoman + * + * Created by Jens Alfke on 8/27/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_INDEX_ +#define _MOOSEYARD_INDEX_ +#include "Chunk.h" +#include "Dictionary.h" + +namespace Mooseyard { + + class KeyValueChunk; + + /** An in-file hash table. This structure is stored directly in the file, memory-mapped. + Index is normally only used internally by VersionDictionary. */ + class Index :public Chunk { + public: + + class Entry; + + static const Index* indexMappedAt (const void*); + static Index* create (int capacity); + + int count() const {return _count;} + + Blob get (File *file, Key) const; + bool put (File *file, Key, FilePosition valuePosition, FilePosition maxPosition); + bool remove (File *file, Key, FilePosition maxPosition); + void removeAll(); + + void copyFrom (File *file, const Index *index); + + class Iterator; + + void validate() const throw(File::Error); + void validateEntries (const File *file) const throw(File::Error); + static const uint16_t kChunkType = 2; + + private: + Index (uint32_t tableSize); + const Entry* table (int index) const; + Entry* table (int index); + const KeyValueChunk* _find (File *file, Key) const; + bool _put (File *file, Key, FilePosition, FilePosition maxPosition); + + // This is mapped to data in the file! Don't mess with it! + LittleEndian _magicNumber; + LittleEndian _count; + LittleEndian _tableSize; + char _table[0]; // Actually Entry[] + }; + + + + class Index::Iterator :public Dictionary::Iterator { + public: + Iterator (File*, const Index*); + virtual Key key() const; + virtual Blob value() const; + virtual bool next(); + virtual bool hasValue() const; + + FilePosition valuePosition(); + private: + File* const _file; + const Index::Entry *_current, *_end; + }; + +} + +#endif //_MOOSEYARD_INDEX_ diff -r 000000000000 -r 31a43d94cc26 include/MemoryMap.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/MemoryMap.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,53 @@ +/* + * MemoryMap.h + * Ottoman + * + * Created by Jens Alfke on 9/17/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "File.h" + +namespace Mooseyard { + + /** A flexible memory-map on a file, which can handle multiple discontiguous mapped regions. + Clients usually access this functionality through File, which delegates to it. */ + class MemoryMap { + public: + MemoryMap (File* file) :_file(file), _nRegions(0), _regions(NULL) { } + ~MemoryMap(); + + void mapRegion (off_t position, size_t length); + const void* mappedPosition (off_t position) const; + + private: + UNCOPYABLE(MemoryMap); + class Region; + const void* _at (off_t) const; + + File *_file; + int _nRegions; + Region **_regions; + }; + + + class MemoryMap::Region { + public: + Region (File*, off_t position, size_t length); + ~Region(); + off_t position() const {return _position;} + size_t length() const {return _length;} + off_t end() const {return _position + _length;} + bool setLength (size_t); // Returns false if it failed + const void* mappedPosition(off_t); + private: + UNCOPYABLE(Region); + File* const _file; + const off_t _position; + size_t _length; + const uint8_t *_start; + }; + + +} diff -r 000000000000 -r 31a43d94cc26 include/Ottoman.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/Ottoman.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,99 @@ +/* + * Ottoman.h + * Ottoman + * + * Created by Jens Alfke on 8/31/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_OTTOMAN_ +#define _MOOSEYARD_OTTOMAN_ + +namespace Mooseyard { + + class ChunkIterator; + class Dictionary; + class File; + class VersionDictionary; + class OverlayDictionary; + + + /** A version-controlled persistent dictionary. + Each version is stored as a VersionDictionary, unsaved changes as an OverlayDictionary. */ + class Ottoman { + public: + /** Creates an "untitled" Ottoman with no file and no lastVersion. + After adding values to the currentVersion, use saveAs to save it. */ + Ottoman (); + + /** Opens an existing Ottoman file. */ + explicit Ottoman (const char *filename, bool writeable =true); + + /** Closes an Ottoman. */ + virtual ~Ottoman(); + + /** The current filename, or NULL if the receiver is still in the "untitled" state. */ + virtual const char* filename() const {return _filename;} + + /** The latest saved version of the dictionary. + Earlier versions can be accessed through its previousVersion() accessor. + This will be NULL if the receiver is still in the "untitled" state. */ + virtual VersionDictionary* lastVersion() const {return _lastVersion;} + + /** A mutable overlay representing the current state of the dictionary. + Changes are made in memory until save() is called. + This will be NULL if the receiver was opened read-only (writeable=false). */ + virtual OverlayDictionary* currentVersion() const {return _current;} + + /** Has the on-disk file been updated with newer revisions than what I know about? */ + virtual bool needsSync() const; + + /** Reads any newer versions from disk. + Returns true if new versions were read, false if there were none. + Afterwards, lastVersion() will return the latest version in the file. + (The old lastVersion dictionary is still around, so you can save a pointer to it + before the call and use it later to see what changed.) + Changes made to the currentVersion dictionary are not lost, but are now relative + to the new lastVersion. You may want to scan them and resolve conflicts. */ + virtual bool sync(); + + /** Saves the current version to the file, by appending. + Returns false if there is a version conflict; in that case you need to call sync(), + possibly resolve conflicts, and then call save() again. */ + virtual bool save(); + + /** Saves the current version to the file, by writing to a new temporary file and + then atomically replacing the original. + Older versions, and older copies of the data, are not preserved, so this + will typically shrink the file quite a bit. + Returns false if there is a version conflict. */ + virtual bool saveAndCompact(); + + /** Saves the current version to a new file, leaving the new file open, + so subsequent saves will be written to it. */ + virtual void saveAs (const char *newFilename, bool overwriteAllowed =true); + + /** Scans the file for damage. Returns true if the file is OK, false if problems are found. + If the 'repair' flag is set, will truncate the file to the last valid version. */ + bool scavenge (bool repair =false); + + ChunkIterator* chunkIterator() const; // for testing purposes + + private: + Ottoman(const Ottoman&); // forbid copying/assignment + Ottoman& operator=(const Ottoman&); + + void _open(); + void _append (File *dstFile); + File* _writeTo (const char *dstFileName, bool overwriteAllowed); + + bool _writeable; + File *_file; + char *_filename; + VersionDictionary *_lastVersion; + OverlayDictionary *_current; + }; + +} +#endif // _MOOSEYARD_OTTOMAN_ diff -r 000000000000 -r 31a43d94cc26 include/VersionDictionary.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/VersionDictionary.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,127 @@ +/* + * VersionDictionary.h + * Ottoman + * + * Created by Jens Alfke on 8/21/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#ifndef _MOOSEYARD_VERSIONDICTIONARY_ +#define _MOOSEYARD_VERSIONDICTIONARY_ +#include "Dictionary.h" + +namespace Mooseyard { + + class File; + class Index; + + /** A persistent Dictionary embedded in a memory-mapped file, + representing one complete version of an Ottoman dictionary. */ + class VersionDictionary :public Dictionary { + public: + + /** The generation number. The first version in a new file is zero. */ + int generation() const; + + /** The absolute time at which this version was written to the file. */ + time_t timestamp() const; + + /** The previous version, before this one was saved. */ + const VersionDictionary* previousVersion() const; + + /** Is this the latest version in the file? */ + bool isLatestVersion() const; + + virtual int count() const; + virtual Blob get (Key) const; + class Iterator; + virtual Dictionary::Iterator* iterate() const; + + class ChangeIterator; + /** Iterates over the changes between this dictionary and the previous version. */ + virtual ChangeIterator* iterateChanges() const; + + + // Stuff you probably won't need to use: + + File* file() const {return _file;} + explicit VersionDictionary (File*); + VersionDictionary (File *file, FilePosition trailerPosition); + + private: + /** The type of the trailer chunk that's used as the position of the dictionary. */ + static const uint16_t kChunkType = 3; + + struct IndexPositions { + LittleEndian position[256]; + LittleEndian& operator[] (int i) {return position[i];} + LittleEndian const& operator[] (int i) const {return position[i];} + }; + + class Trailer; + const Trailer* _trailer() const; + const Index* _index (int i) const; + void _read (FilePosition trailerPosition =0); + static FilePosition _append (const VersionDictionary *baseDict, + const Dictionary *addDict, + File *dstFile, + bool replace); + + File *_file; + FilePosition _trailerPosition, _previousTrailerPosition; + IndexPositions _indexPositions; + int _count; + mutable const VersionDictionary *_previousVersion; + +#if VERSIONDICTIONARY_TESTING + public: +#endif + static VersionDictionary* create (File *file, const Dictionary *srcDict); + VersionDictionary* byAppending (const Dictionary* d) const {return _appendAndOpen(d,_file,false);} + VersionDictionary* byReplacing (const Dictionary* d) const {return _appendAndOpen(d,_file,true);} + VersionDictionary* _appendAndOpen (const Dictionary *addDict, File *dstFile, bool replace) const; + + friend class Ottoman; + UNCOPYABLE(VersionDictionary); + }; + + + class VersionDictionary::Iterator :public Dictionary::Iterator { + public: + explicit Iterator (const VersionDictionary*); + virtual ~Iterator(); + virtual Key key() const; + virtual Blob value() const; + virtual bool next(); + virtual bool hasValue() const; + private: + bool nextIndex(); + + const VersionDictionary *_file; + int _bucket; + Dictionary::Iterator *_iter; + UNCOPYABLE(Iterator); + }; + + + class VersionDictionary::ChangeIterator :public Dictionary::Iterator { + public: + explicit ChangeIterator (const VersionDictionary*); + virtual ~ChangeIterator(); + virtual Key key() const; + virtual Blob value() const; + virtual bool next(); + virtual bool hasValue() const; + private: + bool nextIndex(); + + const VersionDictionary *_file; + int _bucket; + Dictionary::Iterator *_iter; + UNCOPYABLE(ChangeIterator); + }; + +} + +#endif //_MOOSEYARD_VERSIONDICTIONARY_ diff -r 000000000000 -r 31a43d94cc26 src/Base.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Base.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,66 @@ +/* + * Base.cpp + * Ottoman + * + * Created by Jens Alfke on 8/18/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Base.h" +#include "File.h" +#include +#include +#include + +namespace Mooseyard { + +#pragma mark - +#pragma mark BLOB: + + Blob::Blob (const char *str) + :bytes(str), + length(str ?strlen(str) :0) + { } + + /* + struct MutableBlob :public Blob { + MutableBlob(void *b, size_t len) :Blob(b,len) { } + void* mutableBytes() {return (void*)bytes;} + void freeBytes() {::free(mutableBytes()); bytes=NULL; length=0;} + }; + MutableBlob Blob::copyBytes() const { + MutableBlob copy( malloc(length), length); + memcpy(copy.mutableBytes(), bytes, length); + return copy; + } + */ + +#pragma mark - +#pragma mark UUID: + +#if 0 + /** Typical 128-bit universally-unique identifier. */ + class UUID { + public: + UUID(); + const void *bytes() const {return _bytes;} + bool operator== (const UUID&) const; + private: + uint8_t _bytes[16]; + }; + + + UUID::UUID() { + CFUUIDRef u = CFUUIDCreate(NULL); + CFUUIDBytes bytes = CFUUIDGetUUIDBytes(u); + memcpy(&_bytes, &bytes, sizeof(_bytes)); + CFRelease(u); + } + + bool UUID::operator== (const UUID& u) const { + return memcmp(_bytes, u._bytes, sizeof(_bytes)) == 0; + } +#endif + +} diff -r 000000000000 -r 31a43d94cc26 src/Chunk.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Chunk.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,135 @@ +/* + * Chunk.cpp + * Ottoman + * + * Created by Jens Alfke on 8/27/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Chunk.h" +#include +#include +#include + +namespace Mooseyard { + + uint16_t Chunk::type() const { + return _keyLength == 0xFFFF ?(uint16_t)_type :KeyValueChunk::kChunkType; + } + + const KeyValueChunk* Chunk::asKeyValue() const { + return _keyLength == 0xFFFF ?NULL :(const KeyValueChunk*)this; + } + + size_t Chunk::writeMultiple (File *file, uint16_t type, + Blob blobs[], int count) throw(File::Error) { + assert(type != KeyValueChunk::kChunkType); + Blob nuBlobs[count+1]; + uint32_t size = sizeof(Chunk); + for (int i=0; iwriteMultiple(nuBlobs, count+1); + } + + size_t Chunk::writePadding (File *file) { + int padding = file->position() & 0x03; + if (padding == 0) + return 0; + else { + padding = 4 - padding; + uint32_t zero = 0; + Blob pad(&zero, padding); + return writeMultiple(file, kChunkTypePadding, &pad, 1); + } + } + + + + void KeyValueChunk::validate() const throw(File::Error) { + if (_keyLength >= 0xFFFFU) + throw File::Error(ERANGE, "Invalid key length (0xFFFF)"); + if (valuePtr() > end()) + throw File::Error(ERANGE, "Bad KeyValueChunk (key-length too large to fit)"); + } + + Blob KeyValueChunk::key() const { + return Blob(&_keyLength +1, _keyLength); // KeyValueChunk doesn't have _type + } + + const char* KeyValueChunk::valuePtr() const { + size_t vptr = (size_t) key().end(); + vptr = (vptr+3) & ~3; // 4-byte align + return (const char*)vptr; + } + + Blob KeyValueChunk::value() const { + const char *vp = valuePtr(); + return Blob(vp, (const char*)end()-vp); + } + + bool KeyValueChunk::hasKey (Blob key) const { + return key.length==_keyLength + && memcmp(key.bytes, &_keyLength +1, key.length)==0; + } + + size_t KeyValueChunk::write (File* file, FilePosition pos, Blob key, Blob value) throw(File::Error) { + if (key.length >= 0xFFFFU) + throw File::Error(ERANGE, "Key too long (>=64k)"); + uint16_t keyLength = key.length; + uint32_t size = sizeof(uint32_t) + sizeof(uint16_t) + keyLength; + size_t pad = (pos + size) & 0x3; + if (pad) + pad = 4-pad; + uint32_t zeros = 0; + size += pad + value.length; + + Chunk chunk(size, KeyValueChunk::kChunkType); + chunk._keyLength = keyLength; + Blob b[4] = { + Blob(&chunk, sizeof(chunk._size)+sizeof(chunk._keyLength)), + key, + Blob(&zeros, pad), + value}; + return file->writeMultiple(b,4); + } + + + + ChunkIterator::ChunkIterator (File* file, FilePosition start) + :_file(file), + _pos(start), + _length(_file->length()), + _chunk(NULL) + { + _loadChunk(); + } + + void ChunkIterator::_loadChunk() { + if (_pos < _length) { + _chunk = (const Chunk*) _file->mappedPosition(_pos); + if (_chunk->size() < sizeof(Chunk) || _pos+_chunk->size() > _length) + _chunk = NULL; + } else { + _chunk = NULL; + } + + } + + bool ChunkIterator::atEOF() const { + return _pos == _length; + } + + bool ChunkIterator::next() { + if (_chunk) { + _pos += _chunk->size(); + _loadChunk(); + } + return _chunk != NULL; + } + +} diff -r 000000000000 -r 31a43d94cc26 src/Dictionary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Dictionary.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,376 @@ +/* + * Dictionary.cpp + * Ottoman + * + * Created by Jens Alfke on 8/23/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Dictionary.h" +#include "Hash.h" +#include +#include +#include + +namespace Mooseyard { + + Dictionary::Dictionary() { + } + + Dictionary::~Dictionary() { + } + + Dictionary::Iterator::~Iterator() { + } + + + const int Dictionary::kMinSize = 8; + const float Dictionary::kMinLoadFactor = 0.25; + const float Dictionary::kMaxLoadFactor = 0.75; + + // Choose the smallest power of two that's large enough to hold the given capacity + // of entries with no greater than the given load (fraction full). + int Dictionary::choosePowerOfTwoSize (int capacity, float load) { + int idealSize = capacity / load; + int size; + for (size=kMinSize; size= 4) + { + uint32_t k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } + + HashCode Key::computeHash (Blob key) { + return MurmurHashNeutral2(key.bytes, key.length, 0); + } + + + + void MutableDictionary::add (Dictionary* dict) { + Iterator *it = dict->iterate(); + for (; *it; it->next()) + put(it->key(), it->value()); + delete it; + } + + + + const Dictionary& Dictionary::kEmpty = * new HashDictionary(); + + +#pragma mark - +#pragma mark KEY AND VALUE + + class KeyAndValue { + public: + static KeyAndValue* create (Blob key, Blob value) { + size_t size = 2*sizeof(uint32_t) + key.length; + size = (size + 0x3) & ~0x3; // 32-bit align start of value + if (value!=kDeletedValue) + size += value.length; + KeyAndValue *kv = (KeyAndValue*) ::operator new(size); + kv->_keyLength = key.length; + memcpy(&kv->_keyLength + 1, key.bytes, key.length); + uint32_t *valueLengthPtr = const_cast(kv->valueLengthPtr()); + *valueLengthPtr = value.length; + if (value!=kDeletedValue) + memcpy(valueLengthPtr + 1, value.bytes, value.length); + return kv; + } + Blob key() const { + return Blob(&_keyLength+1, _keyLength); + } + + Blob value() const{ + const uint32_t *v = this->valueLengthPtr(); + if (*v != kDeletedValue.length) + return Blob(v+1, *v); + else + return kDeletedValue; + } + + /** Magic value OverlayDictionary stores in me to represent a deleted value. */ + static const Blob kDeletedValue; + private: + const uint32_t* valueLengthPtr() const { + size_t ptr = (size_t)(&_keyLength + 1) + _keyLength; + ptr = (ptr + 0x3) & ~0x3; // 32-bit align + return (uint32_t*)ptr; + } + + uint32_t _keyLength; + //...there follows the key, valueLength and value... + }; + + const Blob KeyAndValue::kDeletedValue(NULL, (size_t)-1); + + + + +#pragma mark - +#pragma mark HASH DICTIONARY: + + HashDictionary::~HashDictionary() { + removeAll(); + } + + Blob HashDictionary::_convertValue (void *value) { + return value ?((KeyAndValue*)value)->value() :Blob(); + } + + bool HashDictionary::contains (Key key) const { + Blob value = get(key); + return value.bytes || value.length; + } + + + void HashDictionary::put (Key key, Blob value) { + // Allocate a block to store both the Blob struct and the data + KeyAndValue *kv = KeyAndValue::create(key,value); + _hash.put(Key(kv->key(),key.hash), kv); + } + + bool HashDictionary::remove (Key key) { + KeyAndValue *kv = (KeyAndValue*) _hash.get(key); + if (kv) { + free(kv); + return true; + } else + return false; + } + + void HashDictionary::removeAll() { + for (Hash::Iterator it(&_hash); it; ++it) + free(it.value()); + _hash.removeAll(); + } + + + + +#pragma mark - +#pragma mark OVERLAY DICTIONARY: + + + OverlayDictionary::OverlayDictionary (const Dictionary *base) + :_base(base), + _overlay(new HashDictionary()), + _count(base->count()), + _allRemoved(false) + { } + + OverlayDictionary::~OverlayDictionary() { + delete _overlay; + } + + int OverlayDictionary::count() const { + return _count; + } + + bool OverlayDictionary::isChanged() const { + return _overlay->count() > 0; + } + + Blob OverlayDictionary::get (Key key) const { + Blob result = _overlay->get(key); + if (!result) { + if (result==KeyAndValue::kDeletedValue) + result = Blob(); // It's been marked as deleted + else if (!_allRemoved) + result = _base->get(key); + } + return result; + } + + bool OverlayDictionary::contains (Key key) const { + Blob result = _overlay->get(key); + if (result) + return true; + else if (result==KeyAndValue::kDeletedValue) + return false; + else if (!_allRemoved) + return _base->get(key); + else + return false; + } + + void OverlayDictionary::put (Key key, Blob value) { + assert(value.bytes || value.length==0); // make sure it doesn't look like magic kDeletedValue + _overlay->put(key,value); + if (_allRemoved || !_base->contains(key)) + _count++; + } + + bool OverlayDictionary::remove (Key key) { + if (!_allRemoved && _base->contains(key)) { + _overlay->put(key,KeyAndValue::kDeletedValue); + _count--; + return true; + } else if (_overlay->remove(key)) { + _count--; + return true; + } else + return false; + } + + void OverlayDictionary::removeAll() { + _allRemoved = true; + _count = 0; + } + + + void OverlayDictionary::revert() { + _overlay->removeAll(); + _count = _base->count(); + _allRemoved = false; + } + + void OverlayDictionary::revertTo (const Dictionary* newBase) { + _base = newBase; + revert(); + } + + void OverlayDictionary::rebase (const Dictionary* newBase) { + _base = newBase; + } + + OverlayDictionary::Iterator::Iterator (const OverlayDictionary &dict) + :_iter(dict.base()->iterate()), + _overlay(dict._overlay) + { + if (skipCurrentState()) + next(); + } + + Key OverlayDictionary::Iterator::key() const {return _iter ?_iter->key() :Key();} + Blob OverlayDictionary::Iterator::value() const {return _iter ?_iter->value() :Blob();} + + bool OverlayDictionary::Iterator::next() { + if (_iter) { + do { + _iter->next(); + } while (skipCurrentState()); + } + return _iter && *_iter; + } + + bool OverlayDictionary::Iterator::skipCurrentState() { + if (_iter) { + if (*_iter) { + if (_overlay) { + if (_overlay->contains(_iter->key())) + return true; // Skip overridden value in base dict + } else { + if (_iter->value() == KeyAndValue::kDeletedValue) + return true; // Skip marked-deleted value in overlay dict + } + } else { + delete _iter; + if (_overlay) { + // end of base iterator; switch to overlay + _iter = _overlay->iterate(); + _overlay = NULL; + return skipCurrentState(); + } else { + _iter = NULL; + } + } + } + return false; + } + + +#pragma mark - +#pragma mark CHANGE ITERATOR: + + + Dictionary::ChangeIterator::ChangeIterator (const Dictionary *dict1, const Dictionary *dict2) { + _init(dict1, dict2); + } + + Dictionary::ChangeIterator::ChangeIterator (const OverlayDictionary *overlay) { + _init(overlay->_overlay, overlay->base()); + } + + void Dictionary::ChangeIterator::_init(const Dictionary *dict1, const Dictionary *dict2) { + _dict2 = dict2; + _iter = dict1->iterate(); + if (*_iter) + _skipMatching(); + } + + Dictionary::ChangeIterator::~ChangeIterator() { + delete _iter; + } + + bool Dictionary::ChangeIterator::next() { + return _iter->next() && this->_skipMatching(); + } + + bool Dictionary::ChangeIterator::_skipMatching() { + do{ + _otherValue = _dict2->get(_iter->key()); + if (!_otherValue.equals(_iter->value())) + return true; + }while (_iter->next()); + return false; + } + + + +} diff -r 000000000000 -r 31a43d94cc26 src/File.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/File.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,214 @@ +/* + * File.cpp + * Ottoman + * + * Created by Jens Alfke on 8/20/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "File.h" +#include "MemoryMap.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace Mooseyard { + + File::File (const char *filename, bool writeable, bool create) throw(Error) + :_fd(_check( ::open(filename, writeable ?(create ?O_RDWR|O_CREAT :O_RDWR) :O_RDONLY, 0644) )), + _memoryMap(NULL), + _locked(0) + { } + + File::File (const char *filename, int oflag) throw(Error) + :_fd(_check( ::open(filename, oflag, 0644) )), + _memoryMap(NULL), + _locked( (oflag | O_EXLOCK) ?1 :0) + { } + + File::~File() { + delete _memoryMap; + _unlock(); + ::close(_fd); + } + + + off_t File::length() const throw(Error) { + struct stat s; + _check( ::fstat(_fd,&s) ); + return s.st_size; + } + + void File::setLength (off_t length) throw(Error) { + _check( ftruncate(_fd,length) ); + } + + off_t File::position() const throw(Error) { + return _check( lseek(_fd,0,SEEK_CUR) ); + } + + void File::setPosition (off_t pos) throw(Error) { + _check( lseek(_fd,pos,SEEK_SET) ); + } + + off_t File::setPositionToEnd (off_t bytesBefore) throw(Error) { + return _check( lseek(_fd,-bytesBefore,SEEK_END) ); + } + + void File::read (void *dst, size_t size) throw(Error) { + _checkRead( ::read(_fd, dst, size), size ); + } + + size_t File::write (const void *src, size_t size) throw(Error) { + return _check( ::write(_fd, src, size) ); + } + + + void File::readFrom (off_t where, void *dst, size_t size) throw(Error) { + _checkRead( ::pread(_fd, dst, size, where), size ); + } + + void File::writeTo (off_t where, const void *src, size_t size) throw(Error) { + _check( ::pwrite(_fd, src, size, where) ); + } + + size_t File::writeMultiple (Blob blobs[], int count) throw(Error) { + struct iovec vec[count]; + for (int i=0; i= 0) + return result; + //printf("*** File::_check: Error %i: %s\n", errno, strerror(errno)); + throw Error(errno, strerror(errno)); + } + + void File::_checkRead (ssize_t result, size_t expectedSize) throw(Error) { + if ((size_t)result < expectedSize) { + if (result < 0) + throw Error(errno, strerror(errno)); + else + throw Error(kEOF, "unexpected EOF"); + } + } + + File::Error::Error(const char *m) + :code(ERANGE), + message(m) + { } + + +#pragma mark - +#pragma mark LOCKS: + + bool File::_lock (bool block) { + if (_locked) { + _locked++; + } else { + int mode = LOCK_EX; + if (block) + mode |= LOCK_NB; + if (::flock(_fd, mode) == 0) + _locked = 1; + else if (errno != EWOULDBLOCK) // may be returned in LOCK_UN mode + _check(-1); + } + return _locked > 0; + } + + void File::_unlock() { + if (_locked > 0) { + _locked--; + ::flock(_fd, LOCK_UN); + } + } + + File::Lock::Lock (File *file, bool block) throw(Error) + :_file(file), + _locked( _file->_lock(block) ) + { } + + File::Lock::~Lock() { + if (_locked) + _file->_unlock(); + } + + bool File::Lock::retry() { + if (!_locked) + _locked = _file->_lock(false); + return _locked; + } + + +#pragma mark - +#pragma mark MEMORY MAP: + + MemoryMap* File::map() { + if (!_memoryMap) + _memoryMap = new MemoryMap(this); + return _memoryMap; + } + + void File::mapRegion (off_t position, size_t length) { + return map()->mapRegion(position,length); + } + + const void* File::mappedPosition (off_t position) const { + return map()->mappedPosition(position); + } + +} diff -r 000000000000 -r 31a43d94cc26 src/Hash.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Hash.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,160 @@ +/* + * Hash.cpp + * Ottoman + * + * Created by Jens Alfke on 8/20/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Hash.h" +#include "Dictionary.h" +#include + +namespace Mooseyard { + + class Hash::IndexEntry { + public: + IndexEntry (Key key, Hash::Value value =NULL) + :_key(key), + _value(value) + { } + + Key key() const {return _key;} + HashCode hash() const {return _key.hash;} + Hash::Value value() const {return _key.bytes ?_value :NULL;} + void setValue (Hash::Value value) {_value = value;} + + bool isAvailable() const {return _value == NULL;} // false if deleted + bool hasValue() const {return _key.bytes != NULL;}// false if deleted + bool operator==(const IndexEntry &e) const {return _key.hash==e._key.hash && _key.equals(e._key);} + bool operator!=(const IndexEntry &e) const {return ! (*this==e);} + + void markDeleted() {_key.bytes = NULL; _value = (Hash::Value)-1;} + void markEmpty() {_key.bytes = NULL; _value = NULL;} + + private: + Key _key; // bytes will be NULL if empty or deleted + Hash::Value _value; // NULL if empty, -1 if deleted + }; + + + Hash::Hash (int capacity) + :_count(0), + _tableSize( Dictionary::choosePowerOfTwoSize(capacity) ), + _table( (IndexEntry*) calloc(_tableSize,sizeof(IndexEntry)) ) + { } + + Hash::~Hash() { + free(_table); + } + + Hash::IndexEntry& Hash::_find (Key key) const { + Key k = Key(key); + IndexEntry newEntry(k); + int index = newEntry.hash() % _tableSize; + IndexEntry *found = &_table[index]; + int probe = 0; + while (!found->isAvailable() && (!found->hasValue() || *found != newEntry)) { + index = (index + ++probe) % _tableSize; + found = &_table[index]; + } + return *found; + } + + Hash::IndexEntry& Hash::_findForPut (const Hash::IndexEntry &newEntry) { + int index = newEntry.hash() % _tableSize; + IndexEntry *found = &_table[index]; + int probe = 0; + while (found->hasValue() && *found != newEntry) { + index = (index + ++probe) % _tableSize; + found = &_table[index]; + } + return *found; + } + + Hash::Value Hash::get(Key key) const { + return _find(key).value(); + } + + void Hash::put(Key key, Hash::Value value) { + IndexEntry newEntry = IndexEntry(Key(key),value); + IndexEntry &found = _findForPut(newEntry); + if (found.hasValue()) + found.setValue(value); + else { + found = newEntry; + _count++; + if (_count/(float)_tableSize > Dictionary::kMaxLoadFactor) + _resize(2*_tableSize); + } + } + + bool Hash::remove(Key key) { + IndexEntry &entry = _find(key); + if (!entry.value()) + return false; + entry.markDeleted(); + _count--; + if (_count/(float)_tableSize < Dictionary::kMinLoadFactor) + _resize(_tableSize/2); + return true; + } + + void Hash::removeAll() { + memset(_table, 0, _tableSize*sizeof(*_table)); + _count = 0; + } + + void Hash::dump() const { + for (int index=0; index<_tableSize; index++) + if (_table[index].hasValue()) { + Blob key = _table[index].key(); + printf("%3i: %*s -> %p\n", + index, (int)key.length,(const char*)key.bytes, _table[index].value()); + } + } + + + void Hash::_resize(int newSize) { + newSize = std::max(newSize, Dictionary::kMinSize); + if (newSize != _tableSize) { + IndexEntry* oldTable = _table; + int oldTableSize = _tableSize; + _tableSize = newSize; + _table = (IndexEntry*) calloc(_tableSize,sizeof(IndexEntry)); + + // Add existing entries to new table: + for (int index=0; index_table-1), _end(hash->_table+hash->_tableSize) + { + next(); + } + + Hash::Iterator::Iterator (const Hash &hash) + :_entry(hash._table-1), _end(hash._table+hash._tableSize) + { + next(); + } + + bool Hash::Iterator::next() { + while (++_entry < _end) { + if (_entry->hasValue()) + return true; + } + return false; + } + + Key Hash::Iterator::key() const {return _entry->key();} + Hash::Value Hash::Iterator::value() const {return _entry->value();} + +} diff -r 000000000000 -r 31a43d94cc26 src/Index.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Index.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,270 @@ +/* + * Index.cpp + * Ottoman + * + * Created by Jens Alfke on 8/27/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Index.h" +#include "Chunk.h" +#include "File.h" +#include "Dictionary.h" +#include +#include +#include +#include + +namespace Mooseyard { + + // Quadratic probing results in fewer bucket tests on lookup, but makes the hash tables + // significantly larger, which increases the file size. In practice a smaller table size seems + // to be a bigger win since disk I/O is slow. + #define PROBE_QUADRATIC 0 + + // Similarly use a higher max load than for memory-resident tables, to save space. + static const float kMaxLoadFactor = 0.85; + + static const uint32_t kIndexMagicNumber = 0x54378543; + + + /** An index (hash-table) entry as stored in the memory-mapped file. */ + class Index::Entry { + public: + explicit Entry() :_hash(0), _valuePosition(0) { } + explicit Entry (HashCode h, FilePosition p) + :_hash(h), _valuePosition(p) { } + LittleEndian hash() const {return _hash;} + bool hasHash (LittleEndian h) const {return _hash == h;} + + const KeyValueChunk* value (const File *file) const { + return (const KeyValueChunk*) file->mappedPosition(_valuePosition); + } + FilePosition valuePosition() const {return _valuePosition;} + + + bool isAvailable() const {return _valuePosition == 0;} // false if deleted + bool hasValue() const {return _valuePosition > 1;} // false if deleted + + void markDeleted() {_valuePosition = kDeletedPosition;} + void markEmpty() {_valuePosition = 0;} + + + static const FilePosition kDeletedPosition = 1; + + private: + LittleEndian _hash; + LittleEndian _valuePosition; + }; + + +#pragma mark - +#pragma mark CREATION: + + Index::Index (uint32_t itsTableSize) + :Chunk(sizeof(Index) + itsTableSize*sizeof(Index::Entry), kChunkType), + _magicNumber(kIndexMagicNumber), + _count(0), + _tableSize(itsTableSize) + { } + + Index* Index::create (int capacity) { +#if PROBE_QUADRATIC + int tableSize = Dictionary::choosePowerOfTwoSize(capacity); +#else + int tableSize = Dictionary::chooseAnyOldSize(capacity, kMaxLoadFactor); +#endif + size_t size = sizeof(Index) + tableSize*sizeof(Index::Entry); + void* storage = ::operator new(size); + memset(storage,0,size); + return new(storage) Index(tableSize); + } + + const Index* Index::indexMappedAt (const void* ptr) { + const Index *index = (const Index*)ptr; + index->validate(); + return index; + } + + void Index::validate() const throw(File::Error) { + if (_magicNumber != kIndexMagicNumber) + throw File::Error(ERANGE, "Bad Index (wrong magic number)"); + if (_tableSize < 2 || _count > _tableSize) + throw File::Error(ERANGE, "Bad Index (wrong count or tableSize)"); +#if PROBE_QUADRATIC + if ((_tableSize & (_tableSize-1)) != 0) + throw File::Error(ERANGE, "Bad Index (size not power of 2)"); +#endif + if (size() != sizeof(Index)+_tableSize*sizeof(Index::Entry)) + throw File::Error(ERANGE, "Bad Index (wrong size)"); + } + + void Index::validateEntries (const File *file) const throw(File::Error) { + validate(); + uint32_t size = _tableSize; + uint32_t count = 0; + const Entry *entry = table(0); + for (uint32_t i=0; ihasValue()) { + count++; + const KeyValueChunk *chunk = entry->value(file); + if ((void*)chunk->end() > (void*)this) + throw File::Error(ERANGE, "Bad Index entry (points beyond index)"); + chunk->validate(); + if (entry->hash() != Key::computeHash(chunk->key())) + throw File::Error(ERANGE, "Bad Index entry (wrong hash code)"); + } + } + if (count != _count) + throw File::Error(ERANGE, "Bad Index (inconsistent count)"); + } + + +#pragma mark - +#pragma mark LOOKUP: + + const Index::Entry* Index::table (int index) const { + return &((const Index::Entry*)&_table)[index]; + } + + Index::Entry* Index::table (int index) { + return &((Index::Entry*)&_table)[index]; + } + + + const KeyValueChunk* Index::_find (File *file, Key key) const { + int index = key.hash % _tableSize; + LittleEndian lhash = key.hash; +#if PROBE_QUADRATIC + int probe = 0; +#endif + for(;;) { + const Index::Entry *found = table(index); + if (found->isAvailable()) { + return NULL; + } else if (found->hasHash(lhash)) { + const KeyValueChunk *value = found->value(file); + if (value->hasKey(key)) { + return value; + } + } +#if PROBE_QUADRATIC + index = (index + ++probe) % _tableSize; +#else + index = (index + 1) % _tableSize; +#endif + } + } + + Blob Index::get (File *file, Key key) const { + const KeyValueChunk *kv = _find(file, key); + return kv ?kv->value() :Blob(); + } + + + // Common implementation of put and delete. (If deleting, valuePosition will be kDeletedPosition.) + // While searching, entries past maxPosition should be considered newly-added and not compared, + // since they're not in the memory-mapped part of the file yet. + // Returns true if the entry was added/removed. + bool Index::_put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) { + bool result = (valuePosition <= Entry::kDeletedPosition); + int index = key.hash % _tableSize; + Index::Entry entry(key.hash, valuePosition); +#if PROBE_QUADRATIC + int probe = 0; +#endif + Index::Entry *found; + for(;;) { + found = table(index); + if (!found->hasValue()) { + if (entry.hasValue()) { + result = true; + break; // it's a put and we found an empty entry + } else if (found->isAvailable()) { + return false; // it's a delete but the key wasn't found + } + } + else if (found->hasHash(entry.hash())) { + if (!file) + break; // no file given, so assume no duplicate keys + else if (found->valuePosition() < maxPosition && found->value(file)->hasKey(key)) + break; // found existing (not newly-inserted) entry with the matching key + /*else + printf("** hash collision! hash=%u\n", key.hash);*/ + } +#if PROBE_QUADRATIC + index = (index + ++probe) % _tableSize; +#else + index = (index + 1) % _tableSize; +#endif + } + // Finally found where to put it: + *found = entry; + return result; + } + + bool Index::put (File *file, Key key, FilePosition valuePosition, FilePosition maxPosition) { + bool added = _put(file, key, valuePosition, maxPosition); + if (added) + ++_count; + return added; + } + + bool Index::remove (File *file, Key key, FilePosition maxPosition) { + bool removed = _put(file, key, Index::Entry::kDeletedPosition, maxPosition); + if (removed) { + assert(_count > 0); + --_count; + } + return removed; + } + + void Index::removeAll () { + memset(table(0), 0, _tableSize*sizeof(Entry)); + _count = 0; + } + + + void Index::copyFrom (File *file, const Index *index) { + const Entry *src = index->table(0); + if (index->_tableSize == _tableSize) { + memcpy(table(0), src, _tableSize*sizeof(Entry)); + } else { + removeAll(); + for (uint32_t i=0; i_tableSize; i++,src++) + if (src->hasValue()) + _put(NULL, Key(NULL,0,src->hash()), src->valuePosition(), UINT32_MAX); + } + _count = index->count(); + } + + + + Index::Iterator::Iterator (File *file, const Index *index) + :_file(file), + _current(index->table(-1)), + _end(index->table(index->_tableSize)) + { + next(); + } + + bool Index::Iterator::hasValue() const {return _current != NULL;} + Key Index::Iterator::key() const {return Key(_current->value(_file)->key(), _current->hash());} + Blob Index::Iterator::value() const {return _current->value(_file)->value();} + + bool Index::Iterator::next() { + if (_current) { + while (++_current < _end) { + if (_current->hasValue()) + return true; + } + _current = NULL; + } + return false; + } + + FilePosition Index::Iterator::valuePosition() { + return _current ?_current->valuePosition() :0; + } +} diff -r 000000000000 -r 31a43d94cc26 src/MemoryMap.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MemoryMap.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,103 @@ +/* + * MemoryMap.cpp + * Ottoman + * + * Created by Jens Alfke on 9/17/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "MemoryMap.h" + +#include +#include +#include + +namespace Mooseyard { + + MemoryMap::~MemoryMap() { + for (int i=0; i<_nRegions; i++) + delete _regions[i]; + free(_regions); + } + + void MemoryMap::mapRegion (off_t pos, size_t length) { + size_t end = pos+length; + for (int i=0; i<_nRegions; i++) { + Region *region = _regions[i]; + if (region->position() <= pos) { + if (end <= region->end()) + return; // found an existing region covering this range + else if (region->setLength(end - region->position())) + return; // able to grow the existing region + } + } + + // No existing region, so add a new one: + Region *region = new Region(_file, pos,length); + _regions = (Region**) realloc(_regions, (_nRegions+1)*sizeof(*_regions)); + _regions[_nRegions++] = region; + } + + const void* MemoryMap::_at (off_t pos) const { + for (int i=0; i<_nRegions; i++) { + const void *ptr = _regions[i]->mappedPosition(pos); + if (ptr) + return ptr; + } + return NULL; + } + + const void* MemoryMap::mappedPosition (off_t pos) const { + const void *result = _at(pos); + if (!result) + throw File::Error("No memory mapped at this position"); + return result; + } + + + + + MemoryMap::Region::Region (File* file, off_t position, size_t length) + :_file(file), + _position(position), + _length (length), + _start( (const uint8_t*) ::mmap(NULL, length, PROT_READ, MAP_PRIVATE, file->_fd, position) ) + { + printf("File#%i: Mapped (%6llu -- %6llu) --> %p\n", file->_fd, _position, end(), _start); + if (_start==NULL || _start == MAP_FAILED) { + _start = NULL; + throw File::Error(errno, strerror(errno)); + } + } + + MemoryMap::Region::~Region() { + if (_start) { + printf("File#%i: Unmapped (%6llu -- %6llu) from %p\n", _file->_fd, _position, end(), _start); + ::munmap((void*)_start,_length); + } + } + + bool MemoryMap::Region::setLength (size_t length) { + if (length != _length) { + printf("File#%i: Resiging (%6llu -- %6llu) from %lu to %lu ...", + _file->_fd, _position, end(), _length,length); + if (::mmap((void*)_start, length, PROT_READ, MAP_PRIVATE | MAP_FIXED, _file->_fd, _position) == MAP_FAILED) { + printf("failed! errno=%i\n", errno); + return false; + } + printf("OK\n"); + _length = length; + } + return true; + } + + const void* MemoryMap::Region::mappedPosition (off_t pos) { + if (pos >= _position && pos < end()) + return _start + (pos-_position); + else + return NULL; + } + + +} diff -r 000000000000 -r 31a43d94cc26 src/Ottoman.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Ottoman.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,261 @@ +/* + * Ottoman.cpp + * Ottoman + * + * Created by Jens Alfke on 8/31/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "Ottoman.h" +#include "VersionDictionary.h" +#include "Chunk.h" +#include "Index.h" +#include "File.h" +#include +#include +#include + +namespace Mooseyard { + + class DictionaryFileHeader :public Chunk { + public: + DictionaryFileHeader() + :Chunk(sizeof(DictionaryFileHeader),kChunkType) + { + memcpy(&magicNumber, &kMagicNumber, sizeof(magicNumber)); + } + + bool valid() const { + return type()==kChunkType && memcmp(&magicNumber,&kMagicNumber,sizeof(magicNumber))==0; + } + + uint8_t magicNumber[8]; + + static const uint16_t kChunkType = 4; + static const uint8_t kMagicNumber[8]; + }; + + const uint8_t DictionaryFileHeader::kMagicNumber[8] = {0x4A, 0x54, 0xF1, 0x1E, 'h', 'A', 's', 'H'}; + + + + Ottoman::Ottoman() + :_writeable(true), + _filename(NULL), + _lastVersion(NULL), + _current( new OverlayDictionary(&Dictionary::kEmpty) ) + { + } + + Ottoman::Ottoman (const char *filename, bool writeable) + :_writeable(writeable), + _filename(strdup(filename)), + _lastVersion(), + _current() + { + _file = new File(_filename, writeable ?O_RDWR :O_RDONLY); + _lastVersion = new VersionDictionary(_file); + if (writeable) + _current = new OverlayDictionary(_lastVersion); + } + + Ottoman::~Ottoman() { + delete _current; + delete _lastVersion; + delete _file; + free(_filename); + } + + + bool Ottoman::needsSync() const { + return _file && (!_lastVersion->isLatestVersion() || !_file->hasPath(_filename)); + } + + bool Ottoman::sync() { + if (!needsSync()) + return false; + File *curFile = _file; + if (!curFile->hasPath(_filename)) + curFile = new File(_filename, _writeable ?O_RDWR :O_RDONLY); + + VersionDictionary *newest = new VersionDictionary(curFile); + bool changed = (newest->generation() != _lastVersion->generation()); + if (changed) { + if (newest->generation() < _lastVersion->generation()) + throw File::Error("Versions got lost in file update"); + + if (_current) + _current->rebase(newest); + + delete _lastVersion; + _lastVersion = newest; + } + + if (_file != curFile) { + delete _file; + _file = curFile; + } + return changed; + } + + + ChunkIterator* Ottoman::chunkIterator() const { + return _file ?new ChunkIterator(_file, sizeof(DictionaryFileHeader)) :NULL; + } + + bool Ottoman::scavenge (bool repair) { + if (!_file) + return true; + + const off_t actualEOF = _file->length(); + fprintf(stderr, "Ottoman::scavenge: Scanning %s (%llu / 0x%llX bytes) ...\n", + _filename, actualEOF, actualEOF); + _file->setPosition(0); + ChunkIterator it(_file, 0); + const DictionaryFileHeader *header = (const DictionaryFileHeader *) it.chunk(); + if (!header || !header->valid()) { + fprintf(stderr, "Ottoman::scavenge: No valid file header at all!\n"); + return false; + } + + // Scan through all the chunks: + FilePosition lastValidTrailer = 0; + FilePosition validEOF = 0; + int lastType = -1; + int count = 0; + try{ + for (; it; ++it) { + const Chunk *chunk = it.chunk(); + + uint16_t type = it.chunk()->type(); + if (type != lastType) { + if (count > 0) + printf("%6d\n", count); + fprintf(stderr, "Ottoman::scavenge: at 0x%08X: type %u ... ", + it.position(), type); + lastType = type; + count = 0; + } + count++; + + switch (type) { + case KeyValueChunk::kChunkType: + ((const KeyValueChunk*)chunk)->validate(); + break; + case Index::kChunkType: + ((const Index*)chunk)->validateEntries(_file); + break; + case VersionDictionary::kChunkType: { + fprintf(stderr, "Found dictionary trailer at %u!\n", it.position()); + count = 0; + // Validate by instantiating the VersionDictionary: + VersionDictionary tempDict(_file, it.position()); + // If constructor succeeded, it's valid, so remember it: + lastValidTrailer = it.position(); + validEOF = lastValidTrailer + chunk->size(); + fprintf(stderr, "Ottoman::scavenge: Valid dictionary trailer at %X--%X\n", + lastValidTrailer, validEOF); + break; + } + } + } + if (count > 0) + fprintf(stderr, "%6d\n", count); + } catch (File::Error &err) { + fprintf(stderr, "Ottoman::scavenge caught File::Error(%i,\"%s\") at pos %llX\n", + err.code, err.message, _file->position()); + // Keep going; we can recover the dictionary(ies) before the bad one. + } + + if (lastValidTrailer == 0) { + fprintf(stderr, "Ottoman::scavenge: No valid dictionaries found!\n"); + return false; + } else if (validEOF < actualEOF) { + fprintf(stderr, "Ottoman::scavenge: Need to truncate to 0x%X (0x%llX bytes)\n", + lastValidTrailer, actualEOF-lastValidTrailer); + if (repair) { + _file->setLength(validEOF); + _file->flushDisk(); + } + return false; + } + + fprintf(stderr, "Ottoman::scavenge: File is OK!\n"); + return true; + } + + +#pragma mark - +#pragma mark SAVING: + + + // low-level write. Does not lock or check for conflicts! + void Ottoman::_append (File *dstFile) { + VersionDictionary *lastVersion = _lastVersion; + if (!lastVersion) + lastVersion = new VersionDictionary(dstFile); + VersionDictionary *saved = lastVersion->_appendAndOpen(_current->overlay(), + dstFile, + _current->baseReplaced()); + // (don't delete _lastVersion: saved->_previousVersion now points to it.) + _lastVersion = saved; + _current->revertTo(_lastVersion); + } + + bool Ottoman::save() { + if (!_file) + return false; + if (_current && _current->isChanged()) { + File::Lock lock(_file); + if (needsSync()) + return false; // conflict! + _append(_file); + } + return true; + } + + File* Ottoman::_writeTo (const char *dstFileName, bool overwriteAllowed) { + int mode = O_RDWR | O_CREAT | O_TRUNC | O_EXLOCK; + if (!overwriteAllowed) + mode |= O_EXCL; + File *dstFile = new File(dstFileName, mode); + try { + dstFile->write(DictionaryFileHeader()); + _append(dstFile); + } catch (...) { + delete dstFile; + File::unlink(dstFileName); + throw; + } + return dstFile; + } + + void Ottoman::saveAs (const char *dstFileName, bool overwriteAllowed) { + File *dstFile = _writeTo(dstFileName, overwriteAllowed); + free(_filename); + _filename = strdup(dstFileName); + _file = dstFile; + } + + bool Ottoman::saveAndCompact() { + if (!_file) + return false; + char tempFileName[1024]; + strlcpy(tempFileName, _filename, sizeof(tempFileName)-1); + strlcat(tempFileName, "~", sizeof(tempFileName)); + File *tempFile; + { + // Check for conflict in existing file: + File::Lock lock(_file); + if (needsSync()) + return false; + tempFile = _writeTo(tempFileName, false); + File::rename(tempFileName, _filename); + } + delete _file; + _file = tempFile; + return true; + } + +} diff -r 000000000000 -r 31a43d94cc26 src/VersionDictionary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/VersionDictionary.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,429 @@ +/* + * VersionDictionary.cpp + * Ottoman + * + * Created by Jens Alfke on 8/21/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "VersionDictionary.h" +#include "Chunk.h" +#include "Index.h" +#include "File.h" +#include "Chunk.h" +#include +#include +#include + +namespace Mooseyard { + + // Trailer is stored in file; all numbers are little-endian. + class VersionDictionary::Trailer :public Chunk { + public: + LittleEndian magicNumber1; + LittleEndian count; + VersionDictionary::IndexPositions indexPositions; + LittleEndian previousTrailerPosition; + LittleEndian generation; + LittleEndianDouble timestamp; + LittleEndian magicNumber2; + + Trailer () + :Chunk(sizeof(Trailer), kChunkType) + { } + + Trailer (int c, + VersionDictionary::IndexPositions indexPos, + FilePosition previousTrailerPos, + uint32_t gen) + :Chunk(sizeof(Trailer), kChunkType), + magicNumber1(kMagicNumber1), + count(c), + indexPositions(indexPos), + previousTrailerPosition(previousTrailerPos), + generation(gen), + timestamp(::time(NULL)), + magicNumber2(kMagicNumber2) + { } + + static const uint32_t kMagicNumber1 = 0xfe83b1cd; + static const uint32_t kMagicNumber2 = 0xff84b2c1; + }; + + + + VersionDictionary::VersionDictionary (File *file) + :_file(file), + _trailerPosition(0), + _previousTrailerPosition(0), + _indexPositions(), + _count(0), + _previousVersion() + { + if (file->length() > sizeof(Trailer)) + _read(); + } + + VersionDictionary::VersionDictionary (File *file, FilePosition trailerPosition) + :_file(file), + _trailerPosition(trailerPosition), + _previousTrailerPosition(0), + _indexPositions(), + _count(0), + _previousVersion() + { + _read(trailerPosition); + } + + int VersionDictionary::count() const { + return _count; + } + + const VersionDictionary::Trailer* VersionDictionary::_trailer() const { + if (_trailerPosition > 0) + return (const VersionDictionary::Trailer*) _file->mappedPosition(_trailerPosition); + else + return NULL; // Only happens in the empty-file case + } + + const Index* VersionDictionary::_index (int i) const { + if (_indexPositions[i] > 0) + return (const Index*) _file->mappedPosition(_indexPositions[i]); + else + return NULL; + } + + int VersionDictionary::generation() const { + const VersionDictionary::Trailer *trailer = _trailer(); + return trailer ? (int)trailer->generation : -1; + } + + time_t VersionDictionary::timestamp() const { + const VersionDictionary::Trailer *trailer = _trailer(); + return trailer ? (time_t)trailer->timestamp : 0; + } + + const VersionDictionary* VersionDictionary::previousVersion() const { + if (!_previousVersion) + if (_previousTrailerPosition > 0) + _previousVersion = new VersionDictionary(_file, _previousTrailerPosition); + return _previousVersion; + } + + + Blob VersionDictionary::get (Key key) const { + const Index *index = _index(key.hash >> 24); + return index ?index->get(_file, key) :Blob(); + } + + void VersionDictionary::_read (FilePosition trailerPos) { + assert(_file); + + if (trailerPos > 0) { + _file->setPosition(trailerPos); + } else { + // Determine position of trailer, at EOF: + off_t pos = _file->setPositionToEnd(sizeof(VersionDictionary::Trailer)); + if (pos < 0 || (pos & 0x03) || pos > UINT32_MAX) + throw File::Error(ERANGE, "No trailer found in file (wrong EOF)"); + trailerPos = pos; + } + + // Read & verify trailer: + VersionDictionary::Trailer trailer; + _file->read(trailer); + _trailerPosition = trailerPos; + _count = trailer.count; + _indexPositions = trailer.indexPositions; + + if (trailer.magicNumber1 != VersionDictionary::Trailer::kMagicNumber1 + || trailer.magicNumber2 != VersionDictionary::Trailer::kMagicNumber2) + throw File::Error(ERANGE, "No trailer found in file (invalid magic numbers)");\ + + + // Map in the file: + _file->mapRegion(0, _trailerPosition+sizeof(trailer)); + + // Verify Indexes: + for (int i=0; i<256; i++) { + const Index *index = _index(i); + if (index) + index->validate(); + } +} + + + bool VersionDictionary::isLatestVersion() const { + return _file->length() <= _trailerPosition+sizeof(VersionDictionary::Trailer); + } + + + /* Append addDict to baseDict, writing the results into dstFile (which is usually the same + as baseDict->file().) If 'replace' is true, ignore the old contents of baseDict. + Returns the position of the new trailer. */ + FilePosition VersionDictionary::_append (const VersionDictionary *baseDict, + const Dictionary *addDict, + File *dstFile, + bool replace) + { + File *srcFile = baseDict->_file; + bool incremental = !replace && dstFile==srcFile; + Index* newIndex[256]; + + { + // Work out the needed capacity for each Index bucket: + int newCounts[256] = {0}; + if (!replace) { + for (int i=0; i<256; i++) { + const Index *oldIndex = baseDict->_index(i); + if (oldIndex) + newCounts[i] = oldIndex->count(); + } + } + Dictionary::Iterator *it = addDict->iterate(); + for (; *it; ++*it) + newCounts[it->key().hash >> 24]++; + delete it; + + // Allocate new Indexes, of sufficient capacity, for each growing bucket: + for (int i=0; i<256; i++) { + const Index *oldIndex = baseDict->_index(i); + if (newCounts[i] && (!incremental || !oldIndex || newCounts[i] > oldIndex->count())) { + newIndex[i] = Index::create(newCounts[i]); + if (incremental && oldIndex) + newIndex[i]->copyFrom(srcFile, oldIndex); + } else + newIndex[i] = NULL; + } + } + + // Lock the file now, seek to the end, and make sure it's been prepared with a header, + // since FilePositions of 0 and 1 are reserved. + File::Lock lock(dstFile); + const FilePosition startPos = dstFile->setPositionToEnd(); + if (startPos <= 1) + throw File::Error(ERANGE, "Cannot write VersionDictionary to empty file"); + + // For safety's sake, make sure the old file hasn't been destroyed: + FilePosition oldEOF = baseDict->_trailerPosition; + if (oldEOF > 0) + oldEOF += sizeof(VersionDictionary::Trailer); + if (srcFile->length() < oldEOF) + throw File::Error(ERANGE, "File has been truncated since being read"); + + try{ + FilePosition pos = startPos; + + int newCount; + if (replace) + newCount = 0; + else if (dstFile == srcFile) + newCount = baseDict->_count; + else { + // Write out the surviving pre-existing entries from the old file: + newCount = 0; + for (VersionDictionary::Iterator it(baseDict); it; ++it) { + Key key = it.key(); + if (!addDict->contains(key)) { + int bucket = key.hash >> 24; + int size = KeyValueChunk::write(dstFile,pos, key, it.value()); + newIndex[bucket]->put(srcFile, key, pos, startPos); + pos += size; + newCount++; + } + } + } + + // Now write the items from the new dict: + Dictionary::Iterator *it = addDict->iterate(); + for (; *it; ++*it) { + Key key=it->key(); + Blob value=it->value(); + int bucket = key.hash >> 24; + Index *index = newIndex[bucket]; + assert(index); + + if (value.bytes) { + int size = KeyValueChunk::write(dstFile,pos, key, value); + if (index->put(srcFile, key, pos, startPos)) + newCount++; + pos += size; + } else if (incremental) { + // NULL value is a deleted-entry marker used by OverlayDictionary + if (index->remove(srcFile, key, startPos)) + newCount--; + } + } + delete it; + + // Write out the new indexes: + IndexPositions newIndexPositions; + if (incremental) + newIndexPositions = baseDict->_indexPositions; + else + memset(&newIndexPositions, 0, sizeof(newIndexPositions)); + + pos += Chunk::writePadding(dstFile); + for (int i=0; i<256; i++) { + if (newIndex[i]) { + Index *index = newIndex[i]; + newIndexPositions[i] = pos; + pos += dstFile->write(index, index->size()); + delete index; + } + } + + // Flush everything out to disk, with maximum paranoia, before writing the trailer. + // Since scavenging corrupt files looks for trailers, we don't want to append a + // trailer until we're certain that all of the real data is safely on-disk. + dstFile->flushDisk(); + + // Write the trailer: + FilePosition newTrailerPosition = pos; + VersionDictionary::Trailer trailer(newCount, + newIndexPositions, + baseDict->_trailerPosition, + baseDict->generation() + 1); + pos += dstFile->write(trailer); + + // Just a mild flush here; flushDisk() is very expensive and can be disruptive to + // real-time I/O in other apps, so it's bad to call it too often. + dstFile->flush(); + assert(pos==dstFile->position()); + + return newTrailerPosition; + + } catch (...) { + // If something goes wrong, try to back out everything we wrote: + try{ + dstFile->setLength(startPos); + }catch(...) { } + throw; + } + } + + +#pragma mark - +#pragma mark TESTING-ONLY: + + VersionDictionary* VersionDictionary::_appendAndOpen (const Dictionary *addDict, + File *dstFile, + bool replace) const + { + FilePosition nextVersionPos = _append(this, addDict, dstFile, replace); + VersionDictionary *nextVersion = new VersionDictionary(dstFile, nextVersionPos); + nextVersion->_previousVersion = this; + return nextVersion; + } + + VersionDictionary* VersionDictionary::create (File *file, const Dictionary *srcDict) { + return VersionDictionary(file)._appendAndOpen(srcDict, file, true); + } + + +#pragma mark - +#pragma mark ITERATOR: + + Dictionary::Iterator* VersionDictionary::iterate() const { + return new VersionDictionary::Iterator(this); + } + + VersionDictionary::Iterator::Iterator (const VersionDictionary *file) + :_file(file), + _bucket(-1), + _iter(NULL) + { + nextIndex(); + } + + VersionDictionary::Iterator::~Iterator() { + delete _iter; + } + + bool VersionDictionary::Iterator::hasValue() const {return _iter && _iter->hasValue();} + Key VersionDictionary::Iterator::key() const {return _iter->key();} + Blob VersionDictionary::Iterator::value() const {return _iter->value();} + + bool VersionDictionary::Iterator::next() { + if (_iter->next()) + return true; + else { + delete _iter; + return nextIndex(); + } + } + + bool VersionDictionary::Iterator::nextIndex() { + while (++_bucket < 256) { + const Index *index = _file->_index(_bucket); + if (index) { + _iter = new Index::Iterator(_file->_file, index); + if (*_iter) + return true; + else + delete _iter; + } + } + _iter = NULL; + return false; + } + + +#pragma mark - +#pragma mark CHANGE ITERATOR: + + VersionDictionary::ChangeIterator* VersionDictionary::iterateChanges() const { + return new VersionDictionary::ChangeIterator(this); + } + + VersionDictionary::ChangeIterator::ChangeIterator (const VersionDictionary *file) + :_file(file), + _bucket(-1), + _iter(NULL) + { + next(); + } + + VersionDictionary::ChangeIterator::~ChangeIterator() { + delete _iter; + } + + bool VersionDictionary::ChangeIterator::hasValue() const {return _iter && _iter->hasValue();} + Key VersionDictionary::ChangeIterator::key() const {return _iter->key();} + Blob VersionDictionary::ChangeIterator::value() const {return _iter->value();} + + bool VersionDictionary::ChangeIterator::next() { + const VersionDictionary::Trailer *trailer = _file->_trailer(); + for (;;) { + // Check if current iterator has a value that's from this version: + if (_iter && _iter->hasValue()) { + if (((Index::Iterator*)_iter)->valuePosition() > trailer->previousTrailerPosition) + return true; + else if (_iter->next()) + continue; + } + // If not, go to next Index: + if (!nextIndex()) + return false; + } + } + + bool VersionDictionary::ChangeIterator::nextIndex() { + delete _iter; + const VersionDictionary::Trailer *trailer = _file->_trailer(); + while (++_bucket < 256) { + const Index *index = _file->_index(_bucket); + if (index) { + // Skip indexes that weren't updated in this version: + if (trailer->indexPositions[_bucket] > trailer->previousTrailerPosition) { + _iter = new Index::Iterator(_file->_file, index); + return true; + } + } + } + _iter = NULL; + return false; + } + +} diff -r 000000000000 -r 31a43d94cc26 test/Dictionary_test.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/Dictionary_test.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,111 @@ +/* + * Dictionary_test.cpp + * Ottoman + * + * Created by Jens Alfke on 9/4/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + + +#include +#include "TestUtils.h" +#include "File.h" +#include "Dictionary.h" +#include "Hash.h" +#include +#include + +using namespace Mooseyard; + +static const HashDictionary& getDict() { + static HashDictionary *sDict; + if (!sDict) { + printf("Building large HashDictionary...\n"); + sDict = new HashDictionary(); + readWords(); + for( int i=0; iput(Key(kv),kv); + } + } + return *sDict; +} + +TEST(Dictionary,GetAll) { + const Dictionary &dict = getDict(); + EXPECT_EQ( sNWords , dict.count() ); + for( int i=0; i 0 && it.key().length < 50); + EXPECT_TRUE(it.key().equals(it.value())); + EXPECT_EQ( 0, ((size_t)it.value().bytes & 0x3) ); // 4-byte aligned + } + EXPECT_EQ(sNWords, n); +} + +TEST(Dictionary,Overlay) { + const Dictionary &dict = getDict(); + OverlayDictionary overlay(&dict); + + EXPECT_EQ( sNWords , overlay.count() ); + EXPECT_TRUE(overlay.get("animal").equals("animal")); + EXPECT_TRUE(overlay.get("asparagus").equals("asparagus")); + EXPECT_FALSE(overlay.get("growf")); + + overlay.put("animal", "AMINAL"); + overlay.put("growf", "growf"); + EXPECT_TRUE(overlay.remove("asparagus")); + + EXPECT_EQ( sNWords, overlay.count() ); + EXPECT_TRUE(overlay.get("animal").equals("AMINAL")); + EXPECT_TRUE(overlay.get("growf").equals("growf")); + EXPECT_TRUE(overlay.contains("growf")); + EXPECT_FALSE(overlay.get("asparagus")); + EXPECT_FALSE(overlay.contains("asparagus")); + + int n=0; + for( OverlayDictionary::Iterator it(overlay); it; ++it) { + n++; + EXPECT_TRUE(!it.key().equals("asparagus")); + } + EXPECT_EQ(sNWords, n); + + printf("Testing ChangeIterator...\n"); + n=0; + int foundAsparagus=0, foundAnimal=0, foundGrowf=0; + for( Dictionary::ChangeIterator it(&overlay); it; ++it) { + n++; + if (it.key().equals("animal")) { + foundAnimal++; + EXPECT_TRUE(it.value().equals("AMINAL")); + EXPECT_TRUE(it.otherValue().equals("animal")); + } else if (it.key().equals("asparagus")) { + foundAsparagus++; + EXPECT_FALSE(it.value()); + EXPECT_TRUE(it.otherValue().equals("asparagus")); + } else if (it.key().equals("growf")) { + foundGrowf++; + EXPECT_TRUE(it.value().equals("growf")); + EXPECT_FALSE(it.otherValue()); + } else { + EXPECT_TRUE(false); + } + } + EXPECT_EQ(1, foundAnimal); + EXPECT_EQ(1, foundAsparagus); + EXPECT_EQ(1, foundGrowf); + EXPECT_EQ(3, n); +} diff -r 000000000000 -r 31a43d94cc26 test/Hash_test.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/Hash_test.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,193 @@ +/* + * Hash_test.cpp + * Ottoman + * + * Created by Jens Alfke on 9/2/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include +#include "TestUtils.h" +#include "File.h" +#include "Hash.h" +#include + +using namespace Mooseyard; + + +static bool verbose = false; +static int num = 100; + + +#pragma mark - +#pragma mark UTILITIES: + + +char* mkKey (int i) { + static char* sKeys[100]; + if (i>=0 && i<100 && sKeys[i] != NULL) + return sKeys[i]; + char str[10]; + sprintf(str, "%i",i); + char *key = strdup(str); + if (i>=0 && i<100) + sKeys[i] = key; + return key; +} + +static char* Insert(Hash &hash, const char* key) { + char string[100]; + sprintf(string, "value for %s", key); + if (verbose) + fprintf(stderr, "\nInserting %s -> '%s'\n", key,string); + int count = hash.count(); + char *leaf = strdup(string); + + hash.put(key,leaf); + + if (verbose) + hash.dump(); + EXPECT_EQ( count+1, hash.count() ); + EXPECT_EQ( leaf, hash.get(key) ); + return leaf; +} + +static char* Insert(Hash &hash, int i) { + return Insert(hash, mkKey(i)); +} + +static void Remove(Hash &hash, int i) { + char* key = mkKey(i); + char string[100]; + sprintf(string, "value for %s", key); + if (verbose) + fprintf(stderr, "\nRemoving %s\n", key); + int count = hash.count(); + char *value = (char*) hash.get(key); + EXPECT_TRUE(value!=NULL); + EXPECT_EQ(0, strcmp(value,string)); + + EXPECT_TRUE(hash.remove(key)); + + if (verbose) + hash.dump(); + EXPECT_EQ( count-1, hash.count() ); + EXPECT_EQ( NULL, hash.get(key) ); +} + + + +#pragma mark - +#pragma mark TESTS: + + +TEST(Hash,Simple) { + Hash hash; + hash.dump(); + EXPECT_EQ(0, hash.count()); + + char *seventeen = Insert(hash, "seventeen"); + + EXPECT_EQ(NULL, hash.get("zero")); + EXPECT_EQ(NULL, hash.get("eighteen")); + + char *four = Insert(hash, "four"); + EXPECT_EQ(seventeen, hash.get("seventeen")); + + char *zero = Insert(hash, "zero"); + char *hundred = Insert(hash, "one hundred"); + char *eight = Insert(hash, "eight"); + char *twenty = Insert(hash, "twenty"); + + EXPECT_EQ(zero, hash.get("zero")); + EXPECT_EQ(four, hash.get("four")); + EXPECT_EQ(eight, hash.get("eight")); + EXPECT_EQ(seventeen, hash.get("seventeen")); + EXPECT_EQ(twenty, hash.get("twenty")); + EXPECT_EQ(hundred, hash.get("one hundred")); + + int i=0; + for (Hash::Iterator iter(&hash); iter; ++iter, ++i) { + Blob key = iter.key(); + if (verbose) + fprintf(stderr, " %*s -> %s\n", + (int)key.length,(const char*)key.bytes, + ((char*)iter.value())); + } + EXPECT_EQ(6, i); +} + +TEST(Hash,InsertLotsAtRandom) { + int map[num]; + for (int i=0; i +#include "TestUtils.h" +#include "Chunk.h" +#include "Ottoman.h" +#include "VersionDictionary.h" +#include +#include + +using namespace Mooseyard; + + +TEST(Ottoman,Build) { + ASSERT_EQ(8, sizeof(Chunk)); + ASSERT_EQ(8, sizeof(KeyValueChunk)); + + time_t startTime = ::time(NULL); + time_t createTime; + { + Ottoman otto; + EXPECT_EQ((void*)NULL, otto.lastVersion()); + ASSERT_NE((void*)NULL, otto.currentVersion()); + readWords(); + for( int i=0; iput(Key(kv),kv); + } + Timer t("Saving Ottoman", sNWords); + otto.saveAs("/tmp/test.ottoman"); + + VersionDictionary *last = otto.lastVersion(); + ASSERT_NE((void*)NULL, last); + EXPECT_EQ(sNWords, last->count()); + ASSERT_NE((void*)NULL, otto.currentVersion()); + EXPECT_FALSE(otto.currentVersion()->isChanged()); + createTime = last->timestamp(); + EXPECT_GE(createTime, startTime); + EXPECT_LE(createTime, ::time(NULL)); + } + { + Ottoman otto("/tmp/test.ottoman"); + VersionDictionary *last = otto.lastVersion(); + ASSERT_NE((void*)NULL, last); + ASSERT_EQ(0, last->generation()); + ASSERT_EQ(createTime, last->timestamp()); + { + Timer t("Reading from Ottoman", sNWords); + EXPECT_EQ( sNWords , last->count() ); + for( int i=0; iget(key); + ASSERT_TRUE(value); + ASSERT_TRUE( value.equals(key) ) << "expected '" << key << "', got '" << value << "' (i=" << i <<")"; + } + } + + printf("Iterating through the Ottoman...\n"); + Timer t("Iterating Ottoman", sNWords); + int n=0; + for( VersionDictionary::Iterator it(last); it; ++it) { + n++; + ASSERT_TRUE(it.key().length > 0 && it.key().length < 50); + ASSERT_TRUE(it.key().equals(it.value())); + ASSERT_EQ( 0, ((size_t)it.value().bytes & 0x3) ); // 4-byte aligned + } + ASSERT_EQ(sNWords, n); + } + { + printf("Opening Ottoman...\n"); + Ottoman otto("/tmp/test.ottoman"); + OverlayDictionary *current = otto.currentVersion(); + ASSERT_TRUE(current != NULL); + EXPECT_EQ( sNWords , current->count() ); + EXPECT_TRUE(current->get("asparagus").equals("asparagus")); + + current->put("animal", "AMINAL"); + current->put("growf", "growf"); + EXPECT_TRUE(current->remove("asparagus")); + + EXPECT_EQ( sNWords, current->count() ); + EXPECT_TRUE(current->get("animal").equals("AMINAL")); + EXPECT_TRUE(current->get("growf").equals("growf")); + EXPECT_EQ( NULL, current->get("asparagus").bytes ); + EXPECT_TRUE(!current->contains("asparagus")); + + int n=0; + for( OverlayDictionary::Iterator it(*current); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("asparagus")); + } + ASSERT_EQ(sNWords, n); + + printf("Saving Ottoman...\n"); + { + Timer t("Saving Ottoman"); + otto.save(); + } + + EXPECT_EQ( sNWords, current->count() ); + EXPECT_TRUE(current->get("animal").equals("AMINAL")); + EXPECT_TRUE(current->get("growf").equals("growf")); + EXPECT_EQ( NULL, current->get("asparagus").bytes ); + EXPECT_TRUE(!current->contains("asparagus")); + + n=0; + for( OverlayDictionary::Iterator it(*current); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("asparagus")); + } + ASSERT_EQ(sNWords, n); + + EXPECT_EQ(1, otto.lastVersion()->generation()); + const VersionDictionary *prev = otto.lastVersion()->previousVersion(); + ASSERT_TRUE(prev != NULL); + EXPECT_EQ(0, prev->generation()); + EXPECT_TRUE(prev->get("asparagus").equals("asparagus")); + EXPECT_TRUE(prev->get("growf").equals(NULL)); + } + { + printf("Re-opening Ottoman...\n"); + Ottoman otto("/tmp/test.ottoman", false); + ASSERT_EQ(NULL, otto.currentVersion()); + VersionDictionary *last = otto.lastVersion(); + ASSERT_TRUE(last != NULL); + EXPECT_EQ(1, last->generation()); + EXPECT_GE(last->timestamp(), createTime); + EXPECT_LE(last->timestamp(), ::time(NULL)); + EXPECT_EQ( sNWords , last->count() ); + EXPECT_TRUE(last->get("animal").equals("AMINAL")); + EXPECT_TRUE(last->get("growf").equals("growf")); + EXPECT_EQ( NULL, last->get("asparagus").bytes ); + EXPECT_TRUE(!last->contains("asparagus")); + + int n=0; + for( VersionDictionary::Iterator it(last); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("asparagus")); + } + ASSERT_EQ(sNWords, n); + } + { + printf("Writing Ottoman to a new file...\n"); + Ottoman oldhf("/tmp/test.ottoman"); + + oldhf.saveAs("/tmp/test2.ottoman"); + } + { + printf("Opening new file...\n"); + Ottoman otto("/tmp/test2.ottoman"); + OverlayDictionary *current = otto.currentVersion(); + ASSERT_TRUE(current != NULL); + + EXPECT_EQ( sNWords , current->count() ); + EXPECT_TRUE(current->get("animal").equals("AMINAL")); + EXPECT_TRUE(current->get("growf").equals("growf")); + EXPECT_EQ( NULL, current->get("asparagus").bytes ); + EXPECT_TRUE(!current->contains("asparagus")); + + int n=0; + for( OverlayDictionary::Iterator it(*current); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("asparagus")); + } + ASSERT_EQ(sNWords, n); + + printf("Iterating the chunks...\n"); + int lastType = -1; + int count = 0; + n = 0; + ChunkIterator *it = otto.chunkIterator(); + for (; *it; it->next()) { + uint16_t type = it->chunk()->type(); + if (type != lastType) { + if (count > 0) + printf("%6d\n", count); + printf("type %u ... ", type); + lastType = type; + count = 0; + } + count++; + ASSERT_LE(type, 3); + if (type != 0) // don't count padding chunks + n++; + } + if (count > 0) + printf("%6d\n", count); + printf("Iterated over %i chunks\n",n); + EXPECT_EQ(sNWords+256+1, n); + EXPECT_TRUE(it->atEOF()); + + printf("Scavenging...\n"); + EXPECT_TRUE( otto.scavenge() ); + } +} + diff -r 000000000000 -r 31a43d94cc26 test/TestUtils.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/TestUtils.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,104 @@ +/* + * TestUtils.cpp + * Ottoman + * + * Created by Jens Alfke on 9/12/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include "TestUtils.h" +#include "File.h" +#include +#include + +namespace Mooseyard { + + std::ostream& operator<< (std::ostream &out, const Blob &blob) { + char str[blob.length+1]; + memcpy(str,blob.bytes,blob.length); + str[blob.length] = 0; + out << str; + return out; + } + + void shuffle(int a[], int n, unsigned seed) { + if (seed==0) { + srandomdev(); + seed = random(); + } + fprintf(stderr,"shuffle(n=%i, seed=%u)\n", n,seed); + srandom(seed); + for (int i=0; istring()); + sNWords++; + } + } + } + +#pragma mark - +#pragma mark TIMER: + + Timer::Timer (const char *operation, int divisor) { + _operation = operation; + _divisor = divisor; + _cpuTime = clock(); + _time = now(); + } + + Timer::~Timer() { + double elapsedCPU = (clock() - _cpuTime) / 1.0e6; + double elapsed = now() - _time; + printf("### %s took %.6lf sec (used %.6lf sec CPU)", _operation, elapsed, elapsedCPU); + if (_divisor > 1) + printf(" ... per item: %.3lf usec, %.3lf usec CPU", elapsed/_divisor*1e6, elapsedCPU/_divisor*1e6); + printf("\n"); + } + + double Timer::now() { + struct timeval t; + gettimeofday(&t,NULL); + return (double)t.tv_sec + t.tv_usec/1.0e6; + } + +} + + +using namespace Mooseyard; + +int main(int argc, char **argv) { + srandomdev(); + try { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } catch (const File::Error &err) { + fprintf(stderr, "\n*** File::Error thrown: %i/%s\n", err.code,err.message); + throw; + } +} diff -r 000000000000 -r 31a43d94cc26 test/TestUtils.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/TestUtils.h Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,37 @@ +/* + * TestUtils.h + * Ottoman + * + * Created by Jens Alfke on 9/2/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + +#include // Get gtest from +#include +#include "Base.h" + +namespace Mooseyard { + + std::ostream& operator<< (std::ostream &out, const Blob&); + + void shuffle(int a[], int n, unsigned seed =0); + + extern char **sWords; + extern int sNWords; + + void readWords(); + + class Timer { + public: + Timer (const char *operation, int divisor =1); + ~Timer(); + double elapsed() {return now() - _time;} + static double now(); + private: + const char *_operation; + int _divisor; + double _cpuTime, _time; + }; + +} diff -r 000000000000 -r 31a43d94cc26 test/VersionDictionary_test.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/VersionDictionary_test.cpp Sun Sep 20 15:14:12 2009 -0700 @@ -0,0 +1,265 @@ +/* + * VersionDictionary_test.cpp + * Ottoman + * + * Created by Jens Alfke on 9/2/09. + * Copyright 2009 Jens Alfke. All rights reserved. + * BSD-Licensed: See the file "LICENSE.txt" for details. + */ + + +#define VERSIONDICTIONARY_TESTING 1 + +#include +#include "TestUtils.h" +#include "Chunk.h" +#include "File.h" +#include "VersionDictionary.h" +#include "Hash.h" +#include +#include + +using namespace Mooseyard; + + +class OverlayVersionDictionary :public OverlayDictionary { +public: + OverlayVersionDictionary (File *file) + :OverlayDictionary(new VersionDictionary(file)) + { } + + int generation() const {return ((VersionDictionary*)base())->generation();} + time_t timestamp() const {return ((VersionDictionary*)base())->timestamp();} + + void save(){ + saveAs(((VersionDictionary*)base())->file()); + } + + void saveAs (File *dstFile) { + VersionDictionary* oldBase = (VersionDictionary*) base(); + revertTo( ((VersionDictionary*)base())->_appendAndOpen(overlay(), dstFile, baseReplaced()) ); + delete oldBase; + } +}; + + + +TEST(File,HasPath) { + { + File f("/tmp/jubba", O_RDWR | O_CREAT | O_TRUNC); + f.write("howdy"); + } + { + File f("/tmp/jubba", O_RDWR); + EXPECT_TRUE(f.hasPath("/tmp/jubba")); + File::unlink("/tmp/jubba"); + EXPECT_FALSE(f.hasPath("/tmp/jubba")); + + File f2("/tmp/jubba", O_RDWR | O_CREAT | O_TRUNC); + f2.write("howdy"); + f2.flush(); + + EXPECT_FALSE(f.hasPath("/tmp/jubba")); + EXPECT_TRUE(f2.hasPath("/tmp/jubba")); + } + File::unlink("/tmp/jubba"); +} + + +static int TestIterate (File *file) { + printf("Iterating the chunks...\n"); + int lastType = -1; + int count = 0; + int n = 0; + ChunkIterator it(file, 4); + for (; it; ++it) { + uint16_t type = it.chunk()->type(); + if (type != lastType) { + if (count > 0) + printf("%6d\n", count); + printf("type %u ... ", type); + lastType = type; + count = 0; + } + count++; + EXPECT_LE(type, 3); + if (type != 0) // don't count padding chunks + n++; + } + if (count > 0) + printf("%6d\n", count); + EXPECT_TRUE(it.atEOF()); + return n; +} + + +static void TestWithWords (const int nWords) { + ASSERT_EQ(8, sizeof(Chunk)); + ASSERT_EQ(8, sizeof(KeyValueChunk)); + + printf("Building dictionary of %i words...\n", nWords); + readWords(); + HashDictionary dict; + for( int i=0; igeneration()); + createTime = hf->timestamp(); + ASSERT_GE(createTime, startTime); + ASSERT_LE(createTime, ::time(NULL)); + delete hf; + } + { + File file("/tmp/hashfiletest"); + VersionDictionary hf(&file); + ASSERT_EQ(0, hf.generation()); + ASSERT_EQ(createTime, hf.timestamp()); + { + Timer t("Reading from VersionDictionary", nWords); + EXPECT_EQ( nWords , hf.count() ); + for( int i=0; i 0 && it.key().length < 50); + ASSERT_TRUE(it.key().equals(it.value())); + ASSERT_EQ( 0, ((size_t)it.value().bytes & 0x3) ); // 4-byte aligned + } + ASSERT_EQ(nWords, n); + } + { + printf("Opening OverlayVersionDictionary...\n"); + File file("/tmp/hashfiletest", O_RDWR); + OverlayVersionDictionary hf(&file); + EXPECT_EQ( nWords , hf.count() ); + EXPECT_TRUE(hf.get("abatement").equals("abatement")); + + hf.put("abaser", "DEBASER"); + hf.put("growf", "growf"); + EXPECT_TRUE(hf.remove("abatement")); + + EXPECT_EQ( nWords, hf.count() ); + EXPECT_TRUE(hf.get("abaser").equals("DEBASER")); + EXPECT_TRUE(hf.get("growf").equals("growf")); + EXPECT_EQ( NULL, hf.get("abatement").bytes ); + EXPECT_TRUE(!hf.contains("abatement")); + + int n=0; + for( OverlayVersionDictionary::Iterator it(hf); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("abatement")); + } + ASSERT_EQ(nWords, n); + + printf("Saving OverlayVersionDictionary...\n"); + { + Timer t("Saving OverlayVersionDictionary"); + hf.save(); + } + printf("File size: %llu bytes\n", file.length()); + + EXPECT_EQ( nWords, hf.count() ); + EXPECT_TRUE(hf.get("abaser").equals("DEBASER")); + EXPECT_TRUE(hf.get("growf").equals("growf")); + EXPECT_EQ( NULL, hf.get("abatement").bytes ); + EXPECT_TRUE(!hf.contains("abatement")); + + n=0; + for( OverlayVersionDictionary::Iterator it(hf); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("abatement")); + } + ASSERT_EQ(nWords, n); + } + { + printf("Re-opening OverlayVersionDictionary...\n"); + File file("/tmp/hashfiletest"); + OverlayVersionDictionary hf(&file); + + ASSERT_EQ(1, hf.generation()); + ASSERT_GE(hf.timestamp(), createTime); + ASSERT_LE(hf.timestamp(), ::time(NULL)); + EXPECT_EQ( nWords , hf.count() ); + EXPECT_TRUE(hf.get("abaser").equals("DEBASER")); + EXPECT_TRUE(hf.get("growf").equals("growf")); + EXPECT_EQ( NULL, hf.get("abatement").bytes ); + EXPECT_TRUE(!hf.contains("abatement")); + + int n=0; + for( OverlayVersionDictionary::Iterator it(hf); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("abatement")); + } + ASSERT_EQ(nWords, n); + + n = TestIterate(&file); + EXPECT_GE(n, nWords+1+4); + if (nWords > 1000) + EXPECT_GE(n, nWords+256+4); + } + { + printf("Writing VersionDictionary to a new file...\n"); + File oldFile("/tmp/hashfiletest"); + OverlayVersionDictionary oldhf(&oldFile); + + File newFile("/tmp/hashfiletest2", O_RDWR | O_CREAT | O_TRUNC); + newFile.write("Ha5h", 4); // VersionDictionary won't write to an empty file + oldhf.saveAs(&newFile); + printf("File size: %llu bytes\n", newFile.length()); + } + { + printf("Opening new file...\n"); + File file("/tmp/hashfiletest2"); + OverlayVersionDictionary hf(&file); + + EXPECT_EQ( nWords , hf.count() ); + EXPECT_TRUE(hf.get("abaser").equals("DEBASER")); + EXPECT_TRUE(hf.get("growf").equals("growf")); + EXPECT_EQ( NULL, hf.get("abatement").bytes ); + EXPECT_TRUE(!hf.contains("abatement")); + + int n=0; + for( OverlayVersionDictionary::Iterator it(hf); it; ++it) { + n++; + ASSERT_TRUE(!it.key().equals("abatement")); + } + ASSERT_EQ(nWords, n); + + n = TestIterate(&file); + EXPECT_GE(n, nWords+1+1); + if (nWords > 1000) + EXPECT_EQ(nWords+256+1, n); + } +} + + +TEST(VersionDictionary,BuildSmall) { + TestWithWords(100); +} + +TEST(VersionDictionary,BuildLarge) { + TestWithWords(sNWords); +}