diff --git a/keyvi/src/cpp/dictionary/fsa/automata.h b/keyvi/src/cpp/dictionary/fsa/automata.h index 7a866fe2..d4ca0cc9 100644 --- a/keyvi/src/cpp/dictionary/fsa/automata.h +++ b/keyvi/src/cpp/dictionary/fsa/automata.h @@ -26,6 +26,7 @@ #define AUTOMATA_H_ #include +#include #include #include #include @@ -70,14 +71,18 @@ final { sparse_array_properties_ = internal::SerializationUtils::ReadJsonRecord( in_stream); - compact_size_ = sparse_array_properties_.get("version", 1) == 2; + compact_size_ = boost::lexical_cast (sparse_array_properties_.get("version")) == 2; size_t bucket_size = compact_size_ ? sizeof(uint16_t) : sizeof(uint32_t); + // get start state and number of keys + start_state_ = boost::lexical_cast (automata_properties_.get("start_state")); + number_of_keys_ = boost::lexical_cast (automata_properties_.get("number_of_keys")); + size_t offset = in_stream.tellg(); file_mapping_ = new boost::interprocess::file_mapping( filename, boost::interprocess::read_only); - size_t array_size = sparse_array_properties_.get("size"); + size_t array_size = boost::lexical_cast(sparse_array_properties_.get("size")); in_stream.seekg(offset + array_size + bucket_size * array_size - 1); @@ -119,8 +124,9 @@ final { // initialize value store internal::value_store_t value_store_type = - static_cast(automata_properties_.get( - "value_store_type")); + static_cast( + boost::lexical_cast (automata_properties_.get( + "value_store_type"))); value_store_reader_ = internal::ValueStoreFactory::MakeReader(value_store_type, in_stream, file_mapping_); in_stream.close(); @@ -143,11 +149,11 @@ final { * @return index of root state. */ uint64_t GetStartState() const { - return automata_properties_.get("start_state"); + return start_state_; } uint64_t GetNumberOfKeys() const { - return automata_properties_.get("number_of_keys"); + return number_of_keys_; } uint64_t TryWalkTransition(uint64_t starting_state, unsigned char c) const { @@ -469,6 +475,8 @@ final { uint32_t* transitions_; uint16_t* transitions_compact_; bool compact_size_; + uint64_t start_state_; + uint64_t number_of_keys_; inline uint64_t ResolvePointer(uint64_t starting_state, unsigned char c) const { if (!compact_size_) { diff --git a/keyvi/src/cpp/dictionary/fsa/internal/json_value_store.h b/keyvi/src/cpp/dictionary/fsa/internal/json_value_store.h index 0614bc91..8802340a 100644 --- a/keyvi/src/cpp/dictionary/fsa/internal/json_value_store.h +++ b/keyvi/src/cpp/dictionary/fsa/internal/json_value_store.h @@ -359,7 +359,7 @@ class JsonValueStoreReader final: public IValueStoreReader { internal::SerializationUtils::ReadJsonRecord(stream); size_t offset = stream.tellg(); - size_t strings_size = properties_.get("size"); + size_t strings_size = boost::lexical_cast (properties_.get("size")); // check for file truncation if (strings_size > 0) { diff --git a/keyvi/src/cpp/dictionary/fsa/internal/json_value_store_deprecated.h b/keyvi/src/cpp/dictionary/fsa/internal/json_value_store_deprecated.h index 0c56d503..300c4460 100644 --- a/keyvi/src/cpp/dictionary/fsa/internal/json_value_store_deprecated.h +++ b/keyvi/src/cpp/dictionary/fsa/internal/json_value_store_deprecated.h @@ -34,6 +34,8 @@ #define RAPIDJSON_SSE42 #endif +#include + #include "rapidjson/document.h" #include "rapidjson/writer.h" #include "rapidjson/stringbuffer.h" @@ -59,275 +61,6 @@ namespace dictionary { namespace fsa { namespace internal { -/** - * Value store where the value consists of a single integer. - */ -class JsonValueStoreDeprecated final : public IValueStoreWriter { - public: - - struct RawPointer - final { - public: - RawPointer() - : RawPointer(0, 0, 0) { - } - - RawPointer(uint64_t offset, int hashcode, size_t length) - : offset_(offset), - hashcode_(hashcode), - length_(length) { - - if (length > USHRT_MAX) { - length_ = USHRT_MAX; - } - - } - - int GetHashcode() const { - return hashcode_; - } - - uint64_t GetOffset() const { - return offset_; - } - - ushort GetLength() const { - return length_; - } - - int GetCookie() const { - return cookie_; - } - - void SetCookie(int value) { - cookie_ = static_cast(value); - } - - bool IsEmpty() const { - return offset_ == 0 && hashcode_ == 0 && length_ == 0; - } - - bool operator==(const RawPointer& l) { - return offset_ == l.offset_; - } - - static size_t GetMaxCookieSize() { - return MaxCookieSize; - } - - private: - static const size_t MaxCookieSize = 0xFFFF; - - uint64_t offset_; - int32_t hashcode_; - ushort length_; - ushort cookie_ = 0; - - }; - - template - struct RawPointerForCompare - final - { - public: - RawPointerForCompare(const std::string& value, - PersistenceT* persistence) - : value_(value), - persistence_(persistence) { - hashcode_ = std::hash()(value); - length_ = value.size(); - } - - int GetHashcode() const { - return hashcode_; - } - - bool operator==(const RawPointer& l) const { - // First filter - check if hash code is the same - if (l.GetHashcode() != hashcode_) { - return false; - } - TRACE("check"); - size_t length_l = l.GetLength(); - - if (length_l < USHRT_MAX && length_l != length_) { - return false; - } - - TRACE("Compare values at data level length: %d %d", l.GetOffset(), value_.size()); - - return persistence_->Compare(l.GetOffset(), (void*) value_.data(), value_.size()); - } - - private: - std::string value_; - PersistenceT* persistence_; - int32_t hashcode_; - size_t length_; - }; - - typedef std::string value_t; - static const uint64_t no_value = 0; - static const bool inner_weight = false; - - JsonValueStoreDeprecated(const vs_param_t& parameters, - size_t memory_limit = 104857600) - : IValueStoreWriter(parameters), hash_(memory_limit) { - temporary_directory_ = parameters_[TEMPORARY_PATH_KEY]; - temporary_directory_ /= boost::filesystem::unique_path( - "dictionary-fsa-json_value_store-%%%%-%%%%-%%%%-%%%%"); - boost::filesystem::create_directory(temporary_directory_); - - size_t external_memory_chunk_size = 1073741824; - - values_extern_ = new MemoryMapManager(external_memory_chunk_size, - temporary_directory_, - "json_values_filebuffer"); - } - - ~JsonValueStoreDeprecated() { - delete values_extern_; - boost::filesystem::remove_all(temporary_directory_); - } - - JsonValueStoreDeprecated() = delete; - JsonValueStoreDeprecated& operator=(JsonValueStoreDeprecated const&) = delete; - JsonValueStoreDeprecated(const JsonValueStoreDeprecated& that) = delete; - - /** - * Simple implementation of a value store for json values: - * todo: performance improvements? - */ - uint64_t GetValue(const value_t& value, bool& no_minimization) { - std::string packed_value; - msgpack_buffer_.clear(); - - ++number_of_values_; - - rapidjson::Document json_document; - json_document.Parse(value.c_str()); - - if (!json_document.HasParseError()) { - TRACE("Got json"); - msgpack::pack(&msgpack_buffer_, json_document); - } else { - TRACE("Got a normal string"); - msgpack::pack(&msgpack_buffer_, value); - } - - // zlib compression - if (msgpack_buffer_.size() > 32) { - packed_value = compress_string(msgpack_buffer_.data(), msgpack_buffer_.size()); - } else { - util::encodeVarint(msgpack_buffer_.size(), packed_value); - packed_value.append(msgpack_buffer_.data(), msgpack_buffer_.size()); - } - - TRACE("Packed value: %s", packed_value.c_str()); - - const RawPointerForCompare stp(packed_value, - values_extern_); - const RawPointer p = hash_.Get(stp); - - if (!p.IsEmpty()) { - // found the same value again, minimize - TRACE("Minimized value"); - return p.GetOffset(); - } // else persist string value - - no_minimization = true; - TRACE("New unique value"); - ++number_of_unique_values_; - - uint64_t pt = static_cast(values_buffer_size_); - - values_extern_->Append(values_buffer_size_, (void*)packed_value.data(), packed_value.size()); - values_buffer_size_ += packed_value.size(); - - TRACE("add value to hash at %d, length %d", pt, packed_value.size()); - hash_.Add(RawPointer(pt, stp.GetHashcode(), packed_value.size())); - - return pt; - } - - uint32_t GetWeightValue(value_t value) const { - return 0; - } - - value_store_t GetValueStoreType() const { - return JSON_VALUE_STORE_DEPRECATED; - } - - void Write(std::ostream& stream) { - - boost::property_tree::ptree pt; - pt.put("size", std::to_string(values_buffer_size_)); - pt.put("values", std::to_string(number_of_values_)); - pt.put("unique_values", std::to_string(number_of_unique_values_)); - - internal::SerializationUtils::WriteJsonRecord(stream, pt); - TRACE("Wrote JSON header, stream at %d", stream.tellp()); - - values_extern_->Write(stream, values_buffer_size_); - } - - private: - MemoryMapManager* values_extern_; - - LeastRecentlyUsedGenerationsCache hash_; - msgpack::sbuffer msgpack_buffer_; - char zlib_buffer_[32768]; - size_t number_of_values_ = 0; - size_t number_of_unique_values_ = 0; - size_t values_buffer_size_ = 0; - boost::filesystem::path temporary_directory_; - - /** Compress a STL string using zlib with given compression level and return - * the binary data. */ - std::string compress_string(const char* data, size_t data_size, - int compressionlevel = Z_BEST_COMPRESSION) - { - z_stream zs; // z_stream is zlib's control structure - memset(&zs, 0, sizeof(zs)); - - if (deflateInit(&zs, compressionlevel) != Z_OK) - throw(std::runtime_error("deflateInit failed while compressing.")); - - zs.next_in = (Bytef*)data; - zs.avail_in = data_size; // set the z_stream's input - - int ret; - std::string outstring = " "; - - // retrieve the compressed bytes blockwise - do { - zs.next_out = reinterpret_cast(zlib_buffer_); - zs.avail_out = sizeof(zlib_buffer_); - - ret = deflate(&zs, Z_FINISH); - - if (outstring.size() - 1 < zs.total_out) { - // append the block to the output string - outstring.append(zlib_buffer_, - zs.total_out - outstring.size() + 1); - } - } while (ret == Z_OK); - - deflateEnd(&zs); - - if (ret != Z_STREAM_END) { // an error occurred that was not EOF - std::ostringstream oss; - oss << "Exception during zlib compression: (" << ret << ") " << zs.msg; - throw(std::runtime_error(oss.str())); - } - - std::string size_prefix; - util::encodeVarint(outstring.size(), size_prefix); - - return size_prefix + outstring; - } - }; - class JsonValueStoreDeprecatedReader final: public IValueStoreReader { public: using IValueStoreReader::IValueStoreReader; @@ -343,7 +76,7 @@ class JsonValueStoreDeprecated final : public IValueStoreWriter { internal::SerializationUtils::ReadJsonRecord(stream); size_t offset = stream.tellg(); - size_t strings_size = properties_.get("size"); + size_t strings_size = boost::lexical_cast(properties_.get("size")); // check for file truncation if (strings_size > 0) { diff --git a/keyvi/src/cpp/dictionary/fsa/internal/memory_map_manager.h b/keyvi/src/cpp/dictionary/fsa/internal/memory_map_manager.h index 09370ad5..b5b42e73 100644 --- a/keyvi/src/cpp/dictionary/fsa/internal/memory_map_manager.h +++ b/keyvi/src/cpp/dictionary/fsa/internal/memory_map_manager.h @@ -46,8 +46,8 @@ namespace internal { class MemoryMapManager final { public: - MemoryMapManager(size_t chunk_size, boost::filesystem::path directory, - boost::filesystem::path filename_pattern) + MemoryMapManager(const size_t chunk_size, const boost::filesystem::path directory, + const boost::filesystem::path filename_pattern) : chunk_size_(chunk_size), directory_(directory), filename_pattern_(filename_pattern) { @@ -64,7 +64,7 @@ final { * * This API is to check first whether GetAdress is safe to use. */ - bool GetAddressQuickTestOk(size_t offset, size_t length){ + bool GetAddressQuickTestOk(size_t offset, size_t length) const { size_t chunk_offset = offset % chunk_size_; return (length <= (chunk_size_ - chunk_offset)); @@ -83,7 +83,7 @@ final { * * This API is to be used when GetAdress is not safe to use. */ - void GetBuffer(size_t offset, void* buffer, size_t buffer_length) { + void GetBuffer(const size_t offset, void* buffer, const size_t buffer_length) { size_t chunk_number = offset / chunk_size_; size_t chunk_offset = offset % chunk_size_; @@ -97,30 +97,6 @@ final { std::memcpy((char*) buffer + first_chunk_size, (char*) chunk_address_part2, second_chunk_size); } - /** - * DEPRECATED: Append to the buffer starting from the given offset. - * Note: Append(buffer, buffer_length) shall be used instead. - * - * @param offset offset at where to append. - * @param buffer the buffer to append. - * @param buffer_length the buffer length to append. - */ - void Append(size_t offset, void* buffer, size_t buffer_length){ - size_t chunk_number = offset / chunk_size_; - size_t chunk_offset = offset % chunk_size_; - - void* chunk_address = GetChunk(chunk_number); - size_t first_chunk_size = std::min(buffer_length, chunk_size_ - chunk_offset); - std::memcpy((char*)chunk_address + chunk_offset, buffer, first_chunk_size); - - // handle overflow - if (buffer_length != first_chunk_size) { - void* chunk_address_part2 = GetChunk(chunk_number + 1); - std::memcpy((char*)chunk_address_part2, (char*)buffer + first_chunk_size, buffer_length - first_chunk_size); - } - - tail_ = offset + buffer_length; - } /** * Append to the buffer at the current tail. @@ -128,21 +104,22 @@ final { * @param buffer the buffer to append * @param buffer_length the buffer length to append */ - void Append(void* buffer, size_t buffer_length){ - size_t chunk_number = tail_ / chunk_size_; - size_t chunk_offset = tail_ % chunk_size_; + void Append(const void* buffer, const size_t buffer_length){ + size_t remaining = buffer_length; + size_t buffer_offset = 0; - void* chunk_address = GetChunk(chunk_number); - size_t first_chunk_size = std::min(buffer_length, chunk_size_ - chunk_offset); - std::memcpy((char*)chunk_address + chunk_offset, buffer, first_chunk_size); + while (remaining > 0) { + size_t chunk_number = tail_ / chunk_size_; + size_t chunk_offset = tail_ % chunk_size_; - // handle overflow - if (buffer_length != first_chunk_size) { - void* chunk_address_part2 = GetChunk(chunk_number + 1); - std::memcpy((char*)chunk_address_part2, (char*)buffer + first_chunk_size, buffer_length - first_chunk_size); - } + void* chunk_address = GetChunk(chunk_number); + size_t copy_size = std::min(buffer_length, chunk_size_ - chunk_offset); + std::memcpy((char*)chunk_address + chunk_offset, (char*)buffer + buffer_offset, copy_size); - tail_ += buffer_length; + remaining -= copy_size; + tail_ += copy_size; + buffer_offset += copy_size; + } } void push_back(const char c){ @@ -154,7 +131,7 @@ final { ++tail_; } - bool Compare(size_t offset, void* buffer, size_t buffer_length){ + bool Compare(const size_t offset, const void* buffer, const size_t buffer_length){ size_t chunk_number = offset / chunk_size_; size_t chunk_offset = offset % chunk_size_; @@ -175,7 +152,7 @@ final { return (std::memcmp((char*)chunk_address_part2, (char*)buffer + first_chunk_size, buffer_length - first_chunk_size) == 0); } - void Write (std::ostream& stream, size_t end) const { + void Write (std::ostream& stream, const size_t end) const { if (persisted_) { for (size_t i =0; i < number_of_chunks_; i++) { @@ -242,7 +219,7 @@ final { return filename; } - void* GetChunk(size_t chunk_number) { + void* GetChunk(const size_t chunk_number) { while (chunk_number >= number_of_chunks_) { CreateMapping(); } diff --git a/keyvi/src/cpp/dictionary/fsa/internal/sparse_array_persistence.h b/keyvi/src/cpp/dictionary/fsa/internal/sparse_array_persistence.h index 17c6356e..d5444f08 100644 --- a/keyvi/src/cpp/dictionary/fsa/internal/sparse_array_persistence.h +++ b/keyvi/src/cpp/dictionary/fsa/internal/sparse_array_persistence.h @@ -197,15 +197,13 @@ final { if (labels_) { size_t highest_write_position = std::max(highest_state_begin_ + MAX_TRANSITIONS_OF_A_STATE, highest_raw_write_bucket_); - labels_extern_->Append(in_memory_buffer_offset_, - labels_, + labels_extern_->Append(labels_, (highest_write_position - in_memory_buffer_offset_)); // in place re-write HostOrderToPersistenceOrder(transitions_, highest_write_position - in_memory_buffer_offset_); - transitions_extern_->Append(in_memory_buffer_offset_ * sizeof(BucketT), - transitions_, + transitions_extern_->Append(transitions_, (highest_write_position - in_memory_buffer_offset_) * sizeof(BucketT)); delete[] labels_; @@ -251,8 +249,7 @@ final { size_t highest_raw_write_bucket_ = 0; inline void FlushBuffers() { - labels_extern_->Append(in_memory_buffer_offset_, - labels_, + labels_extern_->Append(labels_, flush_size_); TRACE ("Write labels from %d to %d (flushsize %d)", in_memory_buffer_offset_, in_memory_buffer_offset_ + flush_size_, flush_size_); @@ -260,8 +257,7 @@ final { // in place re-write HostOrderToPersistenceOrder(transitions_, flush_size_); - transitions_extern_->Append(in_memory_buffer_offset_ * sizeof(BucketT), - transitions_, + transitions_extern_->Append(transitions_, flush_size_ * sizeof(BucketT)); size_t overlap = buffer_size_ - flush_size_; diff --git a/keyvi/src/cpp/dictionary/fsa/internal/string_value_store.h b/keyvi/src/cpp/dictionary/fsa/internal/string_value_store.h index fb89333a..0d0bafbd 100644 --- a/keyvi/src/cpp/dictionary/fsa/internal/string_value_store.h +++ b/keyvi/src/cpp/dictionary/fsa/internal/string_value_store.h @@ -26,6 +26,7 @@ #define STRING_VALUE_STORE_H_ #include +#include #include "dictionary/fsa/internal/ivalue_store.h" #include "dictionary/fsa/internal/serialization_utils.h" @@ -241,7 +242,7 @@ class StringValueStore final : public IValueStoreWriter { internal::SerializationUtils::ReadJsonRecord(stream); size_t offset = stream.tellg(); - size_t strings_size = properties.get("size"); + size_t strings_size = boost::lexical_cast(properties.get("size")); // check for file truncation if (strings_size > 0) { diff --git a/keyvi/src/cpp/keyvicompiler/keyvicompiler.cpp b/keyvi/src/cpp/keyvicompiler/keyvicompiler.cpp index 95835b01..4b8a66a7 100644 --- a/keyvi/src/cpp/keyvicompiler/keyvicompiler.cpp +++ b/keyvi/src/cpp/keyvicompiler/keyvicompiler.cpp @@ -287,7 +287,8 @@ int main(int argc, char** argv) { memory_limit = vm["memory-limit"].as(); } - bool compact = false; + // make compact the default + bool compact = true; if (vm.count("compact")) { compact = true; } diff --git a/keyvi/tests/cpp/dictionary/fsa/internal/json_value_store2_test.cpp b/keyvi/tests/cpp/dictionary/fsa/internal/json_value_store2_test.cpp deleted file mode 100644 index c821e782..00000000 --- a/keyvi/tests/cpp/dictionary/fsa/internal/json_value_store2_test.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// -// keyvi - A key value store. -// -// Copyright 2015 Hendrik Muhs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -/* - * json_value_store_test.cpp - * - * Created on: October 14, 2015 - * Author: David Mark Nemeskey - */ - -#include -#include -#include "dictionary/fsa/internal/json_value_store_deprecated.h" -#include "dictionary/fsa/internal/constants.h" - -namespace keyvi { -namespace dictionary { -namespace fsa { -namespace internal { - -// The name of the suite must be a different name to your class -BOOST_AUTO_TEST_SUITE( JsonValueDeprecatedTest ) - -BOOST_AUTO_TEST_CASE( minimization ) -{ - JsonValueStoreDeprecated strings( - IValueStoreWriter::vs_param_t{{TEMPORARY_PATH_KEY, "/tmp"}}); - bool no_minimization = false; - uint32_t v = strings.GetValue("{\"mytestvalue\":25, \"mytestvalue2\":23}", no_minimization); - BOOST_CHECK_EQUAL(v,0); - uint32_t w = strings.GetValue("othervalue", no_minimization); - uint32_t x = strings.GetValue("{\"mytestvalue3\":55, \"mytestvalue4\":773}", no_minimization); - - BOOST_CHECK(w>0); - BOOST_CHECK_EQUAL(v,strings.GetValue("{\"mytestvalue\": 25, \"mytestvalue2\": 23}", no_minimization)); - BOOST_CHECK_EQUAL(x,strings.GetValue("{\"mytestvalue3\":55, \"mytestvalue4\":773}", no_minimization)); - BOOST_CHECK_EQUAL(w,strings.GetValue("othervalue", no_minimization)); -} - -BOOST_AUTO_TEST_CASE( minimization2 ) -{ - JsonValueStoreDeprecated strings( - IValueStoreWriter::vs_param_t{{TEMPORARY_PATH_KEY, "/tmp"}}); - bool no_minimization = false; - - uint64_t v = strings.GetValue("{\"f\": 5571575, \"df\": 1362790, \"uqf\": 2129086, \"tf1df\": 99838, \"tf2df\": 274586, \"tf3df\": 481278, \"tf5df\": 811157}", no_minimization); - uint64_t w = strings.GetValue("{\"f\": 3, \"df\": 1, \"uqf\": 1, \"tf1df\": 0, \"tf2df\": 0, \"tf3df\": 0, \"tf5df\": 0}", no_minimization); - BOOST_CHECK(v != w); -} - -BOOST_AUTO_TEST_SUITE_END() - -} /* namespace internal */ -} /* namespace fsa */ -} /* namespace dictionary */ -} /* namespace keyvi */ diff --git a/keyvi/tests/cpp/dictionary/fsa/internal/memory_map_manager_test.cpp b/keyvi/tests/cpp/dictionary/fsa/internal/memory_map_manager_test.cpp index b41ab90f..57e22fc7 100644 --- a/keyvi/tests/cpp/dictionary/fsa/internal/memory_map_manager_test.cpp +++ b/keyvi/tests/cpp/dictionary/fsa/internal/memory_map_manager_test.cpp @@ -117,6 +117,44 @@ BOOST_AUTO_TEST_CASE( GetBuffer ) { boost::filesystem::remove_all(path); } +BOOST_AUTO_TEST_CASE( AppendLargeChunk ) { + size_t chunkSize = 4096; + + boost::filesystem::path path = boost::filesystem::temp_directory_path(); + path /= boost::filesystem::unique_path( + "dictionary-fsa-unittest-%%%%-%%%%-%%%%-%%%%"); + boost::filesystem::create_directory(path); + MemoryMapManager m(chunkSize, path, "append large chunk test"); + + char buffer[16384]; + std::fill(buffer, buffer+4096, 'y'); + std::fill(buffer+4096, buffer+16384, 'x'); + buffer[8887] = 'w'; + buffer[8889] = 'y'; + + buffer[9503] = 'a'; + buffer[9504] = 'b'; + + buffer[12003] = 'c'; + buffer[12005] = 'd'; + + buffer[14000] = 'e'; + buffer[14001] = 'f'; + + buffer[16382] = 'g'; + buffer[16383] = 'h'; + + m.Append(buffer, 16384); + + BOOST_CHECK_EQUAL('x', ((char*)m.GetAddress(8888))[0]); + BOOST_CHECK_EQUAL('a', ((char*)m.GetAddress(9503))[0]); + BOOST_CHECK_EQUAL('x', ((char*)m.GetAddress(12004))[0]); + BOOST_CHECK_EQUAL('e', ((char*)m.GetAddress(14000))[0]); + BOOST_CHECK_EQUAL('h', ((char*)m.GetAddress(16383))[0]); + + boost::filesystem::remove_all(path); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/pykeyvi/tests/dictionary_compiler_test.py b/pykeyvi/tests/dictionary_compiler_test.py new file mode 100644 index 00000000..d9332bd1 --- /dev/null +++ b/pykeyvi/tests/dictionary_compiler_test.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# Usage: py.test tests + +import pykeyvi + +def test_compiler_no_compile_edge_case(): + c = pykeyvi.KeyOnlyDictionaryCompiler() + c.Add("abc") + c.Add("abd") + del c diff --git a/pykeyvi/tests/json/json_dictionary_test.py b/pykeyvi/tests/json/json_dictionary_test.py index 842c2dbc..3bac3047 100644 --- a/pykeyvi/tests/json/json_dictionary_test.py +++ b/pykeyvi/tests/json/json_dictionary_test.py @@ -18,6 +18,8 @@ def test_simple(): c = pykeyvi.JsonDictionaryCompiler() c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') + # use python syntax ala __setitem__ + c["abd"] = '{"a" : 3}' with tmp_dictionary(c, 'simple_json.kv') as d: assert len(d) == 2 assert d["abc"].GetValueAsString() == '{"a":2}' diff --git a/pykeyvi/tests/statistics_test.py b/pykeyvi/tests/statistics_test.py index 1a43df78..e7ce420f 100644 --- a/pykeyvi/tests/statistics_test.py +++ b/pykeyvi/tests/statistics_test.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Usage: py.test tests +import os import pykeyvi from test_tools import tmp_dictionary @@ -22,6 +23,21 @@ def test_manifest(): m = d.GetManifest() assert m['author'] == "Zapp Brannigan" +def test_manifest_after_compile(): + c = pykeyvi.KeyOnlyDictionaryCompiler() + c.Add("Leela") + c.Add("Kif") + c.Compile() + c.SetManifest({"author": "Zapp Brannigan"}) + file_name = 'brannigan_manifest2.kv' + try: + c.WriteToFile(file_name) + d = pykeyvi.Dictionary(file_name) + m = d.GetManifest() + assert m['author'] == "Zapp Brannigan" + del d + finally: + os.remove(file_name) def test_statistics(): c = pykeyvi.KeyOnlyDictionaryCompiler()