Skip to content

Commit

Permalink
Merge pull request #93 from hendrik-cliqz/mmap-options
Browse files Browse the repository at this point in the history
implement different loading styles (mmap options)
  • Loading branch information
hendrikmuhs committed May 20, 2016
2 parents db0a113 + cd2ba46 commit d72b40c
Show file tree
Hide file tree
Showing 12 changed files with 3,218 additions and 2,458 deletions.
24 changes: 22 additions & 2 deletions keyvi/src/cpp/dictionary/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,28 @@ namespace dictionary {
class Dictionary
final {
public:
Dictionary(const char* filename, bool load_lazy = false)
: fsa_(new fsa::Automata(filename, load_lazy)) {

/**
* DEPRECATED: Initialize a dictionary in lazy or non-lazy mode.
*
* Use Dictionary(filename, loading_strategy) instead.
*
* @param filename the filename
* @param load_lazy whether to load lazy.
*/
Dictionary(const char* filename, bool load_lazy)
: fsa_(std::make_shared<fsa::Automata>(filename, load_lazy)) {
TRACE("Dictionary from file %s", filename);
}

/**
* Initialize a dictionary from a file.
*
* @param filename filename to load keyvi file from.
* @param loading_strategy optional: Loading strategy to use.
*/
explicit Dictionary(const char* filename, loading_strategy_types loading_strategy = loading_strategy_types::lazy)
: fsa_(std::make_shared<fsa::Automata>(filename, loading_strategy)) {
TRACE("Dictionary from file %s", filename);
}

Expand Down
27 changes: 11 additions & 16 deletions keyvi/src/cpp/dictionary/fsa/automata.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "dictionary/fsa/internal/constants.h"
#include "dictionary/fsa/internal/value_store_factory.h"
#include "dictionary/fsa/internal/serialization_utils.h"
#include "dictionary/fsa/internal/memory_map_flags.h"
#include "dictionary/fsa/traversal/traversal_base.h"
#include "dictionary/fsa/traversal/weighted_traversal.h"
#include "dictionary/util/vint.h"
Expand All @@ -47,12 +48,14 @@ namespace keyvi {
namespace dictionary {
namespace fsa {


class Automata
final {

public:
Automata(const char * filename, bool load_lazy=false) {
Automata(const char * filename, bool load_lazy):
Automata(filename, load_lazy ? loading_strategy_types::default_os : loading_strategy_types::populate) {}

explicit Automata(const char * filename, loading_strategy_types loading_strategy = loading_strategy_types::lazy) {
std::ifstream in_stream(filename, std::ios::binary);

if (!in_stream.good()) {
Expand Down Expand Up @@ -92,17 +95,7 @@ final {
throw std::invalid_argument("file is corrupt(truncated)");
}

boost::interprocess::map_options_t map_options = boost::interprocess::default_map_options;

#ifdef MAP_HUGETLB
map_options |= MAP_HUGETLB;
#endif

if (!load_lazy) {
#ifdef MAP_POPULATE
map_options |= MAP_POPULATE;
#endif
}
const boost::interprocess::map_options_t map_options = internal::MemoryMapFlags::FSAGetMemoryMapOptions(loading_strategy);

TRACE("labels start offset: %d", offset);
labels_region_ = new boost::interprocess::mapped_region(
Expand All @@ -113,9 +106,10 @@ final {
*file_mapping_, boost::interprocess::read_only, offset + array_size,
bucket_size * array_size, 0, map_options);

// prevent pre-fetching pages by the OS which does not make sense for the FST structure
labels_region_->advise(boost::interprocess::mapped_region::advice_types::advice_random);
transitions_region_->advise(boost::interprocess::mapped_region::advice_types::advice_random);
const auto advise = internal::MemoryMapFlags::ValuesGetMemoryMapAdvices(loading_strategy);

labels_region_->advise(advise);
transitions_region_->advise(advise);

TRACE("full file size %zu", offset + array_size + bucket_size * array_size);

Expand Down Expand Up @@ -535,6 +529,7 @@ final {
TRACE("Compact Transition after resolve %d", resolved_ptr);
return resolved_ptr;
}

};

// shared pointer
Expand Down
20 changes: 6 additions & 14 deletions keyvi/src/cpp/dictionary/fsa/internal/json_value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "rapidjson/stringbuffer.h"

#include "dictionary/fsa/internal/ivalue_store.h"
#include "dictionary/fsa/internal/memory_map_flags.h"
#include "dictionary/fsa/internal/serialization_utils.h"
#include "dictionary/fsa/internal/lru_generation_cache.h"
#include "dictionary/fsa/internal/memory_map_manager.h"
Expand Down Expand Up @@ -272,7 +273,7 @@ class JsonValueStoreReader final: public IValueStoreReader {

JsonValueStoreReader(std::istream& stream,
boost::interprocess::file_mapping* file_mapping,
bool load_lazy = false)
loading_strategy_types loading_strategy = loading_strategy_types::lazy)
: IValueStoreReader(stream, file_mapping) {
TRACE("JsonValueStoreReader construct");

Expand All @@ -290,24 +291,15 @@ class JsonValueStoreReader final: public IValueStoreReader {
}
}

boost::interprocess::map_options_t map_options = boost::interprocess::default_map_options;

#ifdef MAP_HUGETLB
map_options |= MAP_HUGETLB;
#endif

if (!load_lazy) {
#ifdef MAP_POPULATE
map_options |= MAP_POPULATE;
#endif
}
const boost::interprocess::map_options_t map_options = internal::MemoryMapFlags::ValuesGetMemoryMapOptions(loading_strategy);

strings_region_ = new boost::interprocess::mapped_region(
*file_mapping, boost::interprocess::read_only, offset,
strings_size, 0, map_options);

// prevent pre-fetching pages by the OS which does not make sense as values usually fit into few pages
strings_region_->advise(boost::interprocess::mapped_region::advice_types::advice_random);
const auto advise = internal::MemoryMapFlags::ValuesGetMemoryMapAdvices(loading_strategy);

strings_region_->advise(advise);

strings_ = (const char*) strings_region_->get_address();
}
Expand Down
188 changes: 188 additions & 0 deletions keyvi/src/cpp/dictionary/fsa/internal/memory_map_flags.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/* * keyvi - A key value store.
*
* Copyright 2015 Hendrik Muhs<hendrik.muhs@gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* memory_map_flags.h
*
* Created on: May 17, 2016
* Author: hendrik
*/

#ifndef KEYVI_MEMORY_MAP_FLAGS_H_
#define KEYVI_MEMORY_MAP_FLAGS_H_

#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/mapped_region.hpp>

namespace keyvi {
namespace dictionary {

enum class loading_strategy_types {
default_os, // no special treatment, use whatever the OS/Boost has as default
lazy, // load data as needed with some read-ahead
populate, // immediately load everything in memory (blocks until everything is fully read)
populate_key_part, // populate only the key part, load value part lazy
populate_lazy, // load data lazy but ask the OS to read ahead if possible (does not block)
lazy_no_readahead, // disable any read-ahead (for cases when index > x * main memory)
lazy_no_readahead_value_part, // disable read-ahead only for the value part
populate_key_part_no_readahead_value_part // populate the key part, but disable read ahead value part
};

namespace fsa {
namespace internal {

class MemoryMapFlags final{
public:
/**
* Translates the loading strategy into the according options for mmap. To be used for loading the FSA part.
*
* @param strategy load strategy
* @return flags to be used for mmap (via boost).
*/
static int FSAGetMemoryMapOptions(const loading_strategy_types strategy) {
#ifdef _Win32

// there is no comparable fine-grained control on windows, so simply use the defaults
return boost::interprocess::default_map_options;
#else // not _Win32

if (strategy == loading_strategy_types::default_os) {
return boost::interprocess::default_map_options;
}

int flags = 0;

flags |= MAP_SHARED;

#ifdef MAP_NOSYNC
flags |= MAP_NOSYNC
#endif

switch (strategy){
case loading_strategy_types::populate:
case loading_strategy_types::populate_key_part:
case loading_strategy_types::populate_key_part_no_readahead_value_part:
flags |= MAP_POPULATE;
break;
default:
break;
}

return flags;
#endif // not _Win32
}

/**
* Translates the loading strategy into the according options for mmap. To be used for loading the Values part.
*
* @param strategy load strategy
* @return flags to be used for mmap (via boost).
*/
static int ValuesGetMemoryMapOptions(const loading_strategy_types strategy) {
#ifdef _Win32

// there is no comparable fine-grained control on windows, so simply use the defaults
return boost::interprocess::default_map_options;
#else // not _Win32

if (strategy == loading_strategy_types::default_os) {
return boost::interprocess::default_map_options;
}

int flags = 0;
flags |= MAP_SHARED;

#ifdef MAP_NOSYNC
flags |= MAP_NOSYNC
#endif

switch (strategy){
case loading_strategy_types::populate:
flags |= MAP_POPULATE;
break;
default:
break;
}

return flags;
#endif // not _Win32
}


/**
* Translates the loading strategy into the according options for madvise. To be used for loading the FSA part.
*
* @param strategy load strategy
* @return advise to be used for madvise (via boost)
*/
static boost::interprocess::mapped_region::advice_types FSAGetMemoryMapAdvices(const loading_strategy_types strategy) {
#ifdef _Win32

// there is no madvise on windows, so simply use the default
return boost::interprocess::mapped_region::advice_types::advice_normal;
#else // _Win32
switch (strategy){
case loading_strategy_types::lazy_no_readahead:
return boost::interprocess::mapped_region::advice_types::advice_random;
case loading_strategy_types::lazy_no_readahead_value_part:
case loading_strategy_types::populate_key_part_no_readahead_value_part:
break;
case loading_strategy_types::populate_lazy:
return boost::interprocess::mapped_region::advice_types::advice_willneed;
default:
break;
}

return boost::interprocess::mapped_region::advice_types::advice_normal;
#endif
}

/**
* Translates the loading strategy into the according options for madvise. To be used for loading the Values part.
*
* @param strategy load strategy
* @return advise to be used for madvise (via boost)
*/
static boost::interprocess::mapped_region::advice_types ValuesGetMemoryMapAdvices(const loading_strategy_types strategy) {
#ifdef _Win32

// there is no madvise on windows, so simply use the default
return boost::interprocess::mapped_region::advice_types::advice_normal;
#else // _Win32
switch (strategy){
case loading_strategy_types::lazy_no_readahead:
case loading_strategy_types::lazy_no_readahead_value_part:
case loading_strategy_types::populate_key_part_no_readahead_value_part:
return boost::interprocess::mapped_region::advice_types::advice_random;
case loading_strategy_types::populate_lazy:
return boost::interprocess::mapped_region::advice_types::advice_willneed;
default:
break;
}

return boost::interprocess::mapped_region::advice_types::advice_normal;
#endif
}
};

} /* namespace internal */
} /* namespace fsa */
} /* namespace dictionary */
} /* namespace keyvi */


#endif /* KEYVI_MEMORY_MAP_FLAGS_H_ */
20 changes: 6 additions & 14 deletions keyvi/src/cpp/dictionary/fsa/internal/string_value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <boost/lexical_cast.hpp>

#include "dictionary/fsa/internal/ivalue_store.h"
#include "dictionary/fsa/internal/memory_map_flags.h"
#include "dictionary/fsa/internal/serialization_utils.h"
#include "dictionary/fsa/internal/minimization_hash.h"

Expand Down Expand Up @@ -247,7 +248,7 @@ class StringValueStore final : public IValueStoreWriter {
using IValueStoreReader::IValueStoreReader;

StringValueStoreReader(std::istream& stream,
boost::interprocess::file_mapping* file_mapping, bool load_lazy = false)
boost::interprocess::file_mapping* file_mapping, loading_strategy_types loading_strategy = loading_strategy_types::lazy)
: IValueStoreReader(stream, file_mapping) {

boost::property_tree::ptree properties =
Expand All @@ -264,24 +265,15 @@ class StringValueStore final : public IValueStoreWriter {
}
}

boost::interprocess::map_options_t map_options = boost::interprocess::default_map_options;

#ifdef MAP_HUGETLB
map_options |= MAP_HUGETLB;
#endif

if (!load_lazy) {
#ifdef MAP_POPULATE
map_options |= MAP_POPULATE;
#endif
}
const boost::interprocess::map_options_t map_options = internal::MemoryMapFlags::ValuesGetMemoryMapOptions(loading_strategy);

strings_region_ = new boost::interprocess::mapped_region(
*file_mapping, boost::interprocess::read_only, offset,
strings_size, 0, map_options);

// prevent pre-fetching pages by the OS which does not make sense as values usually fit into few pages
strings_region_->advise(boost::interprocess::mapped_region::advice_types::advice_random);
const auto advise = internal::MemoryMapFlags::ValuesGetMemoryMapAdvices(loading_strategy);

strings_region_->advise(advise);

strings_ = (const char*) strings_region_->get_address();
}
Expand Down
Loading

0 comments on commit d72b40c

Please sign in to comment.