Skip to content

Commit

Permalink
Merge pull request #55 from hendrik-cliqz/traver_optimize#27
Browse files Browse the repository at this point in the history
Traver optimize#27
  • Loading branch information
hendrikmuhs committed Nov 20, 2015
2 parents 7c233fd + 3a06ddb commit 1680ec6
Show file tree
Hide file tree
Showing 26 changed files with 4,803 additions and 4,584 deletions.
1 change: 0 additions & 1 deletion keyvi/src/cpp/compression/zlib_compression_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ struct ZlibCompressionStrategy final : public CompressionStrategy {
buffer[0] = static_cast<char>(ZLIB_COMPRESSION);

int ret;
size_t written = 0;

// compress bytes
zs.next_out = reinterpret_cast<Bytef*>(buffer.data() + 1);
Expand Down
3 changes: 1 addition & 2 deletions keyvi/src/cpp/dictionary/completion/multiword_completion.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
#include "dictionary/dictionary.h"
#include "dictionary/fsa/automata.h"
#include "dictionary/match_iterator.h"
#include "dictionary/fsa/state_traverser.h"
#include "dictionary/fsa/weighted_state_traverser.h"
#include "dictionary/fsa/traverser_types.h"
#include "dictionary/fsa/bounded_weighted_state_traverser.h"
#include "dictionary/fsa/codepoint_state_traverser.h"
#include "dictionary/util/transform.h"
Expand Down
5 changes: 2 additions & 3 deletions keyvi/src/cpp/dictionary/completion/prefix_completion.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
#include "dictionary/dictionary.h"
#include "dictionary/fsa/automata.h"
#include "dictionary/match_iterator.h"
#include "dictionary/fsa/state_traverser.h"
#include "dictionary/fsa/weighted_state_traverser.h"
#include "dictionary/fsa/traverser_types.h"
#include "dictionary/fsa/bounded_weighted_state_traverser.h"
#include "dictionary/fsa/codepoint_state_traverser.h"
#include "dictionary/util/trace.h"
Expand Down Expand Up @@ -120,7 +119,7 @@ final {
data->traverser.GetStateValue());

data->traverser++;
data->traverser.TryReduceResultQueue();
//data->traverser.TryReduceResultQueue();
return m;
}
data->traverser++;
Expand Down
97 changes: 93 additions & 4 deletions keyvi/src/cpp/dictionary/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <queue>
#include "dictionary/fsa/automata.h"
#include "dictionary/fsa/state_traverser.h"
#include "dictionary/fsa/traverser_types.h"
#include "dictionary/match.h"
#include "dictionary/match_iterator.h"

Expand Down Expand Up @@ -176,19 +177,19 @@ final {

// data which is required for the callback as well
struct delegate_payload {
delegate_payload(fsa::StateTraverser&& t,
delegate_payload(fsa::StateTraverser<>&& t,
std::vector<unsigned char>& stack)
: traverser(std::move(t)),
traversal_stack(std::move(stack)) {
}

fsa::StateTraverser traverser;
fsa::StateTraverser<> traverser;
std::vector<unsigned char> traversal_stack;
};

std::shared_ptr<delegate_payload> data(
new delegate_payload(
fsa::StateTraverser(fsa_, state),
fsa::StateTraverser<>(fsa_, state),
traversal_stack));

std::function<Match()> tfunc =
Expand All @@ -205,7 +206,7 @@ final {

if (data->traverser.IsFinalState()) {
std::string match_str = std::string(reinterpret_cast<char*> (&data->traversal_stack[0]),
data->traverser.GetDepth())
data->traverser.GetDepth());
TRACE("found final state at depth %d %s",
data->traverser.GetDepth(),
match_str.c_str());
Expand Down Expand Up @@ -330,6 +331,94 @@ final {
return MatchIterator::MakeIteratorPair(func);
}

/**
* Match a key near
*
* @param key
* @param minimum_prefix_length
* @return
*/
MatchIterator::MatchIteratorPair GetNear(const std::string& key, size_t minimum_prefix_length) {
uint64_t state = fsa_->GetStartState();

TRACE("GetNear %s, matching prefix first", key.substr(0, minimum_prefix_length).c_str());
for (size_t i = 0; i < minimum_prefix_length; ++i) {
state = fsa_->TryWalkTransition(state, key[i]);

if (!state) {
return MatchIterator::EmptyIteratorPair();
}
}

std::vector<unsigned char> traversal_stack;
traversal_stack.reserve(20);

TRACE("Match prefix, doing near traversal now start state: %ld, remaining key: %s", state, key.substr(minimum_prefix_length).c_str());

// data which is required for the callback as well
struct delegate_payload {
delegate_payload(fsa::NearStateTraverser&& t,
std::vector<unsigned char>& stack)
: traverser(std::move(t)),
traversal_stack(std::move(stack)) {
}

fsa::NearStateTraverser traverser;
std::vector<unsigned char> traversal_stack;
size_t matched_depth = 0;
};

auto payload = fsa::traversal::TraversalPayload<fsa::traversal::NearTransition>(key.substr(minimum_prefix_length));
std::shared_ptr<delegate_payload> data(
new delegate_payload(
fsa::NearStateTraverser(fsa_, state, payload),
traversal_stack));

auto tfunc =
[data, key, minimum_prefix_length] () {
TRACE("prefix completion callback called");


for (;;) {
unsigned char label = data->traverser.GetStateLabel();

// check minimum depth
if (label && data->traverser.GetDepth() > data->matched_depth) {

data->traversal_stack.resize(data->traverser.GetDepth()-1);
data->traversal_stack.push_back(label);
TRACE("Current depth %d (%d)", minimum_prefix_length + data->traverser.GetDepth() -1, data->traversal_stack.size());
if (data->traverser.IsFinalState()) {
// optimize? fill vector upfront?
std::string match_str = key.substr(0, minimum_prefix_length) + std::string(reinterpret_cast<char*> (&data->traversal_stack[0]), data->traverser.GetDepth());
TRACE("found final state at depth %d %s", minimum_prefix_length + data->traverser.GetDepth(), match_str.c_str());
Match m(0,
data->traverser.GetDepth() + key.size(),
match_str,
0,
data->traverser.GetFsa(),
data->traverser.GetStateValue());

data->traverser++;

// remember the depth
TRACE("found a match, remember depth, only allow matches with same depth %ld", data->traverser.GetTraversalPayload().exact_depth);
data->matched_depth = data->traverser.GetTraversalPayload().exact_depth;
return m;
}
data->traverser++;
} else {
TRACE("StateTraverser exhausted.");
return Match();
}
}
};

return MatchIterator::MakeIteratorPair(tfunc);
}



std::string GetManifestAsString() const {
return fsa_->GetManifestAsString();
}
Expand Down
Loading

0 comments on commit 1680ec6

Please sign in to comment.