Skip to content

Commit

Permalink
some code cleanup and heuristic for packed
Browse files Browse the repository at this point in the history
  • Loading branch information
nicola-cab committed Mar 20, 2024
1 parent 28d4473 commit ea6be06
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 156 deletions.
2 changes: 1 addition & 1 deletion src/realm/array_direct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ class bf_iterator {
}
field_position = next_field_position;
}

inline void move(size_t index, size_t initial_offset = 0)
{
field_position = initial_offset + index * step_size;
Expand Down
2 changes: 1 addition & 1 deletion src/realm/array_encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ bool ArrayEncode::always_encode(const Array& origin, Array& arr, bool packed) co
bool ArrayEncode::encode(const Array& origin, Array& arr) const
{
// return false;
// return always_encode(origin, arr, false); // true packed, false flex
// return always_encode(origin, arr, true); // true packed, false flex

std::vector<int64_t> values;
std::vector<size_t> indices;
Expand Down
42 changes: 0 additions & 42 deletions src/realm/array_flex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@

using namespace realm;

inline bool run_eq_neq_parallel_subscan(size_t w, size_t range)
{
return w < 32 && range >= 50;
}

inline bool run_lt_gt_parallel_subscan(size_t w, size_t range)
{
return w < 16 && range >= 50;
}

void ArrayFlex::init_array(char* h, uint8_t flags, size_t v_width, size_t ndx_width, size_t v_size,
size_t ndx_size) const
{
Expand Down Expand Up @@ -136,38 +126,6 @@ void ArrayFlex::get_chunk(const Array& arr, size_t ndx, int64_t res[8]) const
}
}

bool ArrayFlex::find_eq(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
if(!run_eq_neq_parallel_subscan(arr.m_width, end-start))
return find_linear<Equal>(arr, value, start, end, baseindex, state);
return find_parallel<Equal, Equal>(arr, value, start, end, baseindex, state);
}

bool ArrayFlex::find_neq(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
if(!run_eq_neq_parallel_subscan(arr.m_width, end-start))
return find_linear<NotEqual>(arr, value, start, end, baseindex, state);
return find_parallel<NotEqual, LessEqual>(arr, value, start, end, baseindex, state);
}

bool ArrayFlex::find_lt(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
if(!run_lt_gt_parallel_subscan(arr.m_width, end-start))
return find_linear<Less>(arr, value, start, end, baseindex, state);
return find_parallel<GreaterEqual, Less>(arr, value, start, end, baseindex, state);
}

bool ArrayFlex::find_gt(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
if(!run_lt_gt_parallel_subscan(arr.m_width, end-start))
return find_linear<Greater>(arr, value, start, end, baseindex, state);
return find_parallel<Greater, GreaterEqual>(arr, value, start, end, baseindex, state);
}

int64_t ArrayFlex::sum(const Array& arr, size_t start, size_t end) const
{
const auto& encoder = arr.m_encoder;
Expand Down
138 changes: 102 additions & 36 deletions src/realm/array_flex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,26 @@ class ArrayFlex {
private:
int64_t do_get(uint64_t*, size_t, size_t, size_t, size_t, size_t, uint64_t) const;
bool find_all_match(size_t, size_t, size_t, QueryStateBase*) const;
template<typename Cond>

template <typename Cond>
inline bool find_linear(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
template<typename CondVal, typename CondIndex>

template <typename CondVal, typename CondIndex>
inline bool find_parallel(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;

bool find_eq(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
bool find_neq(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
bool find_lt(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
bool find_gt(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;

inline bool find_eq(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
inline bool find_neq(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
inline bool find_lt(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
inline bool find_gt(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;

inline bool run_eq_neq_parallel_subscan(size_t, size_t, size_t, size_t) const;
inline bool run_lt_gt_parallel_subscan(size_t, size_t, size_t, size_t) const;
};


template<typename Cond>
inline bool ArrayFlex::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const
template <typename Cond>
inline bool ArrayFlex::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
REALM_ASSERT_DEBUG(start <= arr.m_size && (end <= arr.m_size || end == size_t(-1)) && start <= end);
Cond c;
Expand All @@ -91,7 +95,7 @@ inline bool ArrayFlex::find_all(const Array& arr, int64_t value, size_t start, s
}

REALM_ASSERT_3(arr.m_width, !=, 0);

if constexpr (std::is_same_v<Equal, Cond>) {
return find_eq(arr, value, start, end, baseindex, state);
}
Expand All @@ -107,35 +111,36 @@ inline bool ArrayFlex::find_all(const Array& arr, int64_t value, size_t start, s
return true;
}

template<typename Cond>
inline bool ArrayFlex::find_linear(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const
template <typename Cond>
inline bool ArrayFlex::find_linear(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto cmp = [](int64_t item, int64_t key){
if constexpr(std::is_same_v<Cond, Equal>)
const auto cmp = [](int64_t item, int64_t key) {
if constexpr (std::is_same_v<Cond, Equal>)
return item == key;
if constexpr(std::is_same_v<Cond, NotEqual>) {
if constexpr (std::is_same_v<Cond, NotEqual>) {
return item != key;
}
if constexpr(std::is_same_v<Cond, Less>) {
if constexpr (std::is_same_v<Cond, Less>) {
return item < key;
}
if constexpr(std::is_same_v<Cond, Greater>) {
if constexpr (std::is_same_v<Cond, Greater>) {
return item > key;
}
REALM_UNREACHABLE();
};

auto data = (uint64_t*)arr.m_data;
const auto& encoder = arr.get_encoder();
const auto offset = encoder.width() * encoder.v_size();
const auto v_width = encoder.width();
const auto ndx_width = encoder.ndx_width();

bf_iterator ndx_it((uint64_t*)data, offset, ndx_width, ndx_width, start);
bf_iterator val_it((uint64_t*)data, 0, v_width, v_width, *ndx_it);
while(start < end) {
while (start < end) {
const auto sv = sign_extend_field_by_mask(encoder.width_mask(), *val_it);
if(cmp(sv, value) && !state->match(start + baseindex))
if (cmp(sv, value) && !state->match(start + baseindex))
return false;
++start;
++ndx_it;
Expand All @@ -151,12 +156,12 @@ inline uint64_t vector_compare(uint64_t MSBs, uint64_t a, uint64_t b)
return find_all_fields_EQ(MSBs, a, b);
if constexpr (std::is_same_v<Cond, NotEqual>)
return find_all_fields_NE(MSBs, a, b);
if constexpr (std::is_same_v<Cond, Greater>){
if(std::is_same_v<Type, WordTypeValue>)

if constexpr (std::is_same_v<Cond, Greater>) {
if (std::is_same_v<Type, WordTypeValue>)
return find_all_fields_signed_GT(MSBs, a, b);
if(std::is_same_v<Type, WordTypeIndex>)
return find_all_fields_unsigned_GT(MSBs,a, b);
if (std::is_same_v<Type, WordTypeIndex>)
return find_all_fields_unsigned_GT(MSBs, a, b);
REALM_UNREACHABLE();
}
if constexpr (std::is_same_v<Cond, GreaterEqual>) {
Expand All @@ -180,38 +185,99 @@ inline uint64_t vector_compare(uint64_t MSBs, uint64_t a, uint64_t b)
return find_all_fields_unsigned_LE(MSBs, a, b);
REALM_UNREACHABLE();
}

}

template<typename CondVal, typename CondIndex>
inline bool ArrayFlex::find_parallel(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex, QueryStateBase* state) const
template <typename CondVal, typename CondIndex>
inline bool ArrayFlex::find_parallel(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto& encoder = arr.m_encoder;
const auto v_width = encoder.width();
const auto v_size = encoder.v_size();
const auto ndx_width = encoder.ndx_width();
const auto offset = v_size * v_width;
uint64_t* data = (uint64_t*)arr.m_data;

auto MSBs = encoder.msb();
auto search_vector = populate(v_width, value);
auto v_start = parallel_subword_find(vector_compare<CondVal>, data, 0, v_width, MSBs, search_vector, 0, v_size);
if (v_start == v_size)
return true;

MSBs = encoder.ndx_msb();
search_vector = populate(ndx_width, v_start);
while (start < end) {
start =
parallel_subword_find(vector_compare<CondIndex, WordTypeIndex>, data, offset, ndx_width, MSBs, search_vector, start, end);
start = parallel_subword_find(vector_compare<CondIndex, WordTypeIndex>, data, offset, ndx_width, MSBs,
search_vector, start, end);
if (start < end)
if (!state->match(start + baseindex))
return false;

++start;
}
return true;
}

inline bool ArrayFlex::find_eq(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto v_width = arr.m_width;
const auto v_range = arr.get_encoder().v_size();
const auto ndx_width = arr.get_encoder().ndx_width();
const auto ndx_range = end - start;
if (!run_eq_neq_parallel_subscan(v_width, v_range, ndx_width, ndx_range))
return find_linear<Equal>(arr, value, start, end, baseindex, state);
return find_parallel<Equal, Equal>(arr, value, start, end, baseindex, state);
}

inline bool ArrayFlex::find_neq(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto v_width = arr.m_width;
const auto v_range = arr.get_encoder().v_size();
const auto ndx_width = arr.get_encoder().ndx_width();
const auto ndx_range = end - start;
if (!run_eq_neq_parallel_subscan(v_width, v_range, ndx_width, ndx_range))
return find_linear<NotEqual>(arr, value, start, end, baseindex, state);
return find_parallel<NotEqual, LessEqual>(arr, value, start, end, baseindex, state);
}

inline bool ArrayFlex::find_lt(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto v_width = arr.m_width;
const auto v_range = arr.get_encoder().v_size();
const auto ndx_width = arr.get_encoder().ndx_width();
const auto ndx_range = end - start;
if (!run_lt_gt_parallel_subscan(v_width, v_range, ndx_width, ndx_range))
return find_linear<Less>(arr, value, start, end, baseindex, state);
return find_parallel<GreaterEqual, Less>(arr, value, start, end, baseindex, state);
}

inline bool ArrayFlex::find_gt(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
const auto v_width = arr.m_width;
const auto v_range = arr.get_encoder().v_size();
const auto ndx_width = arr.get_encoder().ndx_width();
const auto ndx_range = end - start;
if (!run_lt_gt_parallel_subscan(v_width, v_range, ndx_width, ndx_range))
return find_linear<Greater>(arr, value, start, end, baseindex, state);
return find_parallel<Greater, GreaterEqual>(arr, value, start, end, baseindex, state);
}

inline bool ArrayFlex::run_eq_neq_parallel_subscan(size_t v_width, size_t v_range, size_t ndx_width,
size_t ndx_range) const
{
return v_width < 32 && ndx_width < 32 && v_range >= 16 && ndx_range >= 16;
}

inline bool ArrayFlex::run_lt_gt_parallel_subscan(size_t v_width, size_t v_range, size_t ndx_width,
size_t ndx_range) const
{
return v_width < 16 && ndx_width < 16 && v_range >= 16 && ndx_range >= 16;
}


} // namespace realm
#endif // REALM_ARRAY_COMPRESS_HPP
72 changes: 0 additions & 72 deletions src/realm/array_packed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@

using namespace realm;

template bool ArrayPacked::find_all<Equal>(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
template bool ArrayPacked::find_all<NotEqual>(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
template bool ArrayPacked::find_all<Greater>(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;
template bool ArrayPacked::find_all<Less>(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*) const;


void ArrayPacked::init_array(char* h, uint8_t flags, size_t v_width, size_t v_size) const
{
using Encoding = NodeHeader::Encoding;
Expand Down Expand Up @@ -112,72 +106,6 @@ void ArrayPacked::get_chunk(const Array& arr, size_t ndx, int64_t res[8]) const
}
}

template <typename Cond>
uint64_t vector_compare(uint64_t MSBs, uint64_t a, uint64_t b)
{
if constexpr (std::is_same_v<Cond, Equal>)
return find_all_fields_EQ(MSBs, a, b);
if constexpr (std::is_same_v<Cond, NotEqual>)
return find_all_fields_NE(MSBs, a, b);
if constexpr (std::is_same_v<Cond, Greater>)
return find_all_fields_signed_GT(MSBs, a, b);
if constexpr (std::is_same_v<Cond, Less>)
return find_all_fields_signed_LT(MSBs, a, b);
}

template <typename Cond>
bool ArrayPacked::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
QueryStateBase* state) const
{
REALM_ASSERT_DEBUG(start <= arr.m_size && (end <= arr.m_size || end == size_t(-1)) && start <= end);
Cond c;

if (end == npos)
end = arr.m_size;

if (!(arr.m_size > start && start < end))
return true;

const auto lbound = arr.m_lbound;
const auto ubound = arr.m_ubound;

if (!c.can_match(value, lbound, ubound))
return true;

if (c.will_match(value, lbound, ubound)) {
return find_all_match(start, end, baseindex, state);
}

REALM_ASSERT_3(arr.m_width, !=, 0);
// NOTE: this is one of the most important functions in the whole codebase, since it determines how fast the
// queries run.
//
// Main idea around find.
// Try to find the starting point where the condition can be met, comparing as many values as a single 64bit can
// contain in parallel. Once we have found the starting point, keep matching values as much as we can between
// start and end.
//
// EG: we store the value 6, with width 4bits (0110), 6 is 4 bits because, 110 (6) + sign bit 0.
// Inside 64bits we can fit max 16 times 6. If we go from index 0 to 15 throughout the same 64 bits, we need to
// apply a mask and a shift bits every time, then compare the values.
// This is not the cheapest thing to do. Instead we can compare all values contained within 64 bits in one go and
// see if there is a match with what we are looking for. Reducing the number of comparison by ~logk(N) where K is
// the width of each single value within a 64 bit word and N is the total number of values stored in the array.

// in packed format a parallel subword find pays off also for width >= 32
const auto MSBs = arr.get_encoder().msb();
const auto search_vector = populate(arr.m_width, value);
while (start < end) {
start = parallel_subword_find(vector_compare<Cond>, (const uint64_t*)arr.m_data, 0, arr.m_width, MSBs,
search_vector, start, end);
if (start < end)
if (!state->match(start + baseindex))
return false;
++start;
}
return true;
}

bool ArrayPacked::find_all_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state) const
{
REALM_ASSERT_DEBUG(state->match_count() < state->limit());
Expand Down
Loading

0 comments on commit ea6be06

Please sign in to comment.