Skip to content

Commit

Permalink
bcachefs: Eytzinger accumulation for accounting keys
Browse files Browse the repository at this point in the history
The btree write buffer takes as input keys from the journal, sorts them,
deduplicates them, and flushes them back to the btree in sorted order.

The disk space accounting rewrite is moving accounting to normal btree
keys, with update (in this case deltas) accumulated in the write buffer
and then flushed to the btree; but this is going to increase the number
of keys handled by the write buffer by perhaps as much as a factor of
3x-5x.

The overhead from copying around and sorting this many keys would cause
a significant performance regression, but: there is huge locality in
updates to accounting keys that we can take advantage of.

Instead of appending accounting keys to the list of keys to be sorted,
this patch adds an eytzinger search tree of recently seen accounting
keys. We look up the accounting key in the eytzinger search tree and
apply the delta directly, adding it if it doesn't exist, and
periodically prune the eytzinger tree of unused entries.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
  • Loading branch information
Kent Overstreet committed May 29, 2024
1 parent 8841e64 commit 03af59e
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 8 deletions.
54 changes: 52 additions & 2 deletions fs/bcachefs/btree_write_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,29 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
}

static void wb_accounting_sort(struct btree_write_buffer *wb)
{
eytzinger0_sort(wb->accounting.data, wb->accounting.nr,
sizeof(wb->accounting.data[0]),
wb_key_cmp, NULL);
}

int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree,
struct bkey_i_accounting *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_write_buffered_key new = { .btree = btree };

bkey_copy(&new.k, &k->k_i);

int ret = darray_push(&wb->accounting, new);
if (ret)
return ret;

wb_accounting_sort(wb);
return 0;
}

int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
Expand Down Expand Up @@ -642,11 +665,35 @@ void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_ke

bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
bch2_btree_write_buffer_journal_flush);

darray_for_each(wb->accounting, i)
memset(&i->k.v, 0, bkey_val_bytes(&i->k.k));
}

void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
unsigned live_accounting_keys = 0;
int ret = 0;

darray_for_each(wb->accounting, i)
if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&i->k))) {
i->journal_seq = dst->seq;
live_accounting_keys++;
ret = __bch2_journal_key_to_wb(c, dst, i->btree, &i->k);
if (ret)
break;
}

if (live_accounting_keys * 2 < wb->accounting.nr) {
struct btree_write_buffered_key *dst = wb->accounting.data;

darray_for_each(wb->accounting, src)
if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&src->k)))
*dst++ = *src;
wb->accounting.nr = dst - wb->accounting.data;
wb_accounting_sort(wb);
}

if (!dst->wb->keys.nr)
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
Expand All @@ -659,6 +706,8 @@ void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys
if (dst->wb == &wb->flushing)
mutex_unlock(&wb->flushing.lock);
mutex_unlock(&wb->inc.lock);

return ret;
}

static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
Expand All @@ -682,7 +731,7 @@ static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_bu
buf->need_flush_to_write_buffer = false;
spin_unlock(&c->journal.lock);
out:
bch2_journal_keys_to_write_buffer_end(c, &dst);
ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret;
return ret;
}

Expand Down Expand Up @@ -714,6 +763,7 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
!bch2_journal_error(&c->journal));

darray_exit(&wb->accounting);
darray_exit(&wb->sorted);
darray_exit(&wb->flushing.keys);
darray_exit(&wb->inc.keys);
Expand Down
49 changes: 45 additions & 4 deletions fs/bcachefs/btree_write_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define _BCACHEFS_BTREE_WRITE_BUFFER_H

#include "bkey.h"
#include "disk_accounting.h"

static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
{
Expand All @@ -29,16 +30,45 @@ struct journal_keys_to_wb {
u64 seq;
};

static inline int wb_key_cmp(const void *_l, const void *_r)
{
const struct btree_write_buffered_key *l = _l;
const struct btree_write_buffered_key *r = _r;

return cmp_int(l->btree, r->btree) ?: bpos_cmp(l->k.k.p, r->k.k.p);
}

int bch2_accounting_key_to_wb_slowpath(struct bch_fs *,
enum btree_id, struct bkey_i_accounting *);

static inline int bch2_accounting_key_to_wb(struct bch_fs *c,
enum btree_id btree, struct bkey_i_accounting *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_write_buffered_key search;
search.btree = btree;
search.k.k.p = k->k.p;

unsigned idx = eytzinger0_find(wb->accounting.data, wb->accounting.nr,
sizeof(wb->accounting.data[0]),
wb_key_cmp, &search);

if (idx >= wb->accounting.nr)
return bch2_accounting_key_to_wb_slowpath(c, btree, k);

struct bkey_i_accounting *dst = bkey_i_to_accounting(&wb->accounting.data[idx].k);
bch2_accounting_accumulate(dst, accounting_i_to_s_c(k));
return 0;
}

int bch2_journal_key_to_wb_slowpath(struct bch_fs *,
struct journal_keys_to_wb *,
enum btree_id, struct bkey_i *);

static inline int bch2_journal_key_to_wb(struct bch_fs *c,
static inline int __bch2_journal_key_to_wb(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
EBUG_ON(!dst->seq);

if (unlikely(!dst->room))
return bch2_journal_key_to_wb_slowpath(c, dst, btree, k);

Expand All @@ -51,8 +81,19 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
return 0;
}

static inline int bch2_journal_key_to_wb(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
EBUG_ON(!dst->seq);

return k->k.type == KEY_TYPE_accounting
? bch2_accounting_key_to_wb(c, btree, bkey_i_to_accounting(k))
: __bch2_journal_key_to_wb(c, dst, btree, k);
}

void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);

int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
Expand Down
2 changes: 2 additions & 0 deletions fs/bcachefs/btree_write_buffer_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct btree_write_buffer {
struct btree_write_buffer_keys inc;
struct btree_write_buffer_keys flushing;
struct work_struct flush_work;

DARRAY(struct btree_write_buffered_key) accounting;
};

#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
10 changes: 8 additions & 2 deletions fs/bcachefs/journal_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1848,8 +1848,14 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
}
}

if (wb.wb)
bch2_journal_keys_to_write_buffer_end(c, &wb);
if (wb.wb) {
ret = bch2_journal_keys_to_write_buffer_end(c, &wb);
if (ret) {
bch2_fs_fatal_error(c, "error flushing journal keys to btree write buffer: %s",
bch2_err_str(ret));
return ret;
}
}

spin_lock(&c->journal.lock);
w->need_flush_to_write_buffer = false;
Expand Down

0 comments on commit 03af59e

Please sign in to comment.