From cdc5c0b55d62b4a086d5d17965db69203311a11f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Sep 2024 19:37:56 -0400 Subject: [PATCH] bcachefs: split up btree cache counters for live, freeable this is prep for introducing a second live list and shrinker for pinned nodes Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 58 ++++++++++++++++++----------- fs/bcachefs/btree_gc.c | 3 +- fs/bcachefs/btree_io.c | 4 +- fs/bcachefs/btree_types.h | 4 +- fs/bcachefs/btree_update_interior.c | 6 ++- fs/bcachefs/journal_reclaim.c | 4 +- 6 files changed, 47 insertions(+), 32 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 1d7d4ca84e3f..2c2dec0c1b1b 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -49,7 +49,7 @@ void bch2_recalc_btree_reserve(struct bch_fs *c) static inline size_t btree_cache_can_free(struct btree_cache *bc) { - return max_t(int, 0, bc->nr_used - bc->nr_reserve); + return max_t(int, 0, bc->nr_live + bc->nr_freeable - bc->nr_reserve); } static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) @@ -64,6 +64,8 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; + BUG_ON(btree_node_hashed(b)); + /* * This should really be done in slub/vmalloc, but we're using the * kmalloc_large() path, so we're working around a slub bug by doing @@ -87,7 +89,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) #endif b->aux_data = NULL; - bc->nr_used--; + bc->nr_freeable--; btree_node_to_freedlist(bc, b); } @@ -167,7 +169,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) bch2_btree_lock_init(&b->c, 0); - bc->nr_used++; + bc->nr_freeable++; list_add(&b->list, &bc->freeable); return b; } @@ -186,6 +188,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { + lockdep_assert_held(&bc->lock); int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); @@ -195,6 +198,10 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) if (b->c.btree_id < BTREE_ID_NR) --bc->nr_by_btree[b->c.btree_id]; + + bc->nr_live--; + bc->nr_freeable++; + list_move(&b->list, &bc->freeable); } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) @@ -204,23 +211,25 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); - if (!ret && b->c.btree_id < BTREE_ID_NR) + if (ret) + return ret; + + if (b->c.btree_id < BTREE_ID_NR) bc->nr_by_btree[b->c.btree_id]++; - return ret; + bc->nr_live++; + bc->nr_freeable--; + list_move_tail(&b->list, &bc->live); + return 0; } int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, unsigned level, enum btree_id id) { - int ret; - b->c.level = level; b->c.btree_id = id; mutex_lock(&bc->lock); - ret = __bch2_btree_node_hash_insert(bc, b); - if (!ret) - list_add_tail(&b->list, &bc->live); + int ret = __bch2_btree_node_hash_insert(bc, b); mutex_unlock(&bc->lock); return ret; @@ -402,7 +411,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, unsigned i, flags; unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= - bc->nr_used * 3 / 4; + (bc->nr_live + bc->nr_freeable) * 3 / 4; if (bch2_btree_shrinker_disabled) return SHRINK_STOP; @@ -451,11 +460,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; } else if (!btree_node_reclaim(c, b, true)) { + bch2_btree_node_hash_remove(bc, b); + freed++; btree_node_data_free(c, b); bc->nr_freed++; - bch2_btree_node_hash_remove(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -506,7 +516,7 @@ static unsigned long bch2_btree_cache_count(struct shrinker *shrink, void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; - struct btree *b; + struct btree *b, *t; unsigned i, flags; shrinker_free(bc->shrink); @@ -527,11 +537,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) list_add(&r->b->list, &bc->live); } - list_splice(&bc->freeable, &bc->live); - - while (!list_empty(&bc->live)) { - b = list_first_entry(&bc->live, struct btree, list); + list_for_each_entry_safe(b, t, &bc->live, list) + bch2_btree_node_hash_remove(bc, b); + list_for_each_entry_safe(b, t, &bc->freeable, list) { BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); @@ -543,8 +552,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); - while (!list_empty(&bc->freed_nonpcpu)) { - b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); + list_for_each_entry_safe(b, t, &bc->freed_nonpcpu, list) { list_del(&b->list); six_lock_exit(&b->c.lock); kfree(b); @@ -553,6 +561,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) mutex_unlock(&bc->lock); memalloc_nofs_restore(flags); + for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) + BUG_ON(bc->nr_by_btree[i]); + BUG_ON(bc->nr_live); + BUG_ON(bc->nr_freeable); + if (bc->table_init_done) rhashtable_destroy(&bc->table); } @@ -740,7 +753,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea } mutex_lock(&bc->lock); - bc->nr_used++; + bc->nr_freeable++; got_mem: mutex_unlock(&bc->lock); @@ -1281,8 +1294,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) BUG_ON(btree_node_dirty(b)); mutex_lock(&bc->lock); - btree_node_data_free(c, b); bch2_btree_node_hash_remove(bc, b); + btree_node_data_free(c, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); @@ -1375,7 +1388,8 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); - prt_btree_cache_line(out, c, "total:", bc->nr_used); + prt_btree_cache_line(out, c, "nr_live:", bc->nr_live); + prt_btree_cache_line(out, c, "nr_freeable:", bc->nr_freeable); prt_btree_cache_line(out, c, "nr dirty:", atomic_long_read(&bc->nr_dirty)); prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); prt_newline(out); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 120ffd68ab0a..b5e0692f03c6 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -549,9 +549,8 @@ int bch2_check_topology(struct bch_fs *c) six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { - bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_lock(&c->btree_cache.lock); - list_move(&b->list, &c->btree_cache.freeable); + bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); r->b = NULL; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index aad89ba16b9b..cb48a9477514 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1749,10 +1749,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_lock(&c->btree_cache.lock); - list_move(&b->list, &c->btree_cache.freeable); + bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); ret = -BCH_ERR_btree_node_read_error; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 806d27b7f41b..ee3df2a486cc 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -179,8 +179,8 @@ struct btree_cache { struct list_head freed_pcpu; struct list_head freed_nonpcpu; - /* Number of elements in live + freeable lists */ - size_t nr_used; + size_t nr_live; + size_t nr_freeable; size_t nr_reserve; size_t nr_by_btree[BTREE_ID_NR]; atomic_long_t nr_dirty; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 24e445574ab8..18494a662e0a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -251,8 +251,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, unsigned i, level = b->c.level; bch2_btree_node_lock_write_nofail(trans, path, &b->c); + + mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); + mutex_unlock(&c->btree_cache.lock); + __btree_node_free(trans, b); + six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); @@ -284,7 +289,6 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, clear_btree_node_need_write(b); mutex_lock(&c->btree_cache.lock); - list_del_init(&b->list); bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 9794b6d214cd..f8e045982753 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -681,7 +681,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) if (j->watermark != BCH_WATERMARK_stripe) min_nr = 1; - if (atomic_long_read(&c->btree_cache.nr_dirty) * 2 > c->btree_cache.nr_used) + if (atomic_long_read(&c->btree_cache.nr_dirty) * 2 > c->btree_cache.nr_live) min_nr = 1; min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128); @@ -690,7 +690,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) direct, kicked, min_nr, min_key_cache, atomic_long_read(&c->btree_cache.nr_dirty), - c->btree_cache.nr_used, + c->btree_cache.nr_live, atomic_long_read(&c->btree_key_cache.nr_dirty), atomic_long_read(&c->btree_key_cache.nr_keys));