From 87a3e08121cbb59a314fc727b4f1202782ed56b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 28 Aug 2024 10:00:56 -0400 Subject: [PATCH] bcachefs: Switch to memalloc_flags_do() for vmalloc allocations vmalloc doesn't correctly respect gfp flags - gfp flags aren't used for pte allocation, so doing vmalloc/kvmalloc allocations with reclaim unsafe locks is a potential deadlock. Note that we also want to use PF_MEMALLOC_NORECLAIM, not PF_MEMALLOC_NOFS, because when we're doing allocations with btree locks held we have a fallback available - drop locks and do a normal GFP_KERNEL allocation. We don't want to be invoking reclaim with btree locks held at all, since these are big shared locks and overalll system performance is sensitive to hold times. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 5 ++-- fs/bcachefs/btree_cache.c | 3 ++- fs/bcachefs/btree_iter.h | 48 ++++++++++++++++++++--------------- fs/bcachefs/btree_key_cache.c | 10 ++++---- fs/bcachefs/ec.c | 12 ++++----- fs/bcachefs/fs.c | 8 ------ 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 87f1be9d4db46..1def61875a6fd 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -137,7 +137,7 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, return NULL; acl = allocate_dropping_locks(trans, ret, - posix_acl_alloc(count, _gfp)); + posix_acl_alloc(count, GFP_KERNEL)); if (!acl) return ERR_PTR(-ENOMEM); if (ret) { @@ -427,7 +427,8 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, if (ret) goto err; - ret = allocate_dropping_locks_errcode(trans, __posix_acl_chmod(&acl, _gfp, mode)); + ret = allocate_dropping_locks_errcode(trans, + __posix_acl_chmod(&acl, GFP_KERNEL, mode)); if (ret) goto err; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 6e4afb2b54413..7b951b2733068 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -804,7 +804,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea mutex_unlock(&bc->lock); - if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { + if (memalloc_flags_do(PF_MEMALLOC_NORECLAIM, + btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))) { bch2_trans_unlock(trans); if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) goto err; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 78e63ad7d380e..aec89e001ddc7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -6,6 +6,8 @@ #include "btree_types.h" #include "trace.h" +#include + void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); @@ -871,29 +873,33 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); (_do) ?: bch2_trans_relock(_trans); \ }) -#define allocate_dropping_locks_errcode(_trans, _do) \ -({ \ - gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ - int _ret = _do; \ - \ - if (bch2_err_matches(_ret, ENOMEM)) { \ - _gfp = GFP_KERNEL; \ - _ret = drop_locks_do(_trans, _do); \ - } \ - _ret; \ +#define memalloc_flags_do(_flags, _do) \ +({ \ + unsigned _saved_flags = memalloc_flags_save(_flags); \ + typeof(_do) _ret = _do; \ + memalloc_noreclaim_restore(_saved_flags); \ + _ret; \ }) -#define allocate_dropping_locks(_trans, _ret, _do) \ -({ \ - gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ - typeof(_do) _p = _do; \ - \ - _ret = 0; \ - if (unlikely(!_p)) { \ - _gfp = GFP_KERNEL; \ - _ret = drop_locks_do(_trans, ((_p = _do), 0)); \ - } \ - _p; \ +#define allocate_dropping_locks_errcode(_trans, _do) \ +({ \ + int _ret = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\ + \ + if (bch2_err_matches(_ret, ENOMEM)) { \ + _ret = drop_locks_do(_trans, _do); \ + } \ + _ret; \ +}) + +#define allocate_dropping_locks(_trans, _ret, _do) \ +({ \ + typeof(_do) _p = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\ + \ + _ret = 0; \ + if (unlikely(!_p)) { \ + _ret = drop_locks_do(_trans, ((_p = _do), 0)); \ + } \ + _p; \ }) #define bch2_trans_run(_c, _do) \ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 244610b1d0b59..d91f249735585 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -116,14 +116,14 @@ static void bkey_cached_free(struct btree_key_cache *bc, this_cpu_inc(*bc->nr_pending); } -static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp) +static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s) { - gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE; + gfp_t gfp = GFP_KERNEL|__GFP_ACCOUNT|__GFP_RECLAIMABLE; struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp); if (unlikely(!ck)) return NULL; - ck->k = kmalloc(key_u64s * sizeof(u64), gfp); + ck->k = kmalloc(key_u64s * sizeof(u64), GFP_KERNEL); if (unlikely(!ck->k)) { kmem_cache_free(bch2_key_cache, ck); return NULL; @@ -147,7 +147,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k goto lock; ck = allocate_dropping_locks(trans, ret, - __bkey_cached_alloc(key_u64s, _gfp)); + __bkey_cached_alloc(key_u64s)); if (ret) { if (ck) kfree(ck->k); @@ -241,7 +241,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path * mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); struct bkey_i *new_k = allocate_dropping_locks(trans, ret, - kmalloc(key_u64s * sizeof(u64), _gfp)); + kmalloc(key_u64s * sizeof(u64), GFP_KERNEL)); if (unlikely(!new_k)) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), key_u64s); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 1587c6e1866ae..c26319475876a 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -907,12 +907,12 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, /* stripe bucket accounting: */ -static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) +static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx) { ec_stripes_heap n, *h = &c->ec_stripes_heap; if (idx >= h->size) { - if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp)) + if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), GFP_KERNEL)) return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; mutex_lock(&c->ec_stripes_heap_lock); @@ -926,11 +926,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) free_heap(&n); } - if (!genradix_ptr_alloc(&c->stripes, idx, gfp)) + if (!genradix_ptr_alloc(&c->stripes, idx, GFP_KERNEL)) return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; if (c->gc_pos.phase != GC_PHASE_not_running && - !genradix_ptr_alloc(&c->gc_stripes, idx, gfp)) + !genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL)) return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return 0; @@ -940,7 +940,7 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans, struct btree_iter *iter) { return allocate_dropping_locks_errcode(trans, - __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp)); + __ec_stripe_mem_alloc(trans->c, iter->pos.offset)); } /* @@ -2333,7 +2333,7 @@ int bch2_stripes_read(struct bch_fs *c) if (k.k->type != KEY_TYPE_stripe) continue; - ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); + ret = __ec_stripe_mem_alloc(c, k.k->p.offset); if (ret) break; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 1aee5bafaae54..e774cb3e6b1ae 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -278,14 +278,6 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, } } -#define memalloc_flags_do(_flags, _do) \ -({ \ - unsigned _saved_flags = memalloc_flags_save(_flags); \ - typeof(_do) _ret = _do; \ - memalloc_noreclaim_restore(_saved_flags); \ - _ret; \ -}) - static struct inode *bch2_alloc_inode(struct super_block *sb) { BUG();