From: Vitaly Wool <vitaly.wool@konsulko.com>
To: Mike Galbraith <efault@gmx.de>
Cc: Linux-MM <linux-mm@kvack.org>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH] z3fold: stricter locking and more careful reclaim
Date: Sat, 5 Dec 2020 12:38:51 +0100 [thread overview]
Message-ID: <CAM4kBB++pMOF75zcaqX+_Fb9KptR_OWCe8dGkMMxS-jP4M6-Mg@mail.gmail.com> (raw)
In-Reply-To: <3b8e33c3ba44e7ec4c090d68972015ef52ba1e7e.camel@gmx.de>
On Fri, Dec 4, 2020 at 10:47 PM Mike Galbraith <efault@gmx.de> wrote:
>
> On Fri, 2020-12-04 at 22:04 +0200, Vitaly Wool wrote:
> > Use temporary slots in reclaim function to avoid possible race when
> > freeing those.
> >
> > While at it, make sure we check CLAIMED flag under page lock in the
> > reclaim function to make sure we are not racing with z3fold_alloc().
> >
> > Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
>
> FYI, with this applied, ltp::mm still crashes the RT kernel, with the
> same rwlock corruption being what takes it down.
So sad it does, but thanks a lot for the quick response.
~Vitaly
> -Mike
> > ---
> > mm/z3fold.c | 133 +++++++++++++++++++++++++++++-----------------------
> > 1 file changed, 74 insertions(+), 59 deletions(-)
> >
> > diff --git a/mm/z3fold.c b/mm/z3fold.c
> > index 18feaa0bc537..47f05228abdf 100644
> > --- a/mm/z3fold.c
> > +++ b/mm/z3fold.c
> > @@ -186,6 +186,7 @@ enum z3fold_page_flags {
> > */
> > enum z3fold_handle_flags {
> > HANDLES_ORPHANED = 0,
> > + HANDLES_NOFREE = 1,
> > };
> >
> > /*
> > @@ -320,7 +321,7 @@ static inline void free_handle(unsigned long handle)
> > slots = handle_to_slots(handle);
> > write_lock(&slots->lock);
> > *(unsigned long *)handle = 0;
> > - if (zhdr->slots == slots) {
> > + if (zhdr->slots == slots || test_bit(HANDLES_NOFREE, &slots->pool)) {
> > write_unlock(&slots->lock);
> > return; /* simple case, nothing else to do */
> > }
> > @@ -653,6 +654,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
> > }
> > }
> >
> > +static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
> > +{
> > + enum buddy bud = HEADLESS;
> > +
> > + if (zhdr->middle_chunks) {
> > + if (!zhdr->first_chunks &&
> > + chunks <= zhdr->start_middle - ZHDR_CHUNKS)
> > + bud = FIRST;
> > + else if (!zhdr->last_chunks)
> > + bud = LAST;
> > + } else {
> > + if (!zhdr->first_chunks)
> > + bud = FIRST;
> > + else if (!zhdr->last_chunks)
> > + bud = LAST;
> > + else
> > + bud = MIDDLE;
> > + }
> > +
> > + return bud;
> > +}
> > +
> > static inline void *mchunk_memmove(struct z3fold_header *zhdr,
> > unsigned short dst_chunk)
> > {
> > @@ -714,18 +737,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
> > if (WARN_ON(new_zhdr == zhdr))
> > goto out_fail;
> >
> > - if (new_zhdr->first_chunks == 0) {
> > - if (new_zhdr->middle_chunks != 0 &&
> > - chunks >= new_zhdr->start_middle) {
> > - new_bud = LAST;
> > - } else {
> > - new_bud = FIRST;
> > - }
> > - } else if (new_zhdr->last_chunks == 0) {
> > - new_bud = LAST;
> > - } else if (new_zhdr->middle_chunks == 0) {
> > - new_bud = MIDDLE;
> > - }
> > + new_bud = get_free_buddy(new_zhdr, chunks);
> > q = new_zhdr;
> > switch (new_bud) {
> > case FIRST:
> > @@ -847,9 +859,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
> > return;
> > }
> >
> > - if (unlikely(PageIsolated(page) ||
> > - test_bit(PAGE_CLAIMED, &page->private) ||
> > - test_bit(PAGE_STALE, &page->private))) {
> > + if (test_bit(PAGE_STALE, &page->private) ||
> > + test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> > z3fold_page_unlock(zhdr);
> > return;
> > }
> > @@ -858,13 +869,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
> > zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
> > if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
> > atomic64_dec(&pool->pages_nr);
> > - else
> > + else {
> > + clear_bit(PAGE_CLAIMED, &page->private);
> > z3fold_page_unlock(zhdr);
> > + }
> > return;
> > }
> >
> > z3fold_compact_page(zhdr);
> > add_to_unbuddied(pool, zhdr);
> > + clear_bit(PAGE_CLAIMED, &page->private);
> > z3fold_page_unlock(zhdr);
> > }
> >
> > @@ -1109,17 +1123,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
> > retry:
> > zhdr = __z3fold_alloc(pool, size, can_sleep);
> > if (zhdr) {
> > - if (zhdr->first_chunks == 0) {
> > - if (zhdr->middle_chunks != 0 &&
> > - chunks >= zhdr->start_middle)
> > - bud = LAST;
> > - else
> > - bud = FIRST;
> > - } else if (zhdr->last_chunks == 0)
> > - bud = LAST;
> > - else if (zhdr->middle_chunks == 0)
> > - bud = MIDDLE;
> > - else {
> > + bud = get_free_buddy(zhdr, chunks);
> > + if (bud == HEADLESS) {
> > if (kref_put(&zhdr->refcount,
> > release_z3fold_page_locked))
> > atomic64_dec(&pool->pages_nr);
> > @@ -1265,7 +1270,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> > pr_err("%s: unknown bud %d\n", __func__, bud);
> > WARN_ON(1);
> > put_z3fold_header(zhdr);
> > - clear_bit(PAGE_CLAIMED, &page->private);
> > return;
> > }
> >
> > @@ -1280,8 +1284,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> > z3fold_page_unlock(zhdr);
> > return;
> > }
> > - if (unlikely(PageIsolated(page)) ||
> > - test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> > + if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> > put_z3fold_header(zhdr);
> > clear_bit(PAGE_CLAIMED, &page->private);
> > return;
> > @@ -1345,6 +1348,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> > struct page *page = NULL;
> > struct list_head *pos;
> > unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
> > + struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
> > +
> > + rwlock_init(&slots.lock);
> > + slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
> >
> > spin_lock(&pool->lock);
> > if (!pool->ops || !pool->ops->evict || retries == 0) {
> > @@ -1359,35 +1366,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> > list_for_each_prev(pos, &pool->lru) {
> > page = list_entry(pos, struct page, lru);
> >
> > - /* this bit could have been set by free, in which case
> > - * we pass over to the next page in the pool.
> > - */
> > - if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> > - page = NULL;
> > - continue;
> > - }
> > -
> > - if (unlikely(PageIsolated(page))) {
> > - clear_bit(PAGE_CLAIMED, &page->private);
> > - page = NULL;
> > - continue;
> > - }
> > zhdr = page_address(page);
> > if (test_bit(PAGE_HEADLESS, &page->private))
> > break;
> >
> > + if (kref_get_unless_zero(&zhdr->refcount) == 0) {
> > + zhdr = NULL;
> > + break;
> > + }
> > if (!z3fold_page_trylock(zhdr)) {
> > - clear_bit(PAGE_CLAIMED, &page->private);
> > + if (kref_put(&zhdr->refcount,
> > + release_z3fold_page))
> > + atomic64_dec(&pool->pages_nr);
> > zhdr = NULL;
> > continue; /* can't evict at this point */
> > }
> > - if (zhdr->foreign_handles) {
> > - clear_bit(PAGE_CLAIMED, &page->private);
> > - z3fold_page_unlock(zhdr);
> > +
> > + /* test_and_set_bit is of course atomic, but we still
> > + * need to do it under page lock, otherwise checking
> > + * that bit in __z3fold_alloc wouldn't make sense
> > + */
> > + if (zhdr->foreign_handles ||
> > + test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> > + if (kref_put(&zhdr->refcount,
> > + release_z3fold_page))
> > + atomic64_dec(&pool->pages_nr);
> > + else
> > + z3fold_page_unlock(zhdr);
> > zhdr = NULL;
> > continue; /* can't evict such page */
> > }
> > - kref_get(&zhdr->refcount);
> > list_del_init(&zhdr->buddy);
> > zhdr->cpu = -1;
> > break;
> > @@ -1409,12 +1417,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> > first_handle = 0;
> > last_handle = 0;
> > middle_handle = 0;
> > + memset(slots.slot, 0, sizeof(slots.slot));
> > if (zhdr->first_chunks)
> > - first_handle = encode_handle(zhdr, FIRST);
> > + first_handle = __encode_handle(zhdr, &slots,
> > + FIRST);
> > if (zhdr->middle_chunks)
> > - middle_handle = encode_handle(zhdr, MIDDLE);
> > + middle_handle = __encode_handle(zhdr, &slots,
> > + MIDDLE);
> > if (zhdr->last_chunks)
> > - last_handle = encode_handle(zhdr, LAST);
> > + last_handle = __encode_handle(zhdr, &slots,
> > + LAST);
> > /*
> > * it's safe to unlock here because we hold a
> > * reference to this page
> > @@ -1429,19 +1441,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> > ret = pool->ops->evict(pool, middle_handle);
> > if (ret)
> > goto next;
> > - free_handle(middle_handle);
> > }
> > if (first_handle) {
> > ret = pool->ops->evict(pool, first_handle);
> > if (ret)
> > goto next;
> > - free_handle(first_handle);
> > }
> > if (last_handle) {
> > ret = pool->ops->evict(pool, last_handle);
> > if (ret)
> > goto next;
> > - free_handle(last_handle);
> > }
> > next:
> > if (test_bit(PAGE_HEADLESS, &page->private)) {
> > @@ -1455,9 +1464,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> > spin_unlock(&pool->lock);
> > clear_bit(PAGE_CLAIMED, &page->private);
> > } else {
> > + struct z3fold_buddy_slots *slots = zhdr->slots;
> > z3fold_page_lock(zhdr);
> > if (kref_put(&zhdr->refcount,
> > release_z3fold_page_locked)) {
> > + kmem_cache_free(pool->c_handle, slots);
> > atomic64_dec(&pool->pages_nr);
> > return 0;
> > }
> > @@ -1573,8 +1584,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
> > VM_BUG_ON_PAGE(!PageMovable(page), page);
> > VM_BUG_ON_PAGE(PageIsolated(page), page);
> >
> > - if (test_bit(PAGE_HEADLESS, &page->private) ||
> > - test_bit(PAGE_CLAIMED, &page->private))
> > + if (test_bit(PAGE_HEADLESS, &page->private))
> > return false;
> >
> > zhdr = page_address(page);
> > @@ -1586,6 +1596,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
> > if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
> > goto out;
> >
> > + if (test_and_set_bit(PAGE_CLAIMED, &page->private))
> > + goto out;
> > pool = zhdr_to_pool(zhdr);
> > spin_lock(&pool->lock);
> > if (!list_empty(&zhdr->buddy))
> > @@ -1612,16 +1624,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
> >
> > VM_BUG_ON_PAGE(!PageMovable(page), page);
> > VM_BUG_ON_PAGE(!PageIsolated(page), page);
> > + VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
> > VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
> >
> > zhdr = page_address(page);
> > pool = zhdr_to_pool(zhdr);
> >
> > - if (!z3fold_page_trylock(zhdr)) {
> > + if (!z3fold_page_trylock(zhdr))
> > return -EAGAIN;
> > - }
> > if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
> > z3fold_page_unlock(zhdr);
> > + clear_bit(PAGE_CLAIMED, &page->private);
> > return -EBUSY;
> > }
> > if (work_pending(&zhdr->work)) {
> > @@ -1663,6 +1676,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
> > queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
> >
> > page_mapcount_reset(page);
> > + clear_bit(PAGE_CLAIMED, &page->private);
> > put_page(page);
> > return 0;
> > }
> > @@ -1686,6 +1700,7 @@ static void z3fold_page_putback(struct page *page)
> > spin_lock(&pool->lock);
> > list_add(&page->lru, &pool->lru);
> > spin_unlock(&pool->lock);
> > + clear_bit(PAGE_CLAIMED, &page->private);
> > z3fold_page_unlock(zhdr);
> > }
> >
>
next prev parent reply other threads:[~2020-12-05 11:39 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-04 20:04 [PATCH] z3fold: stricter locking and more careful reclaim Vitaly Wool
2020-12-04 21:47 ` Mike Galbraith
2020-12-05 11:38 ` Vitaly Wool [this message]
2020-12-05 18:09 ` Mike Galbraith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAM4kBB++pMOF75zcaqX+_Fb9KptR_OWCe8dGkMMxS-jP4M6-Mg@mail.gmail.com \
--to=vitaly.wool@konsulko.com \
--cc=akpm@linux-foundation.org \
--cc=bigeasy@linutronix.de \
--cc=efault@gmx.de \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).