All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gang Li <ligang.bdlg@bytedance.com>
To: Hugh Dickins <hughd@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: songmuchun@bytedance.com, Gang Li <ligang.bdlg@bytedance.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH v2] shmem: move spinlock to the front of iput
Date: Wed, 24 Nov 2021 11:08:39 +0800	[thread overview]
Message-ID: <20211124030840.88455-1-ligang.bdlg@bytedance.com> (raw)

This patch fixes a data race in commit 779750d20b93 ("shmem: split huge pages
beyond i_size under memory pressure").

Call Trace 1:
 shmem_unused_huge_shrink+0x3ae/0x410
 ? __list_lru_walk_one.isra.5+0x33/0x160
 super_cache_scan+0x17c/0x190
 shrink_slab.part.55+0x1ef/0x3f0
 shrink_node+0x10e/0x330
 kswapd+0x380/0x740
 kthread+0xfc/0x130
 ? mem_cgroup_shrink_node+0x170/0x170
 ? kthread_create_on_node+0x70/0x70
 ret_from_fork+0x1f/0x30

Call Trace 2:
 shmem_evict_inode+0xd8/0x190
 evict+0xbe/0x1c0
 do_unlinkat+0x137/0x330
 do_syscall_64+0x76/0x120
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2

iput out of sbinfo->shrinklist_lock will let shmem_evict_inode grab
and delete the inode, which will berak the consistency between
shrinklist_len and shrinklist. The simultaneous deletion of adjacent
elements in the local list "list" by shmem_unused_huge_shrink and
shmem_evict_inode will also break the list.

Fix it by moving shrinklist_lock to the front of iput.

Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
---

Changes in v2:
- Move spinlock to the front of iput instead of changing lock type
  since iput will call evict which may cause deadlock by requesting
  shrinklist_lock.
- Add call trace in commit message.

v1: https://lore.kernel.org/lkml/20211122064126.76734-1-ligang.bdlg@bytedance.com/

---
 mm/shmem.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 9023103ee7d8..2f70a16fc588 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -569,7 +569,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		/* inode is about to be evicted */
 		if (!inode) {
 			list_del_init(&info->shrinklist);
-			removed++;
 			goto next;
 		}
 
@@ -577,15 +576,16 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		if (round_up(inode->i_size, PAGE_SIZE) ==
 				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
 			list_move(&info->shrinklist, &to_remove);
-			removed++;
 			goto next;
 		}
 
 		list_move(&info->shrinklist, &list);
 next:
+		removed++;
 		if (!--batch)
 			break;
 	}
+	sbinfo->shrinklist_len -= removed;
 	spin_unlock(&sbinfo->shrinklist_lock);
 
 	list_for_each_safe(pos, next, &to_remove) {
@@ -602,7 +602,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		inode = &info->vfs_inode;
 
 		if (nr_to_split && split >= nr_to_split)
-			goto leave;
+			goto move_back;
 
 		page = find_get_page(inode->i_mapping,
 				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
@@ -616,38 +616,38 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		}
 
 		/*
-		 * Leave the inode on the list if we failed to lock
-		 * the page at this time.
+		 * Move the inode on the list back to shrinklist if we failed
+		 * to lock the page at this time.
 		 *
 		 * Waiting for the lock may lead to deadlock in the
 		 * reclaim path.
 		 */
 		if (!trylock_page(page)) {
 			put_page(page);
-			goto leave;
+			goto move_back;
 		}
 
 		ret = split_huge_page(page);
 		unlock_page(page);
 		put_page(page);
 
-		/* If split failed leave the inode on the list */
+		/* If split failed move the inode on the list back to shrinklist */
 		if (ret)
-			goto leave;
+			goto move_back;
 
 		split++;
 drop:
 		list_del_init(&info->shrinklist);
-		removed++;
-leave:
+		goto put;
+move_back:
+		spin_lock(&sbinfo->shrinklist_lock);
+		list_move(pos, &sbinfo->shrinklist);
+		sbinfo->shrinklist_len++;
+		spin_unlock(&sbinfo->shrinklist_lock);
+put:
 		iput(inode);
 	}
 
-	spin_lock(&sbinfo->shrinklist_lock);
-	list_splice_tail(&list, &sbinfo->shrinklist);
-	sbinfo->shrinklist_len -= removed;
-	spin_unlock(&sbinfo->shrinklist_lock);
-
 	return split;
 }
 
-- 
2.20.1


             reply	other threads:[~2021-11-24  3:08 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-24  3:08 Gang Li [this message]
2021-11-24  6:08 ` [PATCH v2] shmem: move spinlock to the front of iput Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211124030840.88455-1-ligang.bdlg@bytedance.com \
    --to=ligang.bdlg@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=songmuchun@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.