All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Jonathan Corbet <corbet@lwn.net>,
	Luis Chamberlain <mcgrof@kernel.org>,
	Kees Cook <keescook@chromium.org>,
	Iurii Zaikin <yzaikin@google.com>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-doc@vger.kernel.org,
	Mauro Carvalho Chehab <mchehab+samsung@kernel.org>,
	Eric Biggers <ebiggers@google.com>,
	Dave Chinner <david@fromorbit.com>,
	Eric Sandeen <sandeen@redhat.com>,
	Waiman Long <longman@redhat.com>
Subject: [PATCH 05/11] fs/dcache: Reclaim excessive negative dentries in directories
Date: Wed, 26 Feb 2020 11:13:58 -0500	[thread overview]
Message-ID: <20200226161404.14136-6-longman@redhat.com> (raw)
In-Reply-To: <20200226161404.14136-1-longman@redhat.com>

When the "dentry-dir-max" sysctl parameter is set, it will enable the
checking of dentry count in the parent directory when a negative dentry
is being retained. If the count exceeds the given limit, it will schedule
a work function to scan the children of that parent directory to find
negative dentries to be reclaimed. Positive dentries will not be touched.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 fs/dcache.c            | 207 +++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   2 +
 2 files changed, 209 insertions(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index 8f3ac31a582b..01c6d7277244 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -133,6 +133,11 @@ static DEFINE_PER_CPU(long, nr_dentry_negative);
 int dcache_dentry_dir_max_sysctl __read_mostly;
 EXPORT_SYMBOL_GPL(dcache_dentry_dir_max_sysctl);
 
+static LLIST_HEAD(negative_reclaim_list);
+static void negative_reclaim_check(struct dentry *parent);
+static void negative_reclaim_workfn(struct work_struct *work);
+static DECLARE_WORK(negative_reclaim_work, negative_reclaim_workfn);
+
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 
 /*
@@ -869,7 +874,20 @@ void dput(struct dentry *dentry)
 		rcu_read_unlock();
 
 		if (likely(retain_dentry(dentry))) {
+			struct dentry *neg_parent = NULL;
+
+			if (d_is_negative(dentry)) {
+				neg_parent = dentry->d_parent;
+				rcu_read_lock();
+			}
 			spin_unlock(&dentry->d_lock);
+
+			/*
+			 * Negative dentry reclaim check is only done when
+			 * a negative dentry is being put into a LRU list.
+			 */
+			if (neg_parent)
+				negative_reclaim_check(neg_parent);
 			return;
 		}
 
@@ -1261,6 +1279,195 @@ void shrink_dcache_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
+/*
+ * Return true if reclaiming negative dentry can happen.
+ */
+static inline bool can_reclaim_dentry(unsigned int flags)
+{
+	return !(flags & (DCACHE_SHRINK_LIST | DCACHE_GENOCIDE |
+			  DCACHE_DENTRY_KILLED));
+}
+
+struct reclaim_dentry
+{
+	struct llist_node reclaim_node;
+	struct dentry *parent_dir;
+};
+
+/*
+ * Reclaim excess negative dentries in a directory
+ */
+static void reclaim_negative_dentry(struct dentry *parent,
+				    struct list_head *dispose)
+{
+	struct dentry *child;
+	int limit = READ_ONCE(dcache_dentry_dir_max_sysctl);
+	int cnt;
+
+	lockdep_assert_held(&parent->d_lock);
+
+	cnt = parent->d_nchildren;
+
+	/*
+	 * Compute # of negative dentries to be reclaimed
+	 * An extra 1/8 of dcache_dentry_dir_max_sysctl is added.
+	 */
+	if (cnt <= limit)
+		return;
+	cnt -= limit;
+	cnt += (limit >> 3);
+
+	/*
+	 * The d_subdirs is treated like a kind of LRU where
+	 * non-negative dentries are skipped. Negative dentries
+	 * with DCACHE_REFERENCED bit set are also skipped but
+	 * with DCACHE_REFERENCED cleared.
+	 */
+	list_for_each_entry(child, &parent->d_subdirs, d_child) {
+		/*
+		 * This check is racy and so it may not be accurate.
+		 */
+		if (!d_is_negative(child))
+			continue;
+
+		if (!spin_trylock(&child->d_lock))
+			continue;
+
+		/*
+		 * Only reclaim zero-refcnt negative dentries in the
+		 * LRU, but not in shrink list.
+		 */
+		if (can_reclaim_dentry(child->d_flags) &&
+		    d_is_negative(child) && d_in_lru(child) &&
+		    !child->d_lockref.count) {
+			if (child->d_flags & DCACHE_REFERENCED) {
+				child->d_flags &= ~DCACHE_REFERENCED;
+			} else {
+				cnt--;
+				d_lru_del(child);
+				d_shrink_add(child, dispose);
+			}
+		}
+		spin_unlock(&child->d_lock);
+		if (!cnt) {
+			child = list_next_entry(child, d_child);
+			break;
+		}
+	}
+
+	if (&child->d_child != &parent->d_subdirs) {
+		/*
+		 * Split out the childs from the head up to just
+		 * before the current entry into a separate list and
+		 * then splice it to the end of the child list so
+		 * that the unscanned entries will be in the front.
+		 * This simulates LRU.
+		 */
+		struct list_head list;
+
+		list_cut_before(&list, &parent->d_subdirs,
+				&child->d_child);
+		list_splice_tail(&list, &parent->d_subdirs);
+	}
+}
+
+/*
+ * Excess negative dentry reclaim work function.
+ */
+static void negative_reclaim_workfn(struct work_struct *work)
+{
+	struct llist_node *nodes, *next;
+	struct dentry *parent;
+	struct reclaim_dentry *dentry_node;
+
+	/*
+	 * Collect excess negative dentries in dispose list & shrink them.
+	 */
+	for (nodes = llist_del_all(&negative_reclaim_list);
+	     nodes; nodes = next) {
+		LIST_HEAD(dispose);
+
+		next = llist_next(nodes);
+		dentry_node = container_of(nodes, struct reclaim_dentry,
+					   reclaim_node);
+		parent = dentry_node->parent_dir;
+		spin_lock(&parent->d_lock);
+
+		if (d_is_dir(parent) &&
+		    can_reclaim_dentry(parent->d_flags) &&
+		    (parent->d_flags & DCACHE_RECLAIMING))
+			reclaim_negative_dentry(parent, &dispose);
+
+		if (!list_empty(&dispose)) {
+			spin_unlock(&parent->d_lock);
+			shrink_dentry_list(&dispose);
+			spin_lock(&parent->d_lock);
+		}
+
+		parent->d_flags &= ~DCACHE_RECLAIMING;
+		spin_unlock(&parent->d_lock);
+		dput(parent);
+		kfree(dentry_node);
+		cond_resched();
+	}
+}
+
+/*
+ * Check the parent to see if it has too many negative dentries and queue
+ * it up for the negative dentry reclaim work function to handle it.
+ */
+static void negative_reclaim_check(struct dentry *parent)
+	__releases(rcu)
+{
+	int limit = dcache_dentry_dir_max_sysctl;
+	struct reclaim_dentry *dentry_node;
+
+	if (!limit)
+		goto rcu_unlock_out;
+
+	/*
+	 * These checks are racy before spin_lock().
+	 */
+	if (!can_reclaim_dentry(parent->d_flags) ||
+	    (parent->d_flags & DCACHE_RECLAIMING) ||
+	    (READ_ONCE(parent->d_nchildren) <= limit))
+		goto rcu_unlock_out;
+
+	spin_lock(&parent->d_lock);
+	if (!can_reclaim_dentry(parent->d_flags) ||
+	    (parent->d_flags & DCACHE_RECLAIMING) ||
+	    (READ_ONCE(parent->d_nchildren) <= limit))
+		goto unlock_out;
+
+	if (!d_is_dir(parent))
+		goto unlock_out;
+
+	dentry_node = kzalloc(sizeof(*dentry_node), GFP_NOWAIT);
+	if (!dentry_node)
+		goto unlock_out;
+
+	rcu_read_unlock();
+	__dget_dlock(parent);
+	dentry_node->parent_dir = parent;
+	parent->d_flags |= DCACHE_RECLAIMING;
+	spin_unlock(&parent->d_lock);
+
+	/*
+	 * Only call schedule_work() if negative_reclaim_list is previously
+	 * empty. Otherwise, schedule_work() had been called but the workfn
+	 * workfn hasn't retrieved the list yet.
+	 */
+	if (llist_add(&dentry_node->reclaim_node, &negative_reclaim_list))
+		schedule_work(&negative_reclaim_work);
+	return;
+
+unlock_out:
+	spin_unlock(&parent->d_lock);
+rcu_unlock_out:
+	rcu_read_unlock();
+	return;
+}
+
 /**
  * enum d_walk_ret - action to talke during tree walk
  * @D_WALK_CONTINUE:	contrinue walk
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e9e66eb50d1a..f14d72738903 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -13,6 +13,7 @@
 #include <linux/lockref.h>
 #include <linux/stringhash.h>
 #include <linux/wait.h>
+#include <linux/llist.h>
 
 struct path;
 struct vfsmount;
@@ -214,6 +215,7 @@ struct dentry_operations {
 #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
 #define DCACHE_ENCRYPTED_NAME		0x02000000 /* Encrypted name (dir key was unavailable) */
 #define DCACHE_OP_REAL			0x04000000
+#define DCACHE_RECLAIMING		0x08000000 /* Reclaiming negative dentries */
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
 #define DCACHE_DENTRY_CURSOR		0x20000000
-- 
2.18.1


  parent reply	other threads:[~2020-02-26 16:15 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-26 16:13 [PATCH 00/11] fs/dcache: Limit # of negative dentries Waiman Long
2020-02-26 16:13 ` [PATCH 01/11] fs/dcache: Fix incorrect accounting " Waiman Long
2020-02-26 16:13 ` [PATCH 02/11] fs/dcache: Simplify __dentry_kill() Waiman Long
2020-02-26 16:13 ` [PATCH 03/11] fs/dcache: Add a counter to track number of children Waiman Long
2020-02-26 16:13 ` [PATCH 04/11] fs/dcache: Add sysctl parameter dentry-dir-max Waiman Long
2020-02-26 16:13 ` Waiman Long [this message]
2020-02-26 16:13 ` [PATCH 06/11] fs/dcache: directory opportunistically stores # of positive dentries Waiman Long
2020-02-26 16:14 ` [PATCH 07/11] fs/dcache: Add static key negative_reclaim_enable Waiman Long
2020-02-26 16:14 ` [PATCH 08/11] fs/dcache: Limit dentry reclaim count in negative_reclaim_workfn() Waiman Long
2020-02-26 16:14 ` [PATCH 09/11] fs/dcache: Don't allow small values for dentry-dir-max Waiman Long
2020-02-26 16:14 ` [PATCH 10/11] fs/dcache: Kill off dentry as last resort Waiman Long
2020-02-26 16:14 ` [PATCH 11/11] fs/dcache: Track # of negative dentries reclaimed & killed Waiman Long
2020-02-26 16:29 ` [PATCH 00/11] fs/dcache: Limit # of negative dentries Matthew Wilcox
2020-02-26 19:19   ` Waiman Long
2020-02-26 21:28     ` Matthew Wilcox
2020-02-26 21:28   ` Andreas Dilger
2020-02-26 21:45     ` Matthew Wilcox
2020-02-27  8:07       ` Dave Chinner
2020-02-27  9:55     ` Ian Kent
2020-02-28  3:34       ` Matthew Wilcox
2020-02-28  4:16         ` Ian Kent
2020-02-28  4:36           ` Ian Kent
2020-02-28  4:52             ` Al Viro
2020-02-28  4:22         ` Al Viro
2020-02-28  4:52           ` Ian Kent
2020-02-28 15:32         ` Waiman Long
2020-02-28 15:39           ` Matthew Wilcox
2020-02-28 19:32         ` Theodore Y. Ts'o
2020-02-27 19:04   ` Eric Sandeen
2020-02-27 22:39     ` Dave Chinner
2020-02-27  8:30 ` Dave Chinner
2020-02-28 15:47   ` Waiman Long
2020-03-15  3:46 ` Matthew Wilcox
2020-03-21 10:17   ` Konstantin Khlebnikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200226161404.14136-6-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=corbet@lwn.net \
    --cc=david@fromorbit.com \
    --cc=ebiggers@google.com \
    --cc=keescook@chromium.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=mchehab+samsung@kernel.org \
    --cc=sandeen@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.