linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org
Cc: viro@zeniv.linux.org.uk, matthew.wilcox@oracle.com,
	khlebnikov@yandex-team.ru, gautham.ananthakrishna@oracle.com
Subject: [PATCH RFC 1/6] dcache: sweep cached negative dentries to the end of list of siblings
Date: Thu, 21 Jan 2021 18:49:40 +0530	[thread overview]
Message-ID: <1611235185-1685-2-git-send-email-gautham.ananthakrishna@oracle.com> (raw)
In-Reply-To: <1611235185-1685-1-git-send-email-gautham.ananthakrishna@oracle.com>

From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

For disk filesystems result of every negative lookup is cached, content of
directories is usually cached too. Production of negative dentries isn't
limited with disk speed. It's really easy to generate millions of them if
system has enough memory. Negative dentries are linked into siblings list
along with normal positive dentries. Some operations walks dcache tree but
looks only for positive dentries: most important is fsnotify/inotify.

This patch moves negative dentries to the end of list at final dput() and
marks with flag which tells that all following dentries are negative too.
Reverse operation is required before instantiating negative dentry.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
---
 fs/dcache.c            | 59 +++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/dcache.h |  6 +++++
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index ea04858..a506169 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -632,6 +632,48 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
 	return __lock_parent(dentry);
 }
 
+/*
+ * Move cached negative dentry to the tail of parent->d_subdirs.
+ * This lets walkers skip them all together at first sight.
+ * Must be called at dput of negative dentry.
+ */
+static void sweep_negative(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	if (!d_is_tail_negative(dentry)) {
+		parent = lock_parent(dentry);
+		if (!parent)
+			return;
+
+		if (!d_count(dentry) && d_is_negative(dentry) &&
+		    !d_is_tail_negative(dentry)) {
+			dentry->d_flags |= DCACHE_TAIL_NEGATIVE;
+			list_move_tail(&dentry->d_child, &parent->d_subdirs);
+		}
+
+		spin_unlock(&parent->d_lock);
+	}
+}
+
+/*
+ * Undo sweep_negative() and move to the head of parent->d_subdirs.
+ * Must be called before converting negative dentry into positive.
+ */
+static void recycle_negative(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	spin_lock(&dentry->d_lock);
+	parent = lock_parent(dentry);
+	dentry->d_flags &= ~DCACHE_TAIL_NEGATIVE;
+	if (parent) {
+		list_move(&dentry->d_child, &parent->d_subdirs);
+		spin_unlock(&parent->d_lock);
+	}
+	spin_unlock(&dentry->d_lock);
+}
+
 static inline bool retain_dentry(struct dentry *dentry)
 {
 	WARN_ON(d_in_lookup(dentry));
@@ -737,7 +779,7 @@ static struct dentry *dentry_kill(struct dentry *dentry)
 static inline bool fast_dput(struct dentry *dentry)
 {
 	int ret;
-	unsigned int d_flags;
+	unsigned int d_flags, required;
 
 	/*
 	 * If we have a d_op->d_delete() operation, we sould not
@@ -785,6 +827,8 @@ static inline bool fast_dput(struct dentry *dentry)
 	 * a 'delete' op, and it's referenced and already on
 	 * the LRU list.
 	 *
+	 * Cached negative dentry must be swept to the tail.
+	 *
 	 * NOTE! Since we aren't locked, these values are
 	 * not "stable". However, it is sufficient that at
 	 * some point after we dropped the reference the
@@ -796,10 +840,15 @@ static inline bool fast_dput(struct dentry *dentry)
 	 */
 	smp_rmb();
 	d_flags = READ_ONCE(dentry->d_flags);
-	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
+
+	required = DCACHE_REFERENCED | DCACHE_LRU_LIST |
+		(d_flags_negative(d_flags) ? DCACHE_TAIL_NEGATIVE : 0);
+
+	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST |
+		DCACHE_DISCONNECTED | DCACHE_TAIL_NEGATIVE;
 
 	/* Nothing to do? Dropping the reference was all we needed? */
-	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
+	if (d_flags == required && !d_unhashed(dentry))
 		return true;
 
 	/*
@@ -871,6 +920,8 @@ void dput(struct dentry *dentry)
 		rcu_read_unlock();
 
 		if (likely(retain_dentry(dentry))) {
+			if (d_is_negative(dentry))
+				sweep_negative(dentry);
 			spin_unlock(&dentry->d_lock);
 			return;
 		}
@@ -1970,6 +2021,8 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 {
 	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 	if (inode) {
+		if (d_is_tail_negative(entry))
+			recycle_negative(entry);
 		security_d_instantiate(entry, inode);
 		spin_lock(&inode->i_lock);
 		__d_instantiate(entry, inode);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6f95c33..5f4ce3a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -219,6 +219,7 @@ struct dentry_operations {
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
 #define DCACHE_DENTRY_CURSOR		0x20000000
 #define DCACHE_NORCU			0x40000000 /* No RCU delay for freeing */
+#define DCACHE_TAIL_NEGATIVE		0x80000000 /* All following siblings are negative */
 
 extern seqlock_t rename_lock;
 
@@ -495,6 +496,11 @@ static inline int simple_positive(const struct dentry *dentry)
 	return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
+static inline bool d_is_tail_negative(const struct dentry *dentry)
+{
+	return unlikely(dentry->d_flags & DCACHE_TAIL_NEGATIVE);
+}
+
 extern void d_set_fallthru(struct dentry *dentry);
 
 static inline bool d_is_fallthru(const struct dentry *dentry)
-- 
1.8.3.1


  reply	other threads:[~2021-01-21 17:53 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-21 13:19 [PATCH RFC 0/6] fix the negative dentres bloating system memory usage Gautham Ananthakrishna
2021-01-21 13:19 ` Gautham Ananthakrishna [this message]
2021-04-14  3:00   ` [PATCH RFC 1/6] dcache: sweep cached negative dentries to the end of list of siblings Al Viro
2021-04-15 16:50     ` Al Viro
2021-04-14  3:41   ` Al Viro
2021-04-15 16:25     ` Al Viro
2021-01-21 13:19 ` [PATCH RFC 2/6] fsnotify: stop walking child dentries if remaining tail is negative Gautham Ananthakrishna
2021-01-21 13:19 ` [PATCH RFC 3/6] dcache: add action D_WALK_SKIP_SIBLINGS to d_walk() Gautham Ananthakrishna
2021-01-21 13:19 ` [PATCH RFC 4/6] dcache: stop walking siblings if remaining dentries all negative Gautham Ananthakrishna
2021-01-21 13:19 ` [PATCH RFC 5/6] dcache: push releasing dentry lock into sweep_negative Gautham Ananthakrishna
2021-01-21 13:19 ` [PATCH RFC 6/6] dcache: prevent flooding with negative dentries Gautham Ananthakrishna
2021-04-14  3:56   ` Al Viro
2021-03-31 14:23 ` [PATCH RFC 0/6] fix the negative dentres bloating system memory usage Matthew Wilcox
2021-04-14  2:40 ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1611235185-1685-2-git-send-email-gautham.ananthakrishna@oracle.com \
    --to=gautham.ananthakrishna@oracle.com \
    --cc=khlebnikov@yandex-team.ru \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matthew.wilcox@oracle.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).