linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Neil Brown <neilb@suse.de>
To: linux-kernel@vger.kernel.org, Andrew Morton <akpm@osdl.org>,
	Olaf Hering <olh@suse.de>, Jan Blunck <jblunck@suse.de>,
	Kirill Korotaev <dev@openvz.org>, Al Viro <viro@ftp.linux.org.uk>
Subject: Re: [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super
Date: Mon, 6 Mar 2006 17:09:05 +1100	[thread overview]
Message-ID: <17419.53761.295044.78549@cse.unsw.edu.au> (raw)
In-Reply-To: message from Neil Brown on Thursday March 2

On Thursday March 2, neilb@suse.de wrote:
> 
> 
> Hi,
>  This mail relates to the thread with the same subject which can be
>  found at
> 
>     http://lkml.org/lkml/2006/1/16/279
> 
>  I would like to propose an alternate patch for the problem.
....
> 
> Comments?  Please :-?

Somewhere in among the comments (thanks), I realised that I was only
closing half the race.  I had tried to make sure there were no stray
references to any dentries, but there is still the inode which is
being iput which can cause problem.

The following patch takes a totally different approach, is based on an
idea from Jan Kara, and is much less intrusive.

We:
  - keep track of "who" is calling prune_dcache, and when a filesystem
    is being unmounted (s_root == NULL) we only allow the unmount thread
    to prune dentries.
  - keep track of how many dentries are in the process of having
    dentry_iput called on them for pruning
  - don't allow umount to proceed until that count hits zero
  - bias the count this way and that to make sure we get a wake_up at
    the right time
  - reuse 's_wait_unfrozen' to wait on the iput to complete.

Again, I'm very keen on feedback.  This race is very hard to trigger,
so code review is the only real way to evaluate that patch.

Thanks,
NeilBrown


Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./fs/dcache.c        |   17 +++++++++++++----
 ./fs/super.c         |   11 +++++++++++
 ./include/linux/fs.h |    2 ++
 3 files changed, 26 insertions(+), 4 deletions(-)

diff ./fs/dcache.c~current~ ./fs/dcache.c
--- ./fs/dcache.c~current~	2006-03-06 16:54:59.000000000 +1100
+++ ./fs/dcache.c	2006-03-06 16:55:33.000000000 +1100
@@ -366,6 +366,7 @@ static inline void prune_one_dentry(stru
 {
 	struct dentry * parent;
 
+	dentry->d_sb->s_pending_iputs ++;
 	__d_drop(dentry);
 	list_del(&dentry->d_u.d_child);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
@@ -375,6 +376,9 @@ static inline void prune_one_dentry(stru
 	if (parent != dentry)
 		dput(parent);
 	spin_lock(&dcache_lock);
+	dentry->d_sb->s_pending_iputs --;
+	if (dentry->d_sb->s_pending_iputs < 0)
+		wake_up(&dentry->d_sb->s_wait_unfrozen);
 }
 
 /**
@@ -390,7 +394,7 @@ static inline void prune_one_dentry(stru
  * all the dentries are in use.
  */
  
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct dentry *parent)
 {
 	spin_lock(&dcache_lock);
 	for (; count ; count--) {
@@ -407,6 +411,11 @@ static void prune_dcache(int count)
  		dentry_stat.nr_unused--;
 		dentry = list_entry(tmp, struct dentry, d_lru);
 
+		if (dentry->d_sb->s_root == NULL &&
+		    (parent == NULL ||
+		     parent->d_sb != dentry->d_sb))
+			continue;
+
  		spin_lock(&dentry->d_lock);
 		/*
 		 * We found an inuse dentry which was not removed from
@@ -635,7 +644,7 @@ void shrink_dcache_parent(struct dentry 
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found);
+		prune_dcache(found, parent);
 }
 
 /**
@@ -673,7 +682,7 @@ void shrink_dcache_anon(struct hlist_hea
 			}
 		}
 		spin_unlock(&dcache_lock);
-		prune_dcache(found);
+		prune_dcache(found, NULL);
 	} while(found);
 }
 
@@ -694,7 +703,7 @@ static int shrink_dcache_memory(int nr, 
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr);
+		prune_dcache(nr, NULL);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }

diff ./fs/super.c~current~ ./fs/super.c
--- ./fs/super.c~current~	2006-03-06 16:54:59.000000000 +1100
+++ ./fs/super.c	2006-03-06 16:57:19.000000000 +1100
@@ -230,7 +230,18 @@ void generic_shutdown_super(struct super
 	struct super_operations *sop = sb->s_op;
 
 	if (root) {
+		spin_lock(&dcache_lock);
+		/* disable stray dputs */
 		sb->s_root = NULL;
+
+		/* trigger a wake_up */
+		sb->s_pending_iputs --;
+		spin_unlock(&dcache_lock);
+		wait_event(sb->s_wait_unfrozen,
+			   sb->s_pending_iputs < 0);
+		/* avoid further wakeups */
+		sb->s_pending_iputs = 65000;
+
 		shrink_dcache_parent(root);
 		shrink_dcache_anon(&sb->s_anon);
 		dput(root);

diff ./include/linux/fs.h~current~ ./include/linux/fs.h
--- ./include/linux/fs.h~current~	2006-03-06 16:54:59.000000000 +1100
+++ ./include/linux/fs.h	2006-03-06 12:49:55.000000000 +1100
@@ -833,6 +833,8 @@ struct super_block {
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
 
+	int			s_pending_iputs;
+
 	struct block_device	*s_bdev;
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */

  parent reply	other threads:[~2006-03-06  6:10 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-02  6:57 [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super Neil Brown
2006-03-02 10:48 ` Jan Blunck
2006-03-03 11:42 ` Jan Blunck
2006-03-06  6:09 ` Neil Brown [this message]
2006-03-06  7:32   ` Balbir Singh
2006-03-07  1:58     ` Neil Brown
2006-03-07  2:49       ` Balbir Singh
2006-03-07  6:22         ` Kirill Korotaev
2006-03-07  6:16       ` Kirill Korotaev
2006-03-07  7:03         ` Balbir Singh
2006-03-07  7:21           ` Kirill Korotaev
2006-03-07 11:05             ` Balbir Singh
2006-03-08  0:29         ` Neil Brown
2006-03-08  2:17           ` Balbir Singh
2006-03-08  2:39             ` Neil Brown
2006-03-08  3:05               ` Balbir Singh
2006-03-08 11:01                 ` Jan Blunck
2006-03-06 11:56   ` Jan Blunck
2006-03-07  2:15     ` Neil Brown
2006-03-06 11:56   ` Kirill Korotaev
2006-03-07  2:01     ` Neil Brown
2006-03-07  6:20       ` Kirill Korotaev
2006-03-07 23:20         ` Neil Brown
2006-03-09 12:03           ` Kirill Korotaev
  -- strict thread matches above, loose matches on Subject: below --
2006-01-16 22:34 Olaf Hering
2006-01-16 23:23 ` Kirill Korotaev
2006-01-16 23:29   ` Olaf Hering
2006-01-17  2:05     ` Andrew Morton
2006-01-17  7:03       ` Kirill Korotaev
2006-01-18 22:49   ` Jan Blunck
2006-01-18 23:10     ` Andrew Morton
2006-01-19 10:08       ` Kirill Korotaev
2006-01-19  9:52     ` Kirill Korotaev
2006-01-19 10:04       ` Jan Blunck
2006-01-19 10:26         ` Kirill Korotaev
2006-01-20 19:06           ` Jan Blunck
2006-01-23  8:14             ` Kirill Korotaev
2006-01-30 11:54               ` Jan Blunck
2006-01-30 14:05                 ` Kirill Korotaev
2006-01-30 14:21                   ` Jan Blunck
2006-01-30 14:34                     ` Kirill Korotaev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=17419.53761.295044.78549@cse.unsw.edu.au \
    --to=neilb@suse.de \
    --cc=akpm@osdl.org \
    --cc=dev@openvz.org \
    --cc=jblunck@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=olh@suse.de \
    --cc=viro@ftp.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).