On Wed, Apr 30, 2014 at 12:20:13AM +0100, Al Viro wrote: > On Tue, Apr 29, 2014 at 04:04:11PM -0700, Linus Torvalds wrote: > > But at a minimum, we have "d_op->d_prune()" that would now be possibly > > be called for the old dentry *after* a new dentry has been allocated. > > Not to mention the inode not having been dropped. So it looks like a > > disaster where the filesystem now ends up having concurrent "live" > > dentries for the same entry. Even if one of them is on its way out, > > it's still visible to the filesystem. That makes me very > > uncomfortable. > > > > I'm starting to think that Miklos' original patch (perhaps with the > > spinlock split to at least be one per superblock) is the most > > straightforward approach after all. It's annoying having to add locks > > here, but the whole pruning path should not be a hotpath, so maybe > > it's not actually a big deal. > > FWIW, my solution is more or less working; I'll give more local beating > and post it... OK, aggregate diff follows, more readable splitup (3 commits) attached. It seems to survive beating here; testing, review and comments are welcome. diff --git a/fs/dcache.c b/fs/dcache.c index 494a9def..a83b933 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -246,16 +246,8 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } -/* - * no locks, please. - */ -static void d_free(struct dentry *dentry) +static void dentry_free(struct dentry *dentry) { - BUG_ON((int)dentry->d_lockref.count > 0); - this_cpu_dec(nr_dentry); - if (dentry->d_op && dentry->d_op->d_release) - dentry->d_op->d_release(dentry); - /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -420,40 +412,6 @@ static void dentry_lru_del(struct dentry *dentry) } /** - * d_kill - kill dentry and return parent - * @dentry: dentry to kill - * @parent: parent dentry - * - * The dentry must already be unhashed and removed from the LRU. - * - * If this is the root of the dentry tree, return NULL. - * - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by - * d_kill. - */ -static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) - __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dentry->d_inode->i_lock) -{ - list_del(&dentry->d_u.d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; - if (parent) - spin_unlock(&parent->d_lock); - dentry_iput(dentry); - /* - * dentry_iput drops the locks, at which point nobody (except - * transient RCU lookups) can reach this dentry. - */ - d_free(dentry); - return parent; -} - -/** * d_drop - drop a dentry * @dentry: dentry to drop * @@ -499,6 +457,8 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static DECLARE_WAIT_QUEUE_HEAD(free_wq); + /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. @@ -506,16 +466,17 @@ EXPORT_SYMBOL(d_drop); * Returns dentry requiring refcount drop, or NULL if we're done. */ static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) +dentry_kill(struct dentry *dentry, struct list_head *morgue) __releases(dentry->d_lock) { struct inode *inode; struct dentry *parent; + bool can_free = true; inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { relock: - if (unlock_on_failure) { + if (!morgue) { spin_unlock(&dentry->d_lock); cpu_relax(); } @@ -531,6 +492,15 @@ relock: goto relock; } + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + /* morgue must be non-NULL */ + list_move(&dentry->d_lru, morgue); + if (parent) + spin_unlock(&parent->d_lock); + /* inode must be NULL */ + spin_unlock(&dentry->d_lock); + return parent; + } /* * The dentry is now unrecoverably dead to the world. */ @@ -543,10 +513,43 @@ relock: if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) dentry->d_op->d_prune(dentry); - dentry_lru_del(dentry); + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) + d_lru_del(dentry); + else if (morgue) + d_shrink_del(dentry); + else + can_free = false; + } /* if it was on the hash then remove it */ __d_drop(dentry); - return d_kill(dentry, parent); + list_del(&dentry->d_u.d_child); + /* + * Inform d_walk() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (parent) + spin_unlock(&parent->d_lock); + dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + BUG_ON((int)dentry->d_lockref.count > 0); + this_cpu_dec(nr_dentry); + if (dentry->d_op && dentry->d_op->d_release) + dentry->d_op->d_release(dentry); + + if (likely(can_free)) { + dentry_free(dentry); + } else { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + wake_up(&free_wq); + } + return parent; } /* @@ -602,7 +605,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry, NULL); if (dentry) goto repeat; } @@ -815,47 +818,10 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -/* - * Try to throw away a dentry - free the inode, dput the parent. - * Requires dentry->d_lock is held, and dentry->d_count == 0. - * Releases dentry->d_lock. - * - * This may fail if locks cannot be acquired no problem, just try again. - */ -static struct dentry * try_prune_one_dentry(struct dentry *dentry) - __releases(dentry->d_lock) -{ - struct dentry *parent; - - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - * if it returns the same dentry, trylocks failed. In either - * case, just loop again. - * - * Otherwise, we need to prune ancestors too. This is necessary - * to prevent quadratic behavior of shrink_dcache_parent(), but - * is also expected to be beneficial in reducing dentry cache - * fragmentation. - */ - if (!parent) - return NULL; - if (parent == dentry) - return dentry; - - /* Prune ancestors. */ - dentry = parent; - while (dentry) { - if (lockref_put_or_lock(&dentry->d_lockref)) - return NULL; - dentry = dentry_kill(dentry, 1); - } - return NULL; -} - static void shrink_dentry_list(struct list_head *list) { - struct dentry *dentry; + struct dentry *dentry, *parent; + LIST_HEAD(morgue); rcu_read_lock(); for (;;) { @@ -891,24 +857,43 @@ static void shrink_dentry_list(struct list_head *list) } rcu_read_unlock(); + parent = dentry_kill(dentry, &morgue); /* - * If 'try_to_prune()' returns a dentry, it will - * be the same one we passed in, and d_lock will - * have been held the whole time, so it will not - * have been added to any other lists. We failed - * to get the inode lock. - * - * We just add it back to the shrink list. + * If dentry_kill returns NULL, we have nothing more to do. */ - dentry = try_prune_one_dentry(dentry); - - rcu_read_lock(); - if (dentry) { + if (!parent) { + rcu_read_lock(); + continue; + } + if (unlikely(parent == dentry)) { + /* + * trylocks have failed and d_lock has been held the + * whole time, so it could not have been added to any + * other lists. Just add it back to the shrink list. + */ + rcu_read_lock(); d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + continue; } + /* + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also + * expected to be beneficial in reducing dentry cache + * fragmentation. + */ + dentry = parent; + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) + dentry = dentry_kill(dentry, NULL); + rcu_read_lock(); } rcu_read_unlock(); + while (unlikely(!list_empty(&morgue))) { + dentry = list_first_entry(&morgue, struct dentry, d_lru); + list_del_init(&dentry->d_lru); + wait_event(free_wq, dentry->d_flags & DCACHE_MAY_FREE); + dentry_free(dentry); + } } static enum lru_status diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3b9bfdb..3c7ec32 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -221,6 +221,8 @@ struct dentry_operations { #define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */ #define DCACHE_FILE_TYPE 0x00400000 /* Other file type */ +#define DCACHE_MAY_FREE 0x00800000 + extern seqlock_t rename_lock; static inline int dname_external(const struct dentry *dentry)