From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756952AbdDRJgs (ORCPT ); Tue, 18 Apr 2017 05:36:48 -0400 Received: from s3.sipsolutions.net ([5.9.151.49]:46840 "EHLO sipsolutions.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756047AbdDRJgq (ORCPT ); Tue, 18 Apr 2017 05:36:46 -0400 Message-ID: <1492508204.2472.7.camel@sipsolutions.net> Subject: Re: [RFC PATCH 8/9] debugfs: defer debugfs_fsdata allocation to first usage From: Johannes Berg To: Nicolai Stange , Greg Kroah-Hartman Cc: "Paul E.McKenney" , linux-kernel@vger.kernel.org Date: Tue, 18 Apr 2017 11:36:44 +0200 In-Reply-To: <20170416095137.2784-9-nicstange@gmail.com> (sfid-20170416_115200_841095_0D158B2F) References: <871stdyg0u.fsf@gmail.com> <20170416095137.2784-1-nicstange@gmail.com> <20170416095137.2784-9-nicstange@gmail.com> (sfid-20170416_115200_841095_0D158B2F) Content-Type: text/plain; charset="UTF-8" X-Mailer: Evolution 3.22.4-1 Mime-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Sun, 2017-04-16 at 11:51 +0200, Nicolai Stange wrote: > > +++ b/fs/debugfs/file.c > @@ -53,6 +53,7 @@ const struct file_operations > *debugfs_real_fops(const struct file *filp) >  { >   struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; >   > + WARN_ON((unsigned long)fsd & > DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); >   return fsd->real_fops; I'm not a fan of BUG_ON(), but in this case, if you have a completely bogus pointer here, and then you return fsd->real_fops which will be even more bogus, and *then* you call a function from within it... that seems like a recipe for disaster. So either you could return some valid ops (perhaps debugfs_noop_file_operations although those don't have .name or .poll, so it doesn't cover everything), or you can just BUG_ON() here directly, saving the incomprehensible crash later. johannes >  EXPORT_SYMBOL_GPL(debugfs_real_fops); > @@ -74,9 +75,35 @@ EXPORT_SYMBOL_GPL(debugfs_real_fops); >   */ >  int debugfs_file_get(struct dentry *dentry) >  { > - struct debugfs_fsdata *fsd = dentry->d_fsdata; > + struct debugfs_fsdata *fsd; > + void *d_fsd; > + > + d_fsd = READ_ONCE(dentry->d_fsdata); > + if (!((unsigned long)d_fsd & > DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { > + fsd = d_fsd; > + } else { > + fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); > + if (!fsd) > + return -ENOMEM; > + > + fsd->real_fops = (void *)((unsigned long)d_fsd & > + ~DEBUGFS_FSDATA_IS_REAL_FOPS > _BIT); > + refcount_set(&fsd->active_users, 1); > + init_completion(&fsd->active_users_drained); > + if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) > { > + kfree(fsd); > + fsd = READ_ONCE(dentry->d_fsdata); > + } > + } >   > - /* Avoid starvation of removers. */ > + /* > +  * In case of a successful cmpxchg() above, this check is > +  * strictly necessary and must follow it, see the comment in > +  * __debugfs_remove_file(). > +  * OTOH, if the cmpxchg() hasn't been executed or wasn't > +  * successful, this serves the purpose of not starving > +  * removers. > +  */ >   if (d_unlinked(dentry)) >   return -EIO; >   > @@ -98,7 +125,7 @@ EXPORT_SYMBOL_GPL(debugfs_file_get); >   */ >  void debugfs_file_put(struct dentry *dentry) >  { > - struct debugfs_fsdata *fsd = dentry->d_fsdata; > + struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); >   >   if (refcount_dec_and_test(&fsd->active_users)) >   complete(&fsd->active_users_drained); > @@ -109,10 +136,11 @@ static int open_proxy_open(struct inode *inode, > struct file *filp) >  { >   struct dentry *dentry = F_DENTRY(filp); >   const struct file_operations *real_fops = NULL; > - int r = 0; > + int r; >   > - if (debugfs_file_get(dentry)) > - return -ENOENT; > + r = debugfs_file_get(dentry); > + if (r) > + return r == -EIO ? -ENOENT : r; >   >   real_fops = debugfs_real_fops(filp); >   real_fops = fops_get(real_fops); > @@ -233,10 +261,11 @@ static int full_proxy_open(struct inode *inode, > struct file *filp) >   struct dentry *dentry = F_DENTRY(filp); >   const struct file_operations *real_fops = NULL; >   struct file_operations *proxy_fops = NULL; > - int r = 0; > + int r; >   > - if (debugfs_file_get(dentry)) > - return -ENOENT; > + r = debugfs_file_get(dentry); > + if (r) > + return r == -EIO ? -ENOENT : r; >   >   real_fops = debugfs_real_fops(filp); >   real_fops = fops_get(real_fops); > diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c > index 5550f11d60bd..2360c17ec00a 100644 > --- a/fs/debugfs/inode.c > +++ b/fs/debugfs/inode.c > @@ -184,7 +184,10 @@ static const struct super_operations > debugfs_super_operations = { >   >  static void debugfs_release_dentry(struct dentry *dentry) >  { > - kfree(dentry->d_fsdata); > + void *fsd = dentry->d_fsdata; > + > + if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) > + kfree(dentry->d_fsdata); >  } >   >  static struct vfsmount *debugfs_automount(struct path *path) > @@ -346,35 +349,25 @@ static struct dentry > *__debugfs_create_file(const char *name, umode_t mode, >  { >   struct dentry *dentry; >   struct inode *inode; > - struct debugfs_fsdata *fsd; > - > - fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); > - if (!fsd) > - return NULL; >   >   if (!(mode & S_IFMT)) >   mode |= S_IFREG; >   BUG_ON(!S_ISREG(mode)); >   dentry = start_creating(name, parent); >   > - if (IS_ERR(dentry)) { > - kfree(fsd); > + if (IS_ERR(dentry)) >   return NULL; > - } >   >   inode = debugfs_get_inode(dentry->d_sb); > - if (unlikely(!inode)) { > - kfree(fsd); > + if (unlikely(!inode)) >   return failed_creating(dentry); > - } >   >   inode->i_mode = mode; >   inode->i_private = data; >   >   inode->i_fop = proxy_fops; > - fsd->real_fops = real_fops; > - refcount_set(&fsd->active_users, 1); > - dentry->d_fsdata = fsd; > + dentry->d_fsdata = (void *)((unsigned long)real_fops | > + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); >   >   d_instantiate(dentry, inode); >   fsnotify_create(d_inode(dentry->d_parent), dentry); > @@ -637,8 +630,17 @@ static void __debugfs_remove_file(struct dentry > *dentry, struct dentry *parent) >   >   simple_unlink(d_inode(parent), dentry); >   d_delete(dentry); > - fsd = dentry->d_fsdata; > - init_completion(&fsd->active_users_drained); > + > + /* > +  * Paired with the closing smp_mb() implied by a successful > +  * cmpxchg() in debugfs_file_get(): either > +  * debugfs_file_get() must see a dead dentry or we must see > a > +  * debugfs_fsdata instance at ->d_fsdata here (or both). > +  */ > + smp_mb(); > + fsd = READ_ONCE(dentry->d_fsdata); > + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) > + return; >   if (!refcount_dec_and_test(&fsd->active_users)) >   wait_for_completion(&fsd->active_users_drained); >  } > diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h > index 0eea99432840..cb1e8139c398 100644 > --- a/fs/debugfs/internal.h > +++ b/fs/debugfs/internal.h > @@ -25,4 +25,12 @@ struct debugfs_fsdata { >   struct completion active_users_drained; >  }; >   > +/* > + * A dentry's ->d_fsdata either points to the real fops or to a > + * dynamically allocated debugfs_fsdata instance. > + * In order to distinguish between these two cases, a real fops > + * pointer gets its lowest bit set. > + */ > +#define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0) > + >  #endif /* _DEBUGFS_INTERNAL_H_ */