All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ceph: trim deleted inode
@ 2013-07-21  2:21 Yan, Zheng
  2013-07-21  2:21 ` [PATCH 1/2] mds: notify clients about " Yan, Zheng
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Yan, Zheng @ 2013-07-21  2:21 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

The MDS uses caps message to notify clients about deleted inode.
when receiving a such message, invalidate any alias of the inode.
This makes the kernel release the inode ASAP.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 25442b4..b446fdd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2334,6 +2334,28 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 }
 
 /*
+ * Invalidate unlinked inode's aliases, so we can drop the inode
+ * from the cache ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+	struct dentry *dn;
+
+	dout("invalidate_aliases inode %p\n", inode);
+	d_prune_aliases(inode);
+	while ((dn = d_find_alias(inode))) {
+		d_delete(dn);
+		dput(dn);
+		/*
+		 * for dir inode, d_find_alias() can return
+		 * disconnected dentry
+		 */
+		if (S_ISDIR(inode->i_mode))
+			break;
+	}
+}
+
+/*
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
  *
@@ -2363,6 +2385,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	int writeback = 0;
 	int revoked_rdcache = 0;
 	int queue_invalidate = 0;
+	int deleted_inode = 0;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2407,8 +2430,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
+		if (inode->i_nlink == 0 &&
+		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+			deleted_inode = 1;
+	}
 
 	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
 		int len = le32_to_cpu(grant->xattr_len);
@@ -2517,6 +2544,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		ceph_queue_writeback(inode);
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
+	if (deleted_inode)
+		invalidate_aliases(inode);
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 1/2] mds: notify clients about deleted inode
  2013-07-21  2:21 [PATCH] ceph: trim deleted inode Yan, Zheng
@ 2013-07-21  2:21 ` Yan, Zheng
  2013-07-21  2:21 ` [PATCH 2/2] client: trim " Yan, Zheng
  2013-07-23  1:41 ` [PATCH] ceph: " Sage Weil
  2 siblings, 0 replies; 9+ messages in thread
From: Yan, Zheng @ 2013-07-21  2:21 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

To make sure clients trim the deleted inode from the their cache
ASAP. After all clients release the inode, we can reclaim space.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 src/mds/Locker.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index 30e014a..7bbb749 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -1776,6 +1776,10 @@ bool Locker::issue_caps(CInode *in, Capability *only_cap)
       continue;
     }
 
+    // notify clients about deleted inode, to make sure they release caps ASAP.
+    if (in->inode.nlink == 0)
+      wanted |= CEPH_CAP_LINK_SHARED;
+
     // are there caps that the client _wants_ and can have, but aren't pending?
     // or do we need to revoke?
     if (((wanted & allowed) & ~pending) ||  // missing wanted+allowed caps
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] client: trim deleted inode
  2013-07-21  2:21 [PATCH] ceph: trim deleted inode Yan, Zheng
  2013-07-21  2:21 ` [PATCH 1/2] mds: notify clients about " Yan, Zheng
@ 2013-07-21  2:21 ` Yan, Zheng
  2013-08-23 20:19   ` Gregory Farnum
  2013-07-23  1:41 ` [PATCH] ceph: " Sage Weil
  2 siblings, 1 reply; 9+ messages in thread
From: Yan, Zheng @ 2013-07-21  2:21 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

previous patch makes MDS send notification to clients when an inode
is deleted. When receiving a such notification, we invalidate any
dentry link to the deleted inode. If there is no other reference to
the inode, the inode gets trimmed.

For cephfs fuse client, we use fuse_lowlevel_notify_inval_entry() or
fuse_lowlevel_notify_delete() to notify the kernel to trim the deleted
inode. (this is not completely reliable because we play unlink/link
tricks when  handle MDS replies. it's difficult to keep the user space
cache and kernel dcache in sync)

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 src/client/Client.cc  | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/client/Client.h   | 14 +++++++++++
 src/client/fuse_ll.cc | 19 ++++++++++++--
 3 files changed, 99 insertions(+), 3 deletions(-)

diff --git a/src/client/Client.cc b/src/client/Client.cc
index ae7ddf6..f9c4f2b 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -141,9 +141,12 @@ Client::Client(Messenger *m, MonClient *mc)
     timer(m->cct, client_lock),
     ino_invalidate_cb(NULL),
     ino_invalidate_cb_handle(NULL),
+    dentry_invalidate_cb(NULL),
+    dentry_invalidate_cb_handle(NULL),
     getgroups_cb(NULL),
     getgroups_cb_handle(NULL),
     async_ino_invalidator(m->cct),
+    async_dentry_invalidator(m->cct),
     tick_event(NULL),
     monclient(mc), messenger(m), whoami(m->get_myname().num()),
     initialized(false), mounted(false), unmounting(false),
@@ -403,11 +406,17 @@ void Client::shutdown()
   admin_socket->unregister_command("dump_cache");
 
   if (ino_invalidate_cb) {
-    ldout(cct, 10) << "shutdown stopping invalidator finisher" << dendl;
+    ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
     async_ino_invalidator.wait_for_empty();
     async_ino_invalidator.stop();
   }
 
+  if (dentry_invalidate_cb) {
+    ldout(cct, 10) << "shutdown stopping dentry invalidator finisher" << dendl;
+    async_dentry_invalidator.wait_for_empty();
+    async_dentry_invalidator.stop();
+  }
+
   objectcacher->stop();  // outside of client_lock! this does a join.
 
   client_lock.Lock();
@@ -3526,6 +3535,45 @@ void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, MClientCa
   m->put();
 }
 
+class C_Client_DentryInvalidate : public Context  {
+private:
+  Client *client;
+  vinodeno_t dirino;
+  vinodeno_t ino;
+  string name;
+public:
+  C_Client_DentryInvalidate(Client *c, Dentry *dn) :
+			    client(c), dirino(dn->dir->parent_inode->vino()),
+			    ino(dn->inode->vino()), name(dn->name) { }
+  void finish(int r) {
+    client->_async_dentry_invalidate(dirino, ino, name);
+  }
+};
+
+void Client::_async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name)
+{
+  ldout(cct, 10) << "_async_dentry_invalidate '" << name << "' ino " << ino
+		 << " in dir " << dirino << dendl;
+  dentry_invalidate_cb(dentry_invalidate_cb_handle, dirino, ino, name);
+}
+
+void Client::_schedule_invalidate_dentry_callback(Dentry *dn)
+{
+  if (dentry_invalidate_cb && dn->inode->ll_ref > 0)
+    async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn));
+}
+
+void Client::_invalidate_inode_parents(Inode *in)
+{
+  set<Dentry*>::iterator q = in->dn_set.begin();
+  while (q != in->dn_set.end()) {
+    Dentry *dn = *q++;
+    // FIXME: we play lots of unlink/link tricks when handling MDS replies,
+    //        so in->dn_set doesn't always reflect the state of kernel's dcache.
+    _schedule_invalidate_dentry_callback(dn);
+    unlink(dn, false);
+  }
+}
 
 void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClientCaps *m)
 {
@@ -3553,8 +3601,12 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
     in->uid = m->head.uid;
     in->gid = m->head.gid;
   }
+  bool deleted_inode = false;
   if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
     in->nlink = m->head.nlink;
+    if (in->nlink == 0 &&
+	(new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+      deleted_inode = true;
   }
   if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
       m->xattrbl.length() &&
@@ -3608,6 +3660,10 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
   if (new_caps)
     signal_cond_list(in->waitfor_caps);
 
+  // may drop inode's last ref
+  if (deleted_inode)
+    _invalidate_inode_parents(in);
+
   m->put();
 }
 
@@ -6294,6 +6350,17 @@ void Client::ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handl
   async_ino_invalidator.start();
 }
 
+void Client::ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle)
+{
+  Mutex::Locker l(client_lock);
+  ldout(cct, 10) << "ll_register_dentry_invalidate_cb cb " << (void*)cb << " p " << (void*)handle << dendl;
+  if (cb == NULL)
+    return;
+  dentry_invalidate_cb = cb;
+  dentry_invalidate_cb_handle = handle;
+  async_dentry_invalidator.start();
+}
+
 void Client::ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle)
 {
   Mutex::Locker l(client_lock);
diff --git a/src/client/Client.h b/src/client/Client.h
index 96e8937..9579711 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -119,6 +119,9 @@ class MetaRequest;
 
 typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
 
+typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
+					 vinodeno_t ino, string& name);
+
 typedef int (*client_getgroups_callback_t)(void *handle, uid_t uid, gid_t **sgids);
 
 // ========================================================
@@ -209,10 +212,14 @@ class Client : public Dispatcher {
   client_ino_callback_t ino_invalidate_cb;
   void *ino_invalidate_cb_handle;
 
+  client_dentry_callback_t dentry_invalidate_cb;
+  void *dentry_invalidate_cb_handle;
+
   client_getgroups_callback_t getgroups_cb;
   void *getgroups_cb_handle;
 
   Finisher async_ino_invalidator;
+  Finisher async_dentry_invalidator;
 
   Context *tick_event;
   utime_t last_cap_renew;
@@ -352,6 +359,7 @@ protected:
 
   friend class C_Client_PutInode; // calls put_inode()
   friend class C_Client_CacheInvalidate;  // calls ino_invalidate_cb
+  friend class C_Client_DentryInvalidate;  // calls dentry_invalidate_cb
 
   //int get_cache_size() { return lru.lru_get_size(); }
   //void set_cache_size(int m) { lru.lru_set_max(m); }
@@ -454,6 +462,10 @@ protected:
   void finish_cap_snap(Inode *in, CapSnap *capsnap, int used);
   void _flushed_cap_snap(Inode *in, snapid_t seq);
 
+  void _schedule_invalidate_dentry_callback(Dentry *dn);
+  void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
+  void _invalidate_inode_parents(Inode *in);
+
   void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps);
   void _invalidate_inode_cache(Inode *in, bool keep_caps);
   void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps);
@@ -727,6 +739,8 @@ public:
 
   void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle);
 
+  void ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle);
+
   void ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle);
 };
 
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index 8339553..82761b9 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -534,7 +534,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
   return 0;
 }
 
-static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
+static void ino_invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
 {
   CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
   fuse_ino_t fino = cfuse->make_fake_ino(vino.ino, vino.snapid);
@@ -543,6 +543,19 @@ static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t le
 #endif
 }
 
+static void dentry_invalidate_cb(void *handle, vinodeno_t dirino,
+				 vinodeno_t ino, string& name)
+{
+  CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
+  fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid);
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
+  fuse_ino_t fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
+  fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length());
+#elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
+  fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length());
+#endif
+}
+
 static void do_init(void *data, fuse_conn_info *bar)
 {
   CephFuse::Handle *cfuse = (CephFuse::Handle *)data;
@@ -703,8 +716,10 @@ int CephFuse::Handle::init(int argc, const char *argv[])
 
   client->ll_register_getgroups_cb(getgroups_cb, this);
 
+  client->ll_register_dentry_invalidate_cb(dentry_invalidate_cb, this);
+
   if (g_conf->fuse_use_invalidate_cb)
-    client->ll_register_ino_invalidate_cb(invalidate_cb, this);
+    client->ll_register_ino_invalidate_cb(ino_invalidate_cb, this);
 
 done:
   fuse_opt_free_args(&args);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] ceph: trim deleted inode
  2013-07-21  2:21 [PATCH] ceph: trim deleted inode Yan, Zheng
  2013-07-21  2:21 ` [PATCH 1/2] mds: notify clients about " Yan, Zheng
  2013-07-21  2:21 ` [PATCH 2/2] client: trim " Yan, Zheng
@ 2013-07-23  1:41 ` Sage Weil
  2013-07-23  2:01   ` Yan, Zheng
  2 siblings, 1 reply; 9+ messages in thread
From: Sage Weil @ 2013-07-23  1:41 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel

On Sun, 21 Jul 2013, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> The MDS uses caps message to notify clients about deleted inode.
> when receiving a such message, invalidate any alias of the inode.
> This makes the kernel release the inode ASAP.
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c | 31 ++++++++++++++++++++++++++++++-
>  1 file changed, 30 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 25442b4..b446fdd 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2334,6 +2334,28 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  }
>  
>  /*
> + * Invalidate unlinked inode's aliases, so we can drop the inode
> + * from the cache ASAP.
> + */
> +static void invalidate_aliases(struct inode *inode)
> +{
> +	struct dentry *dn;
> +
> +	dout("invalidate_aliases inode %p\n", inode);
> +	d_prune_aliases(inode);
> +	while ((dn = d_find_alias(inode))) {
> +		d_delete(dn);
> +		dput(dn);

I don't think this loop is safe.  d_delete() won't unlink the inode if 
there are other refs to the dentry, which would make this get stuck in a 
loop.  Maybe simple checking if we get the same dentry back from 
d_find_alias() as the previous call?

> +		/*
> +		 * for dir inode, d_find_alias() can return
> +		 * disconnected dentry
> +		 */
> +		if (S_ISDIR(inode->i_mode))
> +			break;
> +	}

If we handle the more general case, I'm not sure we need any special case 
here for the directory... although I confess I'm not sure why disconnected 
dentries should be treated specially at all.

> +}
> +
> +/*
>   * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
>   * actually be a revocation if it specifies a smaller cap set.)
>   *
> @@ -2363,6 +2385,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>  	int writeback = 0;
>  	int revoked_rdcache = 0;
>  	int queue_invalidate = 0;
> +	int deleted_inode = 0;
>  
>  	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
>  	     inode, cap, mds, seq, ceph_cap_string(newcaps));
> @@ -2407,8 +2430,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>  		     from_kgid(&init_user_ns, inode->i_gid));
>  	}
>  
> -	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
> +	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
>  		set_nlink(inode, le32_to_cpu(grant->nlink));
> +		if (inode->i_nlink == 0 &&
> +		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
> +			deleted_inode = 1;
> +	}
>  
>  	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
>  		int len = le32_to_cpu(grant->xattr_len);
> @@ -2517,6 +2544,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>  		ceph_queue_writeback(inode);
>  	if (queue_invalidate)
>  		ceph_queue_invalidate(inode);
> +	if (deleted_inode)
> +		invalidate_aliases(inode);
>  	if (wake)
>  		wake_up_all(&ci->i_cap_wq);

This looks good, though.

Thanks!
sage


>  
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] ceph: trim deleted inode
  2013-07-23  1:41 ` [PATCH] ceph: " Sage Weil
@ 2013-07-23  2:01   ` Yan, Zheng
  2013-07-23  5:25     ` Sage Weil
  0 siblings, 1 reply; 9+ messages in thread
From: Yan, Zheng @ 2013-07-23  2:01 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel

On 07/23/2013 09:41 AM, Sage Weil wrote:
> On Sun, 21 Jul 2013, Yan, Zheng wrote:
>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>
>> The MDS uses caps message to notify clients about deleted inode.
>> when receiving a such message, invalidate any alias of the inode.
>> This makes the kernel release the inode ASAP.
>>
>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>> ---
>>  fs/ceph/caps.c | 31 ++++++++++++++++++++++++++++++-
>>  1 file changed, 30 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 25442b4..b446fdd 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2334,6 +2334,28 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>  }
>>  
>>  /*
>> + * Invalidate unlinked inode's aliases, so we can drop the inode
>> + * from the cache ASAP.
>> + */
>> +static void invalidate_aliases(struct inode *inode)
>> +{
>> +	struct dentry *dn;
>> +
>> +	dout("invalidate_aliases inode %p\n", inode);
>> +	d_prune_aliases(inode);
>> +	while ((dn = d_find_alias(inode))) {
>> +		d_delete(dn);
>> +		dput(dn);
> 
> I don't think this loop is safe.  d_delete() won't unlink the inode if 
> there are other refs to the dentry, which would make this get stuck in a 
> loop.  Maybe simple checking if we get the same dentry back from 
> d_find_alias() as the previous call?
> 

For non-dir inode, d_find_alias() only return connected dentry. After calling
d_delete, the dentry become disconnected. so the loop is safe.

>> +		/*
>> +		 * for dir inode, d_find_alias() can return
>> +		 * disconnected dentry
>> +		 */
>> +		if (S_ISDIR(inode->i_mode))
>> +			break;
>> +	}
> 
> If we handle the more general case, I'm not sure we need any special case 
> here for the directory... although I confess I'm not sure why disconnected 
> dentries should be treated specially at all.
> 

For dir inode, d_find_alias() may disconnected dentry, that's why the special case
is needed. how about below code.

static void invalidate_aliases(struct inode *inode)
{
	struct dentry *dn, prev = NULL;

	dout("invalidate_aliases inode %p\n", inode);
	d_prune_aliases(inode);
	while ((dn = d_find_alias(inode))) {
		if (dn == prev) {
			dput(dn);
			break;
		}
		d_delete(dn);
		if (prev)
			dput(prev);
		prev = dn;
	}
	if (prev)
		dput(prev);
}



>> +}
>> +
>> +/*
>>   * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
>>   * actually be a revocation if it specifies a smaller cap set.)
>>   *
>> @@ -2363,6 +2385,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>>  	int writeback = 0;
>>  	int revoked_rdcache = 0;
>>  	int queue_invalidate = 0;
>> +	int deleted_inode = 0;
>>  
>>  	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
>>  	     inode, cap, mds, seq, ceph_cap_string(newcaps));
>> @@ -2407,8 +2430,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>>  		     from_kgid(&init_user_ns, inode->i_gid));
>>  	}
>>  
>> -	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
>> +	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
>>  		set_nlink(inode, le32_to_cpu(grant->nlink));
>> +		if (inode->i_nlink == 0 &&
>> +		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
>> +			deleted_inode = 1;
>> +	}
>>  
>>  	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
>>  		int len = le32_to_cpu(grant->xattr_len);
>> @@ -2517,6 +2544,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
>>  		ceph_queue_writeback(inode);
>>  	if (queue_invalidate)
>>  		ceph_queue_invalidate(inode);
>> +	if (deleted_inode)
>> +		invalidate_aliases(inode);
>>  	if (wake)
>>  		wake_up_all(&ci->i_cap_wq);
> 
> This looks good, though.
> 
> Thanks!
> sage
> 
> 
>>  
>> -- 
>> 1.8.1.4
>>
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] ceph: trim deleted inode
  2013-07-23  2:01   ` Yan, Zheng
@ 2013-07-23  5:25     ` Sage Weil
  2013-07-23  5:33       ` Yan, Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Sage Weil @ 2013-07-23  5:25 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel

On Tue, 23 Jul 2013, Yan, Zheng wrote:
> On 07/23/2013 09:41 AM, Sage Weil wrote:
> > On Sun, 21 Jul 2013, Yan, Zheng wrote:
> >> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> >>
> >> The MDS uses caps message to notify clients about deleted inode.
> >> when receiving a such message, invalidate any alias of the inode.
> >> This makes the kernel release the inode ASAP.
> >>
> >> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> >> ---
> >>  fs/ceph/caps.c | 31 ++++++++++++++++++++++++++++++-
> >>  1 file changed, 30 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index 25442b4..b446fdd 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -2334,6 +2334,28 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
> >>  }
> >>  
> >>  /*
> >> + * Invalidate unlinked inode's aliases, so we can drop the inode
> >> + * from the cache ASAP.
> >> + */
> >> +static void invalidate_aliases(struct inode *inode)
> >> +{
> >> +	struct dentry *dn;
> >> +
> >> +	dout("invalidate_aliases inode %p\n", inode);
> >> +	d_prune_aliases(inode);
> >> +	while ((dn = d_find_alias(inode))) {
> >> +		d_delete(dn);
> >> +		dput(dn);
> > 
> > I don't think this loop is safe.  d_delete() won't unlink the inode if 
> > there are other refs to the dentry, which would make this get stuck in a 
> > loop.  Maybe simple checking if we get the same dentry back from 
> > d_find_alias() as the previous call?
> > 
> 
> For non-dir inode, d_find_alias() only return connected dentry. After calling
> d_delete, the dentry become disconnected. so the loop is safe.

Oh, right.  Totally misread that.

> >> +		/*
> >> +		 * for dir inode, d_find_alias() can return
> >> +		 * disconnected dentry
> >> +		 */
> >> +		if (S_ISDIR(inode->i_mode))
> >> +			break;
> >> +	}
> > 
> > If we handle the more general case, I'm not sure we need any special case 
> > here for the directory... although I confess I'm not sure why disconnected 
> > dentries should be treated specially at all.
> > 
> 
> For dir inode, d_find_alias() may disconnected dentry, that's why the special case
> is needed. how about below code.

Right.  I think either variation is okay.  I'll pull in the original for 
now, unless you prefer the below.  Sorry for the runaround!

sage

> 
> static void invalidate_aliases(struct inode *inode)
> {
> 	struct dentry *dn, prev = NULL;
> 
> 	dout("invalidate_aliases inode %p\n", inode);
> 	d_prune_aliases(inode);
> 	while ((dn = d_find_alias(inode))) {
> 		if (dn == prev) {
> 			dput(dn);
> 			break;
> 		}
> 		d_delete(dn);
> 		if (prev)
> 			dput(prev);
> 		prev = dn;
> 	}
> 	if (prev)
> 		dput(prev);
> }
> 
> 
> 
> >> +}
> >> +
> >> +/*
> >>   * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
> >>   * actually be a revocation if it specifies a smaller cap set.)
> >>   *
> >> @@ -2363,6 +2385,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> >>  	int writeback = 0;
> >>  	int revoked_rdcache = 0;
> >>  	int queue_invalidate = 0;
> >> +	int deleted_inode = 0;
> >>  
> >>  	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
> >>  	     inode, cap, mds, seq, ceph_cap_string(newcaps));
> >> @@ -2407,8 +2430,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> >>  		     from_kgid(&init_user_ns, inode->i_gid));
> >>  	}
> >>  
> >> -	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
> >> +	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
> >>  		set_nlink(inode, le32_to_cpu(grant->nlink));
> >> +		if (inode->i_nlink == 0 &&
> >> +		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
> >> +			deleted_inode = 1;
> >> +	}
> >>  
> >>  	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
> >>  		int len = le32_to_cpu(grant->xattr_len);
> >> @@ -2517,6 +2544,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> >>  		ceph_queue_writeback(inode);
> >>  	if (queue_invalidate)
> >>  		ceph_queue_invalidate(inode);
> >> +	if (deleted_inode)
> >> +		invalidate_aliases(inode);
> >>  	if (wake)
> >>  		wake_up_all(&ci->i_cap_wq);
> > 
> > This looks good, though.
> > 
> > Thanks!
> > sage
> > 
> > 
> >>  
> >> -- 
> >> 1.8.1.4
> >>
> >>
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] ceph: trim deleted inode
  2013-07-23  5:25     ` Sage Weil
@ 2013-07-23  5:33       ` Yan, Zheng
  0 siblings, 0 replies; 9+ messages in thread
From: Yan, Zheng @ 2013-07-23  5:33 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel

[-- Attachment #1: Type: text/plain, Size: 2419 bytes --]

On 07/23/2013 01:25 PM, Sage Weil wrote:
> On Tue, 23 Jul 2013, Yan, Zheng wrote:
>> On 07/23/2013 09:41 AM, Sage Weil wrote:
>>> On Sun, 21 Jul 2013, Yan, Zheng wrote:
>>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>>
>>>> The MDS uses caps message to notify clients about deleted inode.
>>>> when receiving a such message, invalidate any alias of the inode.
>>>> This makes the kernel release the inode ASAP.
>>>>
>>>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>>>> ---
>>>>  fs/ceph/caps.c | 31 ++++++++++++++++++++++++++++++-
>>>>  1 file changed, 30 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index 25442b4..b446fdd 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -2334,6 +2334,28 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>>>  }
>>>>  
>>>>  /*
>>>> + * Invalidate unlinked inode's aliases, so we can drop the inode
>>>> + * from the cache ASAP.
>>>> + */
>>>> +static void invalidate_aliases(struct inode *inode)
>>>> +{
>>>> +	struct dentry *dn;
>>>> +
>>>> +	dout("invalidate_aliases inode %p\n", inode);
>>>> +	d_prune_aliases(inode);
>>>> +	while ((dn = d_find_alias(inode))) {
>>>> +		d_delete(dn);
>>>> +		dput(dn);
>>>
>>> I don't think this loop is safe.  d_delete() won't unlink the inode if 
>>> there are other refs to the dentry, which would make this get stuck in a 
>>> loop.  Maybe simple checking if we get the same dentry back from 
>>> d_find_alias() as the previous call?
>>>
>>
>> For non-dir inode, d_find_alias() only return connected dentry. After calling
>> d_delete, the dentry become disconnected. so the loop is safe.
> 
> Oh, right.  Totally misread that.
> 
>>>> +		/*
>>>> +		 * for dir inode, d_find_alias() can return
>>>> +		 * disconnected dentry
>>>> +		 */
>>>> +		if (S_ISDIR(inode->i_mode))
>>>> +			break;
>>>> +	}
>>>
>>> If we handle the more general case, I'm not sure we need any special case 
>>> here for the directory... although I confess I'm not sure why disconnected 
>>> dentries should be treated specially at all.
>>>
>>
>> For dir inode, d_find_alias() may disconnected dentry, that's why the special case
>> is needed. how about below code.
> 
> Right.  I think either variation is okay.  I'll pull in the original for 
> now, unless you prefer the below.  Sorry for the runaround!
> 
> sage
> 

please apply the attached version, thanks.

Yan, Zheng


[-- Attachment #2: 0001-ceph-trim-deleted-inode.patch --]
[-- Type: text/x-patch, Size: 2771 bytes --]

From 04e85337b3384e7be52a0aefb6ae63491986be7d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 21 Jul 2013 10:07:51 +0800
Subject: [PATCH] ceph: trim deleted inode

The MDS uses caps message to notify clients about deleted inode.
when receiving a such message, invalidate any alias of the inode.
This makes the kernel release the inode ASAP.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 25442b4..430121a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2334,6 +2334,38 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 }
 
 /*
+ * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+	struct dentry *dn, *prev = NULL;
+
+	dout("invalidate_aliases inode %p\n", inode);
+	d_prune_aliases(inode);
+	/*
+	 * For non-directory inode, d_find_alias() only returns
+	 * connected dentry. After calling d_delete(), the dentry
+	 * become disconnected.
+	 *
+	 * For directory inode, d_find_alias() only can return
+	 * disconnected dentry. But directory inode should have
+	 * one alias at most.
+	 */
+	while ((dn = d_find_alias(inode))) {
+		if (dn == prev) {
+			dput(dn);
+			break;
+		}
+		d_delete(dn);
+		if (prev)
+			dput(prev);
+		prev = dn;
+	}
+	if (prev)
+		dput(prev);
+}
+
+/*
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
  *
@@ -2363,6 +2395,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	int writeback = 0;
 	int revoked_rdcache = 0;
 	int queue_invalidate = 0;
+	int deleted_inode = 0;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2407,8 +2440,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
+		if (inode->i_nlink == 0 &&
+		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+			deleted_inode = 1;
+	}
 
 	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
 		int len = le32_to_cpu(grant->xattr_len);
@@ -2517,6 +2554,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		ceph_queue_writeback(inode);
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
+	if (deleted_inode)
+		invalidate_aliases(inode);
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] client: trim deleted inode
  2013-07-21  2:21 ` [PATCH 2/2] client: trim " Yan, Zheng
@ 2013-08-23 20:19   ` Gregory Farnum
  2013-08-23 20:36     ` Sage Weil
  0 siblings, 1 reply; 9+ messages in thread
From: Gregory Farnum @ 2013-08-23 20:19 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, Sage Weil

Looks like this patch hasn't been merged in yet, although its partner
to make the MDS notify about deleted inodes was. Any particular
reason, or just still waiting for review? :)
-Greg
Software Engineer #42 @ http://inktank.com | http://ceph.com


On Sat, Jul 20, 2013 at 7:21 PM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> previous patch makes MDS send notification to clients when an inode
> is deleted. When receiving a such notification, we invalidate any
> dentry link to the deleted inode. If there is no other reference to
> the inode, the inode gets trimmed.
>
> For cephfs fuse client, we use fuse_lowlevel_notify_inval_entry() or
> fuse_lowlevel_notify_delete() to notify the kernel to trim the deleted
> inode. (this is not completely reliable because we play unlink/link
> tricks when  handle MDS replies. it's difficult to keep the user space
> cache and kernel dcache in sync)
>
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  src/client/Client.cc  | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/client/Client.h   | 14 +++++++++++
>  src/client/fuse_ll.cc | 19 ++++++++++++--
>  3 files changed, 99 insertions(+), 3 deletions(-)
>
> diff --git a/src/client/Client.cc b/src/client/Client.cc
> index ae7ddf6..f9c4f2b 100644
> --- a/src/client/Client.cc
> +++ b/src/client/Client.cc
> @@ -141,9 +141,12 @@ Client::Client(Messenger *m, MonClient *mc)
>      timer(m->cct, client_lock),
>      ino_invalidate_cb(NULL),
>      ino_invalidate_cb_handle(NULL),
> +    dentry_invalidate_cb(NULL),
> +    dentry_invalidate_cb_handle(NULL),
>      getgroups_cb(NULL),
>      getgroups_cb_handle(NULL),
>      async_ino_invalidator(m->cct),
> +    async_dentry_invalidator(m->cct),
>      tick_event(NULL),
>      monclient(mc), messenger(m), whoami(m->get_myname().num()),
>      initialized(false), mounted(false), unmounting(false),
> @@ -403,11 +406,17 @@ void Client::shutdown()
>    admin_socket->unregister_command("dump_cache");
>
>    if (ino_invalidate_cb) {
> -    ldout(cct, 10) << "shutdown stopping invalidator finisher" << dendl;
> +    ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
>      async_ino_invalidator.wait_for_empty();
>      async_ino_invalidator.stop();
>    }
>
> +  if (dentry_invalidate_cb) {
> +    ldout(cct, 10) << "shutdown stopping dentry invalidator finisher" << dendl;
> +    async_dentry_invalidator.wait_for_empty();
> +    async_dentry_invalidator.stop();
> +  }
> +
>    objectcacher->stop();  // outside of client_lock! this does a join.
>
>    client_lock.Lock();
> @@ -3526,6 +3535,45 @@ void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, MClientCa
>    m->put();
>  }
>
> +class C_Client_DentryInvalidate : public Context  {
> +private:
> +  Client *client;
> +  vinodeno_t dirino;
> +  vinodeno_t ino;
> +  string name;
> +public:
> +  C_Client_DentryInvalidate(Client *c, Dentry *dn) :
> +                           client(c), dirino(dn->dir->parent_inode->vino()),
> +                           ino(dn->inode->vino()), name(dn->name) { }
> +  void finish(int r) {
> +    client->_async_dentry_invalidate(dirino, ino, name);
> +  }
> +};
> +
> +void Client::_async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name)
> +{
> +  ldout(cct, 10) << "_async_dentry_invalidate '" << name << "' ino " << ino
> +                << " in dir " << dirino << dendl;
> +  dentry_invalidate_cb(dentry_invalidate_cb_handle, dirino, ino, name);
> +}
> +
> +void Client::_schedule_invalidate_dentry_callback(Dentry *dn)
> +{
> +  if (dentry_invalidate_cb && dn->inode->ll_ref > 0)
> +    async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn));
> +}
> +
> +void Client::_invalidate_inode_parents(Inode *in)
> +{
> +  set<Dentry*>::iterator q = in->dn_set.begin();
> +  while (q != in->dn_set.end()) {
> +    Dentry *dn = *q++;
> +    // FIXME: we play lots of unlink/link tricks when handling MDS replies,
> +    //        so in->dn_set doesn't always reflect the state of kernel's dcache.
> +    _schedule_invalidate_dentry_callback(dn);
> +    unlink(dn, false);
> +  }
> +}
>
>  void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClientCaps *m)
>  {
> @@ -3553,8 +3601,12 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
>      in->uid = m->head.uid;
>      in->gid = m->head.gid;
>    }
> +  bool deleted_inode = false;
>    if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
>      in->nlink = m->head.nlink;
> +    if (in->nlink == 0 &&
> +       (new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
> +      deleted_inode = true;
>    }
>    if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
>        m->xattrbl.length() &&
> @@ -3608,6 +3660,10 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
>    if (new_caps)
>      signal_cond_list(in->waitfor_caps);
>
> +  // may drop inode's last ref
> +  if (deleted_inode)
> +    _invalidate_inode_parents(in);
> +
>    m->put();
>  }
>
> @@ -6294,6 +6350,17 @@ void Client::ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handl
>    async_ino_invalidator.start();
>  }
>
> +void Client::ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle)
> +{
> +  Mutex::Locker l(client_lock);
> +  ldout(cct, 10) << "ll_register_dentry_invalidate_cb cb " << (void*)cb << " p " << (void*)handle << dendl;
> +  if (cb == NULL)
> +    return;
> +  dentry_invalidate_cb = cb;
> +  dentry_invalidate_cb_handle = handle;
> +  async_dentry_invalidator.start();
> +}
> +
>  void Client::ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle)
>  {
>    Mutex::Locker l(client_lock);
> diff --git a/src/client/Client.h b/src/client/Client.h
> index 96e8937..9579711 100644
> --- a/src/client/Client.h
> +++ b/src/client/Client.h
> @@ -119,6 +119,9 @@ class MetaRequest;
>
>  typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
>
> +typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
> +                                        vinodeno_t ino, string& name);
> +
>  typedef int (*client_getgroups_callback_t)(void *handle, uid_t uid, gid_t **sgids);
>
>  // ========================================================
> @@ -209,10 +212,14 @@ class Client : public Dispatcher {
>    client_ino_callback_t ino_invalidate_cb;
>    void *ino_invalidate_cb_handle;
>
> +  client_dentry_callback_t dentry_invalidate_cb;
> +  void *dentry_invalidate_cb_handle;
> +
>    client_getgroups_callback_t getgroups_cb;
>    void *getgroups_cb_handle;
>
>    Finisher async_ino_invalidator;
> +  Finisher async_dentry_invalidator;
>
>    Context *tick_event;
>    utime_t last_cap_renew;
> @@ -352,6 +359,7 @@ protected:
>
>    friend class C_Client_PutInode; // calls put_inode()
>    friend class C_Client_CacheInvalidate;  // calls ino_invalidate_cb
> +  friend class C_Client_DentryInvalidate;  // calls dentry_invalidate_cb
>
>    //int get_cache_size() { return lru.lru_get_size(); }
>    //void set_cache_size(int m) { lru.lru_set_max(m); }
> @@ -454,6 +462,10 @@ protected:
>    void finish_cap_snap(Inode *in, CapSnap *capsnap, int used);
>    void _flushed_cap_snap(Inode *in, snapid_t seq);
>
> +  void _schedule_invalidate_dentry_callback(Dentry *dn);
> +  void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
> +  void _invalidate_inode_parents(Inode *in);
> +
>    void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps);
>    void _invalidate_inode_cache(Inode *in, bool keep_caps);
>    void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps);
> @@ -727,6 +739,8 @@ public:
>
>    void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle);
>
> +  void ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle);
> +
>    void ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle);
>  };
>
> diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
> index 8339553..82761b9 100644
> --- a/src/client/fuse_ll.cc
> +++ b/src/client/fuse_ll.cc
> @@ -534,7 +534,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
>    return 0;
>  }
>
> -static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
> +static void ino_invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
>  {
>    CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
>    fuse_ino_t fino = cfuse->make_fake_ino(vino.ino, vino.snapid);
> @@ -543,6 +543,19 @@ static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t le
>  #endif
>  }
>
> +static void dentry_invalidate_cb(void *handle, vinodeno_t dirino,
> +                                vinodeno_t ino, string& name)
> +{
> +  CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
> +  fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid);
> +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
> +  fuse_ino_t fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
> +  fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length());
> +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
> +  fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length());
> +#endif
> +}
> +
>  static void do_init(void *data, fuse_conn_info *bar)
>  {
>    CephFuse::Handle *cfuse = (CephFuse::Handle *)data;
> @@ -703,8 +716,10 @@ int CephFuse::Handle::init(int argc, const char *argv[])
>
>    client->ll_register_getgroups_cb(getgroups_cb, this);
>
> +  client->ll_register_dentry_invalidate_cb(dentry_invalidate_cb, this);
> +
>    if (g_conf->fuse_use_invalidate_cb)
> -    client->ll_register_ino_invalidate_cb(invalidate_cb, this);
> +    client->ll_register_ino_invalidate_cb(ino_invalidate_cb, this);
>
>  done:
>    fuse_opt_free_args(&args);
> --
> 1.8.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] client: trim deleted inode
  2013-08-23 20:19   ` Gregory Farnum
@ 2013-08-23 20:36     ` Sage Weil
  0 siblings, 0 replies; 9+ messages in thread
From: Sage Weil @ 2013-08-23 20:36 UTC (permalink / raw)
  To: Gregory Farnum; +Cc: Yan, Zheng, ceph-devel

On Fri, 23 Aug 2013, Gregory Farnum wrote:
> Looks like this patch hasn't been merged in yet, although its partner
> to make the MDS notify about deleted inodes was. Any particular
> reason, or just still waiting for review? :)

I got as far as pushing it to wip-fuse but didn't run any tests.

sage


> -Greg
> Software Engineer #42 @ http://inktank.com | http://ceph.com
> 
> 
> On Sat, Jul 20, 2013 at 7:21 PM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
> > From: "Yan, Zheng" <zheng.z.yan@intel.com>
> >
> > previous patch makes MDS send notification to clients when an inode
> > is deleted. When receiving a such notification, we invalidate any
> > dentry link to the deleted inode. If there is no other reference to
> > the inode, the inode gets trimmed.
> >
> > For cephfs fuse client, we use fuse_lowlevel_notify_inval_entry() or
> > fuse_lowlevel_notify_delete() to notify the kernel to trim the deleted
> > inode. (this is not completely reliable because we play unlink/link
> > tricks when  handle MDS replies. it's difficult to keep the user space
> > cache and kernel dcache in sync)
> >
> > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> > ---
> >  src/client/Client.cc  | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  src/client/Client.h   | 14 +++++++++++
> >  src/client/fuse_ll.cc | 19 ++++++++++++--
> >  3 files changed, 99 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/client/Client.cc b/src/client/Client.cc
> > index ae7ddf6..f9c4f2b 100644
> > --- a/src/client/Client.cc
> > +++ b/src/client/Client.cc
> > @@ -141,9 +141,12 @@ Client::Client(Messenger *m, MonClient *mc)
> >      timer(m->cct, client_lock),
> >      ino_invalidate_cb(NULL),
> >      ino_invalidate_cb_handle(NULL),
> > +    dentry_invalidate_cb(NULL),
> > +    dentry_invalidate_cb_handle(NULL),
> >      getgroups_cb(NULL),
> >      getgroups_cb_handle(NULL),
> >      async_ino_invalidator(m->cct),
> > +    async_dentry_invalidator(m->cct),
> >      tick_event(NULL),
> >      monclient(mc), messenger(m), whoami(m->get_myname().num()),
> >      initialized(false), mounted(false), unmounting(false),
> > @@ -403,11 +406,17 @@ void Client::shutdown()
> >    admin_socket->unregister_command("dump_cache");
> >
> >    if (ino_invalidate_cb) {
> > -    ldout(cct, 10) << "shutdown stopping invalidator finisher" << dendl;
> > +    ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
> >      async_ino_invalidator.wait_for_empty();
> >      async_ino_invalidator.stop();
> >    }
> >
> > +  if (dentry_invalidate_cb) {
> > +    ldout(cct, 10) << "shutdown stopping dentry invalidator finisher" << dendl;
> > +    async_dentry_invalidator.wait_for_empty();
> > +    async_dentry_invalidator.stop();
> > +  }
> > +
> >    objectcacher->stop();  // outside of client_lock! this does a join.
> >
> >    client_lock.Lock();
> > @@ -3526,6 +3535,45 @@ void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, MClientCa
> >    m->put();
> >  }
> >
> > +class C_Client_DentryInvalidate : public Context  {
> > +private:
> > +  Client *client;
> > +  vinodeno_t dirino;
> > +  vinodeno_t ino;
> > +  string name;
> > +public:
> > +  C_Client_DentryInvalidate(Client *c, Dentry *dn) :
> > +                           client(c), dirino(dn->dir->parent_inode->vino()),
> > +                           ino(dn->inode->vino()), name(dn->name) { }
> > +  void finish(int r) {
> > +    client->_async_dentry_invalidate(dirino, ino, name);
> > +  }
> > +};
> > +
> > +void Client::_async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name)
> > +{
> > +  ldout(cct, 10) << "_async_dentry_invalidate '" << name << "' ino " << ino
> > +                << " in dir " << dirino << dendl;
> > +  dentry_invalidate_cb(dentry_invalidate_cb_handle, dirino, ino, name);
> > +}
> > +
> > +void Client::_schedule_invalidate_dentry_callback(Dentry *dn)
> > +{
> > +  if (dentry_invalidate_cb && dn->inode->ll_ref > 0)
> > +    async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn));
> > +}
> > +
> > +void Client::_invalidate_inode_parents(Inode *in)
> > +{
> > +  set<Dentry*>::iterator q = in->dn_set.begin();
> > +  while (q != in->dn_set.end()) {
> > +    Dentry *dn = *q++;
> > +    // FIXME: we play lots of unlink/link tricks when handling MDS replies,
> > +    //        so in->dn_set doesn't always reflect the state of kernel's dcache.
> > +    _schedule_invalidate_dentry_callback(dn);
> > +    unlink(dn, false);
> > +  }
> > +}
> >
> >  void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClientCaps *m)
> >  {
> > @@ -3553,8 +3601,12 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
> >      in->uid = m->head.uid;
> >      in->gid = m->head.gid;
> >    }
> > +  bool deleted_inode = false;
> >    if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
> >      in->nlink = m->head.nlink;
> > +    if (in->nlink == 0 &&
> > +       (new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
> > +      deleted_inode = true;
> >    }
> >    if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
> >        m->xattrbl.length() &&
> > @@ -3608,6 +3660,10 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
> >    if (new_caps)
> >      signal_cond_list(in->waitfor_caps);
> >
> > +  // may drop inode's last ref
> > +  if (deleted_inode)
> > +    _invalidate_inode_parents(in);
> > +
> >    m->put();
> >  }
> >
> > @@ -6294,6 +6350,17 @@ void Client::ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handl
> >    async_ino_invalidator.start();
> >  }
> >
> > +void Client::ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle)
> > +{
> > +  Mutex::Locker l(client_lock);
> > +  ldout(cct, 10) << "ll_register_dentry_invalidate_cb cb " << (void*)cb << " p " << (void*)handle << dendl;
> > +  if (cb == NULL)
> > +    return;
> > +  dentry_invalidate_cb = cb;
> > +  dentry_invalidate_cb_handle = handle;
> > +  async_dentry_invalidator.start();
> > +}
> > +
> >  void Client::ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle)
> >  {
> >    Mutex::Locker l(client_lock);
> > diff --git a/src/client/Client.h b/src/client/Client.h
> > index 96e8937..9579711 100644
> > --- a/src/client/Client.h
> > +++ b/src/client/Client.h
> > @@ -119,6 +119,9 @@ class MetaRequest;
> >
> >  typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
> >
> > +typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
> > +                                        vinodeno_t ino, string& name);
> > +
> >  typedef int (*client_getgroups_callback_t)(void *handle, uid_t uid, gid_t **sgids);
> >
> >  // ========================================================
> > @@ -209,10 +212,14 @@ class Client : public Dispatcher {
> >    client_ino_callback_t ino_invalidate_cb;
> >    void *ino_invalidate_cb_handle;
> >
> > +  client_dentry_callback_t dentry_invalidate_cb;
> > +  void *dentry_invalidate_cb_handle;
> > +
> >    client_getgroups_callback_t getgroups_cb;
> >    void *getgroups_cb_handle;
> >
> >    Finisher async_ino_invalidator;
> > +  Finisher async_dentry_invalidator;
> >
> >    Context *tick_event;
> >    utime_t last_cap_renew;
> > @@ -352,6 +359,7 @@ protected:
> >
> >    friend class C_Client_PutInode; // calls put_inode()
> >    friend class C_Client_CacheInvalidate;  // calls ino_invalidate_cb
> > +  friend class C_Client_DentryInvalidate;  // calls dentry_invalidate_cb
> >
> >    //int get_cache_size() { return lru.lru_get_size(); }
> >    //void set_cache_size(int m) { lru.lru_set_max(m); }
> > @@ -454,6 +462,10 @@ protected:
> >    void finish_cap_snap(Inode *in, CapSnap *capsnap, int used);
> >    void _flushed_cap_snap(Inode *in, snapid_t seq);
> >
> > +  void _schedule_invalidate_dentry_callback(Dentry *dn);
> > +  void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
> > +  void _invalidate_inode_parents(Inode *in);
> > +
> >    void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len, bool keep_caps);
> >    void _invalidate_inode_cache(Inode *in, bool keep_caps);
> >    void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps);
> > @@ -727,6 +739,8 @@ public:
> >
> >    void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle);
> >
> > +  void ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle);
> > +
> >    void ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle);
> >  };
> >
> > diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
> > index 8339553..82761b9 100644
> > --- a/src/client/fuse_ll.cc
> > +++ b/src/client/fuse_ll.cc
> > @@ -534,7 +534,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
> >    return 0;
> >  }
> >
> > -static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
> > +static void ino_invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
> >  {
> >    CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
> >    fuse_ino_t fino = cfuse->make_fake_ino(vino.ino, vino.snapid);
> > @@ -543,6 +543,19 @@ static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t le
> >  #endif
> >  }
> >
> > +static void dentry_invalidate_cb(void *handle, vinodeno_t dirino,
> > +                                vinodeno_t ino, string& name)
> > +{
> > +  CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
> > +  fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid);
> > +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
> > +  fuse_ino_t fino = cfuse->make_fake_ino(ino.ino, ino.snapid);
> > +  fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length());
> > +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
> > +  fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length());
> > +#endif
> > +}
> > +
> >  static void do_init(void *data, fuse_conn_info *bar)
> >  {
> >    CephFuse::Handle *cfuse = (CephFuse::Handle *)data;
> > @@ -703,8 +716,10 @@ int CephFuse::Handle::init(int argc, const char *argv[])
> >
> >    client->ll_register_getgroups_cb(getgroups_cb, this);
> >
> > +  client->ll_register_dentry_invalidate_cb(dentry_invalidate_cb, this);
> > +
> >    if (g_conf->fuse_use_invalidate_cb)
> > -    client->ll_register_ino_invalidate_cb(invalidate_cb, this);
> > +    client->ll_register_ino_invalidate_cb(ino_invalidate_cb, this);
> >
> >  done:
> >    fuse_opt_free_args(&args);
> > --
> > 1.8.1.4
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-08-23 20:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-21  2:21 [PATCH] ceph: trim deleted inode Yan, Zheng
2013-07-21  2:21 ` [PATCH 1/2] mds: notify clients about " Yan, Zheng
2013-07-21  2:21 ` [PATCH 2/2] client: trim " Yan, Zheng
2013-08-23 20:19   ` Gregory Farnum
2013-08-23 20:36     ` Sage Weil
2013-07-23  1:41 ` [PATCH] ceph: " Sage Weil
2013-07-23  2:01   ` Yan, Zheng
2013-07-23  5:25     ` Sage Weil
2013-07-23  5:33       ` Yan, Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.