All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
@ 2015-02-05 22:37 Trond Myklebust
  2015-02-05 22:37 ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Trond Myklebust
  2015-02-06  1:45 ` [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Peng Tao
  0 siblings, 2 replies; 11+ messages in thread
From: Trond Myklebust @ 2015-02-05 22:37 UTC (permalink / raw)
  To: linux-nfs; +Cc: Peng Tao

If we have to do a return-on-close in the delegreturn code, then
we must ensure that the inode and super block remain referenced.

Cc: Peng Tao <tao.peng@primarydata.com>
Cc: stable@vger.kernel.org # 3.17.x
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cd4295d84d54..b803c1d363e7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5175,9 +5175,16 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 static void nfs4_delegreturn_release(void *calldata)
 {
 	struct nfs4_delegreturndata *data = calldata;
+	struct inode *inode = data->inode;
+
+	if (inode) {
+		struct super_block *sb = inode->i_sb;
 
-	if (data->roc)
-		pnfs_roc_release(data->inode);
+		if (data->roc)
+			pnfs_roc_release(inode);
+		iput(inode);
+		nfs_sb_deactive(sb);
+	}
 	kfree(calldata);
 }
 
@@ -5234,9 +5241,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
-	data->inode = inode;
-	data->roc = list_empty(&NFS_I(inode)->open_files) ?
-		    pnfs_roc(inode) : false;
+	data->inode = igrab(inode);
+	if (data->inode) {
+		nfs_sb_active(inode->i_sb);
+		data->roc = nfs4_roc(inode);
+	}
 
 	task_setup_data.callback_data = data;
 	msg.rpc_argp = &data->args;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit
  2015-02-05 22:37 [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Trond Myklebust
@ 2015-02-05 22:37 ` Trond Myklebust
  2015-02-05 22:37   ` [PATCH 3/5] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns Trond Myklebust
  2015-02-06  2:03   ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Peng Tao
  2015-02-06  1:45 ` [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Peng Tao
  1 sibling, 2 replies; 11+ messages in thread
From: Trond Myklebust @ 2015-02-05 22:37 UTC (permalink / raw)
  To: linux-nfs; +Cc: Peng Tao

If we're sending an asynchronous layoutcommit, then we need to ensure
that the inode and the super block remain pinned.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h       | 18 ++++++++++++++++++
 fs/nfs/nfs4proc.c       | 19 +++++++++++--------
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a98cf2006179..d65f693c013b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -514,6 +514,24 @@ extern int nfs41_walk_client_list(struct nfs_client *clp,
 				struct nfs_client **result,
 				struct rpc_cred *cred);
 
+static inline struct inode *nfs_igrab_and_active(struct inode *inode)
+{
+	if (igrab(inode) == NULL)
+		return NULL;
+	nfs_sb_active(inode->i_sb);
+	return inode;
+}
+
+static inline void nfs_iput_and_deactive(struct inode *inode)
+{
+	if (!inode) {
+		struct super_block *sb = inode->i_sb;
+
+		iput(inode);
+		nfs_sb_deactive(sb);
+	}
+}
+
 /*
  * Determine the device name as a string
  */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b803c1d363e7..770495bcd525 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7994,6 +7994,7 @@ static void nfs4_layoutcommit_release(void *calldata)
 	nfs_post_op_update_inode_force_wcc(data->args.inode,
 					   data->res.fattr);
 	put_rpccred(data->cred);
+	nfs_iput_and_deactive(data->inode);
 	kfree(data);
 }
 
@@ -8018,7 +8019,6 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_layoutcommit_ops,
 		.callback_data = data,
-		.flags = RPC_TASK_ASYNC,
 	};
 	struct rpc_task *task;
 	int status = 0;
@@ -8029,18 +8029,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
 		data->args.lastbytewritten,
 		data->args.inode->i_ino);
 
+	if (!sync) {
+		data->inode = nfs_igrab_and_active(data->args.inode);
+		if (data->inode == NULL) {
+			nfs4_layoutcommit_release(data);
+			return -EAGAIN;
+		}
+		task_setup_data.flags = RPC_TASK_ASYNC;
+	}
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	if (sync == false)
-		goto out;
-	status = nfs4_wait_for_completion_rpc_task(task);
-	if (status != 0)
-		goto out;
-	status = task->tk_status;
+	if (sync)
+		status = task->tk_status;
 	trace_nfs4_layoutcommit(data->args.inode, status);
-out:
 	dprintk("%s: status %d\n", __func__, status);
 	rpc_put_task(task);
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2c35e2affa6f..bb0d56f737e0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -285,6 +285,7 @@ struct nfs4_layoutcommit_data {
 	struct nfs_fattr fattr;
 	struct list_head lseg_list;
 	struct rpc_cred *cred;
+	struct inode *inode;
 	struct nfs4_layoutcommit_args args;
 	struct nfs4_layoutcommit_res res;
 };
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns
  2015-02-05 22:37 ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Trond Myklebust
@ 2015-02-05 22:37   ` Trond Myklebust
  2015-02-05 22:37     ` [PATCH 4/5] NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS Trond Myklebust
  2015-02-06  2:03   ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Peng Tao
  1 sibling, 1 reply; 11+ messages in thread
From: Trond Myklebust @ 2015-02-05 22:37 UTC (permalink / raw)
  To: linux-nfs; +Cc: Peng Tao

If we're sending an asynchronous layoutreturn, then we need to ensure
that the inode and the super block remain pinned.

Cc: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 19 +++++++++++--------
 include/linux/nfs_xdr.h |  1 +
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 770495bcd525..3e1ff2bb547d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7861,6 +7861,7 @@ static void nfs4_layoutreturn_release(void *calldata)
 	lo->plh_block_lgets--;
 	spin_unlock(&lo->plh_inode->i_lock);
 	pnfs_put_layout_hdr(lrp->args.layout);
+	nfs_iput_and_deactive(lrp->inode);
 	kfree(calldata);
 	dprintk("<-- %s\n", __func__);
 }
@@ -7885,23 +7886,25 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_layoutreturn_call_ops,
 		.callback_data = lrp,
-		.flags = RPC_TASK_ASYNC,
 	};
 	int status = 0;
 
 	dprintk("--> %s\n", __func__);
+	if (!sync) {
+		lrp->inode = nfs_igrab_and_active(lrp->args.inode);
+		if (!lrp->inode) {
+			nfs4_layoutreturn_release(lrp);
+			return -EAGAIN;
+		}
+		task_setup_data.flags |= RPC_TASK_ASYNC;
+	}
 	nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	if (sync == false)
-		goto out;
-	status = nfs4_wait_for_completion_rpc_task(task);
-	if (status != 0)
-		goto out;
-	status = task->tk_status;
+	if (sync)
+		status = task->tk_status;
 	trace_nfs4_layoutreturn(lrp->args.inode, status);
-out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	rpc_put_task(task);
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bb0d56f737e0..38d96ba935c2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -310,6 +310,7 @@ struct nfs4_layoutreturn {
 	struct nfs4_layoutreturn_res res;
 	struct rpc_cred *cred;
 	struct nfs_client *clp;
+	struct inode *inode;
 	int rpc_status;
 };
 
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS
  2015-02-05 22:37   ` [PATCH 3/5] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns Trond Myklebust
@ 2015-02-05 22:37     ` Trond Myklebust
  2015-02-05 22:37       ` [PATCH 5/5] NFSv4.1: Fix pnfs_put_lseg races Trond Myklebust
  0 siblings, 1 reply; 11+ messages in thread
From: Trond Myklebust @ 2015-02-05 22:37 UTC (permalink / raw)
  To: linux-nfs; +Cc: Peng Tao

In we want to be able to call pnfs_send_layoutreturn() from within the
writeback path, we really want it to use GFP_NOFS in order to prevent
recursion.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 703501d3ed19..a1d8620e8cb7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -948,7 +948,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
 	struct nfs4_layoutreturn *lrp;
 	int status = 0;
 
-	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+	lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
 	if (unlikely(lrp == NULL)) {
 		status = -ENOMEM;
 		spin_lock(&ino->i_lock);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] NFSv4.1: Fix pnfs_put_lseg races
  2015-02-05 22:37     ` [PATCH 4/5] NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS Trond Myklebust
@ 2015-02-05 22:37       ` Trond Myklebust
  0 siblings, 0 replies; 11+ messages in thread
From: Trond Myklebust @ 2015-02-05 22:37 UTC (permalink / raw)
  To: linux-nfs; +Cc: Peng Tao

pnfs_layoutreturn_free_lseg_async() can also race with inode put in
the general case. We can now fix this, and also simplify the code.

Cc: Peng Tao <tao.peng@primarydata.com>
---
 fs/nfs/pnfs.c | 54 +++++++++++++++++++-----------------------------------
 1 file changed, 19 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a1d8620e8cb7..79878611fdb0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,14 +361,9 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
 	return true;
 }
 
-static void pnfs_layoutreturn_free_lseg(struct work_struct *work)
+static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
+		struct pnfs_layout_hdr *lo, struct inode *inode)
 {
-	struct pnfs_layout_segment *lseg;
-	struct pnfs_layout_hdr *lo;
-	struct inode *inode;
-
-	lseg = container_of(work, struct pnfs_layout_segment, pls_work);
-	WARN_ON(atomic_read(&lseg->pls_refcount));
 	lo = lseg->pls_layout;
 	inode = lo->plh_inode;
 
@@ -383,24 +378,11 @@ static void pnfs_layoutreturn_free_lseg(struct work_struct *work)
 		lo->plh_block_lgets++;
 		lo->plh_return_iomode = 0;
 		spin_unlock(&inode->i_lock);
+		pnfs_get_layout_hdr(lo);
 
-		pnfs_send_layoutreturn(lo, stateid, iomode, true);
-		spin_lock(&inode->i_lock);
-	} else
-		/* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */
-		pnfs_put_layout_hdr(lo);
-	pnfs_layout_remove_lseg(lo, lseg);
-	spin_unlock(&inode->i_lock);
-	pnfs_free_lseg(lseg);
-	/* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */
-	pnfs_put_layout_hdr(lo);
-}
-
-static void
-pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg)
-{
-	INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg);
-	queue_work(nfsiod_workqueue, &lseg->pls_work);
+		/* Send an async layoutreturn so we dont deadlock */
+		pnfs_send_layoutreturn(lo, stateid, iomode, false);
+	}
 }
 
 void
@@ -415,21 +397,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
 		atomic_read(&lseg->pls_refcount),
 		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+
+	/* Handle the case where refcount != 1 */
+	if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
+		return;
+
 	lo = lseg->pls_layout;
 	inode = lo->plh_inode;
+	/* Do we need a layoutreturn? */
+	if (pnfs_layout_need_return(lo, lseg))
+		pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
+
 	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
 		pnfs_get_layout_hdr(lo);
-		if (pnfs_layout_need_return(lo, lseg)) {
-			spin_unlock(&inode->i_lock);
-			/* hdr reference dropped in nfs4_layoutreturn_release */
-			pnfs_get_layout_hdr(lo);
-			pnfs_layoutreturn_free_lseg_async(lseg);
-		} else {
-			pnfs_layout_remove_lseg(lo, lseg);
-			spin_unlock(&inode->i_lock);
-			pnfs_free_lseg(lseg);
-			pnfs_put_layout_hdr(lo);
-		}
+		pnfs_layout_remove_lseg(lo, lseg);
+		spin_unlock(&inode->i_lock);
+		pnfs_free_lseg(lseg);
+		pnfs_put_layout_hdr(lo);
 	}
 }
 EXPORT_SYMBOL_GPL(pnfs_put_lseg);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
  2015-02-05 22:37 [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Trond Myklebust
  2015-02-05 22:37 ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Trond Myklebust
@ 2015-02-06  1:45 ` Peng Tao
  2015-02-06  1:57   ` Peng Tao
  1 sibling, 1 reply; 11+ messages in thread
From: Peng Tao @ 2015-02-06  1:45 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If we have to do a return-on-close in the delegreturn code, then
> we must ensure that the inode and super block remain referenced.
>
looks good. One nit is that maybe it's better to reuse the two helpers
in your 2ed patch.

Reviewed-by: Peng Tao <tao.peng@primarydata.com>
> Cc: Peng Tao <tao.peng@primarydata.com>
> Cc: stable@vger.kernel.org # 3.17.x
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4proc.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index cd4295d84d54..b803c1d363e7 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5175,9 +5175,16 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
>  static void nfs4_delegreturn_release(void *calldata)
>  {
>         struct nfs4_delegreturndata *data = calldata;
> +       struct inode *inode = data->inode;
> +
> +       if (inode) {
> +               struct super_block *sb = inode->i_sb;
>
> -       if (data->roc)
> -               pnfs_roc_release(data->inode);
> +               if (data->roc)
> +                       pnfs_roc_release(inode);
> +               iput(inode);
> +               nfs_sb_deactive(sb);
> +       }
>         kfree(calldata);
>  }
>
> @@ -5234,9 +5241,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
>         nfs_fattr_init(data->res.fattr);
>         data->timestamp = jiffies;
>         data->rpc_status = 0;
> -       data->inode = inode;
> -       data->roc = list_empty(&NFS_I(inode)->open_files) ?
> -                   pnfs_roc(inode) : false;
> +       data->inode = igrab(inode);
> +       if (data->inode) {
> +               nfs_sb_active(inode->i_sb);
> +               data->roc = nfs4_roc(inode);
> +       }
>
>         task_setup_data.callback_data = data;
>         msg.rpc_argp = &data->args;
> --
> 2.1.0
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
  2015-02-06  1:45 ` [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Peng Tao
@ 2015-02-06  1:57   ` Peng Tao
  2015-02-06  2:53     ` Trond Myklebust
  2015-02-06  2:54     ` Peng Tao
  0 siblings, 2 replies; 11+ messages in thread
From: Peng Tao @ 2015-02-06  1:57 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

On Fri, Feb 6, 2015 at 9:45 AM, Peng Tao <tao.peng@primarydata.com> wrote:
> On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
> <trond.myklebust@primarydata.com> wrote:
>> If we have to do a return-on-close in the delegreturn code, then
>> we must ensure that the inode and super block remain referenced.
>>
ah, a second thought. I looked for call sites of nfs_sb_active() and
it gets called at five places in current tree:
alloc_nfs_open_context, nfs4_opendata_alloc, nfs4_do_close,
nfs_do_call_unlink, nfs_do_call_unlink

So it appears that sb is activated while any file keeps opened and
between unlink calls. Then it looks that we are allowed to keep
delegations after sb is released? Maybe the best way to fix the sb
reference part is to pin sb when getting the first delegation.

Cheers,
Tao

> looks good. One nit is that maybe it's better to reuse the two helpers
> in your 2ed patch.
>
> Reviewed-by: Peng Tao <tao.peng@primarydata.com>
>> Cc: Peng Tao <tao.peng@primarydata.com>
>> Cc: stable@vger.kernel.org # 3.17.x
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>>  fs/nfs/nfs4proc.c | 19 ++++++++++++++-----
>>  1 file changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index cd4295d84d54..b803c1d363e7 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -5175,9 +5175,16 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
>>  static void nfs4_delegreturn_release(void *calldata)
>>  {
>>         struct nfs4_delegreturndata *data = calldata;
>> +       struct inode *inode = data->inode;
>> +
>> +       if (inode) {
>> +               struct super_block *sb = inode->i_sb;
>>
>> -       if (data->roc)
>> -               pnfs_roc_release(data->inode);
>> +               if (data->roc)
>> +                       pnfs_roc_release(inode);
>> +               iput(inode);
>> +               nfs_sb_deactive(sb);
>> +       }
>>         kfree(calldata);
>>  }
>>
>> @@ -5234,9 +5241,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
>>         nfs_fattr_init(data->res.fattr);
>>         data->timestamp = jiffies;
>>         data->rpc_status = 0;
>> -       data->inode = inode;
>> -       data->roc = list_empty(&NFS_I(inode)->open_files) ?
>> -                   pnfs_roc(inode) : false;
>> +       data->inode = igrab(inode);
>> +       if (data->inode) {
>> +               nfs_sb_active(inode->i_sb);
>> +               data->roc = nfs4_roc(inode);
>> +       }
>>
>>         task_setup_data.callback_data = data;
>>         msg.rpc_argp = &data->args;
>> --
>> 2.1.0
>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit
  2015-02-05 22:37 ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Trond Myklebust
  2015-02-05 22:37   ` [PATCH 3/5] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns Trond Myklebust
@ 2015-02-06  2:03   ` Peng Tao
  1 sibling, 0 replies; 11+ messages in thread
From: Peng Tao @ 2015-02-06  2:03 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If we're sending an asynchronous layoutcommit, then we need to ensure
> that the inode and the super block remain pinned.
>
same thing here. I guess what we should do is to pin inode and super
block in the layout header, except for the case that we ROC bit is
set, which means layout segments cannot live after file is closed.
Then we are ensured not to allow lsegs to pass the life cycle of inode
and super block. Does it make sense?

Cheers,
Tao

> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/internal.h       | 18 ++++++++++++++++++
>  fs/nfs/nfs4proc.c       | 19 +++++++++++--------
>  include/linux/nfs_xdr.h |  1 +
>  3 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index a98cf2006179..d65f693c013b 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -514,6 +514,24 @@ extern int nfs41_walk_client_list(struct nfs_client *clp,
>                                 struct nfs_client **result,
>                                 struct rpc_cred *cred);
>
> +static inline struct inode *nfs_igrab_and_active(struct inode *inode)
> +{
> +       if (igrab(inode) == NULL)
> +               return NULL;
> +       nfs_sb_active(inode->i_sb);
> +       return inode;
> +}
> +
> +static inline void nfs_iput_and_deactive(struct inode *inode)
> +{
> +       if (!inode) {
> +               struct super_block *sb = inode->i_sb;
> +
> +               iput(inode);
> +               nfs_sb_deactive(sb);
> +       }
> +}
> +
>  /*
>   * Determine the device name as a string
>   */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index b803c1d363e7..770495bcd525 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7994,6 +7994,7 @@ static void nfs4_layoutcommit_release(void *calldata)
>         nfs_post_op_update_inode_force_wcc(data->args.inode,
>                                            data->res.fattr);
>         put_rpccred(data->cred);
> +       nfs_iput_and_deactive(data->inode);
>         kfree(data);
>  }
>
> @@ -8018,7 +8019,6 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs4_layoutcommit_ops,
>                 .callback_data = data,
> -               .flags = RPC_TASK_ASYNC,
>         };
>         struct rpc_task *task;
>         int status = 0;
> @@ -8029,18 +8029,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
>                 data->args.lastbytewritten,
>                 data->args.inode->i_ino);
>
> +       if (!sync) {
> +               data->inode = nfs_igrab_and_active(data->args.inode);
> +               if (data->inode == NULL) {
> +                       nfs4_layoutcommit_release(data);
> +                       return -EAGAIN;
> +               }
> +               task_setup_data.flags = RPC_TASK_ASYNC;
> +       }
>         nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
>         task = rpc_run_task(&task_setup_data);
>         if (IS_ERR(task))
>                 return PTR_ERR(task);
> -       if (sync == false)
> -               goto out;
> -       status = nfs4_wait_for_completion_rpc_task(task);
> -       if (status != 0)
> -               goto out;
> -       status = task->tk_status;
> +       if (sync)
> +               status = task->tk_status;
>         trace_nfs4_layoutcommit(data->args.inode, status);
> -out:
>         dprintk("%s: status %d\n", __func__, status);
>         rpc_put_task(task);
>         return status;
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 2c35e2affa6f..bb0d56f737e0 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -285,6 +285,7 @@ struct nfs4_layoutcommit_data {
>         struct nfs_fattr fattr;
>         struct list_head lseg_list;
>         struct rpc_cred *cred;
> +       struct inode *inode;
>         struct nfs4_layoutcommit_args args;
>         struct nfs4_layoutcommit_res res;
>  };
> --
> 2.1.0
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
  2015-02-06  1:57   ` Peng Tao
@ 2015-02-06  2:53     ` Trond Myklebust
  2015-02-06  3:05       ` Peng Tao
  2015-02-06  2:54     ` Peng Tao
  1 sibling, 1 reply; 11+ messages in thread
From: Trond Myklebust @ 2015-02-06  2:53 UTC (permalink / raw)
  To: Peng Tao; +Cc: Linux NFS Mailing List

On Thu, Feb 5, 2015 at 8:57 PM, Peng Tao <tao.peng@primarydata.com> wrote:
>
> On Fri, Feb 6, 2015 at 9:45 AM, Peng Tao <tao.peng@primarydata.com> wrote:
> > On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
> > <trond.myklebust@primarydata.com> wrote:
> >> If we have to do a return-on-close in the delegreturn code, then
> >> we must ensure that the inode and super block remain referenced.
> >>
> ah, a second thought. I looked for call sites of nfs_sb_active() and
> it gets called at five places in current tree:
> alloc_nfs_open_context, nfs4_opendata_alloc, nfs4_do_close,
> nfs_do_call_unlink, nfs_do_call_unlink
>
> So it appears that sb is activated while any file keeps opened and
> between unlink calls. Then it looks that we are allowed to keep
> delegations after sb is released? Maybe the best way to fix the sb
> reference part is to pin sb when getting the first delegation.

The superblock reference is only there in order to allow us to perform
asynchronous delegreturns without any danger. The problem here is that
we'd end up pinning the superblock even after umount if there are
still unreturned delegations.
That said, I do see that there is a problem with calling
nfs_sb_active() when sb->s_active is zero, so I think I'd like to fix
that up.

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
  2015-02-06  1:57   ` Peng Tao
  2015-02-06  2:53     ` Trond Myklebust
@ 2015-02-06  2:54     ` Peng Tao
  1 sibling, 0 replies; 11+ messages in thread
From: Peng Tao @ 2015-02-06  2:54 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

On Fri, Feb 6, 2015 at 9:57 AM, Peng Tao <tao.peng@primarydata.com> wrote:
> On Fri, Feb 6, 2015 at 9:45 AM, Peng Tao <tao.peng@primarydata.com> wrote:
>> On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
>> <trond.myklebust@primarydata.com> wrote:
>>> If we have to do a return-on-close in the delegreturn code, then
>>> we must ensure that the inode and super block remain referenced.
>>>
> ah, a second thought. I looked for call sites of nfs_sb_active() and
> it gets called at five places in current tree:
> alloc_nfs_open_context, nfs4_opendata_alloc, nfs4_do_close,
> nfs_do_call_unlink, nfs_do_call_unlink
>
> So it appears that sb is activated while any file keeps opened and
> between unlink calls. Then it looks that we are allowed to keep
> delegations after sb is released? Maybe the best way to fix the sb
> reference part is to pin sb when getting the first delegation.
>
err, that cannot be working at all... by pinning super block, we
prevent umount from happening. and we are returning delegations while
shutting down nfs_server and evicting inode. I see your point that the
patch actually intends to deal with race between async delegation
return vs. nfs_server shutting down and inode eviction.

oops! sorry for the noise... you patch is definitely the way we want to go.

Cheers,
Tao

> Cheers,
> Tao
>
>> looks good. One nit is that maybe it's better to reuse the two helpers
>> in your 2ed patch.
>>
>> Reviewed-by: Peng Tao <tao.peng@primarydata.com>
>>> Cc: Peng Tao <tao.peng@primarydata.com>
>>> Cc: stable@vger.kernel.org # 3.17.x
>>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>>> ---
>>>  fs/nfs/nfs4proc.c | 19 ++++++++++++++-----
>>>  1 file changed, 14 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>> index cd4295d84d54..b803c1d363e7 100644
>>> --- a/fs/nfs/nfs4proc.c
>>> +++ b/fs/nfs/nfs4proc.c
>>> @@ -5175,9 +5175,16 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
>>>  static void nfs4_delegreturn_release(void *calldata)
>>>  {
>>>         struct nfs4_delegreturndata *data = calldata;
>>> +       struct inode *inode = data->inode;
>>> +
>>> +       if (inode) {
>>> +               struct super_block *sb = inode->i_sb;
>>>
>>> -       if (data->roc)
>>> -               pnfs_roc_release(data->inode);
>>> +               if (data->roc)
>>> +                       pnfs_roc_release(inode);
>>> +               iput(inode);
>>> +               nfs_sb_deactive(sb);
>>> +       }
>>>         kfree(calldata);
>>>  }
>>>
>>> @@ -5234,9 +5241,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
>>>         nfs_fattr_init(data->res.fattr);
>>>         data->timestamp = jiffies;
>>>         data->rpc_status = 0;
>>> -       data->inode = inode;
>>> -       data->roc = list_empty(&NFS_I(inode)->open_files) ?
>>> -                   pnfs_roc(inode) : false;
>>> +       data->inode = igrab(inode);
>>> +       if (data->inode) {
>>> +               nfs_sb_active(inode->i_sb);
>>> +               data->roc = nfs4_roc(inode);
>>> +       }
>>>
>>>         task_setup_data.callback_data = data;
>>>         msg.rpc_argp = &data->args;
>>> --
>>> 2.1.0
>>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn
  2015-02-06  2:53     ` Trond Myklebust
@ 2015-02-06  3:05       ` Peng Tao
  0 siblings, 0 replies; 11+ messages in thread
From: Peng Tao @ 2015-02-06  3:05 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

On Fri, Feb 6, 2015 at 10:53 AM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> On Thu, Feb 5, 2015 at 8:57 PM, Peng Tao <tao.peng@primarydata.com> wrote:
>>
>> On Fri, Feb 6, 2015 at 9:45 AM, Peng Tao <tao.peng@primarydata.com> wrote:
>> > On Fri, Feb 6, 2015 at 6:37 AM, Trond Myklebust
>> > <trond.myklebust@primarydata.com> wrote:
>> >> If we have to do a return-on-close in the delegreturn code, then
>> >> we must ensure that the inode and super block remain referenced.
>> >>
>> ah, a second thought. I looked for call sites of nfs_sb_active() and
>> it gets called at five places in current tree:
>> alloc_nfs_open_context, nfs4_opendata_alloc, nfs4_do_close,
>> nfs_do_call_unlink, nfs_do_call_unlink
>>
>> So it appears that sb is activated while any file keeps opened and
>> between unlink calls. Then it looks that we are allowed to keep
>> delegations after sb is released? Maybe the best way to fix the sb
>> reference part is to pin sb when getting the first delegation.
>
> The superblock reference is only there in order to allow us to perform
> asynchronous delegreturns without any danger. The problem here is that
> we'd end up pinning the superblock even after umount if there are
> still unreturned delegations.
> That said, I do see that there is a problem with calling
> nfs_sb_active() when sb->s_active is zero, so I think I'd like to fix
> that up.
yeah, I see your point. Thanks for the explanation.

Cheers,
Tao

>
> --
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-02-06  3:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-05 22:37 [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Trond Myklebust
2015-02-05 22:37 ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Trond Myklebust
2015-02-05 22:37   ` [PATCH 3/5] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns Trond Myklebust
2015-02-05 22:37     ` [PATCH 4/5] NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS Trond Myklebust
2015-02-05 22:37       ` [PATCH 5/5] NFSv4.1: Fix pnfs_put_lseg races Trond Myklebust
2015-02-06  2:03   ` [PATCH 2/5] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit Peng Tao
2015-02-06  1:45 ` [PATCH 1/5] NFSv4: Ensure we reference the inode for return-on-close in delegreturn Peng Tao
2015-02-06  1:57   ` Peng Tao
2015-02-06  2:53     ` Trond Myklebust
2015-02-06  3:05       ` Peng Tao
2015-02-06  2:54     ` Peng Tao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.