All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NFS: filelayout should use nfs_generic_pg_test
@ 2011-06-01  3:18 Weston Andros Adamson
  2011-06-01  5:47 ` Boaz Harrosh
  0 siblings, 1 reply; 23+ messages in thread
From: Weston Andros Adamson @ 2011-06-01  3:18 UTC (permalink / raw)
  To: trond; +Cc: linux-nfs, Weston Andros Adamson

Use nfs_generic_pg_test instead of pnfs_generic_pg_test.

This fixes the BUG at fs/nfs/write.c:941 introduced by
89a58e32d9105c01022a757fb32ddc3b51bf0025.

I was able to trigger this BUG reliably using pynfs in pnfs mode,
by using dd(1) to write many small blocks.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
---
Fix proposed by Trond.

Benny- Does this make sense?

 fs/nfs/nfs4filelayout.c  |    2 +-
 fs/nfs/pagelist.c        |    5 ++++-
 include/linux/nfs_page.h |    3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..1c3bb72 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req))
+	if (!nfs_generic_pg_test(pgio, prev, req))
 		return 0;
 
 	if (!pgio->pg_lseg)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961..1a4b0de 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,9 @@ nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool
+nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev,
+		    struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +220,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
 
 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 }
+EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
 /**
  * nfs_pageio_init - initialise a page io descriptor
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80..7d8a779 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -96,7 +96,8 @@ extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
-
+extern  bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+				 struct nfs_page *prev, struct nfs_page *req);
 
 /*
  * Lock the page of an asynchronous request without getting a new reference
-- 
1.7.5.2


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01  3:18 [PATCH] NFS: filelayout should use nfs_generic_pg_test Weston Andros Adamson
@ 2011-06-01  5:47 ` Boaz Harrosh
  2011-06-01 12:14   ` Trond Myklebust
  2011-06-01 14:44   ` Weston Andros Adamson
  0 siblings, 2 replies; 23+ messages in thread
From: Boaz Harrosh @ 2011-06-01  5:47 UTC (permalink / raw)
  To: Weston Andros Adamson; +Cc: trond, linux-nfs

On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
> 
> This fixes the BUG at fs/nfs/write.c:941 introduced by
> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
> 
> I was able to trigger this BUG reliably using pynfs in pnfs mode,
> by using dd(1) to write many small blocks.
> 
> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> ---
> Fix proposed by Trond.
> 
> Benny- Does this make sense?
> 
>  fs/nfs/nfs4filelayout.c  |    2 +-
>  fs/nfs/pagelist.c        |    5 ++++-
>  include/linux/nfs_page.h |    3 ++-
>  3 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..1c3bb72 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>  	u64 p_stripe, r_stripe;
>  	u32 stripe_unit;
>  
> -	if (!pnfs_generic_pg_test(pgio, prev, req))
> +	if (!nfs_generic_pg_test(pgio, prev, req))
>  		return 0;
>  

pnfs_generic_pg_test is the one that gets the layout.

What you've done is revert to MDS IO

Boaz

>  	if (!pgio->pg_lseg)
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 7913961..1a4b0de 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -204,7 +204,9 @@ nfs_wait_on_request(struct nfs_page *req)
>  			TASK_UNINTERRUPTIBLE);
>  }
>  
> -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> +bool
> +nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev,
> +		    struct nfs_page *req)
>  {
>  	/*
>  	 * FIXME: ideally we should be able to coalesce all requests
> @@ -218,6 +220,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
>  
>  	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
>  }
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
>  
>  /**
>   * nfs_pageio_init - initialise a page io descriptor
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 3a34e80..7d8a779 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -96,7 +96,8 @@ extern  int nfs_wait_on_request(struct nfs_page *);
>  extern	void nfs_unlock_request(struct nfs_page *req);
>  extern	int nfs_set_page_tag_locked(struct nfs_page *req);
>  extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
> -
> +extern  bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
> +				 struct nfs_page *prev, struct nfs_page *req);
>  
>  /*
>   * Lock the page of an asynchronous request without getting a new reference


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01  5:47 ` Boaz Harrosh
@ 2011-06-01 12:14   ` Trond Myklebust
  2011-06-01 13:36     ` Boaz Harrosh
  2011-06-01 14:44   ` Weston Andros Adamson
  1 sibling, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 12:14 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Weston Andros Adamson, trond, linux-nfs

On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
> > Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
> > 
> > This fixes the BUG at fs/nfs/write.c:941 introduced by
> > 89a58e32d9105c01022a757fb32ddc3b51bf0025.
> > 
> > I was able to trigger this BUG reliably using pynfs in pnfs mode,
> > by using dd(1) to write many small blocks.
> > 
> > Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> > ---
> > Fix proposed by Trond.
> > 
> > Benny- Does this make sense?
> > 
> >  fs/nfs/nfs4filelayout.c  |    2 +-
> >  fs/nfs/pagelist.c        |    5 ++++-
> >  include/linux/nfs_page.h |    3 ++-
> >  3 files changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index 4269088..1c3bb72 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> >  	u64 p_stripe, r_stripe;
> >  	u32 stripe_unit;
> >  
> > -	if (!pnfs_generic_pg_test(pgio, prev, req))
> > +	if (!nfs_generic_pg_test(pgio, prev, req))
> >  		return 0;
> >  
> 
> pnfs_generic_pg_test is the one that gets the layout.
> 
> What you've done is revert to MDS IO

The "files" layout type always gets the layout in the pg_doio() method
instead of the pg_test().

Cheers
  Trond

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 12:14   ` Trond Myklebust
@ 2011-06-01 13:36     ` Boaz Harrosh
  2011-06-01 13:43       ` Benny Halevy
  0 siblings, 1 reply; 23+ messages in thread
From: Boaz Harrosh @ 2011-06-01 13:36 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Weston Andros Adamson, trond, linux-nfs

On 06/01/2011 03:14 PM, Trond Myklebust wrote:
> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>
>> pnfs_generic_pg_test is the one that gets the layout.
>>
>> What you've done is revert to MDS IO
> 
> The "files" layout type always gets the layout in the pg_doio() method
> instead of the pg_test().
> 

Well I don't see where? I fought this all day, when trying to make the
new code run with objlayout, which was missing the implementation of pg_test().
And never got a pnfs-IO.

I've searched the full tree for calls to pnfs_update_layout() the only
one I can see are in:
nfs_pagein_multi() - which means within a single page, right?
nfs_pagein_one()   - But is protected with list_is_singular() so only in the
                     single page case
nfs_flush_multi()  - Same as nfs_pagein_multi
nfs_flush_one()    - Also here protected with list_is_singular()

and the all mighty
pnfs_generic_pg_test()

I cannot see where the filelayout is different then other layouts
in that respect. Sorry to be slow, I would like to understand?

And also be careful with nfs_generic_pg_test() it inspects
desc->bsize which is negotiated with MDS, it's very small.

> Cheers
>   Trond
> 

Thanks
Boaz

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 13:36     ` Boaz Harrosh
@ 2011-06-01 13:43       ` Benny Halevy
  2011-06-01 14:32         ` Benny Halevy
  0 siblings, 1 reply; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 13:43 UTC (permalink / raw)
  To: Boaz Harrosh, Trond Myklebust; +Cc: Weston Andros Adamson, trond, linux-nfs

On 2011-06-01 16:36, Boaz Harrosh wrote:
> On 06/01/2011 03:14 PM, Trond Myklebust wrote:
>> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>
>>> pnfs_generic_pg_test is the one that gets the layout.
>>>
>>> What you've done is revert to MDS IO
>>
>> The "files" layout type always gets the layout in the pg_doio() method
>> instead of the pg_test().
>>
> 
> Well I don't see where? I fought this all day, when trying to make the
> new code run with objlayout, which was missing the implementation of pg_test().
> And never got a pnfs-IO.
> 
> I've searched the full tree for calls to pnfs_update_layout() the only
> one I can see are in:
> nfs_pagein_multi() - which means within a single page, right?
> nfs_pagein_one()   - But is protected with list_is_singular() so only in the
>                      single page case
> nfs_flush_multi()  - Same as nfs_pagein_multi
> nfs_flush_one()    - Also here protected with list_is_singular()
> 
> and the all mighty
> pnfs_generic_pg_test()
> 
> I cannot see where the filelayout is different then other layouts
> in that respect. Sorry to be slow, I would like to understand?
> 
> And also be careful with nfs_generic_pg_test() it inspects
> desc->bsize which is negotiated with MDS, it's very small.
> 

I'm also looking into this.
The call to pnfs_generic_pg_test wasn't a typo.
As pre dfed206 "NFSv4.1: unify pnfs_pageio_init functions"
we were setting pg_pgio->test to pnfs_write_pg_test
which is equivalent to pnfs_generic_pg_test
and 89a58e3 "NFSv4.1: use pnfs_generic_pg_test directly by layout driver"
only reversed the call from pnfs_generic_pg_test to ld->pg_test
to a call from ld->pg_test to pnfs_generic_pg_test

Benny

>> Cheers
>>   Trond
>>
> 
> Thanks
> Boaz
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 13:43       ` Benny Halevy
@ 2011-06-01 14:32         ` Benny Halevy
  0 siblings, 0 replies; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 14:32 UTC (permalink / raw)
  To: Boaz Harrosh, Trond Myklebust; +Cc: Weston Andros Adamson, trond, linux-nfs

I guess the culprit is the moving of the
desc->pg_bsize < PAGE_SIZE condition to nfs_generic_pg_test
in 5b36c7dc.

Previously, it stopped coalescing early for both nfs and pnfs
but with this patch it is performed only when pnfs is disabled.

Benny

On 2011-06-01 16:43, Benny Halevy wrote:
> On 2011-06-01 16:36, Boaz Harrosh wrote:
>> On 06/01/2011 03:14 PM, Trond Myklebust wrote:
>>> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>>
>>>> pnfs_generic_pg_test is the one that gets the layout.
>>>>
>>>> What you've done is revert to MDS IO
>>>
>>> The "files" layout type always gets the layout in the pg_doio() method
>>> instead of the pg_test().
>>>
>>
>> Well I don't see where? I fought this all day, when trying to make the
>> new code run with objlayout, which was missing the implementation of pg_test().
>> And never got a pnfs-IO.
>>
>> I've searched the full tree for calls to pnfs_update_layout() the only
>> one I can see are in:
>> nfs_pagein_multi() - which means within a single page, right?
>> nfs_pagein_one()   - But is protected with list_is_singular() so only in the
>>                      single page case
>> nfs_flush_multi()  - Same as nfs_pagein_multi
>> nfs_flush_one()    - Also here protected with list_is_singular()
>>
>> and the all mighty
>> pnfs_generic_pg_test()
>>
>> I cannot see where the filelayout is different then other layouts
>> in that respect. Sorry to be slow, I would like to understand?
>>
>> And also be careful with nfs_generic_pg_test() it inspects
>> desc->bsize which is negotiated with MDS, it's very small.
>>
> 
> I'm also looking into this.
> The call to pnfs_generic_pg_test wasn't a typo.
> As pre dfed206 "NFSv4.1: unify pnfs_pageio_init functions"
> we were setting pg_pgio->test to pnfs_write_pg_test
> which is equivalent to pnfs_generic_pg_test
> and 89a58e3 "NFSv4.1: use pnfs_generic_pg_test directly by layout driver"
> only reversed the call from pnfs_generic_pg_test to ld->pg_test
> to a call from ld->pg_test to pnfs_generic_pg_test
> 
> Benny
> 
>>> Cheers
>>>   Trond
>>>
>>
>> Thanks
>> Boaz
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01  5:47 ` Boaz Harrosh
  2011-06-01 12:14   ` Trond Myklebust
@ 2011-06-01 14:44   ` Weston Andros Adamson
  2011-06-01 14:51     ` Benny Halevy
  1 sibling, 1 reply; 23+ messages in thread
From: Weston Andros Adamson @ 2011-06-01 14:44 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: trond, linux-nfs


On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:

> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>> 
>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>> 
>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>> by using dd(1) to write many small blocks.
>> 
>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>> ---
>> Fix proposed by Trond.
>> 
>> Benny- Does this make sense?
>> 
>> fs/nfs/nfs4filelayout.c  |    2 +-
>> fs/nfs/pagelist.c        |    5 ++++-
>> include/linux/nfs_page.h |    3 ++-
>> 3 files changed, 7 insertions(+), 3 deletions(-)
>> 
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 4269088..1c3bb72 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>> 	u64 p_stripe, r_stripe;
>> 	u32 stripe_unit;
>> 
>> -	if (!pnfs_generic_pg_test(pgio, prev, req))
>> +	if (!nfs_generic_pg_test(pgio, prev, req))
>> 		return 0;
>> 
> 
> pnfs_generic_pg_test is the one that gets the layout.
> 
> What you've done is revert to MDS IO
> 
> Boaz

Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)

Patch: recalled.  Discussion about a real fix: started.

-dros


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 14:44   ` Weston Andros Adamson
@ 2011-06-01 14:51     ` Benny Halevy
  2011-06-01 15:36       ` Weston Andros Adamson
                         ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 14:51 UTC (permalink / raw)
  To: Weston Andros Adamson; +Cc: Boaz Harrosh, trond, linux-nfs

On 2011-06-01 17:44, Weston Andros Adamson wrote:
> 
> On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:
> 
>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>>>
>>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>>>
>>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>>> by using dd(1) to write many small blocks.
>>>
>>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>>> ---
>>> Fix proposed by Trond.
>>>
>>> Benny- Does this make sense?
>>>
>>> fs/nfs/nfs4filelayout.c  |    2 +-
>>> fs/nfs/pagelist.c        |    5 ++++-
>>> include/linux/nfs_page.h |    3 ++-
>>> 3 files changed, 7 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>> index 4269088..1c3bb72 100644
>>> --- a/fs/nfs/nfs4filelayout.c
>>> +++ b/fs/nfs/nfs4filelayout.c
>>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>> 	u64 p_stripe, r_stripe;
>>> 	u32 stripe_unit;
>>>
>>> -	if (!pnfs_generic_pg_test(pgio, prev, req))
>>> +	if (!nfs_generic_pg_test(pgio, prev, req))
>>> 		return 0;
>>>
>>
>> pnfs_generic_pg_test is the one that gets the layout.
>>
>> What you've done is revert to MDS IO
>>
>> Boaz
> 
> Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)
> 
> Patch: recalled.  Discussion about a real fix: started.
> 
> -dros

I think the following should work:

Benny

git diff --stat -p -M
 fs/nfs/nfs4filelayout.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..9f1d445 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
*pgio, struct nfs_page *prev,
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;

+	/*
+	 * FIXME: ideally we should be able to coalesce all requests
+	 * that are not block boundary aligned, but currently this
+	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
+	 * since nfs_flush_multi and nfs_pagein_multi assume you
+	 * can have only one struct nfs_page.
+	 */
+	if (desc->pg_bsize < PAGE_SIZE)
+		return 0;
+
 	if (!pnfs_generic_pg_test(pgio, prev, req))
 		return 0;


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 14:51     ` Benny Halevy
@ 2011-06-01 15:36       ` Weston Andros Adamson
  2011-06-01 16:01       ` Fred Isaman
  2011-06-01 18:07       ` Trond Myklebust
  2 siblings, 0 replies; 23+ messages in thread
From: Weston Andros Adamson @ 2011-06-01 15:36 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Boaz Harrosh, trond, linux-nfs

On Jun 1, 2011, at 10:51 AM, Benny Halevy wrote:

> On 2011-06-01 17:44, Weston Andros Adamson wrote:
>> 
>> On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:
>> 
>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>>>> 
>>>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>>>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>>>> 
>>>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>>>> by using dd(1) to write many small blocks.
>>>> 
>>>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>>>> ---
>>>> Fix proposed by Trond.
>>>> 
>>>> Benny- Does this make sense?
>>>> 
>>>> fs/nfs/nfs4filelayout.c  |    2 +-
>>>> fs/nfs/pagelist.c        |    5 ++++-
>>>> include/linux/nfs_page.h |    3 ++-
>>>> 3 files changed, 7 insertions(+), 3 deletions(-)
>>>> 
>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>> index 4269088..1c3bb72 100644
>>>> --- a/fs/nfs/nfs4filelayout.c
>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>>> 	u64 p_stripe, r_stripe;
>>>> 	u32 stripe_unit;
>>>> 
>>>> -	if (!pnfs_generic_pg_test(pgio, prev, req))
>>>> +	if (!nfs_generic_pg_test(pgio, prev, req))
>>>> 		return 0;
>>>> 
>>> 
>>> pnfs_generic_pg_test is the one that gets the layout.
>>> 
>>> What you've done is revert to MDS IO
>>> 
>>> Boaz
>> 
>> Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)
>> 
>> Patch: recalled.  Discussion about a real fix: started.
>> 
>> -dros
> 
> I think the following should work:
> 
> Benny
> 

This diff fixes the problem for me (but with s/desc/pgio).

-dros

> git diff --stat -p -M
> fs/nfs/nfs4filelayout.c |   10 ++++++++++
> 1 files changed, 10 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..9f1d445 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
> *pgio, struct nfs_page *prev,
> 	u64 p_stripe, r_stripe;
> 	u32 stripe_unit;
> 
> +	/*
> +	 * FIXME: ideally we should be able to coalesce all requests
> +	 * that are not block boundary aligned, but currently this
> +	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
> +	 * since nfs_flush_multi and nfs_pagein_multi assume you
> +	 * can have only one struct nfs_page.
> +	 */
> +	if (desc->pg_bsize < PAGE_SIZE)
> +		return 0;
> +
> 	if (!pnfs_generic_pg_test(pgio, prev, req))
> 		return 0;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 14:51     ` Benny Halevy
  2011-06-01 15:36       ` Weston Andros Adamson
@ 2011-06-01 16:01       ` Fred Isaman
  2011-06-01 18:56         ` Benny Halevy
  2011-06-01 18:07       ` Trond Myklebust
  2 siblings, 1 reply; 23+ messages in thread
From: Fred Isaman @ 2011-06-01 16:01 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On Wed, Jun 1, 2011 at 10:51 AM, Benny Halevy <bhalevy@panasas.com> wrote:
> On 2011-06-01 17:44, Weston Andros Adamson wrote:
>>
>> On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:
>>
>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>>>>
>>>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>>>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>>>>
>>>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>>>> by using dd(1) to write many small blocks.
>>>>
>>>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>>>> ---
>>>> Fix proposed by Trond.
>>>>
>>>> Benny- Does this make sense?
>>>>
>>>> fs/nfs/nfs4filelayout.c  |    2 +-
>>>> fs/nfs/pagelist.c        |    5 ++++-
>>>> include/linux/nfs_page.h |    3 ++-
>>>> 3 files changed, 7 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>> index 4269088..1c3bb72 100644
>>>> --- a/fs/nfs/nfs4filelayout.c
>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>>>     u64 p_stripe, r_stripe;
>>>>     u32 stripe_unit;
>>>>
>>>> -   if (!pnfs_generic_pg_test(pgio, prev, req))
>>>> +   if (!nfs_generic_pg_test(pgio, prev, req))
>>>>             return 0;
>>>>
>>>
>>> pnfs_generic_pg_test is the one that gets the layout.
>>>
>>> What you've done is revert to MDS IO
>>>
>>> Boaz
>>
>> Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)
>>
>> Patch: recalled.  Discussion about a real fix: started.
>>
>> -dros
>
> I think the following should work:
>
> Benny
>
> git diff --stat -p -M
>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>  1 files changed, 10 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..9f1d445 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
> *pgio, struct nfs_page *prev,
>        u64 p_stripe, r_stripe;
>        u32 stripe_unit;
>
> +       /*
> +        * FIXME: ideally we should be able to coalesce all requests
> +        * that are not block boundary aligned, but currently this
> +        * is problematic for the case of bsize < PAGE_CACHE_SIZE,
> +        * since nfs_flush_multi and nfs_pagein_multi assume you
> +        * can have only one struct nfs_page.
> +        */
> +       if (desc->pg_bsize < PAGE_SIZE)
> +               return 0;
> +
>        if (!pnfs_generic_pg_test(pgio, prev, req))
>                return 0;
>

Note this moves a test that was once part of the plain nfs code into
the file layout driver.  Why don't other drivers need this test?

Fred

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 14:51     ` Benny Halevy
  2011-06-01 15:36       ` Weston Andros Adamson
  2011-06-01 16:01       ` Fred Isaman
@ 2011-06-01 18:07       ` Trond Myklebust
  2011-06-01 19:13         ` Benny Halevy
  2 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 18:07 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote: 
> I think the following should work:
> 
> Benny
> 
> git diff --stat -p -M
>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>  1 files changed, 10 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..9f1d445 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
> *pgio, struct nfs_page *prev,
>  	u64 p_stripe, r_stripe;
>  	u32 stripe_unit;
> 
> +	/*
> +	 * FIXME: ideally we should be able to coalesce all requests
> +	 * that are not block boundary aligned, but currently this
> +	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
> +	 * since nfs_flush_multi and nfs_pagein_multi assume you
> +	 * can have only one struct nfs_page.
> +	 */
> +	if (desc->pg_bsize < PAGE_SIZE)
> +		return 0;
> +
>  	if (!pnfs_generic_pg_test(pgio, prev, req))
>  		return 0;

So, there are several things that bother me about pnfs_generic_pg_test()
too now that I'm looking more closely at it:

     1. If the intention is to coalesce 'prev' and 'req', shouldn't we
        be asking for a layout with req_offset(prev) instead of
        req_offset(req)? 
     2. If we're only requesting a layout of length pg_count, don't we
        still need to test the layout length that the server actually
        returned before we can allow the coalescing? 
     3. if (!pgio->lseg), shouldn't we be returning an error of some
        sort? Right now we're returning 'true', and allowing the
        coalesce to occur. 
     4. Furthermore, shouldn't that test guarding the
        pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
        NULL)' instead of looking at the values of pg_count and
        prev->wb_bytes?

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 16:01       ` Fred Isaman
@ 2011-06-01 18:56         ` Benny Halevy
  2011-06-01 19:17           ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 18:56 UTC (permalink / raw)
  To: Fred Isaman; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On 2011-06-01 19:01, Fred Isaman wrote:
> On Wed, Jun 1, 2011 at 10:51 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On 2011-06-01 17:44, Weston Andros Adamson wrote:
>>>
>>> On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:
>>>
>>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>>>>>
>>>>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>>>>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>>>>>
>>>>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>>>>> by using dd(1) to write many small blocks.
>>>>>
>>>>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>>>>> ---
>>>>> Fix proposed by Trond.
>>>>>
>>>>> Benny- Does this make sense?
>>>>>
>>>>> fs/nfs/nfs4filelayout.c  |    2 +-
>>>>> fs/nfs/pagelist.c        |    5 ++++-
>>>>> include/linux/nfs_page.h |    3 ++-
>>>>> 3 files changed, 7 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>> index 4269088..1c3bb72 100644
>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>>>>     u64 p_stripe, r_stripe;
>>>>>     u32 stripe_unit;
>>>>>
>>>>> -   if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>> +   if (!nfs_generic_pg_test(pgio, prev, req))
>>>>>             return 0;
>>>>>
>>>>
>>>> pnfs_generic_pg_test is the one that gets the layout.
>>>>
>>>> What you've done is revert to MDS IO
>>>>
>>>> Boaz
>>>
>>> Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)
>>>
>>> Patch: recalled.  Discussion about a real fix: started.
>>>
>>> -dros
>>
>> I think the following should work:
>>
>> Benny
>>
>> git diff --stat -p -M
>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 4269088..9f1d445 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
>> *pgio, struct nfs_page *prev,
>>        u64 p_stripe, r_stripe;
>>        u32 stripe_unit;
>>
>> +       /*
>> +        * FIXME: ideally we should be able to coalesce all requests
>> +        * that are not block boundary aligned, but currently this
>> +        * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>> +        * since nfs_flush_multi and nfs_pagein_multi assume you
>> +        * can have only one struct nfs_page.
>> +        */
>> +       if (desc->pg_bsize < PAGE_SIZE)
>> +               return 0;
>> +
>>        if (!pnfs_generic_pg_test(pgio, prev, req))
>>                return 0;
>>
> 
> Note this moves a test that was once part of the plain nfs code into
> the file layout driver.  Why don't other drivers need this test?

True.  Note I said it would work, not that it's the right fix? :-/
This just tells us what change exposed this issue...

Boaz moved this check to the nfs only path assuming that pg_bsize,
which holds the MDS's wsize/rsize is irrelevant for coalescing requests
for striping over pnfs.

I'm still convinced why nfs_flush_multi cannot use desc->pg_lseg
if it exists, but at the same time it seems like not doing the
right thing for pnfs coalescing in nfs_pageio_init_write and
nfs_pageio_do_add_request.

For pnfs, we need to ignore wsize, meaning we first need to try
to coalesce the pages and then decide if we're going the nfs_flush_multi
or the nfs_flush_one way, based on the coalesced length.

Benny

> 
> Fred
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 18:07       ` Trond Myklebust
@ 2011-06-01 19:13         ` Benny Halevy
  2011-06-01 19:29           ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 19:13 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On 2011-06-01 21:07, Trond Myklebust wrote:
> On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote: 
>> I think the following should work:
>>
>> Benny
>>
>> git diff --stat -p -M
>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 4269088..9f1d445 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
>> *pgio, struct nfs_page *prev,
>>  	u64 p_stripe, r_stripe;
>>  	u32 stripe_unit;
>>
>> +	/*
>> +	 * FIXME: ideally we should be able to coalesce all requests
>> +	 * that are not block boundary aligned, but currently this
>> +	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>> +	 * since nfs_flush_multi and nfs_pagein_multi assume you
>> +	 * can have only one struct nfs_page.
>> +	 */
>> +	if (desc->pg_bsize < PAGE_SIZE)
>> +		return 0;
>> +
>>  	if (!pnfs_generic_pg_test(pgio, prev, req))
>>  		return 0;
> 
> So, there are several things that bother me about pnfs_generic_pg_test()
> too now that I'm looking more closely at it:
> 
>      1. If the intention is to coalesce 'prev' and 'req', shouldn't we
>         be asking for a layout with req_offset(prev) instead of
>         req_offset(req)? 
>      2. If we're only requesting a layout of length pg_count, don't we
>         still need to test the layout length that the server actually
>         returned before we can allow the coalescing? 
>      3. if (!pgio->lseg), shouldn't we be returning an error of some
>         sort? Right now we're returning 'true', and allowing the
>         coalesce to occur. 
>      4. Furthermore, shouldn't that test guarding the
>         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
>         NULL)' instead of looking at the values of pg_count and
>         prev->wb_bytes?
> 

or rather we get the layout for the first page in
nfs_pageio_do_add_request when desc->pg_count == 0?

Then, this lseg would be useful for nfs_flush_multi
if we failed to coalesce, or we failed to get a layout
altogether we go the nfs path and can reset pg_test to
nfs_generic_pg_test.

Otherwise I agree with your assertions above.

Benny

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 18:56         ` Benny Halevy
@ 2011-06-01 19:17           ` Trond Myklebust
  2011-06-01 19:29             ` Boaz Harrosh
  0 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 19:17 UTC (permalink / raw)
  To: Benny Halevy
  Cc: Fred Isaman, Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On Wed, 2011-06-01 at 21:56 +0300, Benny Halevy wrote:

> For pnfs, we need to ignore wsize, meaning we first need to try
> to coalesce the pages and then decide if we're going the nfs_flush_multi
> or the nfs_flush_one way, based on the coalesced length.

No! Ignoring the wsize is definitely wrong... If the stripe size is
larger than the 'maxwrite' recommended attribute, then the DS is allowed
to do a short write, in which case we have to resend.

In any case, nfs_flush_multi and nfs_flush_one need a rewrite in order
to deal properly with O_DIRECT writes, and so I'm expecting to get rid
of the single nfs_page limit for the r/wsize<PAGE_SIZE case.
Please don't make any large changes to this code at this time.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:13         ` Benny Halevy
@ 2011-06-01 19:29           ` Trond Myklebust
  2011-06-01 20:09             ` Benny Halevy
  0 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 19:29 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On Wed, 2011-06-01 at 22:13 +0300, Benny Halevy wrote: 
> On 2011-06-01 21:07, Trond Myklebust wrote:
> > On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote: 
> >> I think the following should work:
> >>
> >> Benny
> >>
> >> git diff --stat -p -M
> >>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
> >>  1 files changed, 10 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> >> index 4269088..9f1d445 100644
> >> --- a/fs/nfs/nfs4filelayout.c
> >> +++ b/fs/nfs/nfs4filelayout.c
> >> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
> >> *pgio, struct nfs_page *prev,
> >>  	u64 p_stripe, r_stripe;
> >>  	u32 stripe_unit;
> >>
> >> +	/*
> >> +	 * FIXME: ideally we should be able to coalesce all requests
> >> +	 * that are not block boundary aligned, but currently this
> >> +	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
> >> +	 * since nfs_flush_multi and nfs_pagein_multi assume you
> >> +	 * can have only one struct nfs_page.
> >> +	 */
> >> +	if (desc->pg_bsize < PAGE_SIZE)
> >> +		return 0;
> >> +
> >>  	if (!pnfs_generic_pg_test(pgio, prev, req))
> >>  		return 0;
> > 
> > So, there are several things that bother me about pnfs_generic_pg_test()
> > too now that I'm looking more closely at it:
> > 
> >      1. If the intention is to coalesce 'prev' and 'req', shouldn't we
> >         be asking for a layout with req_offset(prev) instead of
> >         req_offset(req)? 
> >      2. If we're only requesting a layout of length pg_count, don't we
> >         still need to test the layout length that the server actually
> >         returned before we can allow the coalescing? 
> >      3. if (!pgio->lseg), shouldn't we be returning an error of some
> >         sort? Right now we're returning 'true', and allowing the
> >         coalesce to occur. 
> >      4. Furthermore, shouldn't that test guarding the
> >         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
> >         NULL)' instead of looking at the values of pg_count and
> >         prev->wb_bytes?
> > 
> 
> or rather we get the layout for the first page in
> nfs_pageio_do_add_request when desc->pg_count == 0?

I can live with a desc->pg_init() callback or rather, converting
pg_test() and pg_doio() into a

struct nfs_pageio_ops {
	int (*pg_init)(struct nfs_pageio_descriptor *desc, struct nfs_page *req);
	bool (*pg_test)(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req);
	int (*pg_doio)(struct nfs_pageio_descriptor *desc);
};

and then replacing the two callback functions in the existing struct
nfs_pageio_descriptor with a single pointer to a 'const struct
nfs_pageio_ops'...

> Then, this lseg would be useful for nfs_flush_multi
> if we failed to coalesce, or we failed to get a layout
> altogether we go the nfs path and can reset pg_test to
> nfs_generic_pg_test.

It would presumably also get rid of all those pnfs_update_layout() calls
in read.c and write.c.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:17           ` Trond Myklebust
@ 2011-06-01 19:29             ` Boaz Harrosh
  2011-06-01 19:38               ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Boaz Harrosh @ 2011-06-01 19:29 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Benny Halevy, Fred Isaman, Weston Andros Adamson, trond, linux-nfs

On 06/01/2011 10:17 PM, Trond Myklebust wrote:
> On Wed, 2011-06-01 at 21:56 +0300, Benny Halevy wrote:
> 
>> For pnfs, we need to ignore wsize, meaning we first need to try
>> to coalesce the pages and then decide if we're going the nfs_flush_multi
>> or the nfs_flush_one way, based on the coalesced length.
> 
> No! Ignoring the wsize is definitely wrong... If the stripe size is
> larger than the 'maxwrite' recommended attribute, then the DS is allowed
> to do a short write, in which case we have to resend.
> 

As far as I could understand the current code, desc->bsize which derives
from wsize/rsize is negotiated with the MDS. But the wsize in question
is the DS's one. So I think in pnfs it is only the layout-driver that
can check this properly against the filer in question. Only if IO is
to go through MDS the bsize check is relevant.

BTW: The BUG_ON() Andy hit, does not look like an hard bug to fix ;-)

> In any case, nfs_flush_multi and nfs_flush_one need a rewrite in order
> to deal properly with O_DIRECT writes, and so I'm expecting to get rid
> of the single nfs_page limit for the r/wsize<PAGE_SIZE case.
> Please don't make any large changes to this code at this time.
> 

Amen, Good riddance
Boaz

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:29             ` Boaz Harrosh
@ 2011-06-01 19:38               ` Trond Myklebust
  2011-06-01 19:49                 ` Boaz Harrosh
  0 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 19:38 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Benny Halevy, Fred Isaman, Weston Andros Adamson, trond, linux-nfs

On Wed, 2011-06-01 at 22:29 +0300, Boaz Harrosh wrote: 
> On 06/01/2011 10:17 PM, Trond Myklebust wrote:
> > On Wed, 2011-06-01 at 21:56 +0300, Benny Halevy wrote:
> > 
> >> For pnfs, we need to ignore wsize, meaning we first need to try
> >> to coalesce the pages and then decide if we're going the nfs_flush_multi
> >> or the nfs_flush_one way, based on the coalesced length.
> > 
> > No! Ignoring the wsize is definitely wrong... If the stripe size is
> > larger than the 'maxwrite' recommended attribute, then the DS is allowed
> > to do a short write, in which case we have to resend.
> > 
> 
> As far as I could understand the current code, desc->bsize which derives
> from wsize/rsize is negotiated with the MDS. But the wsize in question
> is the DS's one. So I think in pnfs it is only the layout-driver that
> can check this properly against the filer in question. Only if IO is
> to go through MDS the bsize check is relevant.

There is no distinction between the MDS's idea of the wsize and the DS's
idea of the wsize. The DS has to support the same wsize as the MDS,
since the client has no way of querying the wsize from the DS (GETATTR
maxwrite is not on the allowed set of DS operations)...

> BTW: The BUG_ON() Andy hit, does not look like an hard bug to fix ;-)
> 
> > In any case, nfs_flush_multi and nfs_flush_one need a rewrite in order
> > to deal properly with O_DIRECT writes, and so I'm expecting to get rid
> > of the single nfs_page limit for the r/wsize<PAGE_SIZE case.
> > Please don't make any large changes to this code at this time.
> > 
> 
> Amen, Good riddance

:-)

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:38               ` Trond Myklebust
@ 2011-06-01 19:49                 ` Boaz Harrosh
  2011-06-01 19:52                   ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Boaz Harrosh @ 2011-06-01 19:49 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Benny Halevy, Fred Isaman, Weston Andros Adamson, trond, linux-nfs

On 06/01/2011 10:38 PM, Trond Myklebust wrote:
> 
> There is no distinction between the MDS's idea of the wsize and the DS's
> idea of the wsize. The DS has to support the same wsize as the MDS,
> since the client has no way of querying the wsize from the DS (GETATTR
> maxwrite is not on the allowed set of DS operations)...
> 

OK I did not know that. Good god how broken is that?

Any way you are probably right. But then the check properly belongs in
files-layout driver. Surly it has nothing to do with objects, and blocks

Thanks
Boaz

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:49                 ` Boaz Harrosh
@ 2011-06-01 19:52                   ` Trond Myklebust
  0 siblings, 0 replies; 23+ messages in thread
From: Trond Myklebust @ 2011-06-01 19:52 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Benny Halevy, Fred Isaman, Weston Andros Adamson, trond, linux-nfs

On Wed, 2011-06-01 at 22:49 +0300, Boaz Harrosh wrote: 
> On 06/01/2011 10:38 PM, Trond Myklebust wrote:
> > 
> > There is no distinction between the MDS's idea of the wsize and the DS's
> > idea of the wsize. The DS has to support the same wsize as the MDS,
> > since the client has no way of querying the wsize from the DS (GETATTR
> > maxwrite is not on the allowed set of DS operations)...
> > 
> 
> OK I did not know that. Good god how broken is that?
> 
> Any way you are probably right. But then the check properly belongs in
> files-layout driver. Surly it has nothing to do with objects, and blocks

It belongs in the 'files' layout and in the generic (i.e. v2/v3/v4)
code.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 19:29           ` Trond Myklebust
@ 2011-06-01 20:09             ` Benny Halevy
  2011-06-06 16:47               ` William A. (Andy) Adamson
  0 siblings, 1 reply; 23+ messages in thread
From: Benny Halevy @ 2011-06-01 20:09 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On 2011-06-01 22:29, Trond Myklebust wrote:
> On Wed, 2011-06-01 at 22:13 +0300, Benny Halevy wrote: 
>> On 2011-06-01 21:07, Trond Myklebust wrote:
>>> On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote: 
>>>> I think the following should work:
>>>>
>>>> Benny
>>>>
>>>> git diff --stat -p -M
>>>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>> index 4269088..9f1d445 100644
>>>> --- a/fs/nfs/nfs4filelayout.c
>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
>>>> *pgio, struct nfs_page *prev,
>>>>  	u64 p_stripe, r_stripe;
>>>>  	u32 stripe_unit;
>>>>
>>>> +	/*
>>>> +	 * FIXME: ideally we should be able to coalesce all requests
>>>> +	 * that are not block boundary aligned, but currently this
>>>> +	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>>>> +	 * since nfs_flush_multi and nfs_pagein_multi assume you
>>>> +	 * can have only one struct nfs_page.
>>>> +	 */
>>>> +	if (desc->pg_bsize < PAGE_SIZE)
>>>> +		return 0;
>>>> +
>>>>  	if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>  		return 0;
>>>
>>> So, there are several things that bother me about pnfs_generic_pg_test()
>>> too now that I'm looking more closely at it:
>>>
>>>      1. If the intention is to coalesce 'prev' and 'req', shouldn't we
>>>         be asking for a layout with req_offset(prev) instead of
>>>         req_offset(req)? 
>>>      2. If we're only requesting a layout of length pg_count, don't we
>>>         still need to test the layout length that the server actually
>>>         returned before we can allow the coalescing? 
>>>      3. if (!pgio->lseg), shouldn't we be returning an error of some
>>>         sort? Right now we're returning 'true', and allowing the
>>>         coalesce to occur. 
>>>      4. Furthermore, shouldn't that test guarding the
>>>         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
>>>         NULL)' instead of looking at the values of pg_count and
>>>         prev->wb_bytes?
>>>
>>
>> or rather we get the layout for the first page in
>> nfs_pageio_do_add_request when desc->pg_count == 0?
> 
> I can live with a desc->pg_init() callback or rather, converting
> pg_test() and pg_doio() into a
> 
> struct nfs_pageio_ops {
> 	int (*pg_init)(struct nfs_pageio_descriptor *desc, struct nfs_page *req);
> 	bool (*pg_test)(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req);
> 	int (*pg_doio)(struct nfs_pageio_descriptor *desc);
> };
> 
> and then replacing the two callback functions in the existing struct
> nfs_pageio_descriptor with a single pointer to a 'const struct
> nfs_pageio_ops'...
> 

looks like a good way to do this!

>> Then, this lseg would be useful for nfs_flush_multi
>> if we failed to coalesce, or we failed to get a layout
>> altogether we go the nfs path and can reset pg_test to
>> nfs_generic_pg_test.
> 
> It would presumably also get rid of all those pnfs_update_layout() calls
> in read.c and write.c.
> 

Yup.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-01 20:09             ` Benny Halevy
@ 2011-06-06 16:47               ` William A. (Andy) Adamson
  2011-06-06 18:21                 ` Benny Halevy
  0 siblings, 1 reply; 23+ messages in thread
From: William A. (Andy) Adamson @ 2011-06-06 16:47 UTC (permalink / raw)
  To: Benny Halevy
  Cc: Trond Myklebust, Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On Wed, Jun 1, 2011 at 4:09 PM, Benny Halevy <bhalevy@panasas.com> wrote:
> On 2011-06-01 22:29, Trond Myklebust wrote:
>> On Wed, 2011-06-01 at 22:13 +0300, Benny Halevy wrote:
>>> On 2011-06-01 21:07, Trond Myklebust wrote:
>>>> On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote:
>>>>> I think the following should work:
>>>>>
>>>>> Benny
>>>>>
>>>>> git diff --stat -p -M
>>>>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>>>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>> index 4269088..9f1d445 100644
>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
>>>>> *pgio, struct nfs_page *prev,
>>>>>    u64 p_stripe, r_stripe;
>>>>>    u32 stripe_unit;
>>>>>
>>>>> +  /*
>>>>> +   * FIXME: ideally we should be able to coalesce all requests
>>>>> +   * that are not block boundary aligned, but currently this
>>>>> +   * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>>>>> +   * since nfs_flush_multi and nfs_pagein_multi assume you
>>>>> +   * can have only one struct nfs_page.
>>>>> +   */
>>>>> +  if (desc->pg_bsize < PAGE_SIZE)
>>>>> +          return 0;
>>>>> +
>>>>>    if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>>            return 0;
>>>>
>>>> So, there are several things that bother me about pnfs_generic_pg_test()
>>>> too now that I'm looking more closely at it:
>>>>
>>>>      1. If the intention is to coalesce 'prev' and 'req', shouldn't we
>>>>         be asking for a layout with req_offset(prev) instead of
>>>>         req_offset(req)?
>>>>      2. If we're only requesting a layout of length pg_count, don't we
>>>>         still need to test the layout length that the server actually
>>>>         returned before we can allow the coalescing?
>>>>      3. if (!pgio->lseg), shouldn't we be returning an error of some
>>>>         sort? Right now we're returning 'true', and allowing the
>>>>         coalesce to occur.
>>>>      4. Furthermore, shouldn't that test guarding the
>>>>         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
>>>>         NULL)' instead of looking at the values of pg_count and
>>>>         prev->wb_bytes?
>>>>
>>>
>>> or rather we get the layout for the first page in
>>> nfs_pageio_do_add_request when desc->pg_count == 0?
>>
>> I can live with a desc->pg_init() callback or rather, converting
>> pg_test() and pg_doio() into a
>>
>> struct nfs_pageio_ops {
>>       int (*pg_init)(struct nfs_pageio_descriptor *desc, struct nfs_page *req);
>>       bool (*pg_test)(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req);
>>       int (*pg_doio)(struct nfs_pageio_descriptor *desc);
>> };
>>
>> and then replacing the two callback functions in the existing struct
>> nfs_pageio_descriptor with a single pointer to a 'const struct
>> nfs_pageio_ops'...
>>
>
> looks like a good way to do this!

Is anyone coding this fix?

-->Andy

>
>>> Then, this lseg would be useful for nfs_flush_multi
>>> if we failed to coalesce, or we failed to get a layout
>>> altogether we go the nfs path and can reset pg_test to
>>> nfs_generic_pg_test.
>>
>> It would presumably also get rid of all those pnfs_update_layout() calls
>> in read.c and write.c.
>>
>
> Yup.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-06 16:47               ` William A. (Andy) Adamson
@ 2011-06-06 18:21                 ` Benny Halevy
  2011-06-06 18:22                   ` Myklebust, Trond
  0 siblings, 1 reply; 23+ messages in thread
From: Benny Halevy @ 2011-06-06 18:21 UTC (permalink / raw)
  To: William A. (Andy) Adamson
  Cc: Trond Myklebust, Weston Andros Adamson, Boaz Harrosh, trond, linux-nfs

On 2011-06-06 12:47, William A. (Andy) Adamson wrote:
> On Wed, Jun 1, 2011 at 4:09 PM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On 2011-06-01 22:29, Trond Myklebust wrote:
>>> On Wed, 2011-06-01 at 22:13 +0300, Benny Halevy wrote:
>>>> On 2011-06-01 21:07, Trond Myklebust wrote:
>>>>> On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote:
>>>>>> I think the following should work:
>>>>>>
>>>>>> Benny
>>>>>>
>>>>>> git diff --stat -p -M
>>>>>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>>>>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>>> index 4269088..9f1d445 100644
>>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor
>>>>>> *pgio, struct nfs_page *prev,
>>>>>>    u64 p_stripe, r_stripe;
>>>>>>    u32 stripe_unit;
>>>>>>
>>>>>> +  /*
>>>>>> +   * FIXME: ideally we should be able to coalesce all requests
>>>>>> +   * that are not block boundary aligned, but currently this
>>>>>> +   * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>>>>>> +   * since nfs_flush_multi and nfs_pagein_multi assume you
>>>>>> +   * can have only one struct nfs_page.
>>>>>> +   */
>>>>>> +  if (desc->pg_bsize < PAGE_SIZE)
>>>>>> +          return 0;
>>>>>> +
>>>>>>    if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>>>            return 0;
>>>>>
>>>>> So, there are several things that bother me about pnfs_generic_pg_test()
>>>>> too now that I'm looking more closely at it:
>>>>>
>>>>>      1. If the intention is to coalesce 'prev' and 'req', shouldn't we
>>>>>         be asking for a layout with req_offset(prev) instead of
>>>>>         req_offset(req)?
>>>>>      2. If we're only requesting a layout of length pg_count, don't we
>>>>>         still need to test the layout length that the server actually
>>>>>         returned before we can allow the coalescing?
>>>>>      3. if (!pgio->lseg), shouldn't we be returning an error of some
>>>>>         sort? Right now we're returning 'true', and allowing the
>>>>>         coalesce to occur.
>>>>>      4. Furthermore, shouldn't that test guarding the
>>>>>         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg ==
>>>>>         NULL)' instead of looking at the values of pg_count and
>>>>>         prev->wb_bytes?
>>>>>
>>>>
>>>> or rather we get the layout for the first page in
>>>> nfs_pageio_do_add_request when desc->pg_count == 0?
>>>
>>> I can live with a desc->pg_init() callback or rather, converting
>>> pg_test() and pg_doio() into a
>>>
>>> struct nfs_pageio_ops {
>>>       int (*pg_init)(struct nfs_pageio_descriptor *desc, struct nfs_page *req);
>>>       bool (*pg_test)(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req);
>>>       int (*pg_doio)(struct nfs_pageio_descriptor *desc);
>>> };
>>>
>>> and then replacing the two callback functions in the existing struct
>>> nfs_pageio_descriptor with a single pointer to a 'const struct
>>> nfs_pageio_ops'...
>>>
>>
>> looks like a good way to do this!
> 
> Is anyone coding this fix?
> 

I started working on this but switched to porting forward spnfs and
spnfs-block (which I've just pushed out).

Benny

> -->Andy
> 
>>
>>>> Then, this lseg would be useful for nfs_flush_multi
>>>> if we failed to coalesce, or we failed to get a layout
>>>> altogether we go the nfs path and can reset pg_test to
>>>> nfs_generic_pg_test.
>>>
>>> It would presumably also get rid of all those pnfs_update_layout() calls
>>> in read.c and write.c.
>>>
>>
>> Yup.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH] NFS: filelayout should use nfs_generic_pg_test
  2011-06-06 18:21                 ` Benny Halevy
@ 2011-06-06 18:22                   ` Myklebust, Trond
  0 siblings, 0 replies; 23+ messages in thread
From: Myklebust, Trond @ 2011-06-06 18:22 UTC (permalink / raw)
  To: Benny Halevy, William A. (Andy) Adamson
  Cc: Adamson, Dros, Boaz Harrosh, linux-nfs

I'm working on it. I'm doing a lot of surgery in that general area
anyway...

-----Original Message-----
From: Benny Halevy [mailto:bhalevy@panasas.com] 
Sent: Monday, June 06, 2011 2:21 PM
To: William A. (Andy) Adamson
Cc: Myklebust, Trond; Adamson, Dros; Boaz Harrosh; Myklebust, Trond;
linux-nfs@vger.kernel.org
Subject: Re: [PATCH] NFS: filelayout should use nfs_generic_pg_test

On 2011-06-06 12:47, William A. (Andy) Adamson wrote:
> On Wed, Jun 1, 2011 at 4:09 PM, Benny Halevy <bhalevy@panasas.com>
wrote:
>> On 2011-06-01 22:29, Trond Myklebust wrote:
>>> On Wed, 2011-06-01 at 22:13 +0300, Benny Halevy wrote:
>>>> On 2011-06-01 21:07, Trond Myklebust wrote:
>>>>> On Wed, 2011-06-01 at 17:51 +0300, Benny Halevy wrote:
>>>>>> I think the following should work:
>>>>>>
>>>>>> Benny
>>>>>>
>>>>>> git diff --stat -p -M
>>>>>>  fs/nfs/nfs4filelayout.c |   10 ++++++++++
>>>>>>  1 files changed, 10 insertions(+), 0 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>>> index 4269088..9f1d445 100644
>>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>>> @@ -661,6 +661,16 @@ filelayout_pg_test(struct
nfs_pageio_descriptor
>>>>>> *pgio, struct nfs_page *prev,
>>>>>>    u64 p_stripe, r_stripe;
>>>>>>    u32 stripe_unit;
>>>>>>
>>>>>> +  /*
>>>>>> +   * FIXME: ideally we should be able to coalesce all requests
>>>>>> +   * that are not block boundary aligned, but currently this
>>>>>> +   * is problematic for the case of bsize < PAGE_CACHE_SIZE,
>>>>>> +   * since nfs_flush_multi and nfs_pagein_multi assume you
>>>>>> +   * can have only one struct nfs_page.
>>>>>> +   */
>>>>>> +  if (desc->pg_bsize < PAGE_SIZE)
>>>>>> +          return 0;
>>>>>> +
>>>>>>    if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>>>            return 0;
>>>>>
>>>>> So, there are several things that bother me about
pnfs_generic_pg_test()
>>>>> too now that I'm looking more closely at it:
>>>>>
>>>>>      1. If the intention is to coalesce 'prev' and 'req',
shouldn't we
>>>>>         be asking for a layout with req_offset(prev) instead of
>>>>>         req_offset(req)?
>>>>>      2. If we're only requesting a layout of length pg_count,
don't we
>>>>>         still need to test the layout length that the server
actually
>>>>>         returned before we can allow the coalescing?
>>>>>      3. if (!pgio->lseg), shouldn't we be returning an error of
some
>>>>>         sort? Right now we're returning 'true', and allowing the
>>>>>         coalesce to occur.
>>>>>      4. Furthermore, shouldn't that test guarding the
>>>>>         pnfs_update_layout() call rather be an 'if (pgio->pg_lseg
==
>>>>>         NULL)' instead of looking at the values of pg_count and
>>>>>         prev->wb_bytes?
>>>>>
>>>>
>>>> or rather we get the layout for the first page in
>>>> nfs_pageio_do_add_request when desc->pg_count == 0?
>>>
>>> I can live with a desc->pg_init() callback or rather, converting
>>> pg_test() and pg_doio() into a
>>>
>>> struct nfs_pageio_ops {
>>>       int (*pg_init)(struct nfs_pageio_descriptor *desc, struct
nfs_page *req);
>>>       bool (*pg_test)(struct nfs_pageio_descriptor *desc, struct
nfs_page *prev, struct nfs_page *req);
>>>       int (*pg_doio)(struct nfs_pageio_descriptor *desc);
>>> };
>>>
>>> and then replacing the two callback functions in the existing struct
>>> nfs_pageio_descriptor with a single pointer to a 'const struct
>>> nfs_pageio_ops'...
>>>
>>
>> looks like a good way to do this!
> 
> Is anyone coding this fix?
> 

I started working on this but switched to porting forward spnfs and
spnfs-block (which I've just pushed out).

Benny

> -->Andy
> 
>>
>>>> Then, this lseg would be useful for nfs_flush_multi
>>>> if we failed to coalesce, or we failed to get a layout
>>>> altogether we go the nfs path and can reset pg_test to
>>>> nfs_generic_pg_test.
>>>
>>> It would presumably also get rid of all those pnfs_update_layout()
calls
>>> in read.c and write.c.
>>>
>>
>> Yup.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2011-06-06 18:23 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-01  3:18 [PATCH] NFS: filelayout should use nfs_generic_pg_test Weston Andros Adamson
2011-06-01  5:47 ` Boaz Harrosh
2011-06-01 12:14   ` Trond Myklebust
2011-06-01 13:36     ` Boaz Harrosh
2011-06-01 13:43       ` Benny Halevy
2011-06-01 14:32         ` Benny Halevy
2011-06-01 14:44   ` Weston Andros Adamson
2011-06-01 14:51     ` Benny Halevy
2011-06-01 15:36       ` Weston Andros Adamson
2011-06-01 16:01       ` Fred Isaman
2011-06-01 18:56         ` Benny Halevy
2011-06-01 19:17           ` Trond Myklebust
2011-06-01 19:29             ` Boaz Harrosh
2011-06-01 19:38               ` Trond Myklebust
2011-06-01 19:49                 ` Boaz Harrosh
2011-06-01 19:52                   ` Trond Myklebust
2011-06-01 18:07       ` Trond Myklebust
2011-06-01 19:13         ` Benny Halevy
2011-06-01 19:29           ` Trond Myklebust
2011-06-01 20:09             ` Benny Halevy
2011-06-06 16:47               ` William A. (Andy) Adamson
2011-06-06 18:21                 ` Benny Halevy
2011-06-06 18:22                   ` Myklebust, Trond

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.