All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pnfsblock: init pg_bsize properly
@ 2011-08-13  1:04 Peng Tao
  2011-08-16 21:05 ` Boaz Harrosh
  0 siblings, 1 reply; 12+ messages in thread
From: Peng Tao @ 2011-08-13  1:04 UTC (permalink / raw)
  To: benny; +Cc: linux-nfs, Peng Tao

pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.

Signed-off-by: Peng Tao <peng_tao@emc.com>
---
 fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
 1 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 36648e1..9143e61 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
 	return 0;
 }
 
+static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
+			    struct nfs_page *req)
+{
+	pnfs_generic_pg_init_read(pgio, req);
+	if (pgio->pg_lseg)
+		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
+}
+
+static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
+			     struct nfs_page *req)
+{
+	pnfs_generic_pg_init_write(pgio, req);
+	if (pgio->pg_lseg)
+		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
+}
+
 static const struct nfs_pageio_ops bl_pg_read_ops = {
-	.pg_init = pnfs_generic_pg_init_read,
+	.pg_init = bl_pg_init_read,
 	.pg_test = pnfs_generic_pg_test,
 	.pg_doio = pnfs_generic_pg_readpages,
 };
 
 static const struct nfs_pageio_ops bl_pg_write_ops = {
-	.pg_init = pnfs_generic_pg_init_write,
+	.pg_init = bl_pg_init_write,
 	.pg_test = pnfs_generic_pg_test,
 	.pg_doio = pnfs_generic_pg_writepages,
 };
-- 
1.7.1.262.g5ef3d


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-13  1:04 [PATCH] pnfsblock: init pg_bsize properly Peng Tao
@ 2011-08-16 21:05 ` Boaz Harrosh
  2011-08-17  7:15   ` Benny Halevy
  0 siblings, 1 reply; 12+ messages in thread
From: Boaz Harrosh @ 2011-08-16 21:05 UTC (permalink / raw)
  To: Peng Tao; +Cc: benny, linux-nfs, Peng Tao

On 08/12/2011 06:04 PM, Peng Tao wrote:
> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.
> 

Hi

What is the problem you are trying to solve with this patch?

>From what I understand the only place that actually cares about
pg_bsize is nfs_generic_pg_test() which is only used in MDS
read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test()
check that should not concern with pg_bsize (Unless for pnfs-files
which does). So the idea is that pg_bsize is the maximum set by
MDS server in regard to IO through MDS. And it should not be changed
by client.

If it is not what you see then we should fix it. But should never
override MDS wsize/rsize.

> Signed-off-by: Peng Tao <peng_tao@emc.com>
> ---
>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>  1 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 36648e1..9143e61 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>  	return 0;
>  }
>  
> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
> +			    struct nfs_page *req)
> +{
> +	pnfs_generic_pg_init_read(pgio, req);
> +	if (pgio->pg_lseg)
> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
> +}
> +
> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
> +			     struct nfs_page *req)
> +{
> +	pnfs_generic_pg_init_write(pgio, req);
> +	if (pgio->pg_lseg)
> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
> +}
> +
>  static const struct nfs_pageio_ops bl_pg_read_ops = {
> -	.pg_init = pnfs_generic_pg_init_read,
> +	.pg_init = bl_pg_init_read,
>  	.pg_test = pnfs_generic_pg_test,

I see here that you do not override .pg_test. This is your problem
look at objio_osd::objio_pg_test() it checks for similar boundaries
at the objects side. This is where you need to do these checks
for blocks as well.

>  	.pg_doio = pnfs_generic_pg_readpages,
>  };
>  
>  static const struct nfs_pageio_ops bl_pg_write_ops = {
> -	.pg_init = pnfs_generic_pg_init_write,
> +	.pg_init = bl_pg_init_write,
>  	.pg_test = pnfs_generic_pg_test,

Same here

>  	.pg_doio = pnfs_generic_pg_writepages,
>  };

Thanks
Boaz

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-16 21:05 ` Boaz Harrosh
@ 2011-08-17  7:15   ` Benny Halevy
  2011-08-17  9:35     ` Peng Tao
  0 siblings, 1 reply; 12+ messages in thread
From: Benny Halevy @ 2011-08-17  7:15 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Peng Tao, benny, linux-nfs, Peng Tao


On 2011-08-17 00:05, Boaz Harrosh wrote:
> On 08/12/2011 06:04 PM, Peng Tao wrote:
>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.
>>
> 
> Hi
> 
> What is the problem you are trying to solve with this patch?
> 
> From what I understand the only place that actually cares about
> pg_bsize is nfs_generic_pg_test() which is only used in MDS
> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test()
> check that should not concern with pg_bsize (Unless for pnfs-files
> which does). So the idea is that pg_bsize is the maximum set by
> MDS server in regard to IO through MDS. And it should not be changed
> by client.
> 
> If it is not what you see then we should fix it. But should never
> override MDS wsize/rsize.

I second that.

Benny

> 
>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>> ---
>>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>> index 36648e1..9143e61 100644
>> --- a/fs/nfs/blocklayout/blocklayout.c
>> +++ b/fs/nfs/blocklayout/blocklayout.c
>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>>  	return 0;
>>  }
>>  
>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
>> +			    struct nfs_page *req)
>> +{
>> +	pnfs_generic_pg_init_read(pgio, req);
>> +	if (pgio->pg_lseg)
>> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>> +}
>> +
>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
>> +			     struct nfs_page *req)
>> +{
>> +	pnfs_generic_pg_init_write(pgio, req);
>> +	if (pgio->pg_lseg)
>> +		pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>> +}
>> +
>>  static const struct nfs_pageio_ops bl_pg_read_ops = {
>> -	.pg_init = pnfs_generic_pg_init_read,
>> +	.pg_init = bl_pg_init_read,
>>  	.pg_test = pnfs_generic_pg_test,
> 
> I see here that you do not override .pg_test. This is your problem
> look at objio_osd::objio_pg_test() it checks for similar boundaries
> at the objects side. This is where you need to do these checks
> for blocks as well.
> 
>>  	.pg_doio = pnfs_generic_pg_readpages,
>>  };
>>  
>>  static const struct nfs_pageio_ops bl_pg_write_ops = {
>> -	.pg_init = pnfs_generic_pg_init_write,
>> +	.pg_init = bl_pg_init_write,
>>  	.pg_test = pnfs_generic_pg_test,
> 
> Same here
> 
>>  	.pg_doio = pnfs_generic_pg_writepages,
>>  };
> 
> Thanks
> Boaz
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-17  7:15   ` Benny Halevy
@ 2011-08-17  9:35     ` Peng Tao
  2011-08-17 16:27       ` Benny Halevy
  2011-08-22 23:52       ` Boaz Harrosh
  0 siblings, 2 replies; 12+ messages in thread
From: Peng Tao @ 2011-08-17  9:35 UTC (permalink / raw)
  To: Benny Halevy, Boaz Harrosh; +Cc: linux-nfs, Peng Tao

Hi, Benny and Boaz,

On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote:
>
> On 2011-08-17 00:05, Boaz Harrosh wrote:
>> On 08/12/2011 06:04 PM, Peng Tao wrote:
>>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.
>>>
>>
>> Hi
>>
>> What is the problem you are trying to solve with this patch?
>>
>> From what I understand the only place that actually cares about
>> pg_bsize is nfs_generic_pg_test() which is only used in MDS
>> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test()
>> check that should not concern with pg_bsize (Unless for pnfs-files
>> which does). So the idea is that pg_bsize is the maximum set by
>> MDS server in regard to IO through MDS. And it should not be changed
>> by client.
>>
>> If it is not what you see then we should fix it. But should never
>> override MDS wsize/rsize.
In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will
be set as desc->pg_rpc_callops, which is determined in
nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For
blocklayout, we wouldn't want to set data->mds_ops to
partial_read/write ops, so I write the patch to use lseg length as
pg_bsize.

LD can override pg_bsize in pg_init because
nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to
server rsize/wsize if pnfs is not tried.

Sorry that I didn't explain it clearly in the commit log...


>
> I second that.
>
> Benny
>
>>
>>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>>> ---
>>>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>>> index 36648e1..9143e61 100644
>>> --- a/fs/nfs/blocklayout/blocklayout.c
>>> +++ b/fs/nfs/blocklayout/blocklayout.c
>>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>>>      return 0;
>>>  }
>>>
>>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
>>> +                        struct nfs_page *req)
>>> +{
>>> +    pnfs_generic_pg_init_read(pgio, req);
>>> +    if (pgio->pg_lseg)
>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>> +}
>>> +
>>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
>>> +                         struct nfs_page *req)
>>> +{
>>> +    pnfs_generic_pg_init_write(pgio, req);
>>> +    if (pgio->pg_lseg)
>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>> +}
>>> +
>>>  static const struct nfs_pageio_ops bl_pg_read_ops = {
>>> -    .pg_init = pnfs_generic_pg_init_read,
>>> +    .pg_init = bl_pg_init_read,
>>>      .pg_test = pnfs_generic_pg_test,
>>
>> I see here that you do not override .pg_test. This is your problem
>> look at objio_osd::objio_pg_test() it checks for similar boundaries
>> at the objects side. This is where you need to do these checks
>> for blocks as well.
For blocklayout, we don't need to force each IO under a certain size.
Currently (w/ and w/o this patch) the lseg coverage is the only
constraint for pagelist length. So pnfs_generic_pg_test is enough for
blocklayout.

Thanks,
Tao

>>
>>>      .pg_doio = pnfs_generic_pg_readpages,
>>>  };
>>>
>>>  static const struct nfs_pageio_ops bl_pg_write_ops = {
>>> -    .pg_init = pnfs_generic_pg_init_write,
>>> +    .pg_init = bl_pg_init_write,
>>>      .pg_test = pnfs_generic_pg_test,
>>
>> Same here
>>
>>>      .pg_doio = pnfs_generic_pg_writepages,
>>>  };
>>
>> Thanks
>> Boaz
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-17  9:35     ` Peng Tao
@ 2011-08-17 16:27       ` Benny Halevy
  2011-08-18 14:34         ` Peng Tao
  2011-08-22 23:52       ` Boaz Harrosh
  1 sibling, 1 reply; 12+ messages in thread
From: Benny Halevy @ 2011-08-17 16:27 UTC (permalink / raw)
  To: Peng Tao; +Cc: Boaz Harrosh, linux-nfs, Peng Tao

On 2011-08-17 12:35, Peng Tao wrote:
> Hi, Benny and Boaz,
> 
> On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote:
>>
>> On 2011-08-17 00:05, Boaz Harrosh wrote:
>>> On 08/12/2011 06:04 PM, Peng Tao wrote:
>>>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.
>>>>
>>>
>>> Hi
>>>
>>> What is the problem you are trying to solve with this patch?
>>>
>>> From what I understand the only place that actually cares about
>>> pg_bsize is nfs_generic_pg_test() which is only used in MDS
>>> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test()
>>> check that should not concern with pg_bsize (Unless for pnfs-files
>>> which does). So the idea is that pg_bsize is the maximum set by
>>> MDS server in regard to IO through MDS. And it should not be changed
>>> by client.
>>>
>>> If it is not what you see then we should fix it. But should never
>>> override MDS wsize/rsize.
> In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will
> be set as desc->pg_rpc_callops, which is determined in
> nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For
> blocklayout, we wouldn't want to set data->mds_ops to
> partial_read/write ops, so I write the patch to use lseg length as
> pg_bsize.
> 
> LD can override pg_bsize in pg_init because
> nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to
> server rsize/wsize if pnfs is not tried.
> 
> Sorry that I didn't explain it clearly in the commit log...
> 
> 

To reflect that maybe we should also rename pg_bsize to pg_iosize.

Benny

>>
>> I second that.
>>
>> Benny
>>
>>>
>>>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>>>> ---
>>>>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>>>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>>>> index 36648e1..9143e61 100644
>>>> --- a/fs/nfs/blocklayout/blocklayout.c
>>>> +++ b/fs/nfs/blocklayout/blocklayout.c
>>>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>>>>      return 0;
>>>>  }
>>>>
>>>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
>>>> +                        struct nfs_page *req)
>>>> +{
>>>> +    pnfs_generic_pg_init_read(pgio, req);
>>>> +    if (pgio->pg_lseg)
>>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>>> +}
>>>> +
>>>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
>>>> +                         struct nfs_page *req)
>>>> +{
>>>> +    pnfs_generic_pg_init_write(pgio, req);
>>>> +    if (pgio->pg_lseg)
>>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>>> +}
>>>> +
>>>>  static const struct nfs_pageio_ops bl_pg_read_ops = {
>>>> -    .pg_init = pnfs_generic_pg_init_read,
>>>> +    .pg_init = bl_pg_init_read,
>>>>      .pg_test = pnfs_generic_pg_test,
>>>
>>> I see here that you do not override .pg_test. This is your problem
>>> look at objio_osd::objio_pg_test() it checks for similar boundaries
>>> at the objects side. This is where you need to do these checks
>>> for blocks as well.
> For blocklayout, we don't need to force each IO under a certain size.
> Currently (w/ and w/o this patch) the lseg coverage is the only
> constraint for pagelist length. So pnfs_generic_pg_test is enough for
> blocklayout.
> 
> Thanks,
> Tao
> 
>>>
>>>>      .pg_doio = pnfs_generic_pg_readpages,
>>>>  };
>>>>
>>>>  static const struct nfs_pageio_ops bl_pg_write_ops = {
>>>> -    .pg_init = pnfs_generic_pg_init_write,
>>>> +    .pg_init = bl_pg_init_write,
>>>>      .pg_test = pnfs_generic_pg_test,
>>>
>>> Same here
>>>
>>>>      .pg_doio = pnfs_generic_pg_writepages,
>>>>  };
>>>
>>> Thanks
>>> Boaz
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-17 16:27       ` Benny Halevy
@ 2011-08-18 14:34         ` Peng Tao
  0 siblings, 0 replies; 12+ messages in thread
From: Peng Tao @ 2011-08-18 14:34 UTC (permalink / raw)
  To: Benny Halevy; +Cc: Boaz Harrosh, linux-nfs, Peng Tao

On Thu, Aug 18, 2011 at 12:27 AM, Benny Halevy <bhalevy@tonian.com> wrote:
> On 2011-08-17 12:35, Peng Tao wrote:
>> Hi, Benny and Boaz,
>>
>> On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote:
>>>
>>> On 2011-08-17 00:05, Boaz Harrosh wrote:
>>>> On 08/12/2011 06:04 PM, Peng Tao wrote:
>>>>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length.
>>>>>
>>>>
>>>> Hi
>>>>
>>>> What is the problem you are trying to solve with this patch?
>>>>
>>>> From what I understand the only place that actually cares about
>>>> pg_bsize is nfs_generic_pg_test() which is only used in MDS
>>>> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test()
>>>> check that should not concern with pg_bsize (Unless for pnfs-files
>>>> which does). So the idea is that pg_bsize is the maximum set by
>>>> MDS server in regard to IO through MDS. And it should not be changed
>>>> by client.
>>>>
>>>> If it is not what you see then we should fix it. But should never
>>>> override MDS wsize/rsize.
>> In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will
>> be set as desc->pg_rpc_callops, which is determined in
>> nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For
>> blocklayout, we wouldn't want to set data->mds_ops to
>> partial_read/write ops, so I write the patch to use lseg length as
>> pg_bsize.
>>
>> LD can override pg_bsize in pg_init because
>> nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to
>> server rsize/wsize if pnfs is not tried.
>>
>> Sorry that I didn't explain it clearly in the commit log...
>>
>>
>
> To reflect that maybe we should also rename pg_bsize to pg_iosize.
For pnfs, in fact we are not using pg_bsize as the iosize limit. It's
just that if pg_bsize is smaller than PAGE_CACHE_SIZE, partial
read/write ops will be used. I'm afraid that if we rename pg_bsize to
pg_iosize, people would really think it is the limit for read/write
iosize, which it really isn't. :)

Thanks,
Tao
>
> Benny
>
>>>
>>> I second that.
>>>
>>> Benny
>>>
>>>>
>>>>> Signed-off-by: Peng Tao <peng_tao@emc.com>
>>>>> ---
>>>>>  fs/nfs/blocklayout/blocklayout.c |   20 ++++++++++++++++++--
>>>>>  1 files changed, 18 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>>>>> index 36648e1..9143e61 100644
>>>>> --- a/fs/nfs/blocklayout/blocklayout.c
>>>>> +++ b/fs/nfs/blocklayout/blocklayout.c
>>>>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server)
>>>>>      return 0;
>>>>>  }
>>>>>
>>>>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio,
>>>>> +                        struct nfs_page *req)
>>>>> +{
>>>>> +    pnfs_generic_pg_init_read(pgio, req);
>>>>> +    if (pgio->pg_lseg)
>>>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>>>> +}
>>>>> +
>>>>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio,
>>>>> +                         struct nfs_page *req)
>>>>> +{
>>>>> +    pnfs_generic_pg_init_write(pgio, req);
>>>>> +    if (pgio->pg_lseg)
>>>>> +            pgio->pg_bsize = pgio->pg_lseg->pls_range.length;
>>>>> +}
>>>>> +
>>>>>  static const struct nfs_pageio_ops bl_pg_read_ops = {
>>>>> -    .pg_init = pnfs_generic_pg_init_read,
>>>>> +    .pg_init = bl_pg_init_read,
>>>>>      .pg_test = pnfs_generic_pg_test,
>>>>
>>>> I see here that you do not override .pg_test. This is your problem
>>>> look at objio_osd::objio_pg_test() it checks for similar boundaries
>>>> at the objects side. This is where you need to do these checks
>>>> for blocks as well.
>> For blocklayout, we don't need to force each IO under a certain size.
>> Currently (w/ and w/o this patch) the lseg coverage is the only
>> constraint for pagelist length. So pnfs_generic_pg_test is enough for
>> blocklayout.
>>
>> Thanks,
>> Tao
>>
>>>>
>>>>>      .pg_doio = pnfs_generic_pg_readpages,
>>>>>  };
>>>>>
>>>>>  static const struct nfs_pageio_ops bl_pg_write_ops = {
>>>>> -    .pg_init = pnfs_generic_pg_init_write,
>>>>> +    .pg_init = bl_pg_init_write,
>>>>>      .pg_test = pnfs_generic_pg_test,
>>>>
>>>> Same here
>>>>
>>>>>      .pg_doio = pnfs_generic_pg_writepages,
>>>>>  };
>>>>
>>>> Thanks
>>>> Boaz
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
Thanks,
-Bergwolf

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-17  9:35     ` Peng Tao
  2011-08-17 16:27       ` Benny Halevy
@ 2011-08-22 23:52       ` Boaz Harrosh
  2011-08-23  0:00         ` Myklebust, Trond
  1 sibling, 1 reply; 12+ messages in thread
From: Boaz Harrosh @ 2011-08-22 23:52 UTC (permalink / raw)
  To: Peng Tao; +Cc: Benny Halevy, linux-nfs, Peng Tao, Trond Myklebust, Fred Isaman

On 08/17/2011 02:35 AM, Peng Tao wrote:
> Hi, Benny and Boaz,
> 
<snip>

> In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will
> be set as desc->pg_rpc_callops, which is determined in
> nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For
> blocklayout, we wouldn't want to set data->mds_ops to
> partial_read/write ops, so I write the patch to use lseg length as
> pg_bsize.
> 

Do you mean in the case where MDS sets (pg_bsize < PAGE_SIZE) ?

Right, that is a problem. (Theoretically, because the pNFSD-Linux server
does not do that. Do you have a Server that does?)

> LD can override pg_bsize in pg_init because
> nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to
> server rsize/wsize if pnfs is not tried.
> 

So if it is the "pg_bsize < PAGE_SIZE" but pNFS-IO case then I don't
like your patch, at all. We should fix the generic code to behave
properly, and not let LDs hack their way out. (For example what about
objects and files LDs)

There is a few ways you can fix the generic code. One is override the
desc->pg_rpc_callops for the pNFS case to always be the same one. Or
override the test for (pg_bsize < PAGE_SIZE) in the pNFS case if we have
a lseg. Or some other clean way.

But please don't fix it like that, inside each LD driver.

[ Trond Fred
  One thing I do not understand about the files-layout operations. You
  have explained in the passed that r/wsize sent from the MDS is also the
  same one for each DS. So if we take an example of rsize beeing 2MB
  and there is a stripping of 2 DS for that layout.(Say strip_unit==rsize)
  Then we need to read 1/2 of that page from one DS and the 2/2 half from the
  second. Will current partial_read/write work if going through files-LD?
]

Thanks
Boaz

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-22 23:52       ` Boaz Harrosh
@ 2011-08-23  0:00         ` Myklebust, Trond
  2011-08-23 15:01           ` Peng Tao
  0 siblings, 1 reply; 12+ messages in thread
From: Myklebust, Trond @ 2011-08-23  0:00 UTC (permalink / raw)
  To: Boaz Harrosh, Peng Tao; +Cc: Benny Halevy, linux-nfs, Peng Tao, Isaman, Fred

PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBCb2F6IEhhcnJvc2ggW21haWx0
bzpiaGFycm9zaEBwYW5hc2FzLmNvbV0NCj4gU2VudDogTW9uZGF5LCBBdWd1c3QgMjIsIDIwMTEg
Nzo1MiBQTQ0KPiBUbzogUGVuZyBUYW8NCj4gQ2M6IEJlbm55IEhhbGV2eTsgbGludXgtbmZzQHZn
ZXIua2VybmVsLm9yZzsgUGVuZyBUYW87IE15a2xlYnVzdCwNCj4gVHJvbmQ7IElzYW1hbiwgRnJl
ZA0KPiBTdWJqZWN0OiBSZTogW1BBVENIXSBwbmZzYmxvY2s6IGluaXQgcGdfYnNpemUgcHJvcGVy
bHkNCj4gDQo+IE9uIDA4LzE3LzIwMTEgMDI6MzUgQU0sIFBlbmcgVGFvIHdyb3RlOg0KPiA+IEhp
LCBCZW5ueSBhbmQgQm9heiwNCj4gPg0KPiA8c25pcD4NCj4gDQo+ID4gSW4gcG5mc19kb19tdWx0
aXBsZV9yZWFkcy9wbmZzX2RvX211bHRpcGxlX3dyaXRlcywgZGF0YS0+bWRzX29wcyB3aWxsDQo+
ID4gYmUgc2V0IGFzIGRlc2MtPnBnX3JwY19jYWxsb3BzLCB3aGljaCBpcyBkZXRlcm1pbmVkIGlu
DQo+ID4gbmZzX2dlbmVyaWNfZmx1c2gvbmZzX2dlbmVyaWNfcGFnZWluIGFjY29yZGluZyB0byBk
ZXNjLT5wZ19ic2l6ZS4gRm9yDQo+ID4gYmxvY2tsYXlvdXQsIHdlIHdvdWxkbid0IHdhbnQgdG8g
c2V0IGRhdGEtPm1kc19vcHMgdG8NCj4gPiBwYXJ0aWFsX3JlYWQvd3JpdGUgb3BzLCBzbyBJIHdy
aXRlIHRoZSBwYXRjaCB0byB1c2UgbHNlZyBsZW5ndGggYXMNCj4gPiBwZ19ic2l6ZS4NCj4gPg0K
PiANCj4gRG8geW91IG1lYW4gaW4gdGhlIGNhc2Ugd2hlcmUgTURTIHNldHMgKHBnX2JzaXplIDwg
UEFHRV9TSVpFKSA/DQo+IA0KPiBSaWdodCwgdGhhdCBpcyBhIHByb2JsZW0uIChUaGVvcmV0aWNh
bGx5LCBiZWNhdXNlIHRoZSBwTkZTRC1MaW51eA0KPiBzZXJ2ZXINCj4gZG9lcyBub3QgZG8gdGhh
dC4gRG8geW91IGhhdmUgYSBTZXJ2ZXIgdGhhdCBkb2VzPykNCj4gDQo+ID4gTEQgY2FuIG92ZXJy
aWRlIHBnX2JzaXplIGluIHBnX2luaXQgYmVjYXVzZQ0KPiA+IG5mc19wYWdlaW9fcmVzZXRfcmVh
ZF9tZHMvbmZzX3BhZ2Vpb19yZXNldF93cml0ZV9tZHMgd2lsbCByZXNldCBpdCB0bw0KPiA+IHNl
cnZlciByc2l6ZS93c2l6ZSBpZiBwbmZzIGlzIG5vdCB0cmllZC4NCj4gPg0KPiANCj4gU28gaWYg
aXQgaXMgdGhlICJwZ19ic2l6ZSA8IFBBR0VfU0laRSIgYnV0IHBORlMtSU8gY2FzZSB0aGVuIEkg
ZG9uJ3QNCj4gbGlrZSB5b3VyIHBhdGNoLCBhdCBhbGwuIFdlIHNob3VsZCBmaXggdGhlIGdlbmVy
aWMgY29kZSB0byBiZWhhdmUNCj4gcHJvcGVybHksIGFuZCBub3QgbGV0IExEcyBoYWNrIHRoZWly
IHdheSBvdXQuIChGb3IgZXhhbXBsZSB3aGF0IGFib3V0DQo+IG9iamVjdHMgYW5kIGZpbGVzIExE
cykNCj4gDQo+IFRoZXJlIGlzIGEgZmV3IHdheXMgeW91IGNhbiBmaXggdGhlIGdlbmVyaWMgY29k
ZS4gT25lIGlzIG92ZXJyaWRlIHRoZQ0KPiBkZXNjLT5wZ19ycGNfY2FsbG9wcyBmb3IgdGhlIHBO
RlMgY2FzZSB0byBhbHdheXMgYmUgdGhlIHNhbWUgb25lLiBPcg0KPiBvdmVycmlkZSB0aGUgdGVz
dCBmb3IgKHBnX2JzaXplIDwgUEFHRV9TSVpFKSBpbiB0aGUgcE5GUyBjYXNlIGlmIHdlDQo+IGhh
dmUNCj4gYSBsc2VnLiBPciBzb21lIG90aGVyIGNsZWFuIHdheS4NCj4gDQo+IEJ1dCBwbGVhc2Ug
ZG9uJ3QgZml4IGl0IGxpa2UgdGhhdCwgaW5zaWRlIGVhY2ggTEQgZHJpdmVyLg0KPiANCj4gWyBU
cm9uZCBGcmVkDQo+ICAgT25lIHRoaW5nIEkgZG8gbm90IHVuZGVyc3RhbmQgYWJvdXQgdGhlIGZp
bGVzLWxheW91dCBvcGVyYXRpb25zLiBZb3UNCj4gICBoYXZlIGV4cGxhaW5lZCBpbiB0aGUgcGFz
c2VkIHRoYXQgci93c2l6ZSBzZW50IGZyb20gdGhlIE1EUyBpcyBhbHNvDQo+IHRoZQ0KPiAgIHNh
bWUgb25lIGZvciBlYWNoIERTLiBTbyBpZiB3ZSB0YWtlIGFuIGV4YW1wbGUgb2YgcnNpemUgYmVl
aW5nIDJNQg0KPiAgIGFuZCB0aGVyZSBpcyBhIHN0cmlwcGluZyBvZiAyIERTIGZvciB0aGF0IGxh
eW91dC4oU2F5DQo+IHN0cmlwX3VuaXQ9PXJzaXplKQ0KPiAgIFRoZW4gd2UgbmVlZCB0byByZWFk
IDEvMiBvZiB0aGF0IHBhZ2UgZnJvbSBvbmUgRFMgYW5kIHRoZSAyLzIgaGFsZg0KPiBmcm9tIHRo
ZQ0KPiAgIHNlY29uZC4gV2lsbCBjdXJyZW50IHBhcnRpYWxfcmVhZC93cml0ZSB3b3JrIGlmIGdv
aW5nIHRocm91Z2ggZmlsZXMtDQo+IExEPw0KPiBdDQoNCk5vLiBUaGUgc3RyaXBlIHNpemUgbWF5
IGJlIHNtYWxsZXIgdGhhbiB0aGUgci93c2l6ZSwgaW4gd2hpY2ggY2FzZSB3ZSdyZSBpbiB0aGUg
c2FtZSBib2F0IGFzIHRoZSBibG9ja3MgYW5kIG9iamVjdHMuDQoNCkNoZWVycw0KICBUcm9uZA0K
Tu+/ve+/ve+/ve+/ve+/vXLvv73vv71577+977+977+9Yu+/vVjvv73vv73Hp3bvv71e77+9Kd66
ey5u77+9K++/ve+/ve+/ve+/vXvvv73vv73vv70i77+977+9Xm7vv71y77+977+977+9eu+/vRrv
v73vv71o77+977+977+977+9Ju+/ve+/vR7vv71H77+977+977+9aO+/vQMo77+96ZqO77+93aJq
Iu+/ve+/vRrvv70bbe+/ve+/ve+/ve+/ve+/vXrvv73elu+/ve+/ve+/vWbvv73vv73vv71o77+9
77+977+9fu+/vW3vv70=

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-23  0:00         ` Myklebust, Trond
@ 2011-08-23 15:01           ` Peng Tao
  2011-08-23 21:19             ` Boaz Harrosh
  0 siblings, 1 reply; 12+ messages in thread
From: Peng Tao @ 2011-08-23 15:01 UTC (permalink / raw)
  To: Myklebust, Trond, Boaz Harrosh
  Cc: Benny Halevy, linux-nfs, Peng Tao, Isaman, Fred

Hi, Trond and Boaz,

On Tue, Aug 23, 2011 at 8:00 AM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
>> -----Original Message-----
>> From: Boaz Harrosh [mailto:bharrosh@panasas.com]
>> Sent: Monday, August 22, 2011 7:52 PM
>> To: Peng Tao
>> Cc: Benny Halevy; linux-nfs@vger.kernel.org; Peng Tao; Myklebust,
>> Trond; Isaman, Fred
>> Subject: Re: [PATCH] pnfsblock: init pg_bsize properly
>>
>> On 08/17/2011 02:35 AM, Peng Tao wrote:
>> > Hi, Benny and Boaz,
>> >
>> <snip>
>>
>> > In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will
>> > be set as desc->pg_rpc_callops, which is determined in
>> > nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For
>> > blocklayout, we wouldn't want to set data->mds_ops to
>> > partial_read/write ops, so I write the patch to use lseg length as
>> > pg_bsize.
>> >
>>
>> Do you mean in the case where MDS sets (pg_bsize < PAGE_SIZE) ?
>>
>> Right, that is a problem. (Theoretically, because the pNFSD-Linux
>> server
>> does not do that. Do you have a Server that does?)
No, I don't have a server does that. But it is a server config option
and we can't force users not to change it. So better fix it at client
side.

>>
>> > LD can override pg_bsize in pg_init because
>> > nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to
>> > server rsize/wsize if pnfs is not tried.
>> >
>>
>> So if it is the "pg_bsize < PAGE_SIZE" but pNFS-IO case then I don't
>> like your patch, at all. We should fix the generic code to behave
>> properly, and not let LDs hack their way out. (For example what about
>> objects and files LDs)
>>
>> There is a few ways you can fix the generic code. One is override the
>> desc->pg_rpc_callops for the pNFS case to always be the same one. Or
>> override the test for (pg_bsize < PAGE_SIZE) in the pNFS case if we
>> have
>> a lseg. Or some other clean way.
I was under the impression that for object and file layouts, partial
read/write rpc ops are still needed for DS IO when DS r/wsize is
smaller than PAGE_SIZE...

>>
>> But please don't fix it like that, inside each LD driver.
>>
>> [ Trond Fred
>>   One thing I do not understand about the files-layout operations. You
>>   have explained in the passed that r/wsize sent from the MDS is also
>> the
>>   same one for each DS. So if we take an example of rsize beeing 2MB
>>   and there is a stripping of 2 DS for that layout.(Say
>> strip_unit==rsize)
>>   Then we need to read 1/2 of that page from one DS and the 2/2 half
>> from the
>>   second. Will current partial_read/write work if going through files-
>> LD?
>> ]
>
> No. The stripe size may be smaller than the r/wsize, in which case we're in the same boat as the blocks and objects.
So this is a generic issue. For file and object layout, do you need to
use partial read/write rpc ops in any case? For block layout, we would
like to never use it in LD. But I'm not sure about file and object
case. Could you confirm?

Thanks,
Tao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-23 15:01           ` Peng Tao
@ 2011-08-23 21:19             ` Boaz Harrosh
  2011-08-25 20:15               ` Jim Rees
  0 siblings, 1 reply; 12+ messages in thread
From: Boaz Harrosh @ 2011-08-23 21:19 UTC (permalink / raw)
  To: Peng Tao, Isaman, Fred, Andy Adamson
  Cc: Myklebust, Trond, Benny Halevy, linux-nfs, Peng Tao

On 08/23/2011 08:01 AM, Peng Tao wrote:
> So this is a generic issue. For file and object layout, do you need to
> use partial read/write rpc ops in any case? For block layout, we would
> like to never use it in LD. But I'm not sure about file and object
> case. Could you confirm?
> 

For objects it is like blocks. NEVER (ever) use partial rpc ops. r/wsize
are not relevant for obj-LD IO.

(As I understand from Trond also with files the LD should inspect the
 situation and have the final disposition. But someone will need to write
 some files-LD code for that, Fred, Andy?)

> Thanks,
> Tao

Thanks
Boaz

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-23 21:19             ` Boaz Harrosh
@ 2011-08-25 20:15               ` Jim Rees
  2011-08-26  0:16                 ` Boaz Harrosh
  0 siblings, 1 reply; 12+ messages in thread
From: Jim Rees @ 2011-08-25 20:15 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Peng Tao, Isaman, Fred, Andy Adamson, Myklebust, Trond,
	Benny Halevy, linux-nfs, Peng Tao

Boaz Harrosh wrote:

  On 08/23/2011 08:01 AM, Peng Tao wrote:
  > So this is a generic issue. For file and object layout, do you need to
  > use partial read/write rpc ops in any case? For block layout, we would
  > like to never use it in LD. But I'm not sure about file and object
  > case. Could you confirm?
  > 
  
  For objects it is like blocks. NEVER (ever) use partial rpc ops. r/wsize
  are not relevant for obj-LD IO.
  
  (As I understand from Trond also with files the LD should inspect the
   situation and have the final disposition. But someone will need to write
   some files-LD code for that, Fred, Andy?)
  
  > Thanks,
  > Tao

We discussed this on the call today.  Boaz is going to write a brief
description of how to fix this in the generic layer, then I'm going to
implement it.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pnfsblock: init pg_bsize properly
  2011-08-25 20:15               ` Jim Rees
@ 2011-08-26  0:16                 ` Boaz Harrosh
  0 siblings, 0 replies; 12+ messages in thread
From: Boaz Harrosh @ 2011-08-26  0:16 UTC (permalink / raw)
  To: Jim Rees
  Cc: Peng Tao, Isaman, Fred, Andy Adamson, Myklebust, Trond,
	Benny Halevy, linux-nfs, Peng Tao

On 08/25/2011 01:15 PM, Jim Rees wrote:
> 
> We discussed this on the call today.  Boaz is going to write a brief
> description of how to fix this in the generic layer, then I'm going to
> implement it.

For blocks and objects we need something like the below [1].
(Done only for reads)

But I suspect I now broke files-LD. For files-LD what it actually
needs, (As I understood from trond) Is the same code like today
but with a similar patch as Peng's but for files-LD that sets
pg_bsize to the minimum of w/rsize and stripe_unit.

This is mainly because it needs that nfs_readpage_result/release_partial
which waits for all RPCs before it actually calls nfs_end_page_writeback
(PageUpTodate for reads) on that page that was shared between multiple
requests.

So I guess we need to do [2] option below (Only done for writes). 
+ With added code to set this bit_flag in objects and blocks.
  (Just like PNFS_LAYOUTRET_ON_SETATTR)
+ files-LD code to override pnfs_generic_pg_init_read/write and
  set pg_bsize to min(pg_bsize, stripe_unit). (Can be its own patch)
+ Define empty pnfs_ld_ignore_rwsize() for !CONFIG_NFS_V4_1

---------------------------------------------------------------
[1] (only reads)

   Do not use nfs_pagein_multi() for the pNFS case ...

-----
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ab12913..a4d0191 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,7 +296,7 @@ extern int nfs_access_cache_shrinker(struct shrinker *shrink,
 extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
 			     const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+extern int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
 		struct list_head *head);
 
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e88..b7e3e41 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1356,7 +1356,7 @@ struct pnfs_layout_segment *
 	LIST_HEAD(head);
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
+	ret = nfs_pagein_one(desc, &head);
 	if (ret != 0) {
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2171c04..ce5982a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -336,7 +336,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
@@ -369,19 +369,15 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
 	return ret;
 }
 
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
-{
-	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_pagein_multi(desc, head);
-	return nfs_pagein_one(desc, head);
-}
-
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
 	LIST_HEAD(head);
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
+	if (desc->pg_bsize < PAGE_CACHE_SIZE)
+		ret = nfs_pagein_multi(desc, &head);
+	else
+		ret = nfs_pagein_one(desc, &head);
 	if (ret == 0)
 		ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
 	return ret;

---------------------------------------------------------------
[2] (only writes)
  
     Do not use nfs_pagein_multi() for layout drivers that
     must not use it. (Objects and Blocks) ...

-------
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd5..d32538a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -68,6 +68,8 @@ enum {
 enum layoutdriver_policy_flags {
 	/* Should the pNFS client commit and return the layout upon a setattr */
 	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 0,
+	/* Do not use nfs_xxx_partial_ops */
+	PNFS_IGNOR_RWSIZE		= 2 << 0,
 };
 
 struct nfs4_deviceid_node;
@@ -315,6 +317,15 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
 		PNFS_LAYOUTRET_ON_SETATTR;
 }
 
+static inline bool
+pnfs_ld_ignore_rwsize(struct inode *inode)
+{
+	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+		return false;
+	return NFS_SERVER(inode)->pnfs_curr_ld->flags &
+		PNFS_IGNOR_RWSIZE;
+}
+
 static inline int pnfs_return_layout(struct inode *ino)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b39b37f..6b25073 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1029,7 +1029,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 
 int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
 {
-	if (desc->pg_bsize < PAGE_CACHE_SIZE)
+	if (!pnfs_ld_ignore_rwsize(desc->pg_inode) &&
+	    desc->pg_bsize < PAGE_CACHE_SIZE)
 		return nfs_flush_multi(desc, head);
 	return nfs_flush_one(desc, head);
 }

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-08-26  0:17 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-13  1:04 [PATCH] pnfsblock: init pg_bsize properly Peng Tao
2011-08-16 21:05 ` Boaz Harrosh
2011-08-17  7:15   ` Benny Halevy
2011-08-17  9:35     ` Peng Tao
2011-08-17 16:27       ` Benny Halevy
2011-08-18 14:34         ` Peng Tao
2011-08-22 23:52       ` Boaz Harrosh
2011-08-23  0:00         ` Myklebust, Trond
2011-08-23 15:01           ` Peng Tao
2011-08-23 21:19             ` Boaz Harrosh
2011-08-25 20:15               ` Jim Rees
2011-08-26  0:16                 ` Boaz Harrosh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.