All of lore.kernel.org
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@hammerspace.com>
To: "dwysocha@redhat.com" <dwysocha@redhat.com>
Cc: "linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH v3 05/17] NFS: Don't discard readdir results
Date: Fri, 6 Nov 2020 15:05:18 +0000	[thread overview]
Message-ID: <482aa15584d90773068da4af772c7aaf43db183c.camel@hammerspace.com> (raw)
In-Reply-To: <CALF+zOnirS++y=pW8HRtzwdric15ixuAiqTL9YiYh2-NdDd=0Q@mail.gmail.com>

On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote:
> On Wed, Nov 4, 2020 at 11:27 AM <trondmy@gmail.com> wrote:
> > 
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > If a readdir call returns more data than we can fit into one page
> > cache page, then allocate a new one for that data rather than
> > discarding the data.
> > 
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 42 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index 842f69120a01..f7248145c333 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct
> > page *page)
> >         kunmap_atomic(array);
> >  }
> > 
> > +static void nfs_readdir_page_unlock_and_put(struct page *page)
> > +{
> > +       unlock_page(page);
> > +       put_page(page);
> > +}
> > +
> > +static struct page *nfs_readdir_page_get_next(struct address_space
> > *mapping,
> > +                                             pgoff_t index, u64
> > cookie)
> > +{
> > +       struct page *page;
> > +
> > +       page = nfs_readdir_page_get_locked(mapping, index, cookie);
> > +       if (page) {
> > +               if (nfs_readdir_page_last_cookie(page) == cookie)
> > +                       return page;
> > +               nfs_readdir_page_unlock_and_put(page);
> > +       }
> > +       return NULL;
> > +}
> > +
> >  static inline
> >  int is_32bit_api(void)
> >  {
> > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent,
> > struct nfs_entry *entry,
> >  }
> > 
> >  /* Perform conversion from xdr to cache array */
> > -static
> > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *entry,
> > -                               struct page **xdr_pages, struct
> > page *page, unsigned int buflen)
> > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor
> > *desc,
> > +                                  struct nfs_entry *entry,
> > +                                  struct page **xdr_pages,
> > +                                  struct page *fillme, unsigned
> > int buflen)
> >  {
> > +       struct address_space *mapping = desc->file->f_mapping;
> >         struct xdr_stream stream;
> >         struct xdr_buf buf;
> > -       struct page *scratch;
> > +       struct page *scratch, *new, *page = fillme;
> >         int status;
> > 
> >         scratch = alloc_page(GFP_KERNEL);
> > @@ -666,6 +688,19 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> >                                         desc->dir_verifier);
> > 
> >                 status = nfs_readdir_add_to_array(entry, page);
> > +               if (status != -ENOSPC)
> > +                       continue;
> > +
> > +               if (page->mapping != mapping)
> > +                       break;
> > +               new = nfs_readdir_page_get_next(mapping, page-
> > >index + 1,
> > +                                               entry-
> > >prev_cookie);
> > +               if (!new)
> > +                       break;
> > +               if (page != fillme)
> > +                       nfs_readdir_page_unlock_and_put(page);
> > +               page = new;
> > +               status = nfs_readdir_add_to_array(entry, page);
> >         } while (!status && !entry->eof);
> > 
> >         switch (status) {
> > @@ -681,6 +716,9 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> >                 break;
> >         }
> > 
> > +       if (page != fillme)
> > +               nfs_readdir_page_unlock_and_put(page);
> > +
> >         put_page(scratch);
> >         return status;
> >  }
> > --
> > 2.28.0
> > 
> 
> It doesn't look like this handles uncached_readdir.  Were you
> planning
> on addressing that somehow, or should we think about something like
> this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
> and force uncached_readdir() to 1 page:
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index b6c3501e8f61..ca30e2dbb9c3 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -791,13 +791,12 @@ static struct page
> **nfs_readdir_alloc_pages(size_t npages)
> 
>  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor
> *desc,
>                                     struct page *page, __be32
> *verf_arg,
> -                                   __be32 *verf_res)
> +                                   __be32 *verf_res, size_t dtsize)
>  {
>         struct page **pages;
>         struct nfs_entry *entry;
>         size_t array_size;
>         struct inode *inode = file_inode(desc->file);
> -       size_t dtsize = NFS_SERVER(inode)->dtsize;
>         int status = -ENOMEM;
> 
>         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
> nfs_readdir_descriptor *desc)
>         struct nfs_inode *nfsi = NFS_I(inode);
>         __be32 verf[NFS_DIR_VERIFIER_SIZE];
>         int res;
> +       size_t dtsize = NFS_SERVER(inode)->dtsize;
> 
>         desc->page = nfs_readdir_page_get_cached(desc);
>         if (!desc->page)
>                 return -ENOMEM;
>         if (nfs_readdir_page_needs_filling(desc->page)) {
>                 res = nfs_readdir_xdr_to_array(desc, desc->page,
> -                                              nfsi->cookieverf,
> verf);
> +                                              nfsi->cookieverf,
> verf,
> +                                              dtsize);
>                 if (res < 0) {
>                         nfs_readdir_page_unlock_and_put_cached(desc);
>                         if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> @@ -995,7 +996,8 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
>         desc->duped = 0;
> 
>         nfs_readdir_page_init_array(page, desc->dir_cookie);
> -       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf);
> +       status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf,
> +                                         PAGE_SIZE);
>         if (status < 0)
>                 goto out_release;
> 

Actually for uncached readdir, I was thinking we might want to convert
nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an
array of pages + buffer size.
IOW: convert uncached_readdir() to allocate an array of pages, and pass
in a 'struct page **' + a buffer length.

I don't like the idea of passing in a dtsize because that restricts the
size of the READDIR RPC request buffer instead of restricting the
number of entries the server returns. For any given buffer size, that
number of entries fluctuates wildly depending on the filenames in that
directory and their differing lengths, whereas your page can take a
fixed number of entries irrespective of the filename lengths (in fact
it can always take 127 entries on an x86_64).

It is true that the number of entries that nfs_do_filldir() can handle
also depends on the filename length, but we don't have any information
in the filesystem about the buffer size that was passed in to the
getdents() system call of how much space remains in that buffer. All
that information is hidden in the opaque 'struct dir_context'. So for
that reason, we can't use that information to set a dtsize either.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



  reply	other threads:[~2020-11-06 15:05 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-04 16:16 [PATCH v3 00/17] Readdir enhancements trondmy
2020-11-04 16:16 ` [PATCH v3 01/17] NFS: Ensure contents of struct nfs_open_dir_context are consistent trondmy
2020-11-04 16:16   ` [PATCH v3 02/17] NFS: Clean up readdir struct nfs_cache_array trondmy
2020-11-04 16:16     ` [PATCH v3 03/17] NFS: Clean up nfs_readdir_page_filler() trondmy
2020-11-04 16:16       ` [PATCH v3 04/17] NFS: Clean up directory array handling trondmy
2020-11-04 16:16         ` [PATCH v3 05/17] NFS: Don't discard readdir results trondmy
2020-11-04 16:16           ` [PATCH v3 06/17] NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() trondmy
2020-11-04 16:16             ` [PATCH v3 07/17] NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array() trondmy
2020-11-04 16:16               ` [PATCH v3 08/17] NFS: Simplify struct nfs_cache_array_entry trondmy
2020-11-04 16:16                 ` [PATCH v3 09/17] NFS: Support larger readdir buffers trondmy
2020-11-04 16:16                   ` [PATCH v3 10/17] NFS: More readdir cleanups trondmy
2020-11-04 16:16                     ` [PATCH v3 11/17] NFS: nfs_do_filldir() does not return a value trondmy
2020-11-04 16:16                       ` [PATCH v3 12/17] NFS: Reduce readdir stack usage trondmy
2020-11-04 16:16                         ` [PATCH v3 13/17] NFS: Cleanup to remove nfs_readdir_descriptor_t typedef trondmy
2020-11-04 16:16                           ` [PATCH v3 14/17] NFS: Allow the NFS generic code to pass in a verifier to readdir trondmy
2020-11-04 16:16                             ` [PATCH v3 15/17] NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls trondmy
2020-11-04 16:16                               ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers trondmy
2020-11-04 16:16                                 ` [PATCH v3 17/17] NFS: Optimisations for monotonically increasing readdir cookies trondmy
2020-11-04 21:01                                 ` [PATCH v3 16/17] NFS: Improve handling of directory verifiers David Wysochanski
2020-11-04 21:31                                   ` Trond Myklebust
2020-11-04 21:40                                     ` David Wysochanski
2020-11-06 13:30           ` [PATCH v3 05/17] NFS: Don't discard readdir results David Wysochanski
2020-11-06 15:05             ` Trond Myklebust [this message]
2020-11-06 18:00               ` David Wysochanski
2020-11-07 12:49 ` [PATCH v3 00/17] Readdir enhancements Benjamin Coddington
2020-11-07 14:23   ` Trond Myklebust
2020-11-08 11:05     ` Benjamin Coddington
2020-11-08 18:15   ` Mkrtchyan, Tigran

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=482aa15584d90773068da4af772c7aaf43db183c.camel@hammerspace.com \
    --to=trondmy@hammerspace.com \
    --cc=dwysocha@redhat.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.