All of lore.kernel.org
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@hammerspace.com>
To: "trbecker@gmail.com" <trbecker@gmail.com>,
	"steved@redhat.com" <steved@redhat.com>
Cc: "tbecker@redhat.com" <tbecker@redhat.com>,
	"linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [RFC PATCH] mount.nfs: Add readahead option
Date: Tue, 3 Aug 2021 15:15:54 +0000	[thread overview]
Message-ID: <6b627dc6ebc9ee1cbd37a62b48d08b1a031f0f08.camel@hammerspace.com> (raw)
In-Reply-To: <20210803130717.2890565-1-trbecker@gmail.com>

On Tue, 2021-08-03 at 10:07 -0300, Thiago Rafael Becker wrote:
> Linux kernel defined the default read ahead to 128KiB, and this is
> making read perform poorly. To mitigate it, add readahead as a mount
> option that is handled by userspace, with some refactoring included.
> 
> Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
> ---
>  utils/mount/mount.c | 114 ++++++++++++++++++++++++++++++++++++++++--
> --
>  utils/mount/nfs.man |   3 ++
>  2 files changed, 108 insertions(+), 9 deletions(-)
> 
> diff --git a/utils/mount/mount.c b/utils/mount/mount.c
> index b98f9e00..15076ca8 100644
> --- a/utils/mount/mount.c
> +++ b/utils/mount/mount.c
> @@ -56,9 +56,11 @@ int nomtab;
>  int verbose;
>  int sloppy;
>  int string;
> +static int readahead_kb = 0;
>  
>  #define FOREGROUND     (0)
>  #define BACKGROUND     (1)
> +#define READAHEAD_VALUE_LEN 24
>  
>  static struct option longopts[] = {
>    { "fake", 0, 0, 'f' },
> @@ -292,6 +294,16 @@ static int add_mtab(char *spec, char
> *mount_point, char *fstype,
>         return result;
>  }
>  
> +static void append_extra_opt(const char *opt, char *extra_opts,
> size_t len) {
> +       len -= strlen(extra_opts);
> +
> +       if (*extra_opts && --len > 0)
> +               strcat(extra_opts, ",");
> +
> +       if ((len -= strlen(opt)) > 0)
> +               strcat(extra_opts, opt);
> +}
> +
>  static void parse_opt(const char *opt, int *mask, char *extra_opts,
> size_t len)
>  {
>         const struct opt_map *om;
> @@ -306,13 +318,37 @@ static void parse_opt(const char *opt, int
> *mask, char *extra_opts, size_t len)
>                 }
>         }
>  
> -       len -= strlen(extra_opts);
> +       append_extra_opt(opt, extra_opts, len);
> +}
>  
> -       if (*extra_opts && --len > 0)
> -               strcat(extra_opts, ",");
> +static void parse_opt_val(const char *opt, const char *val, char
> *extra_opts, size_t len)
> +{
> +       size_t ov_len;
> +       char *opt_val;
>  
> -       if ((len -= strlen(opt)) > 0)
> -               strcat(extra_opts, opt);
> +       /* readahead is a special value that is handled by userspace
> */
> +       if (!strcmp(opt,  "readahead")) {
> +               char *endptr = NULL;
> +               const char *expected_endptr = val + strlen(val);
> +
> +               readahead_kb = strtol(val, &endptr, 10);
> +
> +               if (endptr != expected_endptr) {
> +                       nfs_error(_("%s: invalid readahead value
> %s"),
> +                                      progname, val);
> +                       readahead_kb = 0;
> +               }
> +               return;
> +       }
> +
> +       /* Send the option to the kernel. */
> +       ov_len = strlen(opt) + strlen(val) + 3;
> +       opt_val = malloc(sizeof(char) * ov_len);
> +       snprintf(opt_val, ov_len, ",%s=%s", opt, val);
> +
> +       append_extra_opt(opt_val, extra_opts, len);
> +
> +       free(opt_val);
>  }
>  
>  /*
> @@ -325,7 +361,7 @@ static void parse_opts(const char *options, int
> *flags, char **extra_opts)
>  {
>         if (options != NULL) {
>                 char *opts = xstrdup(options);
> -               char *opt, *p;
> +               char *opt, *p, *val = NULL;
>                 size_t len = strlen(opts) + 1;  /* include room for a
> null */
>                 int open_quote = 0;
>  
> @@ -341,17 +377,75 @@ static void parse_opts(const char *options, int
> *flags, char **extra_opts)
>                                 continue;       /* still in a quoted
> block */
>                         if (*p == ',')
>                                 *p = '\0';      /* terminate the
> option item */
> +                       if (*p == '=') {        /* key=val option */
> +                               if (!val) {
> +                                       *p = '\0';      /* terminate
> key */
> +                                       val = ++p;      /* start the
> value */
> +                               }
> +                       }
>  
>                         /* end of option item or last item */
>                         if (*p == '\0' || *(p + 1) == '\0') {
> -                               parse_opt(opt, flags, *extra_opts,
> len);
> -                               opt = NULL;
> +                               if (val) {
> +                                       parse_opt_val(opt, val,
> *extra_opts, len);
> +                               } else
> +                                       parse_opt(opt, flags,
> *extra_opts, len);
> +                               opt = val = NULL;
>                         }
>                 }
>                 free(opts);
>         }
>  }
>  
> +/*
> + * Set the read ahead value for the mount point. On failure, uses
> the default kernel
> + * read ahead value (for new mounts) or the current value (for
> remounts).
> + */
> +static void set_readahead(const char *mount_point) {
> +       int error;
> +       struct statx mp_stat;
> +       char *mount_point_readahead_file, value[READAHEAD_VALUE_LEN];
> +       size_t len;
> +       int fp;
> +
> +       /* If readahead_kb is unset, or set to 0, do not change the
> value */
> +       if (!readahead_kb)
> +               return;
> +
> +       if ((error = statx(0, mount_point, 0, 0, &mp_stat)) != 0) {
> +               goto out_error;
> +       }
> +
> +       if (!(mount_point_readahead_file = malloc(PATH_MAX))) {
> +               error = -ENOMEM;
> +               goto out_error;
> +       }
> +
> +       snprintf(mount_point_readahead_file, PATH_MAX,
> "/sys/class/bdi/%d:%d/read_ahead_kb",
> +                       mp_stat.stx_dev_major,
> mp_stat.stx_dev_minor);
> +
> +       len = snprintf(value, READAHEAD_VALUE_LEN, "%d",
> readahead_kb);
> +
> +       if ((fp = open(mount_point_readahead_file, O_WRONLY)) < 0) {
> +               error = errno;
> +               goto out_free;
> +       }
> +
> +       if ((error = write(fp, value, len)) < 0)
> +               goto out_close;
> +
> +       close(fp);
> +       return;
> +
> +out_close:
> +       close(fp);
> +out_free:
> +       free(mount_point_readahead_file);
> +out_error:
> +       nfs_error(_("%s: unable to set readahead value, using default
> kernel value (error = %d)\n"),
> +                       progname, error);
> +}
> +
>  static int try_mount(char *spec, char *mount_point, int flags,
>                         char *fs_type, char **extra_opts, char
> *mount_opts,
>                         int fake, int bg)
> @@ -373,8 +467,10 @@ static int try_mount(char *spec, char
> *mount_point, int flags,
>         if (ret)
>                 return ret;
>  
> -       if (!fake)
> +       if (!fake) {
> +               set_readahead(mount_point);
>                 print_one(spec, mount_point, fs_type, mount_opts);
> +       }
>  
>         return add_mtab(spec, mount_point, fs_type, flags,
> *extra_opts);
>  }
> diff --git a/utils/mount/nfs.man b/utils/mount/nfs.man
> index f1b76936..9832a377 100644
> --- a/utils/mount/nfs.man
> +++ b/utils/mount/nfs.man
> @@ -561,6 +561,9 @@ The
>  .B sloppy
>  option is an alternative to specifying
>  .BR mount.nfs " -s " option.
> +.TP 1.5i
> +.B readahead=n
> +Set the read ahead of the mount to n KiB. This is handled entirely
> in userspace and will not appear on mountinfo. If unset or set to 0,
> it will not set the a value, using the current value (for a remount)
> or the kernel default for a new mount.
>  
>  .SS "Options for NFS versions 2 and 3 only"
>  Use these options, along with the options in the above subsection,


There is already a method for doing this: you set up an appropriate
entry in /etc/udev/rules.d/. Adding a mount option, particularly one
that is NFS only and is handled in userspace rather than by the kernel
parser, just causes fragmentation and confusion.

If you want to help users configure readahead, I'd suggest focussing on
a utility to help them set up the udev rule correctly.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



  reply	other threads:[~2021-08-03 15:16 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-03 13:07 [RFC PATCH] mount.nfs: Add readahead option Thiago Rafael Becker
2021-08-03 15:15 ` Trond Myklebust [this message]
2021-08-04 17:13   ` Thiago Rafael Becker
2021-08-05 18:23   ` Steve Dickson
2021-08-05 20:11     ` Thiago Rafael Becker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6b627dc6ebc9ee1cbd37a62b48d08b1a031f0f08.camel@hammerspace.com \
    --to=trondmy@hammerspace.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=steved@redhat.com \
    --cc=tbecker@redhat.com \
    --cc=trbecker@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.