All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thiago Rafael Becker <trbecker@gmail.com>
To: Steve Dickson <steved@redhat.com>
Cc: linux-nfs@vger.kernel.org, Thiago Becker <tbecker@redhat.com>,
	Thiago Rafael Becker <trbecker@gmail.com>
Subject: [RFC PATCH] mount.nfs: Add readahead option
Date: Tue,  3 Aug 2021 10:07:17 -0300	[thread overview]
Message-ID: <20210803130717.2890565-1-trbecker@gmail.com> (raw)

Linux kernel defined the default read ahead to 128KiB, and this is
making read perform poorly. To mitigate it, add readahead as a mount
option that is handled by userspace, with some refactoring included.

Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
---
 utils/mount/mount.c | 114 ++++++++++++++++++++++++++++++++++++++++----
 utils/mount/nfs.man |   3 ++
 2 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/utils/mount/mount.c b/utils/mount/mount.c
index b98f9e00..15076ca8 100644
--- a/utils/mount/mount.c
+++ b/utils/mount/mount.c
@@ -56,9 +56,11 @@ int nomtab;
 int verbose;
 int sloppy;
 int string;
+static int readahead_kb = 0;
 
 #define FOREGROUND	(0)
 #define BACKGROUND	(1)
+#define READAHEAD_VALUE_LEN 24
 
 static struct option longopts[] = {
   { "fake", 0, 0, 'f' },
@@ -292,6 +294,16 @@ static int add_mtab(char *spec, char *mount_point, char *fstype,
 	return result;
 }
 
+static void append_extra_opt(const char *opt, char *extra_opts, size_t len) {
+	len -= strlen(extra_opts);
+
+	if (*extra_opts && --len > 0)
+		strcat(extra_opts, ",");
+
+	if ((len -= strlen(opt)) > 0)
+		strcat(extra_opts, opt);
+}
+
 static void parse_opt(const char *opt, int *mask, char *extra_opts, size_t len)
 {
 	const struct opt_map *om;
@@ -306,13 +318,37 @@ static void parse_opt(const char *opt, int *mask, char *extra_opts, size_t len)
 		}
 	}
 
-	len -= strlen(extra_opts);
+	append_extra_opt(opt, extra_opts, len);
+}
 
-	if (*extra_opts && --len > 0)
-		strcat(extra_opts, ",");
+static void parse_opt_val(const char *opt, const char *val, char *extra_opts, size_t len)
+{
+	size_t ov_len;
+	char *opt_val;
 
-	if ((len -= strlen(opt)) > 0)
-		strcat(extra_opts, opt);
+	/* readahead is a special value that is handled by userspace */
+	if (!strcmp(opt,  "readahead")) {
+		char *endptr = NULL;
+		const char *expected_endptr = val + strlen(val);
+
+		readahead_kb = strtol(val, &endptr, 10);
+
+		if (endptr != expected_endptr) {
+			nfs_error(_("%s: invalid readahead value %s"),
+				       progname, val);
+			readahead_kb = 0;
+		}
+		return;
+	}
+
+	/* Send the option to the kernel. */
+	ov_len = strlen(opt) + strlen(val) + 3;
+	opt_val = malloc(sizeof(char) * ov_len);
+	snprintf(opt_val, ov_len, ",%s=%s", opt, val);
+
+	append_extra_opt(opt_val, extra_opts, len);
+
+	free(opt_val);
 }
 
 /*
@@ -325,7 +361,7 @@ static void parse_opts(const char *options, int *flags, char **extra_opts)
 {
 	if (options != NULL) {
 		char *opts = xstrdup(options);
-		char *opt, *p;
+		char *opt, *p, *val = NULL;
 		size_t len = strlen(opts) + 1;	/* include room for a null */
 		int open_quote = 0;
 
@@ -341,17 +377,75 @@ static void parse_opts(const char *options, int *flags, char **extra_opts)
 				continue;	/* still in a quoted block */
 			if (*p == ',')
 				*p = '\0';	/* terminate the option item */
+			if (*p == '=') {        /* key=val option */
+				if (!val) {
+					*p = '\0';      /* terminate key */
+					val = ++p;      /* start the value */
+				}
+			}
 
 			/* end of option item or last item */
 			if (*p == '\0' || *(p + 1) == '\0') {
-				parse_opt(opt, flags, *extra_opts, len);
-				opt = NULL;
+				if (val) {
+					parse_opt_val(opt, val, *extra_opts, len);
+				} else
+					parse_opt(opt, flags, *extra_opts, len);
+				opt = val = NULL;
 			}
 		}
 		free(opts);
 	}
 }
 
+/*
+ * Set the read ahead value for the mount point. On failure, uses the default kernel
+ * read ahead value (for new mounts) or the current value (for remounts).
+ */
+static void set_readahead(const char *mount_point) {
+	int error;
+	struct statx mp_stat;
+	char *mount_point_readahead_file, value[READAHEAD_VALUE_LEN];
+	size_t len;
+	int fp;
+
+	/* If readahead_kb is unset, or set to 0, do not change the value */
+	if (!readahead_kb)
+		return;
+
+	if ((error = statx(0, mount_point, 0, 0, &mp_stat)) != 0) {
+		goto out_error;
+	}
+
+	if (!(mount_point_readahead_file = malloc(PATH_MAX))) {
+		error = -ENOMEM;
+		goto out_error;
+	}
+
+	snprintf(mount_point_readahead_file, PATH_MAX, "/sys/class/bdi/%d:%d/read_ahead_kb",
+			mp_stat.stx_dev_major, mp_stat.stx_dev_minor);
+
+	len = snprintf(value, READAHEAD_VALUE_LEN, "%d", readahead_kb);
+
+	if ((fp = open(mount_point_readahead_file, O_WRONLY)) < 0) {
+		error = errno;
+		goto out_free;
+	}
+
+	if ((error = write(fp, value, len)) < 0)
+		goto out_close;
+
+	close(fp);
+	return;
+
+out_close:
+	close(fp);
+out_free:
+	free(mount_point_readahead_file);
+out_error:
+	nfs_error(_("%s: unable to set readahead value, using default kernel value (error = %d)\n"),
+			progname, error);
+}
+
 static int try_mount(char *spec, char *mount_point, int flags,
 			char *fs_type, char **extra_opts, char *mount_opts,
 			int fake, int bg)
@@ -373,8 +467,10 @@ static int try_mount(char *spec, char *mount_point, int flags,
 	if (ret)
 		return ret;
 
-	if (!fake)
+	if (!fake) {
+		set_readahead(mount_point);
 		print_one(spec, mount_point, fs_type, mount_opts);
+	}
 
 	return add_mtab(spec, mount_point, fs_type, flags, *extra_opts);
 }
diff --git a/utils/mount/nfs.man b/utils/mount/nfs.man
index f1b76936..9832a377 100644
--- a/utils/mount/nfs.man
+++ b/utils/mount/nfs.man
@@ -561,6 +561,9 @@ The
 .B sloppy
 option is an alternative to specifying
 .BR mount.nfs " -s " option.
+.TP 1.5i
+.B readahead=n
+Set the read ahead of the mount to n KiB. This is handled entirely in userspace and will not appear on mountinfo. If unset or set to 0, it will not set the a value, using the current value (for a remount) or the kernel default for a new mount.
 
 .SS "Options for NFS versions 2 and 3 only"
 Use these options, along with the options in the above subsection,
-- 
2.31.1


             reply	other threads:[~2021-08-03 13:07 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-03 13:07 Thiago Rafael Becker [this message]
2021-08-03 15:15 ` [RFC PATCH] mount.nfs: Add readahead option Trond Myklebust
2021-08-04 17:13   ` Thiago Rafael Becker
2021-08-05 18:23   ` Steve Dickson
2021-08-05 20:11     ` Thiago Rafael Becker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210803130717.2890565-1-trbecker@gmail.com \
    --to=trbecker@gmail.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=steved@redhat.com \
    --cc=tbecker@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.