ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v7 0/2] ceph: fix blindly expanding the readahead windows
@ 2023-06-05  7:21 xiubli
  2023-06-05  7:21 ` [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq xiubli
  2023-06-05  7:21 ` [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows xiubli
  0 siblings, 2 replies; 5+ messages in thread
From: xiubli @ 2023-06-05  7:21 UTC (permalink / raw)
  To: idryomov, ceph-devel; +Cc: jlayton, vshankar, mchangir, sehuww, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

V7:
- two typo fixes.

Xiubo Li (2):
  ceph: add a dedicated private data for netfs rreq
  ceph: fix blindly expanding the readahead windows

 fs/ceph/addr.c  | 85 ++++++++++++++++++++++++++++++++++++++-----------
 fs/ceph/super.h | 13 ++++++++
 2 files changed, 80 insertions(+), 18 deletions(-)

-- 
2.40.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq
  2023-06-05  7:21 [PATCH v7 0/2] ceph: fix blindly expanding the readahead windows xiubli
@ 2023-06-05  7:21 ` xiubli
  2023-06-06  5:30   ` Milind Changire
  2023-06-05  7:21 ` [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows xiubli
  1 sibling, 1 reply; 5+ messages in thread
From: xiubli @ 2023-06-05  7:21 UTC (permalink / raw)
  To: idryomov, ceph-devel
  Cc: jlayton, vshankar, mchangir, sehuww, Xiubo Li, stable

From: Xiubo Li <xiubli@redhat.com>

We need to save the 'f_ra.ra_pages' to expand the readahead window
later.

Cc: stable@vger.kernel.org
Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead")
URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn
URL: https://www.spinics.net/lists/ceph-users/msg76183.html
Cc: Hu Weiwen <sehuww@mail.scut.edu.cn>
Reviewed-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Tested-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/addr.c  | 45 ++++++++++++++++++++++++++++++++++-----------
 fs/ceph/super.h | 13 +++++++++++++
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3b20873733af..93fff1a7373f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -404,18 +404,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 {
 	struct inode *inode = rreq->inode;
 	int got = 0, want = CEPH_CAP_FILE_CACHE;
+	struct ceph_netfs_request_data *priv;
 	int ret = 0;
 
 	if (rreq->origin != NETFS_READAHEAD)
 		return 0;
 
+	priv = kzalloc(sizeof(*priv), GFP_NOFS);
+	if (!priv)
+		return -ENOMEM;
+
 	if (file) {
 		struct ceph_rw_context *rw_ctx;
 		struct ceph_file_info *fi = file->private_data;
 
+		priv->file_ra_pages = file->f_ra.ra_pages;
+		priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
+
 		rw_ctx = ceph_find_rw_context(fi);
-		if (rw_ctx)
+		if (rw_ctx) {
+			rreq->netfs_priv = priv;
 			return 0;
+		}
 	}
 
 	/*
@@ -425,27 +435,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
 	ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
 	if (ret < 0) {
 		dout("start_read %p, error getting cap\n", inode);
-		return ret;
+		goto out;
 	}
 
 	if (!(got & want)) {
 		dout("start_read %p, no cache cap\n", inode);
-		return -EACCES;
+		ret = -EACCES;
+		goto out;
+	}
+	if (ret == 0) {
+		ret = -EACCES;
+		goto out;
 	}
-	if (ret == 0)
-		return -EACCES;
 
-	rreq->netfs_priv = (void *)(uintptr_t)got;
-	return 0;
+	priv->caps = got;
+	rreq->netfs_priv = priv;
+
+out:
+	if (ret < 0)
+		kfree(priv);
+
+	return ret;
 }
 
 static void ceph_netfs_free_request(struct netfs_io_request *rreq)
 {
-	struct ceph_inode_info *ci = ceph_inode(rreq->inode);
-	int got = (uintptr_t)rreq->netfs_priv;
+	struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+
+	if (!priv)
+		return;
 
-	if (got)
-		ceph_put_cap_refs(ci, got);
+	if (priv->caps)
+		ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
+	kfree(priv);
+	rreq->netfs_priv = NULL;
 }
 
 const struct netfs_request_ops ceph_netfs_ops = {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a226d36b3ecb..3a24b7974d46 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -470,6 +470,19 @@ struct ceph_inode_info {
 #endif
 };
 
+struct ceph_netfs_request_data {
+	int caps;
+
+	/*
+	 * Maximum size of a file readahead request.
+	 * The fadvise could update the bdi's default ra_pages.
+	 */
+	unsigned int file_ra_pages;
+
+	/* Set it if fadvise disables file readahead entirely */
+	bool file_ra_disabled;
+};
+
 static inline struct ceph_inode_info *
 ceph_inode(const struct inode *inode)
 {
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows
  2023-06-05  7:21 [PATCH v7 0/2] ceph: fix blindly expanding the readahead windows xiubli
  2023-06-05  7:21 ` [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq xiubli
@ 2023-06-05  7:21 ` xiubli
  2023-06-06  5:11   ` Milind Changire
  1 sibling, 1 reply; 5+ messages in thread
From: xiubli @ 2023-06-05  7:21 UTC (permalink / raw)
  To: idryomov, ceph-devel
  Cc: jlayton, vshankar, mchangir, sehuww, Xiubo Li, stable

From: Xiubo Li <xiubli@redhat.com>

Blindly expanding the readahead windows will cause unneccessary
pagecache thrashing and also will introduce the network workload.
We should disable expanding the windows if the readahead is disabled
and also shouldn't expand the windows too much.

Expanding forward firstly instead of expanding backward for possible
sequential reads.

Bound `rreq->len` to the actual file size to restore the previous page
cache usage.

The posix_fadvise may change the maximum size of a file readahead.

Cc: stable@vger.kernel.org
Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead")
URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn
URL: https://www.spinics.net/lists/ceph-users/msg76183.html
Cc: Hu Weiwen <sehuww@mail.scut.edu.cn>
Reviewed-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Tested-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 93fff1a7373f..0c4fb3d23078 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -188,16 +188,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
 	struct inode *inode = rreq->inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_layout *lo = &ci->i_layout;
+	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+	loff_t end = rreq->start + rreq->len, new_end;
+	struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+	unsigned long max_len;
 	u32 blockoff;
-	u64 blockno;
 
-	/* Expand the start downward */
-	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
-	rreq->start = blockno * lo->stripe_unit;
-	rreq->len += blockoff;
+	if (priv) {
+		/* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
+		if (priv->file_ra_disabled)
+			max_pages = 0;
+		else
+			max_pages = priv->file_ra_pages;
+
+	}
+
+	/* Readahead is disabled */
+	if (!max_pages)
+		return;
 
-	/* Now, round up the length to the next block */
-	rreq->len = roundup(rreq->len, lo->stripe_unit);
+	max_len = max_pages << PAGE_SHIFT;
+
+	/*
+	 * Try to expand the length forward by rounding up it to the next
+	 * block, but do not exceed the file size, unless the original
+	 * request already exceeds it.
+	 */
+	new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
+	if (new_end > end && new_end <= rreq->start + max_len)
+		rreq->len = new_end - rreq->start;
+
+	/* Try to expand the start downward */
+	div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+	if (rreq->len + blockoff <= max_len) {
+		rreq->start -= blockoff;
+		rreq->len += blockoff;
+	}
 }
 
 static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows
  2023-06-05  7:21 ` [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows xiubli
@ 2023-06-06  5:11   ` Milind Changire
  0 siblings, 0 replies; 5+ messages in thread
From: Milind Changire @ 2023-06-06  5:11 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, ceph-devel, jlayton, vshankar, sehuww, stable

Looks good to me.

Reviewed-by: Milind Changire <mchangir@redhat.com>


On Mon, Jun 5, 2023 at 12:53 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> Blindly expanding the readahead windows will cause unneccessary
> pagecache thrashing and also will introduce the network workload.
> We should disable expanding the windows if the readahead is disabled
> and also shouldn't expand the windows too much.
>
> Expanding forward firstly instead of expanding backward for possible
> sequential reads.
>
> Bound `rreq->len` to the actual file size to restore the previous page
> cache usage.
>
> The posix_fadvise may change the maximum size of a file readahead.
>
> Cc: stable@vger.kernel.org
> Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead")
> URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn
> URL: https://www.spinics.net/lists/ceph-users/msg76183.html
> Cc: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Reviewed-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Tested-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++-------
>  1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 93fff1a7373f..0c4fb3d23078 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -188,16 +188,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
>         struct inode *inode = rreq->inode;
>         struct ceph_inode_info *ci = ceph_inode(inode);
>         struct ceph_file_layout *lo = &ci->i_layout;
> +       unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
> +       loff_t end = rreq->start + rreq->len, new_end;
> +       struct ceph_netfs_request_data *priv = rreq->netfs_priv;
> +       unsigned long max_len;
>         u32 blockoff;
> -       u64 blockno;
>
> -       /* Expand the start downward */
> -       blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> -       rreq->start = blockno * lo->stripe_unit;
> -       rreq->len += blockoff;
> +       if (priv) {
> +               /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
> +               if (priv->file_ra_disabled)
> +                       max_pages = 0;
> +               else
> +                       max_pages = priv->file_ra_pages;
> +
> +       }
> +
> +       /* Readahead is disabled */
> +       if (!max_pages)
> +               return;
>
> -       /* Now, round up the length to the next block */
> -       rreq->len = roundup(rreq->len, lo->stripe_unit);
> +       max_len = max_pages << PAGE_SHIFT;
> +
> +       /*
> +        * Try to expand the length forward by rounding up it to the next
> +        * block, but do not exceed the file size, unless the original
> +        * request already exceeds it.
> +        */
> +       new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
> +       if (new_end > end && new_end <= rreq->start + max_len)
> +               rreq->len = new_end - rreq->start;
> +
> +       /* Try to expand the start downward */
> +       div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> +       if (rreq->len + blockoff <= max_len) {
> +               rreq->start -= blockoff;
> +               rreq->len += blockoff;
> +       }
>  }
>
>  static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
> --
> 2.40.1
>


-- 
Milind


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq
  2023-06-05  7:21 ` [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq xiubli
@ 2023-06-06  5:30   ` Milind Changire
  0 siblings, 0 replies; 5+ messages in thread
From: Milind Changire @ 2023-06-06  5:30 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, ceph-devel, jlayton, vshankar, sehuww, stable

Looks good to me.

Reviewed-by: Milind Changire <mchangir@redhat.com>

On Mon, Jun 5, 2023 at 12:53 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> We need to save the 'f_ra.ra_pages' to expand the readahead window
> later.
>
> Cc: stable@vger.kernel.org
> Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead")
> URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn
> URL: https://www.spinics.net/lists/ceph-users/msg76183.html
> Cc: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Reviewed-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Tested-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/addr.c  | 45 ++++++++++++++++++++++++++++++++++-----------
>  fs/ceph/super.h | 13 +++++++++++++
>  2 files changed, 47 insertions(+), 11 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 3b20873733af..93fff1a7373f 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -404,18 +404,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
>  {
>         struct inode *inode = rreq->inode;
>         int got = 0, want = CEPH_CAP_FILE_CACHE;
> +       struct ceph_netfs_request_data *priv;
>         int ret = 0;
>
>         if (rreq->origin != NETFS_READAHEAD)
>                 return 0;
>
> +       priv = kzalloc(sizeof(*priv), GFP_NOFS);
> +       if (!priv)
> +               return -ENOMEM;
> +
>         if (file) {
>                 struct ceph_rw_context *rw_ctx;
>                 struct ceph_file_info *fi = file->private_data;
>
> +               priv->file_ra_pages = file->f_ra.ra_pages;
> +               priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
> +
>                 rw_ctx = ceph_find_rw_context(fi);
> -               if (rw_ctx)
> +               if (rw_ctx) {
> +                       rreq->netfs_priv = priv;
>                         return 0;
> +               }
>         }
>
>         /*
> @@ -425,27 +435,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
>         ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
>         if (ret < 0) {
>                 dout("start_read %p, error getting cap\n", inode);
> -               return ret;
> +               goto out;
>         }
>
>         if (!(got & want)) {
>                 dout("start_read %p, no cache cap\n", inode);
> -               return -EACCES;
> +               ret = -EACCES;
> +               goto out;
> +       }
> +       if (ret == 0) {
> +               ret = -EACCES;
> +               goto out;
>         }
> -       if (ret == 0)
> -               return -EACCES;
>
> -       rreq->netfs_priv = (void *)(uintptr_t)got;
> -       return 0;
> +       priv->caps = got;
> +       rreq->netfs_priv = priv;
> +
> +out:
> +       if (ret < 0)
> +               kfree(priv);
> +
> +       return ret;
>  }
>
>  static void ceph_netfs_free_request(struct netfs_io_request *rreq)
>  {
> -       struct ceph_inode_info *ci = ceph_inode(rreq->inode);
> -       int got = (uintptr_t)rreq->netfs_priv;
> +       struct ceph_netfs_request_data *priv = rreq->netfs_priv;
> +
> +       if (!priv)
> +               return;
>
> -       if (got)
> -               ceph_put_cap_refs(ci, got);
> +       if (priv->caps)
> +               ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
> +       kfree(priv);
> +       rreq->netfs_priv = NULL;
>  }
>
>  const struct netfs_request_ops ceph_netfs_ops = {
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a226d36b3ecb..3a24b7974d46 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -470,6 +470,19 @@ struct ceph_inode_info {
>  #endif
>  };
>
> +struct ceph_netfs_request_data {
> +       int caps;
> +
> +       /*
> +        * Maximum size of a file readahead request.
> +        * The fadvise could update the bdi's default ra_pages.
> +        */
> +       unsigned int file_ra_pages;
> +
> +       /* Set it if fadvise disables file readahead entirely */
> +       bool file_ra_disabled;
> +};
> +
>  static inline struct ceph_inode_info *
>  ceph_inode(const struct inode *inode)
>  {
> --
> 2.40.1
>


-- 
Milind


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-06  5:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-05  7:21 [PATCH v7 0/2] ceph: fix blindly expanding the readahead windows xiubli
2023-06-05  7:21 ` [PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq xiubli
2023-06-06  5:30   ` Milind Changire
2023-06-05  7:21 ` [PATCH v7 2/2] ceph: fix blindly expanding the readahead windows xiubli
2023-06-06  5:11   ` Milind Changire

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).