ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ceph: move sb->wb_pagevec_pool to be a global mempool
@ 2020-07-30 15:48 Jeff Layton
  2020-08-03  9:27 ` Ilya Dryomov
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Layton @ 2020-07-30 15:48 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, ukernel

When doing some testing recently, I hit some page allocation failures
on mount, when creating the wb_pagevec_pool for the mount. That
requires 128k (32 contiguous pages), and after thrashing the memory
during an xfstests run, sometimes that would fail.

128k for each mount seems like a lot to hold in reserve for a rainy
day, so let's change this to a global mempool that gets allocated
when the module is plugged in.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/addr.c               | 23 +++++++++++------------
 fs/ceph/super.c              | 22 ++++++++--------------
 fs/ceph/super.h              |  2 --
 include/linux/ceph/libceph.h |  1 +
 4 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 01ad09733ac7..6ea761c84494 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req)
 
 	osd_data = osd_req_op_extent_osd_data(req, 0);
 	if (osd_data->pages_from_pool)
-		mempool_free(osd_data->pages,
-			     ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
+		mempool_free(osd_data->pages, ceph_wb_pagevec_pool);
 	else
 		kfree(osd_data->pages);
 	ceph_osdc_put_request(req);
@@ -955,10 +954,10 @@ static int ceph_writepages_start(struct address_space *mapping,
 		int num_ops = 0, op_idx;
 		unsigned i, pvec_pages, max_pages, locked_pages = 0;
 		struct page **pages = NULL, **data_pages;
-		mempool_t *pool = NULL;	/* Becomes non-null if mempool used */
 		struct page *page;
 		pgoff_t strip_unit_end = 0;
 		u64 offset = 0, len = 0;
+		bool from_pool = false;
 
 		max_pages = wsize >> PAGE_SHIFT;
 
@@ -1057,16 +1056,16 @@ static int ceph_writepages_start(struct address_space *mapping,
 						      sizeof(*pages),
 						      GFP_NOFS);
 				if (!pages) {
-					pool = fsc->wb_pagevec_pool;
-					pages = mempool_alloc(pool, GFP_NOFS);
+					from_pool = true;
+					pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
 					BUG_ON(!pages);
 				}
 
 				len = 0;
 			} else if (page->index !=
 				   (offset + len) >> PAGE_SHIFT) {
-				if (num_ops >= (pool ?  CEPH_OSD_SLAB_OPS :
-							CEPH_OSD_MAX_OPS)) {
+				if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
+							     CEPH_OSD_MAX_OPS)) {
 					redirty_page_for_writepage(wbc, page);
 					unlock_page(page);
 					break;
@@ -1161,7 +1160,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 				     offset, len);
 				osd_req_op_extent_osd_data_pages(req, op_idx,
 							data_pages, len, 0,
-							!!pool, false);
+							from_pool, false);
 				osd_req_op_extent_update(req, op_idx, len);
 
 				len = 0;
@@ -1188,12 +1187,12 @@ static int ceph_writepages_start(struct address_space *mapping,
 		dout("writepages got pages at %llu~%llu\n", offset, len);
 
 		osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
-						 0, !!pool, false);
+						 0, from_pool, false);
 		osd_req_op_extent_update(req, op_idx, len);
 
 		BUG_ON(op_idx + 1 != req->r_num_ops);
 
-		pool = NULL;
+		from_pool = false;
 		if (i < locked_pages) {
 			BUG_ON(num_ops <= req->r_num_ops);
 			num_ops -= req->r_num_ops;
@@ -1204,8 +1203,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 			pages = kmalloc_array(locked_pages, sizeof(*pages),
 					      GFP_NOFS);
 			if (!pages) {
-				pool = fsc->wb_pagevec_pool;
-				pages = mempool_alloc(pool, GFP_NOFS);
+				from_pool = true;
+				pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
 				BUG_ON(!pages);
 			}
 			memcpy(pages, data_pages + i,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 585aecea5cad..7ec0e6d03d10 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -637,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 					struct ceph_options *opt)
 {
 	struct ceph_fs_client *fsc;
-	int page_count;
-	size_t size;
 	int err;
 
 	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
@@ -686,22 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc->cap_wq)
 		goto fail_inode_wq;
 
-	/* set up mempools */
-	err = -ENOMEM;
-	page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
-	size = sizeof (struct page *) * (page_count ? page_count : 1);
-	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
-	if (!fsc->wb_pagevec_pool)
-		goto fail_cap_wq;
-
 	spin_lock(&ceph_fsc_lock);
 	list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
 	spin_unlock(&ceph_fsc_lock);
 
 	return fsc;
 
-fail_cap_wq:
-	destroy_workqueue(fsc->cap_wq);
 fail_inode_wq:
 	destroy_workqueue(fsc->inode_wq);
 fail_client:
@@ -732,8 +720,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 	destroy_workqueue(fsc->inode_wq);
 	destroy_workqueue(fsc->cap_wq);
 
-	mempool_destroy(fsc->wb_pagevec_pool);
-
 	destroy_mount_options(fsc->mount_options);
 
 	ceph_destroy_client(fsc->client);
@@ -752,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep;
 struct kmem_cache *ceph_file_cachep;
 struct kmem_cache *ceph_dir_file_cachep;
 struct kmem_cache *ceph_mds_request_cachep;
+mempool_t *ceph_wb_pagevec_pool;
 
 static void ceph_inode_init_once(void *foo)
 {
@@ -796,6 +783,10 @@ static int __init init_caches(void)
 	if (!ceph_mds_request_cachep)
 		goto bad_mds_req;
 
+	ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT);
+	if (!ceph_wb_pagevec_pool)
+		goto bad_pagevec_pool;
+
 	error = ceph_fscache_register();
 	if (error)
 		goto bad_fscache;
@@ -804,6 +795,8 @@ static int __init init_caches(void)
 
 bad_fscache:
 	kmem_cache_destroy(ceph_mds_request_cachep);
+bad_pagevec_pool:
+	mempool_destroy(ceph_wb_pagevec_pool);
 bad_mds_req:
 	kmem_cache_destroy(ceph_dir_file_cachep);
 bad_dir_file:
@@ -834,6 +827,7 @@ static void destroy_caches(void)
 	kmem_cache_destroy(ceph_file_cachep);
 	kmem_cache_destroy(ceph_dir_file_cachep);
 	kmem_cache_destroy(ceph_mds_request_cachep);
+	mempool_destroy(ceph_wb_pagevec_pool);
 
 	ceph_fscache_unregister();
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9001a896ae8c..4c3c964b1c54 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -118,8 +118,6 @@ struct ceph_fs_client {
 
 	struct ceph_mds_client *mdsc;
 
-	/* writeback */
-	mempool_t *wb_pagevec_pool;
 	atomic_long_t writeback_count;
 
 	struct workqueue_struct *inode_wq;
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index e5ed1c541e7f..c8645f0b797d 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep;
 extern struct kmem_cache *ceph_file_cachep;
 extern struct kmem_cache *ceph_dir_file_cachep;
 extern struct kmem_cache *ceph_mds_request_cachep;
+extern mempool_t *ceph_wb_pagevec_pool;
 
 /* ceph_common.c */
 extern bool libceph_compatible(void *data);
-- 
2.26.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] ceph: move sb->wb_pagevec_pool to be a global mempool
  2020-07-30 15:48 [PATCH] ceph: move sb->wb_pagevec_pool to be a global mempool Jeff Layton
@ 2020-08-03  9:27 ` Ilya Dryomov
  2020-08-03 13:05   ` Jeff Layton
  0 siblings, 1 reply; 3+ messages in thread
From: Ilya Dryomov @ 2020-08-03  9:27 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Ceph Development, Yan, Zheng

On Thu, Jul 30, 2020 at 5:48 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> When doing some testing recently, I hit some page allocation failures
> on mount, when creating the wb_pagevec_pool for the mount. That
> requires 128k (32 contiguous pages), and after thrashing the memory
> during an xfstests run, sometimes that would fail.
>
> 128k for each mount seems like a lot to hold in reserve for a rainy
> day, so let's change this to a global mempool that gets allocated
> when the module is plugged in.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/ceph/addr.c               | 23 +++++++++++------------
>  fs/ceph/super.c              | 22 ++++++++--------------
>  fs/ceph/super.h              |  2 --
>  include/linux/ceph/libceph.h |  1 +
>  4 files changed, 20 insertions(+), 28 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 01ad09733ac7..6ea761c84494 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req)
>
>         osd_data = osd_req_op_extent_osd_data(req, 0);
>         if (osd_data->pages_from_pool)
> -               mempool_free(osd_data->pages,
> -                            ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
> +               mempool_free(osd_data->pages, ceph_wb_pagevec_pool);
>         else
>                 kfree(osd_data->pages);
>         ceph_osdc_put_request(req);
> @@ -955,10 +954,10 @@ static int ceph_writepages_start(struct address_space *mapping,
>                 int num_ops = 0, op_idx;
>                 unsigned i, pvec_pages, max_pages, locked_pages = 0;
>                 struct page **pages = NULL, **data_pages;
> -               mempool_t *pool = NULL; /* Becomes non-null if mempool used */
>                 struct page *page;
>                 pgoff_t strip_unit_end = 0;
>                 u64 offset = 0, len = 0;
> +               bool from_pool = false;
>
>                 max_pages = wsize >> PAGE_SHIFT;
>
> @@ -1057,16 +1056,16 @@ static int ceph_writepages_start(struct address_space *mapping,
>                                                       sizeof(*pages),
>                                                       GFP_NOFS);
>                                 if (!pages) {
> -                                       pool = fsc->wb_pagevec_pool;
> -                                       pages = mempool_alloc(pool, GFP_NOFS);
> +                                       from_pool = true;
> +                                       pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
>                                         BUG_ON(!pages);
>                                 }
>
>                                 len = 0;
>                         } else if (page->index !=
>                                    (offset + len) >> PAGE_SHIFT) {
> -                               if (num_ops >= (pool ?  CEPH_OSD_SLAB_OPS :
> -                                                       CEPH_OSD_MAX_OPS)) {
> +                               if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
> +                                                            CEPH_OSD_MAX_OPS)) {
>                                         redirty_page_for_writepage(wbc, page);
>                                         unlock_page(page);
>                                         break;
> @@ -1161,7 +1160,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>                                      offset, len);
>                                 osd_req_op_extent_osd_data_pages(req, op_idx,
>                                                         data_pages, len, 0,
> -                                                       !!pool, false);
> +                                                       from_pool, false);
>                                 osd_req_op_extent_update(req, op_idx, len);
>
>                                 len = 0;
> @@ -1188,12 +1187,12 @@ static int ceph_writepages_start(struct address_space *mapping,
>                 dout("writepages got pages at %llu~%llu\n", offset, len);
>
>                 osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
> -                                                0, !!pool, false);
> +                                                0, from_pool, false);
>                 osd_req_op_extent_update(req, op_idx, len);
>
>                 BUG_ON(op_idx + 1 != req->r_num_ops);
>
> -               pool = NULL;
> +               from_pool = false;
>                 if (i < locked_pages) {
>                         BUG_ON(num_ops <= req->r_num_ops);
>                         num_ops -= req->r_num_ops;
> @@ -1204,8 +1203,8 @@ static int ceph_writepages_start(struct address_space *mapping,
>                         pages = kmalloc_array(locked_pages, sizeof(*pages),
>                                               GFP_NOFS);
>                         if (!pages) {
> -                               pool = fsc->wb_pagevec_pool;
> -                               pages = mempool_alloc(pool, GFP_NOFS);
> +                               from_pool = true;
> +                               pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
>                                 BUG_ON(!pages);
>                         }
>                         memcpy(pages, data_pages + i,
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 585aecea5cad..7ec0e6d03d10 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -637,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>                                         struct ceph_options *opt)
>  {
>         struct ceph_fs_client *fsc;
> -       int page_count;
> -       size_t size;
>         int err;
>
>         fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
> @@ -686,22 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>         if (!fsc->cap_wq)
>                 goto fail_inode_wq;
>
> -       /* set up mempools */
> -       err = -ENOMEM;
> -       page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
> -       size = sizeof (struct page *) * (page_count ? page_count : 1);
> -       fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
> -       if (!fsc->wb_pagevec_pool)
> -               goto fail_cap_wq;
> -
>         spin_lock(&ceph_fsc_lock);
>         list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
>         spin_unlock(&ceph_fsc_lock);
>
>         return fsc;
>
> -fail_cap_wq:
> -       destroy_workqueue(fsc->cap_wq);
>  fail_inode_wq:
>         destroy_workqueue(fsc->inode_wq);
>  fail_client:
> @@ -732,8 +720,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>         destroy_workqueue(fsc->inode_wq);
>         destroy_workqueue(fsc->cap_wq);
>
> -       mempool_destroy(fsc->wb_pagevec_pool);
> -
>         destroy_mount_options(fsc->mount_options);
>
>         ceph_destroy_client(fsc->client);
> @@ -752,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep;
>  struct kmem_cache *ceph_file_cachep;
>  struct kmem_cache *ceph_dir_file_cachep;
>  struct kmem_cache *ceph_mds_request_cachep;
> +mempool_t *ceph_wb_pagevec_pool;
>
>  static void ceph_inode_init_once(void *foo)
>  {
> @@ -796,6 +783,10 @@ static int __init init_caches(void)
>         if (!ceph_mds_request_cachep)
>                 goto bad_mds_req;
>
> +       ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT);
> +       if (!ceph_wb_pagevec_pool)
> +               goto bad_pagevec_pool;
> +
>         error = ceph_fscache_register();
>         if (error)
>                 goto bad_fscache;
> @@ -804,6 +795,8 @@ static int __init init_caches(void)
>
>  bad_fscache:
>         kmem_cache_destroy(ceph_mds_request_cachep);
> +bad_pagevec_pool:
> +       mempool_destroy(ceph_wb_pagevec_pool);
>  bad_mds_req:
>         kmem_cache_destroy(ceph_dir_file_cachep);
>  bad_dir_file:
> @@ -834,6 +827,7 @@ static void destroy_caches(void)
>         kmem_cache_destroy(ceph_file_cachep);
>         kmem_cache_destroy(ceph_dir_file_cachep);
>         kmem_cache_destroy(ceph_mds_request_cachep);
> +       mempool_destroy(ceph_wb_pagevec_pool);
>
>         ceph_fscache_unregister();
>  }
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 9001a896ae8c..4c3c964b1c54 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -118,8 +118,6 @@ struct ceph_fs_client {
>
>         struct ceph_mds_client *mdsc;
>
> -       /* writeback */
> -       mempool_t *wb_pagevec_pool;
>         atomic_long_t writeback_count;
>
>         struct workqueue_struct *inode_wq;
> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> index e5ed1c541e7f..c8645f0b797d 100644
> --- a/include/linux/ceph/libceph.h
> +++ b/include/linux/ceph/libceph.h
> @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep;
>  extern struct kmem_cache *ceph_file_cachep;
>  extern struct kmem_cache *ceph_dir_file_cachep;
>  extern struct kmem_cache *ceph_mds_request_cachep;
> +extern mempool_t *ceph_wb_pagevec_pool;
>
>  /* ceph_common.c */
>  extern bool libceph_compatible(void *data);

Looks fine to me.

I think it used to be just a single page per mount before
95cca2b44e54 ("ceph: limit osd write size") because fsopt->wsize
defaulted to 0 which meant "no limit".  And CEPH_MSG_MAX_DATA_LEN
got increased from 16M to 64M as well...

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] ceph: move sb->wb_pagevec_pool to be a global mempool
  2020-08-03  9:27 ` Ilya Dryomov
@ 2020-08-03 13:05   ` Jeff Layton
  0 siblings, 0 replies; 3+ messages in thread
From: Jeff Layton @ 2020-08-03 13:05 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Ceph Development, Yan, Zheng

On Mon, 2020-08-03 at 11:27 +0200, Ilya Dryomov wrote:
> On Thu, Jul 30, 2020 at 5:48 PM Jeff Layton <jlayton@kernel.org> wrote:
> > When doing some testing recently, I hit some page allocation failures
> > on mount, when creating the wb_pagevec_pool for the mount. That
> > requires 128k (32 contiguous pages), and after thrashing the memory
> > during an xfstests run, sometimes that would fail.
> > 
> > 128k for each mount seems like a lot to hold in reserve for a rainy
> > day, so let's change this to a global mempool that gets allocated
> > when the module is plugged in.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/ceph/addr.c               | 23 +++++++++++------------
> >  fs/ceph/super.c              | 22 ++++++++--------------
> >  fs/ceph/super.h              |  2 --
> >  include/linux/ceph/libceph.h |  1 +
> >  4 files changed, 20 insertions(+), 28 deletions(-)
> > 
> > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> > index 01ad09733ac7..6ea761c84494 100644
> > --- a/fs/ceph/addr.c
> > +++ b/fs/ceph/addr.c
> > @@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req)
> > 
> >         osd_data = osd_req_op_extent_osd_data(req, 0);
> >         if (osd_data->pages_from_pool)
> > -               mempool_free(osd_data->pages,
> > -                            ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
> > +               mempool_free(osd_data->pages, ceph_wb_pagevec_pool);
> >         else
> >                 kfree(osd_data->pages);
> >         ceph_osdc_put_request(req);
> > @@ -955,10 +954,10 @@ static int ceph_writepages_start(struct address_space *mapping,
> >                 int num_ops = 0, op_idx;
> >                 unsigned i, pvec_pages, max_pages, locked_pages = 0;
> >                 struct page **pages = NULL, **data_pages;
> > -               mempool_t *pool = NULL; /* Becomes non-null if mempool used */
> >                 struct page *page;
> >                 pgoff_t strip_unit_end = 0;
> >                 u64 offset = 0, len = 0;
> > +               bool from_pool = false;
> > 
> >                 max_pages = wsize >> PAGE_SHIFT;
> > 
> > @@ -1057,16 +1056,16 @@ static int ceph_writepages_start(struct address_space *mapping,
> >                                                       sizeof(*pages),
> >                                                       GFP_NOFS);
> >                                 if (!pages) {
> > -                                       pool = fsc->wb_pagevec_pool;
> > -                                       pages = mempool_alloc(pool, GFP_NOFS);
> > +                                       from_pool = true;
> > +                                       pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
> >                                         BUG_ON(!pages);
> >                                 }
> > 
> >                                 len = 0;
> >                         } else if (page->index !=
> >                                    (offset + len) >> PAGE_SHIFT) {
> > -                               if (num_ops >= (pool ?  CEPH_OSD_SLAB_OPS :
> > -                                                       CEPH_OSD_MAX_OPS)) {
> > +                               if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
> > +                                                            CEPH_OSD_MAX_OPS)) {
> >                                         redirty_page_for_writepage(wbc, page);
> >                                         unlock_page(page);
> >                                         break;
> > @@ -1161,7 +1160,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> >                                      offset, len);
> >                                 osd_req_op_extent_osd_data_pages(req, op_idx,
> >                                                         data_pages, len, 0,
> > -                                                       !!pool, false);
> > +                                                       from_pool, false);
> >                                 osd_req_op_extent_update(req, op_idx, len);
> > 
> >                                 len = 0;
> > @@ -1188,12 +1187,12 @@ static int ceph_writepages_start(struct address_space *mapping,
> >                 dout("writepages got pages at %llu~%llu\n", offset, len);
> > 
> >                 osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
> > -                                                0, !!pool, false);
> > +                                                0, from_pool, false);
> >                 osd_req_op_extent_update(req, op_idx, len);
> > 
> >                 BUG_ON(op_idx + 1 != req->r_num_ops);
> > 
> > -               pool = NULL;
> > +               from_pool = false;
> >                 if (i < locked_pages) {
> >                         BUG_ON(num_ops <= req->r_num_ops);
> >                         num_ops -= req->r_num_ops;
> > @@ -1204,8 +1203,8 @@ static int ceph_writepages_start(struct address_space *mapping,
> >                         pages = kmalloc_array(locked_pages, sizeof(*pages),
> >                                               GFP_NOFS);
> >                         if (!pages) {
> > -                               pool = fsc->wb_pagevec_pool;
> > -                               pages = mempool_alloc(pool, GFP_NOFS);
> > +                               from_pool = true;
> > +                               pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
> >                                 BUG_ON(!pages);
> >                         }
> >                         memcpy(pages, data_pages + i,
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index 585aecea5cad..7ec0e6d03d10 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -637,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
> >                                         struct ceph_options *opt)
> >  {
> >         struct ceph_fs_client *fsc;
> > -       int page_count;
> > -       size_t size;
> >         int err;
> > 
> >         fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
> > @@ -686,22 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
> >         if (!fsc->cap_wq)
> >                 goto fail_inode_wq;
> > 
> > -       /* set up mempools */
> > -       err = -ENOMEM;
> > -       page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
> > -       size = sizeof (struct page *) * (page_count ? page_count : 1);
> > -       fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
> > -       if (!fsc->wb_pagevec_pool)
> > -               goto fail_cap_wq;
> > -
> >         spin_lock(&ceph_fsc_lock);
> >         list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
> >         spin_unlock(&ceph_fsc_lock);
> > 
> >         return fsc;
> > 
> > -fail_cap_wq:
> > -       destroy_workqueue(fsc->cap_wq);
> >  fail_inode_wq:
> >         destroy_workqueue(fsc->inode_wq);
> >  fail_client:
> > @@ -732,8 +720,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> >         destroy_workqueue(fsc->inode_wq);
> >         destroy_workqueue(fsc->cap_wq);
> > 
> > -       mempool_destroy(fsc->wb_pagevec_pool);
> > -
> >         destroy_mount_options(fsc->mount_options);
> > 
> >         ceph_destroy_client(fsc->client);
> > @@ -752,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep;
> >  struct kmem_cache *ceph_file_cachep;
> >  struct kmem_cache *ceph_dir_file_cachep;
> >  struct kmem_cache *ceph_mds_request_cachep;
> > +mempool_t *ceph_wb_pagevec_pool;
> > 
> >  static void ceph_inode_init_once(void *foo)
> >  {
> > @@ -796,6 +783,10 @@ static int __init init_caches(void)
> >         if (!ceph_mds_request_cachep)
> >                 goto bad_mds_req;
> > 
> > +       ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT);
> > +       if (!ceph_wb_pagevec_pool)
> > +               goto bad_pagevec_pool;
> > +
> >         error = ceph_fscache_register();
> >         if (error)
> >                 goto bad_fscache;
> > @@ -804,6 +795,8 @@ static int __init init_caches(void)
> > 
> >  bad_fscache:
> >         kmem_cache_destroy(ceph_mds_request_cachep);
> > +bad_pagevec_pool:
> > +       mempool_destroy(ceph_wb_pagevec_pool);
> >  bad_mds_req:
> >         kmem_cache_destroy(ceph_dir_file_cachep);
> >  bad_dir_file:
> > @@ -834,6 +827,7 @@ static void destroy_caches(void)
> >         kmem_cache_destroy(ceph_file_cachep);
> >         kmem_cache_destroy(ceph_dir_file_cachep);
> >         kmem_cache_destroy(ceph_mds_request_cachep);
> > +       mempool_destroy(ceph_wb_pagevec_pool);
> > 
> >         ceph_fscache_unregister();
> >  }
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index 9001a896ae8c..4c3c964b1c54 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -118,8 +118,6 @@ struct ceph_fs_client {
> > 
> >         struct ceph_mds_client *mdsc;
> > 
> > -       /* writeback */
> > -       mempool_t *wb_pagevec_pool;
> >         atomic_long_t writeback_count;
> > 
> >         struct workqueue_struct *inode_wq;
> > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> > index e5ed1c541e7f..c8645f0b797d 100644
> > --- a/include/linux/ceph/libceph.h
> > +++ b/include/linux/ceph/libceph.h
> > @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep;
> >  extern struct kmem_cache *ceph_file_cachep;
> >  extern struct kmem_cache *ceph_dir_file_cachep;
> >  extern struct kmem_cache *ceph_mds_request_cachep;
> > +extern mempool_t *ceph_wb_pagevec_pool;
> > 
> >  /* ceph_common.c */
> >  extern bool libceph_compatible(void *data);
> 
> Looks fine to me.
> 
> I think it used to be just a single page per mount before
> 95cca2b44e54 ("ceph: limit osd write size") because fsopt->wsize
> defaulted to 0 which meant "no limit".  And CEPH_MSG_MAX_DATA_LEN
> got increased from 16M to 64M as well...
> 
> Thanks,
> 
>                 Ilya

Yeah, I think this is something that has changed over time. Regardless
though, keeping a per-sb mempool is rather weird. Most filesystems make
do with global ones.

As a side note, it should be done in a separate patch, but I think we
can also fix some of the places that have "try kmalloc and fall back to
mempool if that fails" patterns.

mempools already operate like that -- they only fall back to allocating
from the reserved pool when allocations fail, so doing it manually is
redundant.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-08-03 13:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-30 15:48 [PATCH] ceph: move sb->wb_pagevec_pool to be a global mempool Jeff Layton
2020-08-03  9:27 ` Ilya Dryomov
2020-08-03 13:05   ` Jeff Layton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).