linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
       [not found]                   ` <20180416144638.GA22484@redhat.com>
@ 2018-04-16 19:32                     ` Mikulas Patocka
  2018-04-17 14:45                       ` Christopher Lameter
       [not found]                     ` <alpine.LRH.2.02.1804161054410.17807@file01.intranet.prod.int.rdu2.redhat.com>
  1 sibling, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-16 19:32 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Vlastimil Babka, Christopher Lameter, Matthew Wilcox,
	Pekka Enberg, linux-mm, dm-devel, David Rientjes, Joonsoo Kim,
	Andrew Morton, linux-kernel

This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
flag causes allocation of larger slab caches in order to minimize wasted
space.

This is needed because we want to use dm-bufio for deduplication index and
there are existing installations with non-power-of-two block sizes (such
as 640KB). The performance of the whole solution depends on efficient
memory use, so we must waste as little memory as possible.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm-bufio.c |    2 +-
 include/linux/slab.h  |    7 +++++++
 mm/slab.c             |    4 ++--
 mm/slab.h             |    7 ++++---
 mm/slab_common.c      |    2 +-
 mm/slub.c             |   25 ++++++++++++++++++++-----
 6 files changed, 35 insertions(+), 12 deletions(-)

Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/include/linux/slab.h	2018-04-16 21:10:45.000000000 +0200
@@ -108,6 +108,13 @@
 #define SLAB_KASAN		0
 #endif
 
+/*
+ * Use higer order allocations to minimize wasted space.
+ * Note: the allocation is unreliable if this flag is used, the caller
+ * must handle allocation failures gracefully.
+ */
+#define SLAB_MINIMIZE_WASTE	((slab_flags_t __force)0x10000000U)
+
 /* The following flags affect the page allocator grouping pages by mobility */
 /* Objects are reclaimable */
 #define SLAB_RECLAIM_ACCOUNT	((slab_flags_t __force)0x00020000U)
Index: linux-2.6/mm/slab_common.c
===================================================================
--- linux-2.6.orig/mm/slab_common.c	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab_common.c	2018-04-16 21:10:45.000000000 +0200
@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_d
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
-			 SLAB_ACCOUNT)
+			 SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slub.c	2018-04-16 21:12:41.000000000 +0200
@@ -3249,7 +3249,7 @@ static inline unsigned int slab_order(un
 	return order;
 }
 
-static inline int calculate_order(unsigned int size, unsigned int reserved)
+static inline int calculate_order(unsigned int size, unsigned int reserved, slab_flags_t flags)
 {
 	unsigned int order;
 	unsigned int min_objects;
@@ -3277,7 +3277,7 @@ static inline int calculate_order(unsign
 			order = slab_order(size, min_objects,
 					slub_max_order, fraction, reserved);
 			if (order <= slub_max_order)
-				return order;
+				goto ret_order;
 			fraction /= 2;
 		}
 		min_objects--;
@@ -3289,15 +3289,30 @@ static inline int calculate_order(unsign
 	 */
 	order = slab_order(size, 1, slub_max_order, 1, reserved);
 	if (order <= slub_max_order)
-		return order;
+		goto ret_order;
 
 	/*
 	 * Doh this slab cannot be placed using slub_max_order.
 	 */
 	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
 	if (order < MAX_ORDER)
-		return order;
+		goto ret_order;
 	return -ENOSYS;
+
+ret_order:
+	if (flags & SLAB_MINIMIZE_WASTE) {
+		/* Increase the order if it decreases waste */
+		int test_order;
+		for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
+			unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
+			unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
+			if (test_order_objects >= min(32, MAX_OBJS_PER_PAGE))
+				break;
+			if (test_order_objects > order_objects << (test_order - order))
+				order = test_order;
+		}
+	}
+	return order;
 }
 
 static void
@@ -3562,7 +3577,7 @@ static int calculate_sizes(struct kmem_c
 	if (forced_order >= 0)
 		order = forced_order;
 	else
-		order = calculate_order(size, s->reserved);
+		order = calculate_order(size, s->reserved, flags);
 
 	if ((int)order < 0)
 		return 0;
Index: linux-2.6/drivers/md/dm-bufio.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.c	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/drivers/md/dm-bufio.c	2018-04-16 21:11:23.000000000 +0200
@@ -1683,7 +1683,7 @@ struct dm_bufio_client *dm_bufio_client_
 	    (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
 		snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
 		c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
-						  SLAB_RECLAIM_ACCOUNT, NULL);
+						  SLAB_RECLAIM_ACCOUNT | SLAB_MINIMIZE_WASTE, NULL);
 		if (!c->slab_cache) {
 			r = -ENOMEM;
 			goto bad;
Index: linux-2.6/mm/slab.h
===================================================================
--- linux-2.6.orig/mm/slab.h	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab.h	2018-04-16 21:10:45.000000000 +0200
@@ -142,10 +142,10 @@ static inline slab_flags_t kmem_cache_fl
 #if defined(CONFIG_SLAB)
 #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
 			  SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
-			  SLAB_ACCOUNT)
+			  SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 #elif defined(CONFIG_SLUB)
 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
-			  SLAB_TEMPORARY | SLAB_ACCOUNT)
+			  SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_MINIMIZE_WASTE)
 #else
 #define SLAB_CACHE_FLAGS (0)
 #endif
@@ -164,7 +164,8 @@ static inline slab_flags_t kmem_cache_fl
 			      SLAB_NOLEAKTRACE | \
 			      SLAB_RECLAIM_ACCOUNT | \
 			      SLAB_TEMPORARY | \
-			      SLAB_ACCOUNT)
+			      SLAB_ACCOUNT | \
+			      SLAB_MINIMIZE_WASTE)
 
 bool __kmem_cache_empty(struct kmem_cache *);
 int __kmem_cache_shutdown(struct kmem_cache *);
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c	2018-04-16 21:10:45.000000000 +0200
+++ linux-2.6/mm/slab.c	2018-04-16 21:10:45.000000000 +0200
@@ -1790,14 +1790,14 @@ static size_t calculate_slab_order(struc
 		 * as GFP_NOFS and we really don't want to have to be allocating
 		 * higher-order pages when we are unable to shrink dcache.
 		 */
-		if (flags & SLAB_RECLAIM_ACCOUNT)
+		if (flags & SLAB_RECLAIM_ACCOUNT && !(flags & SLAB_MINIMIZE_WASTE))
 			break;
 
 		/*
 		 * Large number of objects is good, but very large slabs are
 		 * currently bad for the gfp()s.
 		 */
-		if (gfporder >= slab_max_order)
+		if (gfporder >= slab_max_order && !(flags & SLAB_MINIMIZE_WASTE))
 			break;
 
 		/*

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: slab: introduce the flag SLAB_MINIMIZE_WASTE
       [not found]                               ` <b0e6ccf6-06ce-e50b-840e-c8d3072382fd@suse.cz>
@ 2018-04-16 21:01                                 ` Mikulas Patocka
  2018-04-17 14:40                                   ` Christopher Lameter
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-16 21:01 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Christopher Lameter, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Mon, 16 Apr 2018, Vlastimil Babka wrote:

> On 04/16/2018 09:36 PM, Mikulas Patocka wrote:
> 
> >>> I need to increase it just for dm-bufio slabs.
> >>
> >> If you do this then others will want the same...
> > 
> > If others need it, they can turn on the flag SLAB_MINIMIZE_WASTE too.
> 
> I think it should be possible without a new flag. The slub allocator
> could just balance priorities (performance vs memory efficiency) better.
> Currently I get the impression that "slub_max_order" is a performance
> tunable. Let's add another criteria for selecting an order, that would
> try to pick an order to minimize wasted space below e.g. 10% with some
> different kind of max order. Pick good defaults, add tunables if you must.
> 
> I mean, anyone who's creating a cache for 640KB objects most likely
> doesn't want to waste another 384KB by each such object. They shouldn't
> have to add a flag to let the slub allocator figure out that using 2MB
> pages is the right thing to do here.
> 
> Vlastimil

The problem is that higher-order allocations (larger than 32K) are 
unreliable. So, if you increase page order beyond that, the allocation may 
randomly fail.

dm-bufio deals gracefully with allocation failure, because it preallocates 
some buffers with vmalloc, but other subsystems may not deal with it and 
they cound return ENOMEM randomly or misbehave in other ways. So, the 
"SLAB_MINIMIZE_WASTE" flag is also saying that the allocation may fail and 
the caller is prepared to deal with it.

The slub subsystem does actual fallback to low-order when the allocation 
fails (it allows different order for each slab in the same cache), but 
slab doesn't fallback and you get NULL if higher-order allocation fails. 
So, SLAB_MINIMIZE_WASTE is needed for slab because it will just randomly 
fail with higher order.

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-16 21:01                                 ` Mikulas Patocka
@ 2018-04-17 14:40                                   ` Christopher Lameter
  2018-04-17 18:53                                     ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Christopher Lameter @ 2018-04-17 14:40 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Vlastimil Babka, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Mon, 16 Apr 2018, Mikulas Patocka wrote:

> dm-bufio deals gracefully with allocation failure, because it preallocates
> some buffers with vmalloc, but other subsystems may not deal with it and
> they cound return ENOMEM randomly or misbehave in other ways. So, the
> "SLAB_MINIMIZE_WASTE" flag is also saying that the allocation may fail and
> the caller is prepared to deal with it.
>
> The slub subsystem does actual fallback to low-order when the allocation
> fails (it allows different order for each slab in the same cache), but
> slab doesn't fallback and you get NULL if higher-order allocation fails.
> So, SLAB_MINIMIZE_WASTE is needed for slab because it will just randomly
> fail with higher order.

Fix Slab instead of adding a flag that is only useful for one allocator?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-16 19:32                     ` [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE Mikulas Patocka
@ 2018-04-17 14:45                       ` Christopher Lameter
  2018-04-17 16:16                         ` Vlastimil Babka
  2018-04-17 19:06                         ` Mikulas Patocka
  0 siblings, 2 replies; 23+ messages in thread
From: Christopher Lameter @ 2018-04-17 14:45 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Mon, 16 Apr 2018, Mikulas Patocka wrote:

> This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
> flag causes allocation of larger slab caches in order to minimize wasted
> space.
>
> This is needed because we want to use dm-bufio for deduplication index and
> there are existing installations with non-power-of-two block sizes (such
> as 640KB). The performance of the whole solution depends on efficient
> memory use, so we must waste as little memory as possible.

Hmmm. Can we come up with a generic solution instead?

This may mean relaxing the enforcement of the allocation max order a bit
so that we can get dense allocation through higher order allocs.

But then higher order allocs are generally seen as problematic.

Note that SLUB will fall back to smallest order already if a failure
occurs so increasing slub_max_order may not be that much of an issue.

Maybe drop the max order limit completely and use MAX_ORDER instead? That
means that callers need to be able to tolerate failures.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 14:45                       ` Christopher Lameter
@ 2018-04-17 16:16                         ` Vlastimil Babka
  2018-04-17 16:38                           ` Christopher Lameter
  2018-04-17 17:26                           ` Mikulas Patocka
  2018-04-17 19:06                         ` Mikulas Patocka
  1 sibling, 2 replies; 23+ messages in thread
From: Vlastimil Babka @ 2018-04-17 16:16 UTC (permalink / raw)
  To: Christopher Lameter, Mikulas Patocka
  Cc: Mike Snitzer, Matthew Wilcox, Pekka Enberg, linux-mm, dm-devel,
	David Rientjes, Joonsoo Kim, Andrew Morton, linux-kernel

On 04/17/2018 04:45 PM, Christopher Lameter wrote:
> On Mon, 16 Apr 2018, Mikulas Patocka wrote:
> 
>> This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
>> flag causes allocation of larger slab caches in order to minimize wasted
>> space.
>>
>> This is needed because we want to use dm-bufio for deduplication index and
>> there are existing installations with non-power-of-two block sizes (such
>> as 640KB). The performance of the whole solution depends on efficient
>> memory use, so we must waste as little memory as possible.
> 
> Hmmm. Can we come up with a generic solution instead?

Yes please.

> This may mean relaxing the enforcement of the allocation max order a bit
> so that we can get dense allocation through higher order allocs.
> 
> But then higher order allocs are generally seen as problematic.

I think in this case they are better than wasting/fragmenting 384kB for
640kB object.

> Note that SLUB will fall back to smallest order already if a failure
> occurs so increasing slub_max_order may not be that much of an issue.
> 
> Maybe drop the max order limit completely and use MAX_ORDER instead?

For packing, sure. For performance, please no (i.e. don't try to
allocate MAX_ORDER for each and every cache).

> That
> means that callers need to be able to tolerate failures.

Is it any different from now? I suppose there would still be
smallest-order fallback involved in sl*b itself? And if your allocation
is so large it can fail even with the fallback (i.e. >= costly order),
you need to tolerate failures anyway?

One corner case I see is if there is anyone who would rather use their
own fallback instead of the space-wasting smallest-order fallback.
Maybe we could map some GFP flag to indicate that.

> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 16:16                         ` Vlastimil Babka
@ 2018-04-17 16:38                           ` Christopher Lameter
  2018-04-17 19:09                             ` Mikulas Patocka
  2018-04-17 17:26                           ` Mikulas Patocka
  1 sibling, 1 reply; 23+ messages in thread
From: Christopher Lameter @ 2018-04-17 16:38 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Mikulas Patocka, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Tue, 17 Apr 2018, Vlastimil Babka wrote:

> On 04/17/2018 04:45 PM, Christopher Lameter wrote:

> > But then higher order allocs are generally seen as problematic.
>
> I think in this case they are better than wasting/fragmenting 384kB for
> 640kB object.

Well typically we have suggested that people use vmalloc in the past.


> > Note that SLUB will fall back to smallest order already if a failure
> > occurs so increasing slub_max_order may not be that much of an issue.
> >
> > Maybe drop the max order limit completely and use MAX_ORDER instead?
>
> For packing, sure. For performance, please no (i.e. don't try to
> allocate MAX_ORDER for each and every cache).

No of course not. We would have to modify the order selection on kmem
cache creation.

> > That
> > means that callers need to be able to tolerate failures.
>
> Is it any different from now? I suppose there would still be
> smallest-order fallback involved in sl*b itself? And if your allocation
> is so large it can fail even with the fallback (i.e. >= costly order),
> you need to tolerate failures anyway?

Failures can occur even with < costly order as far as I can telkl. Order 0
is the only safe one.

> One corner case I see is if there is anyone who would rather use their
> own fallback instead of the space-wasting smallest-order fallback.
> Maybe we could map some GFP flag to indicate that.

Well if you have a fallback then maybe the slab allocator should not fall
back on its own but let the caller deal with it.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 16:16                         ` Vlastimil Babka
  2018-04-17 16:38                           ` Christopher Lameter
@ 2018-04-17 17:26                           ` Mikulas Patocka
  2018-04-17 19:13                             ` Vlastimil Babka
  1 sibling, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-17 17:26 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: Christopher Lameter, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Tue, 17 Apr 2018, Vlastimil Babka wrote:

> On 04/17/2018 04:45 PM, Christopher Lameter wrote:
> > On Mon, 16 Apr 2018, Mikulas Patocka wrote:
> > 
> >> This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
> >> flag causes allocation of larger slab caches in order to minimize wasted
> >> space.
> >>
> >> This is needed because we want to use dm-bufio for deduplication index and
> >> there are existing installations with non-power-of-two block sizes (such
> >> as 640KB). The performance of the whole solution depends on efficient
> >> memory use, so we must waste as little memory as possible.
> > 
> > Hmmm. Can we come up with a generic solution instead?
> 
> Yes please.
> 
> > This may mean relaxing the enforcement of the allocation max order a bit
> > so that we can get dense allocation through higher order allocs.
> > 
> > But then higher order allocs are generally seen as problematic.
> 
> I think in this case they are better than wasting/fragmenting 384kB for
> 640kB object.

Wasting 37% of memory is still better than the kernel randomly returning 
-ENOMEM when higher-order allocation fails.

> > That
> > means that callers need to be able to tolerate failures.
> 
> Is it any different from now? I suppose there would still be
> smallest-order fallback involved in sl*b itself? And if your allocation
> is so large it can fail even with the fallback (i.e. >= costly order),
> you need to tolerate failures anyway?
> 
> One corner case I see is if there is anyone who would rather use their
> own fallback instead of the space-wasting smallest-order fallback.
> Maybe we could map some GFP flag to indicate that.

For example, if you create a cache with 17KB objects, the slab subsystem 
will pad it up to 32KB. You are wasting almost 1/2 memory, but the 
allocation is realiable and it won't fail.

If you use order higher than 32KB, you get less wasted memory, but you 
also get random -ENOMEMs (yes, we had a problem in dm-thin that it was 
randomly failing during initialization due to 64KB allocation).

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 14:40                                   ` Christopher Lameter
@ 2018-04-17 18:53                                     ` Mikulas Patocka
  2018-04-17 21:42                                       ` Christopher Lameter
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-17 18:53 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Vlastimil Babka, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Tue, 17 Apr 2018, Christopher Lameter wrote:

> On Mon, 16 Apr 2018, Mikulas Patocka wrote:
> 
> > dm-bufio deals gracefully with allocation failure, because it preallocates
> > some buffers with vmalloc, but other subsystems may not deal with it and
> > they cound return ENOMEM randomly or misbehave in other ways. So, the
> > "SLAB_MINIMIZE_WASTE" flag is also saying that the allocation may fail and
> > the caller is prepared to deal with it.
> >
> > The slub subsystem does actual fallback to low-order when the allocation
> > fails (it allows different order for each slab in the same cache), but
> > slab doesn't fallback and you get NULL if higher-order allocation fails.
> > So, SLAB_MINIMIZE_WASTE is needed for slab because it will just randomly
> > fail with higher order.
> 
> Fix Slab instead of adding a flag that is only useful for one allocator?

Slab assumes that all slabs have the same order, so it's not so easy to 
fix it.

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 14:45                       ` Christopher Lameter
  2018-04-17 16:16                         ` Vlastimil Babka
@ 2018-04-17 19:06                         ` Mikulas Patocka
  2018-04-18 14:55                           ` Christopher Lameter
  1 sibling, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-17 19:06 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Tue, 17 Apr 2018, Christopher Lameter wrote:

> On Mon, 16 Apr 2018, Mikulas Patocka wrote:
> 
> > This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
> > flag causes allocation of larger slab caches in order to minimize wasted
> > space.
> >
> > This is needed because we want to use dm-bufio for deduplication index and
> > there are existing installations with non-power-of-two block sizes (such
> > as 640KB). The performance of the whole solution depends on efficient
> > memory use, so we must waste as little memory as possible.
> 
> Hmmm. Can we come up with a generic solution instead?
> 
> This may mean relaxing the enforcement of the allocation max order a bit
> so that we can get dense allocation through higher order allocs.
> 
> But then higher order allocs are generally seen as problematic.
> 
> Note that SLUB will fall back to smallest order already if a failure
> occurs so increasing slub_max_order may not be that much of an issue.
> 
> Maybe drop the max order limit completely and use MAX_ORDER instead? That
> means that callers need to be able to tolerate failures.

I can make a slub-only patch with no extra flag (on a freshly booted 
system it increases only the order of caches "TCPv6" and "sighand_cache" 
by one - so it should not have unexpected effects):

Doing a generic solution for slab would be more comlpicated because slab 
assumes that all slabs have the same order, so it can't fall-back to 
lower-order allocations.


From: Mikulas Patocka <mpatocka@redhat.com>
Subject: [PATCH] slub: minimize wasted space

When object size is greater than slub_max_order, the slub subsystem rounds
up the size to the next power of two. This causes a lot of wasted space -
i.e. 640KB block consumes 1MB of memory.

This patch makes the slub subsystem increase the order if it is benefical.
The order is increased as long as it reduces wasted space. There is cutoff
at 32 objects per slab.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 mm/slub.c |   21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2018-04-17 19:59:49.000000000 +0200
+++ linux-2.6/mm/slub.c	2018-04-17 20:58:23.000000000 +0200
@@ -3252,6 +3252,7 @@ static inline unsigned int slab_order(un
 static inline int calculate_order(unsigned int size, unsigned int reserved)
 {
 	unsigned int order;
+	unsigned int test_order;
 	unsigned int min_objects;
 	unsigned int max_objects;
 
@@ -3277,7 +3278,7 @@ static inline int calculate_order(unsign
 			order = slab_order(size, min_objects,
 					slub_max_order, fraction, reserved);
 			if (order <= slub_max_order)
-				return order;
+				goto ret_order;
 			fraction /= 2;
 		}
 		min_objects--;
@@ -3289,15 +3290,25 @@ static inline int calculate_order(unsign
 	 */
 	order = slab_order(size, 1, slub_max_order, 1, reserved);
 	if (order <= slub_max_order)
-		return order;
+		goto ret_order;
 
 	/*
 	 * Doh this slab cannot be placed using slub_max_order.
 	 */
 	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
-	if (order < MAX_ORDER)
-		return order;
-	return -ENOSYS;
+	if (order >= MAX_ORDER)
+		return -ENOSYS;
+
+ret_order:
+	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
+		unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
+		unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
+		if (test_order_objects > min(32, MAX_OBJS_PER_PAGE))
+			break;
+		if (test_order_objects > order_objects << (test_order - order))
+			order = test_order;
+	}
+	return order;
 }
 
 static void

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 16:38                           ` Christopher Lameter
@ 2018-04-17 19:09                             ` Mikulas Patocka
  0 siblings, 0 replies; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-17 19:09 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Vlastimil Babka, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Tue, 17 Apr 2018, Christopher Lameter wrote:

> On Tue, 17 Apr 2018, Vlastimil Babka wrote:
> 
> > On 04/17/2018 04:45 PM, Christopher Lameter wrote:
> 
> > > But then higher order allocs are generally seen as problematic.
> >
> > I think in this case they are better than wasting/fragmenting 384kB for
> > 640kB object.
> 
> Well typically we have suggested that people use vmalloc in the past.

vmalloc is slow - it is unuseable for a buffer cache.

> > > That
> > > means that callers need to be able to tolerate failures.
> >
> > Is it any different from now? I suppose there would still be
> > smallest-order fallback involved in sl*b itself? And if your allocation
> > is so large it can fail even with the fallback (i.e. >= costly order),
> > you need to tolerate failures anyway?
> 
> Failures can occur even with < costly order as far as I can telkl. Order 0
> is the only safe one.

The alloc_pages functions seems to retry indefinitely for order <= 
PAGE_ALLOC_COSTLY_ORDER. Do you have some explanation why it should fail?

> > One corner case I see is if there is anyone who would rather use their
> > own fallback instead of the space-wasting smallest-order fallback.
> > Maybe we could map some GFP flag to indicate that.
> 
> Well if you have a fallback then maybe the slab allocator should not fall
> back on its own but let the caller deal with it.

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 17:26                           ` Mikulas Patocka
@ 2018-04-17 19:13                             ` Vlastimil Babka
  0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2018-04-17 19:13 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christopher Lameter, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On 04/17/2018 07:26 PM, Mikulas Patocka wrote:
> 
> 
> On Tue, 17 Apr 2018, Vlastimil Babka wrote:
> 
>> On 04/17/2018 04:45 PM, Christopher Lameter wrote:
>>> On Mon, 16 Apr 2018, Mikulas Patocka wrote:
>>>
>>>> This patch introduces a flag SLAB_MINIMIZE_WASTE for slab and slub. This
>>>> flag causes allocation of larger slab caches in order to minimize wasted
>>>> space.
>>>>
>>>> This is needed because we want to use dm-bufio for deduplication index and
>>>> there are existing installations with non-power-of-two block sizes (such
>>>> as 640KB). The performance of the whole solution depends on efficient
>>>> memory use, so we must waste as little memory as possible.
>>>
>>> Hmmm. Can we come up with a generic solution instead?
>>
>> Yes please.
>>
>>> This may mean relaxing the enforcement of the allocation max order a bit
>>> so that we can get dense allocation through higher order allocs.
>>>
>>> But then higher order allocs are generally seen as problematic.
>>
>> I think in this case they are better than wasting/fragmenting 384kB for
>> 640kB object.
> 
> Wasting 37% of memory is still better than the kernel randomly returning 
> -ENOMEM when higher-order allocation fails.

Of course, see below.

>>> That
>>> means that callers need to be able to tolerate failures.
>>
>> Is it any different from now? I suppose there would still be
>> smallest-order fallback involved in sl*b itself? And if your allocation

^ There: "I suppose there would still be smallest-order fallback
involved in sl*b itself?"

If SLAB doesn't currently support fallback to different order, it either
learns to do that, or keeps wasting memory and more people will migrate
to SLUB. Simple.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 18:53                                     ` Mikulas Patocka
@ 2018-04-17 21:42                                       ` Christopher Lameter
  0 siblings, 0 replies; 23+ messages in thread
From: Christopher Lameter @ 2018-04-17 21:42 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Vlastimil Babka, Mike Snitzer, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Tue, 17 Apr 2018, Mikulas Patocka wrote:

> > > The slub subsystem does actual fallback to low-order when the allocation
> > > fails (it allows different order for each slab in the same cache), but
> > > slab doesn't fallback and you get NULL if higher-order allocation fails.
> > > So, SLAB_MINIMIZE_WASTE is needed for slab because it will just randomly
> > > fail with higher order.
> >
> > Fix Slab instead of adding a flag that is only useful for one allocator?
>
> Slab assumes that all slabs have the same order, so it's not so easy to
> fix it.

Well since SLAB uses compound pages one could easily determine the order
of the page and thus also support multiple page orders there.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-17 19:06                         ` Mikulas Patocka
@ 2018-04-18 14:55                           ` Christopher Lameter
  2018-04-25 21:04                             ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Christopher Lameter @ 2018-04-18 14:55 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Tue, 17 Apr 2018, Mikulas Patocka wrote:

> I can make a slub-only patch with no extra flag (on a freshly booted
> system it increases only the order of caches "TCPv6" and "sighand_cache"
> by one - so it should not have unexpected effects):
>
> Doing a generic solution for slab would be more comlpicated because slab
> assumes that all slabs have the same order, so it can't fall-back to
> lower-order allocations.

Well again SLAB uses compound pages and thus would be able to detect the
size of the page. It may be some work but it could be done.

>
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c	2018-04-17 19:59:49.000000000 +0200
> +++ linux-2.6/mm/slub.c	2018-04-17 20:58:23.000000000 +0200
> @@ -3252,6 +3252,7 @@ static inline unsigned int slab_order(un
>  static inline int calculate_order(unsigned int size, unsigned int reserved)
>  {
>  	unsigned int order;
> +	unsigned int test_order;
>  	unsigned int min_objects;
>  	unsigned int max_objects;
>
> @@ -3277,7 +3278,7 @@ static inline int calculate_order(unsign
>  			order = slab_order(size, min_objects,
>  					slub_max_order, fraction, reserved);
>  			if (order <= slub_max_order)
> -				return order;
> +				goto ret_order;
>  			fraction /= 2;
>  		}
>  		min_objects--;
> @@ -3289,15 +3290,25 @@ static inline int calculate_order(unsign
>  	 */
>  	order = slab_order(size, 1, slub_max_order, 1, reserved);

The slab order is determined in slab_order()

>  	if (order <= slub_max_order)
> -		return order;
> +		goto ret_order;
>
>  	/*
>  	 * Doh this slab cannot be placed using slub_max_order.
>  	 */
>  	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
> -	if (order < MAX_ORDER)
> -		return order;
> -	return -ENOSYS;
> +	if (order >= MAX_ORDER)
> +		return -ENOSYS;
> +
> +ret_order:
> +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> +		unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
> +		unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
> +		if (test_order_objects > min(32, MAX_OBJS_PER_PAGE))
> +			break;
> +		if (test_order_objects > order_objects << (test_order - order))
> +			order = test_order;
> +	}
> +	return order;

Could yo move that logic into slab_order()? It does something awfully
similar.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-18 14:55                           ` Christopher Lameter
@ 2018-04-25 21:04                             ` Mikulas Patocka
  2018-04-25 23:24                               ` Mikulas Patocka
  2018-04-26 18:51                               ` Christopher Lameter
  0 siblings, 2 replies; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-25 21:04 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Wed, 18 Apr 2018, Christopher Lameter wrote:

> On Tue, 17 Apr 2018, Mikulas Patocka wrote:
> 
> > I can make a slub-only patch with no extra flag (on a freshly booted
> > system it increases only the order of caches "TCPv6" and "sighand_cache"
> > by one - so it should not have unexpected effects):
> >
> > Doing a generic solution for slab would be more comlpicated because slab
> > assumes that all slabs have the same order, so it can't fall-back to
> > lower-order allocations.
> 
> Well again SLAB uses compound pages and thus would be able to detect the
> size of the page. It may be some work but it could be done.
> 
> >
> > Index: linux-2.6/mm/slub.c
> > ===================================================================
> > --- linux-2.6.orig/mm/slub.c	2018-04-17 19:59:49.000000000 +0200
> > +++ linux-2.6/mm/slub.c	2018-04-17 20:58:23.000000000 +0200
> > @@ -3252,6 +3252,7 @@ static inline unsigned int slab_order(un
> >  static inline int calculate_order(unsigned int size, unsigned int reserved)
> >  {
> >  	unsigned int order;
> > +	unsigned int test_order;
> >  	unsigned int min_objects;
> >  	unsigned int max_objects;
> >
> > @@ -3277,7 +3278,7 @@ static inline int calculate_order(unsign
> >  			order = slab_order(size, min_objects,
> >  					slub_max_order, fraction, reserved);
> >  			if (order <= slub_max_order)
> > -				return order;
> > +				goto ret_order;
> >  			fraction /= 2;
> >  		}
> >  		min_objects--;
> > @@ -3289,15 +3290,25 @@ static inline int calculate_order(unsign
> >  	 */
> >  	order = slab_order(size, 1, slub_max_order, 1, reserved);
> 
> The slab order is determined in slab_order()
> 
> >  	if (order <= slub_max_order)
> > -		return order;
> > +		goto ret_order;
> >
> >  	/*
> >  	 * Doh this slab cannot be placed using slub_max_order.
> >  	 */
> >  	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
> > -	if (order < MAX_ORDER)
> > -		return order;
> > -	return -ENOSYS;
> > +	if (order >= MAX_ORDER)
> > +		return -ENOSYS;
> > +
> > +ret_order:
> > +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> > +		unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
> > +		unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
> > +		if (test_order_objects > min(32, MAX_OBJS_PER_PAGE))
> > +			break;
> > +		if (test_order_objects > order_objects << (test_order - order))
> > +			order = test_order;
> > +	}
> > +	return order;
> 
> Could yo move that logic into slab_order()? It does something awfully
> similar.

But slab_order (and its caller) limits the order to "max_order" and we 
want more.

Perhaps slab_order should be dropped and calculate_order totally 
rewritten?

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-25 21:04                             ` Mikulas Patocka
@ 2018-04-25 23:24                               ` Mikulas Patocka
  2018-04-26 19:01                                 ` Christopher Lameter
  2018-04-26 18:51                               ` Christopher Lameter
  1 sibling, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-25 23:24 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Wed, 25 Apr 2018, Mikulas Patocka wrote:

> 
> 
> On Wed, 18 Apr 2018, Christopher Lameter wrote:
> 
> > On Tue, 17 Apr 2018, Mikulas Patocka wrote:
> > 
> > > I can make a slub-only patch with no extra flag (on a freshly booted
> > > system it increases only the order of caches "TCPv6" and "sighand_cache"
> > > by one - so it should not have unexpected effects):
> > >
> > > Doing a generic solution for slab would be more comlpicated because slab
> > > assumes that all slabs have the same order, so it can't fall-back to
> > > lower-order allocations.
> > 
> > Well again SLAB uses compound pages and thus would be able to detect the
> > size of the page. It may be some work but it could be done.
> > 
> > >
> > > Index: linux-2.6/mm/slub.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/slub.c	2018-04-17 19:59:49.000000000 +0200
> > > +++ linux-2.6/mm/slub.c	2018-04-17 20:58:23.000000000 +0200
> > > @@ -3252,6 +3252,7 @@ static inline unsigned int slab_order(un
> > >  static inline int calculate_order(unsigned int size, unsigned int reserved)
> > >  {
> > >  	unsigned int order;
> > > +	unsigned int test_order;
> > >  	unsigned int min_objects;
> > >  	unsigned int max_objects;
> > >
> > > @@ -3277,7 +3278,7 @@ static inline int calculate_order(unsign
> > >  			order = slab_order(size, min_objects,
> > >  					slub_max_order, fraction, reserved);
> > >  			if (order <= slub_max_order)
> > > -				return order;
> > > +				goto ret_order;
> > >  			fraction /= 2;
> > >  		}
> > >  		min_objects--;
> > > @@ -3289,15 +3290,25 @@ static inline int calculate_order(unsign
> > >  	 */
> > >  	order = slab_order(size, 1, slub_max_order, 1, reserved);
> > 
> > The slab order is determined in slab_order()
> > 
> > >  	if (order <= slub_max_order)
> > > -		return order;
> > > +		goto ret_order;
> > >
> > >  	/*
> > >  	 * Doh this slab cannot be placed using slub_max_order.
> > >  	 */
> > >  	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
> > > -	if (order < MAX_ORDER)
> > > -		return order;
> > > -	return -ENOSYS;
> > > +	if (order >= MAX_ORDER)
> > > +		return -ENOSYS;
> > > +
> > > +ret_order:
> > > +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> > > +		unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
> > > +		unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
> > > +		if (test_order_objects > min(32, MAX_OBJS_PER_PAGE))
> > > +			break;
> > > +		if (test_order_objects > order_objects << (test_order - order))
> > > +			order = test_order;
> > > +	}
> > > +	return order;
> > 
> > Could yo move that logic into slab_order()? It does something awfully
> > similar.
> 
> But slab_order (and its caller) limits the order to "max_order" and we 
> want more.
> 
> Perhaps slab_order should be dropped and calculate_order totally 
> rewritten?
> 
> Mikulas

Do you want this? It deletes slab_order and replaces it with the 
"minimize_waste" logic directly.

The patch starts with a minimal order for a given size and increases the 
order if one of these conditions is met:
* we is below slub_min_order
* we are below min_objects and slub_max_order
* we go above slub_max_order only if it minimizes waste and if we don't 
  increase the object count above 32

It simplifies the code and it is very similar to the old algorithms, most 
slab caches have the same order, so it shouldn't cause any regressions.

This patch changes order of these slabs:
TCPv6: 3 -> 4
sighand_cache: 3 -> 4
task_struct: 3 -> 4

---
 mm/slub.c |   76 +++++++++++++++++++++-----------------------------------------
 1 file changed, 26 insertions(+), 50 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2018-04-26 00:07:30.000000000 +0200
+++ linux-2.6/mm/slub.c	2018-04-26 00:21:37.000000000 +0200
@@ -3224,34 +3224,10 @@ static unsigned int slub_min_objects;
  * requested a higher mininum order then we start with that one instead of
  * the smallest order which will fit the object.
  */
-static inline unsigned int slab_order(unsigned int size,
-		unsigned int min_objects, unsigned int max_order,
-		unsigned int fract_leftover, unsigned int reserved)
-{
-	unsigned int min_order = slub_min_order;
-	unsigned int order;
-
-	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
-		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
-
-	for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved));
-			order <= max_order; order++) {
-
-		unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
-		unsigned int rem;
-
-		rem = (slab_size - reserved) % size;
-
-		if (rem <= slab_size / fract_leftover)
-			break;
-	}
-
-	return order;
-}
-
 static inline int calculate_order(unsigned int size, unsigned int reserved)
 {
 	unsigned int order;
+	unsigned int test_order;
 	unsigned int min_objects;
 	unsigned int max_objects;
 
@@ -3269,35 +3245,35 @@ static inline int calculate_order(unsign
 	max_objects = order_objects(slub_max_order, size, reserved);
 	min_objects = min(min_objects, max_objects);
 
-	while (min_objects > 1) {
-		unsigned int fraction;
+	/* Get the minimum acceptable order for one object */
+	order = get_order(size + reserved);
+
+	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
+		unsigned order_obj = order_objects(order, size, reserved);
+		unsigned test_order_obj = order_objects(test_order, size, reserved);
+
+		/* If there are too many objects, stop searching */
+		if (test_order_obj > MAX_OBJS_PER_PAGE)
+			break;
 
-		fraction = 16;
-		while (fraction >= 4) {
-			order = slab_order(size, min_objects,
-					slub_max_order, fraction, reserved);
-			if (order <= slub_max_order)
-				return order;
-			fraction /= 2;
-		}
-		min_objects--;
+		/* Always increase up to slub_min_order */
+		if (test_order <= slub_min_order)
+			order = test_order;
+
+		/* If we are below min_objects and slub_max_order, increase order */
+		if (order_obj < min_objects && test_order <= slub_max_order)
+			order = test_order;
+
+		/* Increase order even more, but only if it reduces waste */
+		if (test_order_obj <= 32 &&
+		    test_order_obj > order_obj << (test_order - order))
+			order = test_order;
 	}
 
-	/*
-	 * We were unable to place multiple objects in a slab. Now
-	 * lets see if we can place a single object there.
-	 */
-	order = slab_order(size, 1, slub_max_order, 1, reserved);
-	if (order <= slub_max_order)
-		return order;
+	if (order >= MAX_ORDER)
+		return -ENOSYS;
 
-	/*
-	 * Doh this slab cannot be placed using slub_max_order.
-	 */
-	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
-	if (order < MAX_ORDER)
-		return order;
-	return -ENOSYS;
+	return order;
 }
 
 static void

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-25 21:04                             ` Mikulas Patocka
  2018-04-25 23:24                               ` Mikulas Patocka
@ 2018-04-26 18:51                               ` Christopher Lameter
  1 sibling, 0 replies; 23+ messages in thread
From: Christopher Lameter @ 2018-04-26 18:51 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Wed, 25 Apr 2018, Mikulas Patocka wrote:

> >
> > Could yo move that logic into slab_order()? It does something awfully
> > similar.
>
> But slab_order (and its caller) limits the order to "max_order" and we
> want more.
>
> Perhaps slab_order should be dropped and calculate_order totally
> rewritten?

Yes you likely need to do something creative with max_order if not with
more stuff.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-25 23:24                               ` Mikulas Patocka
@ 2018-04-26 19:01                                 ` Christopher Lameter
  2018-04-26 21:09                                   ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Christopher Lameter @ 2018-04-26 19:01 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Wed, 25 Apr 2018, Mikulas Patocka wrote:

> Do you want this? It deletes slab_order and replaces it with the
> "minimize_waste" logic directly.

Well yes that looks better. Now we need to make it easy to read and less
complicated. Maybe try to keep as much as possible of the old code
and also the names of variables to make it easier to review?

> It simplifies the code and it is very similar to the old algorithms, most
> slab caches have the same order, so it shouldn't cause any regressions.
>
> This patch changes order of these slabs:
> TCPv6: 3 -> 4
> sighand_cache: 3 -> 4
> task_struct: 3 -> 4

Hmmm... order 4 for these caches may cause some concern. These should stay
under costly order I think. Otherwise allocations are no longer
guaranteed.

> @@ -3269,35 +3245,35 @@ static inline int calculate_order(unsign
>  	max_objects = order_objects(slub_max_order, size, reserved);
>  	min_objects = min(min_objects, max_objects);
>
> -	while (min_objects > 1) {
> -		unsigned int fraction;
> +	/* Get the minimum acceptable order for one object */
> +	order = get_order(size + reserved);
> +
> +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> +		unsigned order_obj = order_objects(order, size, reserved);
> +		unsigned test_order_obj = order_objects(test_order, size, reserved);
> +
> +		/* If there are too many objects, stop searching */
> +		if (test_order_obj > MAX_OBJS_PER_PAGE)
> +			break;
>
> -		fraction = 16;
> -		while (fraction >= 4) {
> -			order = slab_order(size, min_objects,
> -					slub_max_order, fraction, reserved);
> -			if (order <= slub_max_order)
> -				return order;
> -			fraction /= 2;
> -		}
> -		min_objects--;
> +		/* Always increase up to slub_min_order */
> +		if (test_order <= slub_min_order)
> +			order = test_order;

Well that is a significant change. In our current scheme the order
boundart wins.


> +
> +		/* If we are below min_objects and slub_max_order, increase order */
> +		if (order_obj < min_objects && test_order <= slub_max_order)
> +			order = test_order;
> +
> +		/* Increase order even more, but only if it reduces waste */
> +		if (test_order_obj <= 32 &&

Where does the 32 come from?

> +		    test_order_obj > order_obj << (test_order - order))

Add more () to make the condition better readable.

> +			order = test_order;

Can we just call test_order order and avoid using the long variable names
here? Variable names in functions are typically short.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-26 19:01                                 ` Christopher Lameter
@ 2018-04-26 21:09                                   ` Mikulas Patocka
  2018-04-27 16:41                                     ` Christopher Lameter
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-26 21:09 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Thu, 26 Apr 2018, Christopher Lameter wrote:

> On Wed, 25 Apr 2018, Mikulas Patocka wrote:
> 
> > Do you want this? It deletes slab_order and replaces it with the
> > "minimize_waste" logic directly.
> 
> Well yes that looks better. Now we need to make it easy to read and less
> complicated. Maybe try to keep as much as possible of the old code
> and also the names of variables to make it easier to review?
> 
> > It simplifies the code and it is very similar to the old algorithms, most
> > slab caches have the same order, so it shouldn't cause any regressions.
> >
> > This patch changes order of these slabs:
> > TCPv6: 3 -> 4
> > sighand_cache: 3 -> 4
> > task_struct: 3 -> 4
> 
> Hmmm... order 4 for these caches may cause some concern. These should stay
> under costly order I think. Otherwise allocations are no longer
> guaranteed.

You said that slub has fallback to smaller order allocations.

The whole purpose of this "minimize waste" approach is to use higher-order 
allocations to use memory more efficiently, so it is just doing its job. 
(for these 3 caches, order-4 really wastes less memory than order-3 - on 
my system TCPv6 and sighand_cache have size 2112, task_struct 2752).

We could improve the fallback code, so that if order-4 allocation fails, 
it tries order-3 allocation, and then falls back to order-0. But I think 
that these failures are rare enough that it is not a problem.

> > @@ -3269,35 +3245,35 @@ static inline int calculate_order(unsign
> >  	max_objects = order_objects(slub_max_order, size, reserved);
> >  	min_objects = min(min_objects, max_objects);
> >
> > -	while (min_objects > 1) {
> > -		unsigned int fraction;
> > +	/* Get the minimum acceptable order for one object */
> > +	order = get_order(size + reserved);
> > +
> > +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> > +		unsigned order_obj = order_objects(order, size, reserved);
> > +		unsigned test_order_obj = order_objects(test_order, size, reserved);
> > +
> > +		/* If there are too many objects, stop searching */
> > +		if (test_order_obj > MAX_OBJS_PER_PAGE)
> > +			break;
> >
> > -		fraction = 16;
> > -		while (fraction >= 4) {
> > -			order = slab_order(size, min_objects,
> > -					slub_max_order, fraction, reserved);
> > -			if (order <= slub_max_order)
> > -				return order;
> > -			fraction /= 2;
> > -		}
> > -		min_objects--;
> > +		/* Always increase up to slub_min_order */
> > +		if (test_order <= slub_min_order)
> > +			order = test_order;
> 
> Well that is a significant change. In our current scheme the order
> boundart wins.

I think it's not a change. The existing function slab_order() starts with 
min_order (unless it overshoots MAX_OBJS_PER_PAGE) and then goes upwards. 
My code does the same - my code tests for MAX_OBJS_PER_PAGE (and bails out 
if we would overshoot it) and increases the order until it reaches 
slub_min_order (and then increases it even more if it satisfies the other 
conditions).

If you believe that it behaves differently, please describe the situation 
in detail.

> > +
> > +		/* If we are below min_objects and slub_max_order, increase order */
> > +		if (order_obj < min_objects && test_order <= slub_max_order)
> > +			order = test_order;
> > +
> > +		/* Increase order even more, but only if it reduces waste */
> > +		if (test_order_obj <= 32 &&
> 
> Where does the 32 come from?

It is to avoid extremely high order for extremely small slabs.

For example, see kmalloc-96.
10922 96-byte objects would fit into 1MiB
21845 96-byte objects would fit into 2MiB

The algorithm would recognize this one more object that fits into 2MiB 
slab as "waste reduction" and increase the order to 2MiB - and we don't 
want this.

So, the general reasoning is - if we have 32 objects in a slab, then it is 
already considered that wasted space is reasonably low and we don't want 
to increase the order more.

Currently, kmalloc-96 uses order-0 - that is reasonable (we already have 
42 objects in 4k page, so we don't need to use higher order, even if it 
wastes one-less object).

> > +		    test_order_obj > order_obj << (test_order - order))
> 
> Add more () to make the condition better readable.
> 
> > +			order = test_order;
> 
> Can we just call test_order order and avoid using the long variable names
> here? Variable names in functions are typically short.

You need two variables - "order" and "test_order".

"order" is the best order found so far and "test_order" is the order that 
we are now testing. If "test_order" wastes less space than "order", we 
assign order = test_order.

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-26 21:09                                   ` Mikulas Patocka
@ 2018-04-27 16:41                                     ` Christopher Lameter
  2018-04-27 19:19                                       ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Christopher Lameter @ 2018-04-27 16:41 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

On Thu, 26 Apr 2018, Mikulas Patocka wrote:

> > Hmmm... order 4 for these caches may cause some concern. These should stay
> > under costly order I think. Otherwise allocations are no longer
> > guaranteed.
>
> You said that slub has fallback to smaller order allocations.

Yes it does...

> The whole purpose of this "minimize waste" approach is to use higher-order
> allocations to use memory more efficiently, so it is just doing its job.
> (for these 3 caches, order-4 really wastes less memory than order-3 - on
> my system TCPv6 and sighand_cache have size 2112, task_struct 2752).

Hmmm... Ok if the others are fine with this as well. I got some pushback
there in the past.

> We could improve the fallback code, so that if order-4 allocation fails,
> it tries order-3 allocation, and then falls back to order-0. But I think
> that these failures are rare enough that it is not a problem.

I also think that would be too many fallbacks.

> > > +		/* Increase order even more, but only if it reduces waste */
> > > +		if (test_order_obj <= 32 &&
> >
> > Where does the 32 come from?
>
> It is to avoid extremely high order for extremely small slabs.
>
> For example, see kmalloc-96.
> 10922 96-byte objects would fit into 1MiB
> 21845 96-byte objects would fit into 2MiB

That is the result of considering absolute byte wastage..

> The algorithm would recognize this one more object that fits into 2MiB
> slab as "waste reduction" and increase the order to 2MiB - and we don't
> want this.
>
> So, the general reasoning is - if we have 32 objects in a slab, then it is
> already considered that wasted space is reasonably low and we don't want
> to increase the order more.
>
> Currently, kmalloc-96 uses order-0 - that is reasonable (we already have
> 42 objects in 4k page, so we don't need to use higher order, even if it
> wastes one-less object).


The old code uses the concept of a "fraction" to calculate overhead. The
code here uses absolute counts of bytes. Fraction looks better to me.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-27 16:41                                     ` Christopher Lameter
@ 2018-04-27 19:19                                       ` Mikulas Patocka
  2018-06-13 17:01                                         ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-04-27 19:19 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel



On Fri, 27 Apr 2018, Christopher Lameter wrote:

> On Thu, 26 Apr 2018, Mikulas Patocka wrote:
> 
> > > Hmmm... order 4 for these caches may cause some concern. These should stay
> > > under costly order I think. Otherwise allocations are no longer
> > > guaranteed.
> >
> > You said that slub has fallback to smaller order allocations.
> 
> Yes it does...
> 
> > The whole purpose of this "minimize waste" approach is to use higher-order
> > allocations to use memory more efficiently, so it is just doing its job.
> > (for these 3 caches, order-4 really wastes less memory than order-3 - on
> > my system TCPv6 and sighand_cache have size 2112, task_struct 2752).
> 
> Hmmm... Ok if the others are fine with this as well. I got some pushback
> there in the past.
> 
> > We could improve the fallback code, so that if order-4 allocation fails,
> > it tries order-3 allocation, and then falls back to order-0. But I think
> > that these failures are rare enough that it is not a problem.
> 
> I also think that would be too many fallbacks.

You are right - it's better to fallback to the minimum possible size, so 
that the allocation is faster.

> The old code uses the concept of a "fraction" to calculate overhead. The
> code here uses absolute counts of bytes. Fraction looks better to me.

OK - I reworked the patch using the same "fraction" calculation as before.  
The existing logic targets 1/16 wasted space, so I used this target in 
this patch too.

This patch increases only the order of task_struct (from 3 to 4), all the 
other caches have the same order as before.

Mikulas



From: Mikulas Patocka <mpatocka@redhat.com>
Subject: [PATCH] slub: use higher order to reduce wasted space

If we create a slub cache with large object size (larger than
slub_max_order), the slub subsystem currently rounds up the object size to
the next power of two.

This is inefficient, because it wastes too much space. We use the slab
cache as a buffer cache in dm-bufio, in order to use the memory
efficiently, we need to reduce wasted space.

This patch reworks the slub order calculation algorithm, so that it uses
higher order allocations if it would reduce wasted space. The slub
subsystem has fallback if the higher-order allocations fails, so using
order higher than PAGE_ALLOC_COSTLY_ORDER is ok.

The new algorithm first calculates the minimum order that can be used for
a give object size and then increases the order according to these
conditions:
* if we would overshoot MAX_OBJS_PER_PAGE, don't increase
* if we are below slub_min_order, increase
* if we are below slub_max_order and below min_objects, increase
* we increase above slub_max_order only if it reduces wasted space and if
  we alrady waste at least 1/16 of the compound page

The new algorithm gives very similar results to the old one, all the
caches on my system have the same order as before, only the order of
task_struct (size 2752) is increased from 3 to 4.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 mm/slub.c |   82 +++++++++++++++++++++++---------------------------------------
 1 file changed, 31 insertions(+), 51 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2018-04-27 19:30:34.000000000 +0200
+++ linux-2.6/mm/slub.c	2018-04-27 21:05:53.000000000 +0200
@@ -3224,34 +3224,10 @@ static unsigned int slub_min_objects;
  * requested a higher mininum order then we start with that one instead of
  * the smallest order which will fit the object.
  */
-static inline unsigned int slab_order(unsigned int size,
-		unsigned int min_objects, unsigned int max_order,
-		unsigned int fract_leftover, unsigned int reserved)
+static int calculate_order(unsigned int size, unsigned int reserved)
 {
-	unsigned int min_order = slub_min_order;
-	unsigned int order;
-
-	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
-		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
-
-	for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved));
-			order <= max_order; order++) {
-
-		unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
-		unsigned int rem;
-
-		rem = (slab_size - reserved) % size;
-
-		if (rem <= slab_size / fract_leftover)
-			break;
-	}
-
-	return order;
-}
-
-static inline int calculate_order(unsigned int size, unsigned int reserved)
-{
-	unsigned int order;
+	unsigned int best_order;
+	unsigned int test_order;
 	unsigned int min_objects;
 	unsigned int max_objects;
 
@@ -3269,34 +3245,38 @@ static inline int calculate_order(unsign
 	max_objects = order_objects(slub_max_order, size, reserved);
 	min_objects = min(min_objects, max_objects);
 
-	while (min_objects > 1) {
-		unsigned int fraction;
+	/* Get the minimum acceptable order for one object */
+	best_order = get_order(size + reserved);
+
+	for (test_order = best_order + 1; test_order < MAX_ORDER; test_order++) {
+		unsigned best_order_obj = order_objects(best_order, size, reserved);
+		unsigned test_order_obj = order_objects(test_order, size, reserved);
+
+		unsigned best_order_slab_size = (unsigned int)PAGE_SIZE << best_order;
+		unsigned best_order_rem = (best_order_slab_size - reserved) % size;
+
+		/* If there would be too many objects, stop searching */
+		if (test_order_obj > MAX_OBJS_PER_PAGE)
+			break;
 
-		fraction = 16;
-		while (fraction >= 4) {
-			order = slab_order(size, min_objects,
-					slub_max_order, fraction, reserved);
-			if (order <= slub_max_order)
-				return order;
-			fraction /= 2;
-		}
-		min_objects--;
+		/* Always increase up to slub_min_order */
+		if (test_order <= slub_min_order)
+			best_order = test_order;
+
+		/* If we are below min_objects and slub_max_order, increase the order */
+		if (best_order_obj < min_objects && test_order <= slub_max_order)
+			best_order = test_order;
+
+		/* Increase the order even more, but only if it reduces waste */
+		/* If we already waste less than 1/16, don't increase it */
+		if (best_order_rem >= (best_order_slab_size / 16) &&
+		    test_order_obj > (best_order_obj << (test_order - best_order)))
+			best_order = test_order;
 	}
 
-	/*
-	 * We were unable to place multiple objects in a slab. Now
-	 * lets see if we can place a single object there.
-	 */
-	order = slab_order(size, 1, slub_max_order, 1, reserved);
-	if (order <= slub_max_order)
-		return order;
+	if (best_order < MAX_ORDER)
+		return best_order;
 
-	/*
-	 * Doh this slab cannot be placed using slub_max_order.
-	 */
-	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
-	if (order < MAX_ORDER)
-		return order;
 	return -ENOSYS;
 }
 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-04-27 19:19                                       ` Mikulas Patocka
@ 2018-06-13 17:01                                         ` Mikulas Patocka
  2018-06-13 18:16                                           ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2018-06-13 17:01 UTC (permalink / raw)
  To: Christopher Lameter
  Cc: Mike Snitzer, Vlastimil Babka, Matthew Wilcox, Pekka Enberg,
	linux-mm, dm-devel, David Rientjes, Joonsoo Kim, Andrew Morton,
	linux-kernel

Hi

I'd like to ask about this patch - will you commit it, or do you want to 
make some more changes to it?

Mikulas




On Fri, 27 Apr 2018, Mikulas Patocka wrote:

> 
> 
> On Fri, 27 Apr 2018, Christopher Lameter wrote:
> 
> > On Thu, 26 Apr 2018, Mikulas Patocka wrote:
> > 
> > > > Hmmm... order 4 for these caches may cause some concern. These should stay
> > > > under costly order I think. Otherwise allocations are no longer
> > > > guaranteed.
> > >
> > > You said that slub has fallback to smaller order allocations.
> > 
> > Yes it does...
> > 
> > > The whole purpose of this "minimize waste" approach is to use higher-order
> > > allocations to use memory more efficiently, so it is just doing its job.
> > > (for these 3 caches, order-4 really wastes less memory than order-3 - on
> > > my system TCPv6 and sighand_cache have size 2112, task_struct 2752).
> > 
> > Hmmm... Ok if the others are fine with this as well. I got some pushback
> > there in the past.
> > 
> > > We could improve the fallback code, so that if order-4 allocation fails,
> > > it tries order-3 allocation, and then falls back to order-0. But I think
> > > that these failures are rare enough that it is not a problem.
> > 
> > I also think that would be too many fallbacks.
> 
> You are right - it's better to fallback to the minimum possible size, so 
> that the allocation is faster.
> 
> > The old code uses the concept of a "fraction" to calculate overhead. The
> > code here uses absolute counts of bytes. Fraction looks better to me.
> 
> OK - I reworked the patch using the same "fraction" calculation as before.  
> The existing logic targets 1/16 wasted space, so I used this target in 
> this patch too.
> 
> This patch increases only the order of task_struct (from 3 to 4), all the 
> other caches have the same order as before.
> 
> Mikulas
> 
> 
> 
> From: Mikulas Patocka <mpatocka@redhat.com>
> Subject: [PATCH] slub: use higher order to reduce wasted space
> 
> If we create a slub cache with large object size (larger than
> slub_max_order), the slub subsystem currently rounds up the object size to
> the next power of two.
> 
> This is inefficient, because it wastes too much space. We use the slab
> cache as a buffer cache in dm-bufio, in order to use the memory
> efficiently, we need to reduce wasted space.
> 
> This patch reworks the slub order calculation algorithm, so that it uses
> higher order allocations if it would reduce wasted space. The slub
> subsystem has fallback if the higher-order allocations fails, so using
> order higher than PAGE_ALLOC_COSTLY_ORDER is ok.
> 
> The new algorithm first calculates the minimum order that can be used for
> a give object size and then increases the order according to these
> conditions:
> * if we would overshoot MAX_OBJS_PER_PAGE, don't increase
> * if we are below slub_min_order, increase
> * if we are below slub_max_order and below min_objects, increase
> * we increase above slub_max_order only if it reduces wasted space and if
>   we alrady waste at least 1/16 of the compound page
> 
> The new algorithm gives very similar results to the old one, all the
> caches on my system have the same order as before, only the order of
> task_struct (size 2752) is increased from 3 to 4.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> 
> ---
>  mm/slub.c |   82 +++++++++++++++++++++++---------------------------------------
>  1 file changed, 31 insertions(+), 51 deletions(-)
> 
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c	2018-04-27 19:30:34.000000000 +0200
> +++ linux-2.6/mm/slub.c	2018-04-27 21:05:53.000000000 +0200
> @@ -3224,34 +3224,10 @@ static unsigned int slub_min_objects;
>   * requested a higher mininum order then we start with that one instead of
>   * the smallest order which will fit the object.
>   */
> -static inline unsigned int slab_order(unsigned int size,
> -		unsigned int min_objects, unsigned int max_order,
> -		unsigned int fract_leftover, unsigned int reserved)
> +static int calculate_order(unsigned int size, unsigned int reserved)
>  {
> -	unsigned int min_order = slub_min_order;
> -	unsigned int order;
> -
> -	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
> -		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
> -
> -	for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved));
> -			order <= max_order; order++) {
> -
> -		unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
> -		unsigned int rem;
> -
> -		rem = (slab_size - reserved) % size;
> -
> -		if (rem <= slab_size / fract_leftover)
> -			break;
> -	}
> -
> -	return order;
> -}
> -
> -static inline int calculate_order(unsigned int size, unsigned int reserved)
> -{
> -	unsigned int order;
> +	unsigned int best_order;
> +	unsigned int test_order;
>  	unsigned int min_objects;
>  	unsigned int max_objects;
>  
> @@ -3269,34 +3245,38 @@ static inline int calculate_order(unsign
>  	max_objects = order_objects(slub_max_order, size, reserved);
>  	min_objects = min(min_objects, max_objects);
>  
> -	while (min_objects > 1) {
> -		unsigned int fraction;
> +	/* Get the minimum acceptable order for one object */
> +	best_order = get_order(size + reserved);
> +
> +	for (test_order = best_order + 1; test_order < MAX_ORDER; test_order++) {
> +		unsigned best_order_obj = order_objects(best_order, size, reserved);
> +		unsigned test_order_obj = order_objects(test_order, size, reserved);
> +
> +		unsigned best_order_slab_size = (unsigned int)PAGE_SIZE << best_order;
> +		unsigned best_order_rem = (best_order_slab_size - reserved) % size;
> +
> +		/* If there would be too many objects, stop searching */
> +		if (test_order_obj > MAX_OBJS_PER_PAGE)
> +			break;
>  
> -		fraction = 16;
> -		while (fraction >= 4) {
> -			order = slab_order(size, min_objects,
> -					slub_max_order, fraction, reserved);
> -			if (order <= slub_max_order)
> -				return order;
> -			fraction /= 2;
> -		}
> -		min_objects--;
> +		/* Always increase up to slub_min_order */
> +		if (test_order <= slub_min_order)
> +			best_order = test_order;
> +
> +		/* If we are below min_objects and slub_max_order, increase the order */
> +		if (best_order_obj < min_objects && test_order <= slub_max_order)
> +			best_order = test_order;
> +
> +		/* Increase the order even more, but only if it reduces waste */
> +		/* If we already waste less than 1/16, don't increase it */
> +		if (best_order_rem >= (best_order_slab_size / 16) &&
> +		    test_order_obj > (best_order_obj << (test_order - best_order)))
> +			best_order = test_order;
>  	}
>  
> -	/*
> -	 * We were unable to place multiple objects in a slab. Now
> -	 * lets see if we can place a single object there.
> -	 */
> -	order = slab_order(size, 1, slub_max_order, 1, reserved);
> -	if (order <= slub_max_order)
> -		return order;
> +	if (best_order < MAX_ORDER)
> +		return best_order;
>  
> -	/*
> -	 * Doh this slab cannot be placed using slub_max_order.
> -	 */
> -	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
> -	if (order < MAX_ORDER)
> -		return order;
>  	return -ENOSYS;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-06-13 17:01                                         ` Mikulas Patocka
@ 2018-06-13 18:16                                           ` Christoph Hellwig
  2018-06-13 18:53                                             ` Mikulas Patocka
  0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2018-06-13 18:16 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christopher Lameter, Mike Snitzer, Vlastimil Babka,
	Matthew Wilcox, Pekka Enberg, linux-mm, dm-devel, David Rientjes,
	Joonsoo Kim, Andrew Morton, linux-kernel

On Wed, Jun 13, 2018 at 01:01:22PM -0400, Mikulas Patocka wrote:
> Hi
> 
> I'd like to ask about this patch - will you commit it, or do you want to 
> make some more changes to it?

How about you resend it with the series adding an actual user once
ready?  I haven't actually seen patches using it posted on any list yet.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE
  2018-06-13 18:16                                           ` Christoph Hellwig
@ 2018-06-13 18:53                                             ` Mikulas Patocka
  0 siblings, 0 replies; 23+ messages in thread
From: Mikulas Patocka @ 2018-06-13 18:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christopher Lameter, Mike Snitzer, Vlastimil Babka,
	Matthew Wilcox, Pekka Enberg, linux-mm, dm-devel, David Rientjes,
	Joonsoo Kim, Andrew Morton, linux-kernel



On Wed, 13 Jun 2018, Christoph Hellwig wrote:

> On Wed, Jun 13, 2018 at 01:01:22PM -0400, Mikulas Patocka wrote:
> > Hi
> > 
> > I'd like to ask about this patch - will you commit it, or do you want to 
> > make some more changes to it?
> 
> How about you resend it with the series adding an actual user once
> ready?  I haven't actually seen patches using it posted on any list yet.

dm-bufio is already using it. Starting with the kernel 4.17 (f51f2e0a7fb1 
- "dm bufio: support non-power-of-two block sizes"), dm-bufio has the 
capability to use non-power-of-two buffers. It uses slab cache for its 
buffers - so we would like to have this slab optimization - to avoid 
excessive memory wasting.

Originally, the slub patch used a new flag SLAB_MINIMIZE_WASTE, but after 
a suggestion from others, I reworked the patch so that it minimizes waste 
of all slub caches and doesn't need an extra flag to activate.

Mikulas

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2018-06-13 18:53 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <alpine.LRH.2.02.1803201740280.21066@file01.intranet.prod.int.rdu2.redhat.com>
     [not found] ` <alpine.DEB.2.20.1803211024220.2175@nuc-kabylake>
     [not found]   ` <alpine.LRH.2.02.1803211153320.16017@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]     ` <alpine.DEB.2.20.1803211226350.3174@nuc-kabylake>
     [not found]       ` <alpine.LRH.2.02.1803211425330.26409@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]         ` <20c58a03-90a8-7e75-5fc7-856facfb6c8a@suse.cz>
     [not found]           ` <20180413151019.GA5660@redhat.com>
     [not found]             ` <ee8807ff-d650-0064-70bf-e1d77fa61f5c@suse.cz>
     [not found]               ` <20180416142703.GA22422@redhat.com>
     [not found]                 ` <alpine.LRH.2.02.1804161031300.24222@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]                   ` <20180416144638.GA22484@redhat.com>
2018-04-16 19:32                     ` [PATCH RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE Mikulas Patocka
2018-04-17 14:45                       ` Christopher Lameter
2018-04-17 16:16                         ` Vlastimil Babka
2018-04-17 16:38                           ` Christopher Lameter
2018-04-17 19:09                             ` Mikulas Patocka
2018-04-17 17:26                           ` Mikulas Patocka
2018-04-17 19:13                             ` Vlastimil Babka
2018-04-17 19:06                         ` Mikulas Patocka
2018-04-18 14:55                           ` Christopher Lameter
2018-04-25 21:04                             ` Mikulas Patocka
2018-04-25 23:24                               ` Mikulas Patocka
2018-04-26 19:01                                 ` Christopher Lameter
2018-04-26 21:09                                   ` Mikulas Patocka
2018-04-27 16:41                                     ` Christopher Lameter
2018-04-27 19:19                                       ` Mikulas Patocka
2018-06-13 17:01                                         ` Mikulas Patocka
2018-06-13 18:16                                           ` Christoph Hellwig
2018-06-13 18:53                                             ` Mikulas Patocka
2018-04-26 18:51                               ` Christopher Lameter
     [not found]                     ` <alpine.LRH.2.02.1804161054410.17807@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]                       ` <alpine.DEB.2.20.1804161018030.9397@nuc-kabylake>
     [not found]                         ` <alpine.LRH.2.02.1804161123400.17807@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]                           ` <alpine.DEB.2.20.1804161043430.9622@nuc-kabylake>
     [not found]                             ` <alpine.LRH.2.02.1804161532480.19492@file01.intranet.prod.int.rdu2.redhat.com>
     [not found]                               ` <b0e6ccf6-06ce-e50b-840e-c8d3072382fd@suse.cz>
2018-04-16 21:01                                 ` Mikulas Patocka
2018-04-17 14:40                                   ` Christopher Lameter
2018-04-17 18:53                                     ` Mikulas Patocka
2018-04-17 21:42                                       ` Christopher Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).