From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f41.google.com (mail-qg0-f41.google.com [209.85.192.41]) by kanga.kvack.org (Postfix) with ESMTP id 3988A6B0253 for ; Thu, 3 Dec 2015 10:56:36 -0500 (EST) Received: by qgea14 with SMTP id a14so63632184qge.0 for ; Thu, 03 Dec 2015 07:56:36 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id r190si9101136qhb.108.2015.12.03.07.56.35 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 03 Dec 2015 07:56:35 -0800 (PST) Subject: [RFC PATCH 0/2] slab: implement bulking for SLAB allocator From: Jesper Dangaard Brouer Date: Thu, 03 Dec 2015 16:56:32 +0100 Message-ID: <20151203155600.3589.86568.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton This patchset implements bulking for the SLAB allocator. I split the implementation into two patches, the alloc and free "side" for easier review. (Based on Linus tree at v4.4-rc3-24-g25364a9e54fb) Normal SLAB fastpath 95 cycles(tsc) 23.852 ns, when compiled without debugging options enabled. Benchmarked[1] obj size 256 bytes on CPU i7-4790K @ 4.00GHz: 1 - 115 cycles(tsc) 28.812 ns - 42 cycles(tsc) 10.715 ns - improved 63.5% 2 - 103 cycles(tsc) 25.956 ns - 27 cycles(tsc) 6.985 ns - improved 73.8% 3 - 101 cycles(tsc) 25.336 ns - 22 cycles(tsc) 5.733 ns - improved 78.2% 4 - 100 cycles(tsc) 25.147 ns - 21 cycles(tsc) 5.319 ns - improved 79.0% 8 - 98 cycles(tsc) 24.616 ns - 18 cycles(tsc) 4.620 ns - improved 81.6% 16 - 97 cycles(tsc) 24.408 ns - 17 cycles(tsc) 4.344 ns - improved 82.5% 30 - 98 cycles(tsc) 24.641 ns - 16 cycles(tsc) 4.202 ns - improved 83.7% 32 - 98 cycles(tsc) 24.607 ns - 16 cycles(tsc) 4.199 ns - improved 83.7% 34 - 98 cycles(tsc) 24.605 ns - 18 cycles(tsc) 4.579 ns - improved 81.6% 48 - 97 cycles(tsc) 24.463 ns - 17 cycles(tsc) 4.405 ns - improved 82.5% 64 - 97 cycles(tsc) 24.370 ns - 17 cycles(tsc) 4.384 ns - improved 82.5% 128 - 99 cycles(tsc) 24.763 ns - 19 cycles(tsc) 4.755 ns - improved 80.8% 158 - 98 cycles(tsc) 24.708 ns - 18 cycles(tsc) 4.723 ns - improved 81.6% 250 - 101 cycles(tsc) 25.342 ns - 20 cycles(tsc) 5.035 ns - improved 80.2% [1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/mm/slab_bulk_test01.c --- Jesper Dangaard Brouer (2): slab: implement bulk alloc in SLAB allocator slab: implement bulk free in SLAB allocator mm/slab.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 8 deletions(-) -- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f41.google.com (mail-qg0-f41.google.com [209.85.192.41]) by kanga.kvack.org (Postfix) with ESMTP id CD96C6B0255 for ; Thu, 3 Dec 2015 10:57:33 -0500 (EST) Received: by qgeb1 with SMTP id b1so64362379qge.1 for ; Thu, 03 Dec 2015 07:57:33 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 18si9122525qgl.97.2015.12.03.07.57.33 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 03 Dec 2015 07:57:33 -0800 (PST) Subject: [RFC PATCH 1/2] slab: implement bulk alloc in SLAB allocator From: Jesper Dangaard Brouer Date: Thu, 03 Dec 2015 16:57:31 +0100 Message-ID: <20151203155637.3589.62609.stgit@firesoul> In-Reply-To: <20151203155600.3589.86568.stgit@firesoul> References: <20151203155600.3589.86568.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton (will add more desc after RFC) Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 4765c97ce690..3354489547ec 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3420,9 +3420,59 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) EXPORT_SYMBOL(kmem_cache_free_bulk); int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) + void **p) { - return __kmem_cache_alloc_bulk(s, flags, size, p); + size_t i; + + flags &= gfp_allowed_mask; + lockdep_trace_alloc(flags); + + if (slab_should_failslab(s, flags)) + return 0; + + s = memcg_kmem_get_cache(s, flags); + + cache_alloc_debugcheck_before(s, flags); + + local_irq_disable(); + for (i = 0; i < size; i++) { + void *objp = __do_cache_alloc(s, flags); + + // this call could be done outside IRQ disabled section + objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); + + if (unlikely(!objp)) + goto error; + + prefetchw(objp); + p[i] = objp; + } + local_irq_enable(); + + /* Kmemleak and kmemcheck outside IRQ disabled section */ + for (i = 0; i < size; i++) { + void *x = p[i]; + + kmemleak_alloc_recursive(x, s->object_size, 1, s->flags, flags); + kmemcheck_slab_alloc(s, flags, x, s->object_size); + } + + /* Clear memory outside IRQ disabled section */ + if (unlikely(flags & __GFP_ZERO)) + for (i = 0; i < size; i++) + memset(p[i], 0, s->object_size); + +// FIXME: Trace call missing... should we create a bulk variant? +/* Like: + trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, flags); +*/ + memcg_kmem_put_cache(s); + return size; +error: + local_irq_enable(); + memcg_kmem_put_cache(s); + __kmem_cache_free_bulk(s, i, p); + return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qk0-f173.google.com (mail-qk0-f173.google.com [209.85.220.173]) by kanga.kvack.org (Postfix) with ESMTP id 0C5E46B0256 for ; Thu, 3 Dec 2015 10:57:44 -0500 (EST) Received: by qkda6 with SMTP id a6so31636999qkd.3 for ; Thu, 03 Dec 2015 07:57:43 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id g205si1556667qkb.9.2015.12.03.07.57.43 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 03 Dec 2015 07:57:43 -0800 (PST) Subject: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator From: Jesper Dangaard Brouer Date: Thu, 03 Dec 2015 16:57:41 +0100 Message-ID: <20151203155736.3589.67424.stgit@firesoul> In-Reply-To: <20151203155600.3589.86568.stgit@firesoul> References: <20151203155600.3589.86568.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton (will add more desc after RFC) Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 3354489547ec..b676ac1dad34 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3413,12 +3413,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) } EXPORT_SYMBOL(kmem_cache_alloc); -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) -{ - __kmem_cache_free_bulk(s, size, p); -} -EXPORT_SYMBOL(kmem_cache_free_bulk); - int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { @@ -3619,6 +3613,31 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) } EXPORT_SYMBOL(kmem_cache_free); +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +{ + struct kmem_cache *s; + size_t i; + + local_irq_disable(); + for (i = 0; i < size; i++) { + void *objp = p[i]; + + s = cache_from_obj(orig_s, objp); + + debug_check_no_locks_freed(objp, s->object_size); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, s->object_size); + + __cache_free(s, objp, _RET_IP_); + } + local_irq_enable(); + + // FIXME: tracing + // trace_kmem_cache_free(_RET_IP_, objp); +} +EXPORT_SYMBOL(kmem_cache_free_bulk); + + /** * kfree - free previously allocated memory * @objp: pointer returned by kmalloc. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f48.google.com (mail-qg0-f48.google.com [209.85.192.48]) by kanga.kvack.org (Postfix) with ESMTP id 94EC66B0258 for ; Fri, 4 Dec 2015 04:01:57 -0500 (EST) Received: by qgec40 with SMTP id c40so82728604qge.2 for ; Fri, 04 Dec 2015 01:01:57 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 127si12961296qhr.57.2015.12.04.01.01.56 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 04 Dec 2015 01:01:56 -0800 (PST) Date: Fri, 4 Dec 2015 10:01:51 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 0/2] slab: implement bulking for SLAB allocator Message-ID: <20151204100151.1e96935a@redhat.com> In-Reply-To: <20151203155600.3589.86568.stgit@firesoul> References: <20151203155600.3589.86568.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Thu, 03 Dec 2015 16:56:32 +0100 Jesper Dangaard Brouer wrote: > Normal SLAB fastpath 95 cycles(tsc) 23.852 ns, when compiled without > debugging options enabled. > > Benchmarked[1] obj size 256 bytes on CPU i7-4790K @ 4.00GHz: > [1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/mm/slab_bulk_test01.c > > 1 - 115 cycles(tsc) 28.812 ns - 42 cycles(tsc) 10.715 ns - improved 63.5% > 2 - 103 cycles(tsc) 25.956 ns - 27 cycles(tsc) 6.985 ns - improved 73.8% > 3 - 101 cycles(tsc) 25.336 ns - 22 cycles(tsc) 5.733 ns - improved 78.2% > 4 - 100 cycles(tsc) 25.147 ns - 21 cycles(tsc) 5.319 ns - improved 79.0% > 8 - 98 cycles(tsc) 24.616 ns - 18 cycles(tsc) 4.620 ns - improved 81.6% > 16 - 97 cycles(tsc) 24.408 ns - 17 cycles(tsc) 4.344 ns - improved 82.5% > 30 - 98 cycles(tsc) 24.641 ns - 16 cycles(tsc) 4.202 ns - improved 83.7% > 32 - 98 cycles(tsc) 24.607 ns - 16 cycles(tsc) 4.199 ns - improved 83.7% > 34 - 98 cycles(tsc) 24.605 ns - 18 cycles(tsc) 4.579 ns - improved 81.6% > 48 - 97 cycles(tsc) 24.463 ns - 17 cycles(tsc) 4.405 ns - improved 82.5% > 64 - 97 cycles(tsc) 24.370 ns - 17 cycles(tsc) 4.384 ns - improved 82.5% > 128 - 99 cycles(tsc) 24.763 ns - 19 cycles(tsc) 4.755 ns - improved 80.8% > 158 - 98 cycles(tsc) 24.708 ns - 18 cycles(tsc) 4.723 ns - improved 81.6% > 250 - 101 cycles(tsc) 25.342 ns - 20 cycles(tsc) 5.035 ns - improved 80.2% Ups, copy-past mistake, these (above) were the old results, from the old rejected patch [2]. [2] http://people.netfilter.org/hawk/patches/slab_rejected/slab-implement-bulking-for-slab-allocator.patch The results from this patchset is: 1 - 112 cycles(tsc) 28.060 ns - 45 cycles(tsc) 11.454 ns - improved 59.8% 2 - 102 cycles(tsc) 25.735 ns - 28 cycles(tsc) 7.038 ns - improved 72.5% 3 - 98 cycles(tsc) 24.666 ns - 22 cycles(tsc) 5.518 ns - improved 77.6% 4 - 97 cycles(tsc) 24.437 ns - 18 cycles(tsc) 4.746 ns - improved 81.4% 8 - 95 cycles(tsc) 23.875 ns - 15 cycles(tsc) 3.782 ns - improved 84.2% 16 - 95 cycles(tsc) 24.002 ns - 14 cycles(tsc) 3.621 ns - improved 85.3% 30 - 95 cycles(tsc) 23.893 ns - 14 cycles(tsc) 3.577 ns - improved 85.3% 32 - 95 cycles(tsc) 23.875 ns - 13 cycles(tsc) 3.402 ns - improved 86.3% 34 - 95 cycles(tsc) 23.794 ns - 13 cycles(tsc) 3.385 ns - improved 86.3% 48 - 94 cycles(tsc) 23.721 ns - 14 cycles(tsc) 3.550 ns - improved 85.1% 64 - 94 cycles(tsc) 23.608 ns - 13 cycles(tsc) 3.427 ns - improved 86.2% 128 - 96 cycles(tsc) 24.045 ns - 15 cycles(tsc) 3.936 ns - improved 84.4% 158 - 95 cycles(tsc) 23.886 ns - 17 cycles(tsc) 4.289 ns - improved 82.1% 250 - 97 cycles(tsc) 24.358 ns - 17 cycles(tsc) 4.329 ns - improved 82.5% These results are likely better because there is now less instructions in the more tight bulk loops (mostly in kmem_cache_alloc_bulk). SLUB itself might also have improved at bit since old patch. -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f45.google.com (mail-qg0-f45.google.com [209.85.192.45]) by kanga.kvack.org (Postfix) with ESMTP id 572DF6B0258 for ; Fri, 4 Dec 2015 05:16:44 -0500 (EST) Received: by qgcc31 with SMTP id c31so83713007qgc.3 for ; Fri, 04 Dec 2015 02:16:44 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id u102si11399043qge.90.2015.12.04.02.16.43 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 04 Dec 2015 02:16:43 -0800 (PST) Date: Fri, 4 Dec 2015 11:16:38 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 1/2] slab: implement bulk alloc in SLAB allocator Message-ID: <20151204111638.2c581a9d@redhat.com> In-Reply-To: <20151203155637.3589.62609.stgit@firesoul> References: <20151203155600.3589.86568.stgit@firesoul> <20151203155637.3589.62609.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Thu, 03 Dec 2015 16:57:31 +0100 Jesper Dangaard Brouer wrote: > diff --git a/mm/slab.c b/mm/slab.c > index 4765c97ce690..3354489547ec 100644 > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -3420,9 +3420,59 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) > EXPORT_SYMBOL(kmem_cache_free_bulk); > > int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, > - void **p) > + void **p) > { > - return __kmem_cache_alloc_bulk(s, flags, size, p); > + size_t i; > + > + flags &= gfp_allowed_mask; > + lockdep_trace_alloc(flags); > + > + if (slab_should_failslab(s, flags)) > + return 0; > + > + s = memcg_kmem_get_cache(s, flags); > + > + cache_alloc_debugcheck_before(s, flags); > + > + local_irq_disable(); > + for (i = 0; i < size; i++) { > + void *objp = __do_cache_alloc(s, flags); > + > + // this call could be done outside IRQ disabled section > + objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); Profiling with SLAB mem debugging on (CONFIG_DEBUG_SLAB=y), this call cache_alloc_debugcheck_after() is the most expensive call, well actually the underlying check_poison_obj() call. Thus, it might be a good idea to, place it outside the IRQ disabled section? It might make the code look a little strange, but I can try and we can see how ugly that makes the code look (and the compiler still have to be able to remove the code in-case no debugging enabled). > + > + if (unlikely(!objp)) > + goto error; > + > + prefetchw(objp); > + p[i] = objp; > + } > + local_irq_enable(); > + > + /* Kmemleak and kmemcheck outside IRQ disabled section */ > + for (i = 0; i < size; i++) { > + void *x = p[i]; > + > + kmemleak_alloc_recursive(x, s->object_size, 1, s->flags, flags); > + kmemcheck_slab_alloc(s, flags, x, s->object_size); > + } > + > + /* Clear memory outside IRQ disabled section */ > + if (unlikely(flags & __GFP_ZERO)) > + for (i = 0; i < size; i++) > + memset(p[i], 0, s->object_size); [...] -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f174.google.com (mail-io0-f174.google.com [209.85.223.174]) by kanga.kvack.org (Postfix) with ESMTP id 3D5616B0258 for ; Fri, 4 Dec 2015 12:10:07 -0500 (EST) Received: by ioc74 with SMTP id 74so122244750ioc.2 for ; Fri, 04 Dec 2015 09:10:07 -0800 (PST) Received: from resqmta-ch2-11v.sys.comcast.net (resqmta-ch2-11v.sys.comcast.net. [2001:558:fe21:29:69:252:207:43]) by mx.google.com with ESMTPS id m8si7218283igx.42.2015.12.04.09.10.06 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Fri, 04 Dec 2015 09:10:06 -0800 (PST) Date: Fri, 4 Dec 2015 11:10:05 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 1/2] slab: implement bulk alloc in SLAB allocator In-Reply-To: <20151203155637.3589.62609.stgit@firesoul> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155637.3589.62609.stgit@firesoul> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Thu, 3 Dec 2015, Jesper Dangaard Brouer wrote: > + size_t i; > + > + flags &= gfp_allowed_mask; > + lockdep_trace_alloc(flags); > + > + if (slab_should_failslab(s, flags)) > + return 0; Ok here is an overlap with slub;'s pre_alloc_hook() and that stuff is really not allocator specific. Could make it generic and move the hook calls into slab_common.c/slab.h? That also gives you the opportunity to get the array option in there. > + s = memcg_kmem_get_cache(s, flags); > + > + cache_alloc_debugcheck_before(s, flags); > + > + local_irq_disable(); > + for (i = 0; i < size; i++) { > + void *objp = __do_cache_alloc(s, flags); > + > + // this call could be done outside IRQ disabled section > + objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); > + > + if (unlikely(!objp)) > + goto error; > + > + prefetchw(objp); Is the prefetch really useful here? Only if these objects are immediately used I would think. > + p[i] = objp; > + } > + local_irq_enable(); > + > + /* Kmemleak and kmemcheck outside IRQ disabled section */ > + for (i = 0; i < size; i++) { > + void *x = p[i]; > + > + kmemleak_alloc_recursive(x, s->object_size, 1, s->flags, flags); > + kmemcheck_slab_alloc(s, flags, x, s->object_size); > + } > + > + /* Clear memory outside IRQ disabled section */ > + if (unlikely(flags & __GFP_ZERO)) > + for (i = 0; i < size; i++) > + memset(p[i], 0, s->object_size); Maybe make this one loop instead of two? > +// FIXME: Trace call missing... should we create a bulk variant? > +/* Like: > + trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, flags); > +*/ That trace call could be created when you do the genericization of the hooks() which also involve debugging stuff. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f169.google.com (mail-io0-f169.google.com [209.85.223.169]) by kanga.kvack.org (Postfix) with ESMTP id 9C7EA6B025C for ; Fri, 4 Dec 2015 12:17:04 -0500 (EST) Received: by ioir85 with SMTP id r85so122728804ioi.1 for ; Fri, 04 Dec 2015 09:17:04 -0800 (PST) Received: from resqmta-ch2-07v.sys.comcast.net (resqmta-ch2-07v.sys.comcast.net. [2001:558:fe21:29:69:252:207:39]) by mx.google.com with ESMTPS id c196si21544541ioe.212.2015.12.04.09.17.04 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Fri, 04 Dec 2015 09:17:04 -0800 (PST) Date: Fri, 4 Dec 2015 11:17:02 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator In-Reply-To: <20151203155736.3589.67424.stgit@firesoul> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Thu, 3 Dec 2015, Jesper Dangaard Brouer wrote: > +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) orig_s? Thats strange > +{ > + struct kmem_cache *s; s? > + size_t i; > + > + local_irq_disable(); > + for (i = 0; i < size; i++) { > + void *objp = p[i]; > + > + s = cache_from_obj(orig_s, objp); Does this support freeing objects from a set of different caches? Otherwise there needs to be a check in here that the objects come from the same cache. > + > + debug_check_no_locks_freed(objp, s->object_size); > + if (!(s->flags & SLAB_DEBUG_OBJECTS)) > + debug_check_no_obj_freed(objp, s->object_size); > + > + __cache_free(s, objp, _RET_IP_); The function could be further optimized if you take the code from __cache_free() and move stuff outside of the loop. The alien cache check f.e. and the Pfmemalloc checking may be moved out. The call to virt_to_head page may also be avoided if the objects are on the same page as the last. So you may be able to function calls for the fastpath in the inner loop which may accelerate frees significantly. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qk0-f182.google.com (mail-qk0-f182.google.com [209.85.220.182]) by kanga.kvack.org (Postfix) with ESMTP id 4A2806B0257 for ; Mon, 7 Dec 2015 05:21:05 -0500 (EST) Received: by qkcb135 with SMTP id b135so20732053qkc.3 for ; Mon, 07 Dec 2015 02:21:05 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 61si22132133qgz.37.2015.12.07.02.21.04 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 07 Dec 2015 02:21:04 -0800 (PST) Date: Mon, 7 Dec 2015 11:20:57 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 1/2] slab: implement bulk alloc in SLAB allocator Message-ID: <20151207112057.1566dd5c@redhat.com> In-Reply-To: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155637.3589.62609.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Fri, 4 Dec 2015 11:10:05 -0600 (CST) Christoph Lameter wrote: > On Thu, 3 Dec 2015, Jesper Dangaard Brouer wrote: > > > + size_t i; > > + > > + flags &= gfp_allowed_mask; > > + lockdep_trace_alloc(flags); > > + > > + if (slab_should_failslab(s, flags)) > > + return 0; > > Ok here is an overlap with slub;'s pre_alloc_hook() and that stuff is > really not allocator specific. Could make it generic and move the hook > calls into slab_common.c/slab.h? That also gives you the opportunity to > get the array option in there. Perhaps we could consolidate some code here. (This would also help code SLAB elimination between slab_alloc_node() and slab_alloc()) A question: SLAB takes the "boot_cache" into account before calling should_failslab(), but SLUB does not. Should we also do so for SLUB? SLAB code: static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) { if (unlikely(cachep == kmem_cache)) return false; return should_failslab(cachep->object_size, flags, cachep->flags); } > > + s = memcg_kmem_get_cache(s, flags); > > + > > + cache_alloc_debugcheck_before(s, flags); > > + > > + local_irq_disable(); > > + for (i = 0; i < size; i++) { > > + void *objp = __do_cache_alloc(s, flags); > > + > > + // this call could be done outside IRQ disabled section > > + objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); > > + > > + if (unlikely(!objp)) > > + goto error; > > + > > + prefetchw(objp); > > Is the prefetch really useful here? Only if these objects are immediately > used I would think. I primarily have prefetch here because I'm mimicking the behavior of slab_alloc(). We can remove it here. > > + p[i] = objp; > > + } > > + local_irq_enable(); > > + > > + /* Kmemleak and kmemcheck outside IRQ disabled section */ > > + for (i = 0; i < size; i++) { > > + void *x = p[i]; > > + > > + kmemleak_alloc_recursive(x, s->object_size, 1, s->flags, flags); > > + kmemcheck_slab_alloc(s, flags, x, s->object_size); > > + } > > + > > + /* Clear memory outside IRQ disabled section */ > > + if (unlikely(flags & __GFP_ZERO)) > > + for (i = 0; i < size; i++) > > + memset(p[i], 0, s->object_size); > > Maybe make this one loop instead of two? I kept it two loops to get the advantage of only needing to check the __GFP_ZERO flag once. (Plus, in case debugging is enabled, we might get a small advantage of better instruction and pipeline usage, as erms memset rep-stos operations flush the CPU pipeline). I also wrote it this way to make it more obvious what code I want the compiler to generate. If no debugging is enabled to top loop should be compiled out. If I didn't pullout the flag check, the compiler should be smart enough to realize this optimization itself, but can only realize this in case the other code compiles out (case where loops were combined). Thus, compiler might already do this optimization, but I'm making it explicit. Besides, maybe we can consolidate first loop and replace it with slab_post_alloc_hook()? > > +// FIXME: Trace call missing... should we create a bulk variant? > > +/* Like: > > + trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, > > s->size, flags); +*/ > > That trace call could be created when you do the genericization of the > hooks() which also involve debugging stuff. Should we call trace_kmem_cache_alloc() for each object? Or should we create trace calls that are specific to bulk'ing? (which would allow us to study/record bulk sizes) -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qk0-f179.google.com (mail-qk0-f179.google.com [209.85.220.179]) by kanga.kvack.org (Postfix) with ESMTP id 149E86B0257 for ; Mon, 7 Dec 2015 06:25:56 -0500 (EST) Received: by qkfb125 with SMTP id b125so1207108qkf.2 for ; Mon, 07 Dec 2015 03:25:55 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id g189si26876305qhd.50.2015.12.07.03.25.55 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 07 Dec 2015 03:25:55 -0800 (PST) Date: Mon, 7 Dec 2015 12:25:49 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator Message-ID: <20151207122549.109e82db@redhat.com> In-Reply-To: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Fri, 4 Dec 2015 11:17:02 -0600 (CST) Christoph Lameter wrote: > On Thu, 3 Dec 2015, Jesper Dangaard Brouer wrote: > > > +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > > orig_s? Thats strange > > > +{ > > + struct kmem_cache *s; > > s? The "s" comes from the slub.c code uses "struct kmem_cache *s" everywhere. > > + size_t i; > > + > > + local_irq_disable(); > > + for (i = 0; i < size; i++) { > > + void *objp = p[i]; > > + > > + s = cache_from_obj(orig_s, objp); > > Does this support freeing objects from a set of different caches? This is for supporting memcg (CONFIG_MEMCG_KMEM). Quoting from commit 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk"): Incoming bulk free objects can belong to different kmem cgroups, and object free call can happen at a later point outside memcg context. Thus, we need to keep the orig kmem_cache, to correctly verify if a memcg object match against its "root_cache" (s->memcg_params.root_cache). > > + > > + debug_check_no_locks_freed(objp, s->object_size); > > + if (!(s->flags & SLAB_DEBUG_OBJECTS)) > > + debug_check_no_obj_freed(objp, s->object_size); > > + > > + __cache_free(s, objp, _RET_IP_); > > The function could be further optimized if you take the code from > __cache_free() and move stuff outside of the loop. The alien cache check > f.e. and the Pfmemalloc checking may be moved out. The call to > virt_to_head page may also be avoided if the objects are on the same > page as the last. So you may be able to function calls for the > fastpath in the inner loop which may accelerate frees significantly. Interesting! Maybe we can do a followup patch to pullout last optimization's. Right now I'm mostly interested in correctness and clean code. And we are already looking at a 80% speedup with these patches ;-) -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f169.google.com (mail-io0-f169.google.com [209.85.223.169]) by kanga.kvack.org (Postfix) with ESMTP id 4370A6B0257 for ; Mon, 7 Dec 2015 09:57:54 -0500 (EST) Received: by ioir85 with SMTP id r85so183284842ioi.1 for ; Mon, 07 Dec 2015 06:57:54 -0800 (PST) Received: from resqmta-ch2-08v.sys.comcast.net (resqmta-ch2-08v.sys.comcast.net. [2001:558:fe21:29:69:252:207:40]) by mx.google.com with ESMTPS id p19si21988503igs.16.2015.12.07.06.57.53 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Mon, 07 Dec 2015 06:57:53 -0800 (PST) Date: Mon, 7 Dec 2015 08:57:52 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 1/2] slab: implement bulk alloc in SLAB allocator In-Reply-To: <20151207112057.1566dd5c@redhat.com> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155637.3589.62609.stgit@firesoul> <20151207112057.1566dd5c@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Mon, 7 Dec 2015, Jesper Dangaard Brouer wrote: > A question: SLAB takes the "boot_cache" into account before calling > should_failslab(), but SLUB does not. Should we also do so for SLUB? Not necessary in SLUB. > Besides, maybe we can consolidate first loop and replace it with > slab_post_alloc_hook()? Ok. > Or should we create trace calls that are specific to bulk'ing? > (which would allow us to study/record bulk sizes) I would prefer that. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f170.google.com (mail-io0-f170.google.com [209.85.223.170]) by kanga.kvack.org (Postfix) with ESMTP id 1B12D6B0257 for ; Mon, 7 Dec 2015 09:59:27 -0500 (EST) Received: by ioc74 with SMTP id 74so183093494ioc.2 for ; Mon, 07 Dec 2015 06:59:26 -0800 (PST) Received: from resqmta-ch2-01v.sys.comcast.net (resqmta-ch2-01v.sys.comcast.net. [2001:558:fe21:29:69:252:207:33]) by mx.google.com with ESMTPS id i80si5524137ioi.14.2015.12.07.06.59.26 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Mon, 07 Dec 2015 06:59:26 -0800 (PST) Date: Mon, 7 Dec 2015 08:59:25 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator In-Reply-To: <20151207122549.109e82db@redhat.com> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Mon, 7 Dec 2015, Jesper Dangaard Brouer wrote: > > s? > > The "s" comes from the slub.c code uses "struct kmem_cache *s" everywhere. Ok then use it. Why is there an orig_s here. > > > + > > > + local_irq_disable(); > > > + for (i = 0; i < size; i++) { > > > + void *objp = p[i]; > > > + > > > + s = cache_from_obj(orig_s, objp); > > > > Does this support freeing objects from a set of different caches? > > This is for supporting memcg (CONFIG_MEMCG_KMEM). > > Quoting from commit 033745189b1b ("slub: add missing kmem cgroup > support to kmem_cache_free_bulk"): > > Incoming bulk free objects can belong to different kmem cgroups, and > object free call can happen at a later point outside memcg context. Thus, > we need to keep the orig kmem_cache, to correctly verify if a memcg object > match against its "root_cache" (s->memcg_params.root_cache). Where is that verification? This looks like SLAB would support freeing objects from different caches. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f41.google.com (mail-qg0-f41.google.com [209.85.192.41]) by kanga.kvack.org (Postfix) with ESMTP id 4406D6B0038 for ; Tue, 8 Dec 2015 08:39:22 -0500 (EST) Received: by qgeb1 with SMTP id b1so15648488qge.1 for ; Tue, 08 Dec 2015 05:39:22 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id t64si3410117qht.71.2015.12.08.05.39.21 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 05:39:21 -0800 (PST) Date: Tue, 8 Dec 2015 14:39:15 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator Message-ID: <20151208143915.0ffbdf51@redhat.com> In-Reply-To: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com (To Vladimir can you comment on memcg?) On Mon, 7 Dec 2015 08:59:25 -0600 (CST) Christoph Lameter wrote: > On Mon, 7 Dec 2015, Jesper Dangaard Brouer wrote: > > > > > + > > > > + local_irq_disable(); > > > > + for (i = 0; i < size; i++) { > > > > + void *objp = p[i]; > > > > + > > > > + s = cache_from_obj(orig_s, objp); > > > > > > Does this support freeing objects from a set of different caches? > > > > This is for supporting memcg (CONFIG_MEMCG_KMEM). > > > > Quoting from commit 033745189b1b ("slub: add missing kmem cgroup > > support to kmem_cache_free_bulk"): > > > > Incoming bulk free objects can belong to different kmem cgroups, and > > object free call can happen at a later point outside memcg context. Thus, > > we need to keep the orig kmem_cache, to correctly verify if a memcg object > > match against its "root_cache" (s->memcg_params.root_cache). > > Where is that verification? This looks like SLAB would support freeing > objects from different caches. This is for supporting CONFIG_MEMCG_KMEM, thus I would like Vladimir input on this, as I don't know enough about memcg.... -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ig0-f171.google.com (mail-ig0-f171.google.com [209.85.213.171]) by kanga.kvack.org (Postfix) with ESMTP id 88AA16B0254 for ; Tue, 8 Dec 2015 09:11:25 -0500 (EST) Received: by igcph11 with SMTP id ph11so96425694igc.1 for ; Tue, 08 Dec 2015 06:11:25 -0800 (PST) Received: from resqmta-ch2-08v.sys.comcast.net (resqmta-ch2-08v.sys.comcast.net. [2001:558:fe21:29:69:252:207:40]) by mx.google.com with ESMTPS id 21si5588188iod.72.2015.12.08.06.11.24 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Tue, 08 Dec 2015 06:11:24 -0800 (PST) Date: Tue, 8 Dec 2015 08:11:23 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator In-Reply-To: <20151208143915.0ffbdf51@redhat.com> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> <20151208143915.0ffbdf51@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote: > > > > Does this support freeing objects from a set of different caches? > > > > > > This is for supporting memcg (CONFIG_MEMCG_KMEM). > > > > > > Quoting from commit 033745189b1b ("slub: add missing kmem cgroup > > > support to kmem_cache_free_bulk"): > > > > > > Incoming bulk free objects can belong to different kmem cgroups, and > > > object free call can happen at a later point outside memcg context. Thus, > > > we need to keep the orig kmem_cache, to correctly verify if a memcg object > > > match against its "root_cache" (s->memcg_params.root_cache). > > > > Where is that verification? This looks like SLAB would support freeing > > objects from different caches. > > This is for supporting CONFIG_MEMCG_KMEM, thus I would like Vladimir > input on this, as I don't know enough about memcg.... Oww... So far objects passed to bulk free must be confined to the *same* slab cache. Not the same "root_cache". Are we changing that? -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lb0-f172.google.com (mail-lb0-f172.google.com [209.85.217.172]) by kanga.kvack.org (Postfix) with ESMTP id 281966B0254 for ; Tue, 8 Dec 2015 09:12:33 -0500 (EST) Received: by lbblt2 with SMTP id lt2so11749895lbb.3 for ; Tue, 08 Dec 2015 06:12:32 -0800 (PST) Received: from relay.parallels.com (relay.parallels.com. [195.214.232.42]) by mx.google.com with ESMTPS id d190si1779437lfb.135.2015.12.08.06.12.31 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 06:12:31 -0800 (PST) Date: Tue, 8 Dec 2015 17:12:11 +0300 From: Vladimir Davydov Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator Message-ID: <20151208141211.GH11488@esperanza> References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Disposition: inline In-Reply-To: Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Mon, Dec 07, 2015 at 08:59:25AM -0600, Christoph Lameter wrote: > On Mon, 7 Dec 2015, Jesper Dangaard Brouer wrote: > > > > s? > > > > The "s" comes from the slub.c code uses "struct kmem_cache *s" everywhere. > > Ok then use it. Why is there an orig_s here. > > > > > + > > > > + local_irq_disable(); > > > > + for (i = 0; i < size; i++) { > > > > + void *objp = p[i]; > > > > + > > > > + s = cache_from_obj(orig_s, objp); > > > > > > Does this support freeing objects from a set of different caches? > > > > This is for supporting memcg (CONFIG_MEMCG_KMEM). > > > > Quoting from commit 033745189b1b ("slub: add missing kmem cgroup > > support to kmem_cache_free_bulk"): > > > > Incoming bulk free objects can belong to different kmem cgroups, and > > object free call can happen at a later point outside memcg context. Thus, > > we need to keep the orig kmem_cache, to correctly verify if a memcg object > > match against its "root_cache" (s->memcg_params.root_cache). > > Where is that verification? This looks like SLAB would support freeing > objects from different caches. As Jesper explained to me in the thread regarding the SLUB version of this API (see http://www.spinics.net/lists/linux-mm/msg96728.html), objects allocated by kmem_cache_alloc_bulk() will not necessarily be freed by kmem_cache_free_bulk() and vice-versa. For instance, it is possible that a bunch of objects allocated using kmem_cache_alloc() will be freed with a single kmem_cache_free_bulk() call. As a result, we can't prevent users of the API from doing something like this: 1. Multiple producers allocate objects of the same kind using kmem_cache_alloc() and pass them to the consumer 2. The consumer processes the objects and frees as many as possible using kmem_cache_bulk() If producers are represented by different processes, they can belong to different memory cgroups, so that objects passed to the consumer will come from different kmem caches (per memcg caches), although they are all of the same kind. This means, we must call cache_from_obj() on each object passed to kmem_cache_free_bulk() in order to free each object to the cache it was allocated from. Thanks, Vladimir -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ig0-f169.google.com (mail-ig0-f169.google.com [209.85.213.169]) by kanga.kvack.org (Postfix) with ESMTP id BEE006B0254 for ; Tue, 8 Dec 2015 09:15:22 -0500 (EST) Received: by igbxm8 with SMTP id xm8so17760389igb.1 for ; Tue, 08 Dec 2015 06:15:22 -0800 (PST) Received: from resqmta-ch2-04v.sys.comcast.net (resqmta-ch2-04v.sys.comcast.net. [2001:558:fe21:29:69:252:207:36]) by mx.google.com with ESMTPS id rt1si5608425igb.69.2015.12.08.06.15.22 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Tue, 08 Dec 2015 06:15:22 -0800 (PST) Date: Tue, 8 Dec 2015 08:15:21 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator In-Reply-To: <20151208141211.GH11488@esperanza> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> <20151208141211.GH11488@esperanza> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Vladimir Davydov Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, 8 Dec 2015, Vladimir Davydov wrote: > If producers are represented by different processes, they can belong to > different memory cgroups, so that objects passed to the consumer will > come from different kmem caches (per memcg caches), although they are > all of the same kind. This means, we must call cache_from_obj() on each > object passed to kmem_cache_free_bulk() in order to free each object to > the cache it was allocated from. The we should change the API so that we do not specify kmem_cache on bulk free. Do it like kfree without any cache spec. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lb0-f172.google.com (mail-lb0-f172.google.com [209.85.217.172]) by kanga.kvack.org (Postfix) with ESMTP id 407166B0254 for ; Tue, 8 Dec 2015 09:56:53 -0500 (EST) Received: by lbblt2 with SMTP id lt2so12516267lbb.3 for ; Tue, 08 Dec 2015 06:56:52 -0800 (PST) Received: from relay.parallels.com (relay.parallels.com. [195.214.232.42]) by mx.google.com with ESMTPS id r13si1879575lfe.161.2015.12.08.06.56.51 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 06:56:51 -0800 (PST) Date: Tue, 8 Dec 2015 17:56:35 +0300 From: Vladimir Davydov Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator Message-ID: <20151208145635.GI11488@esperanza> References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> <20151208141211.GH11488@esperanza> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Disposition: inline In-Reply-To: Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, Dec 08, 2015 at 08:15:21AM -0600, Christoph Lameter wrote: > On Tue, 8 Dec 2015, Vladimir Davydov wrote: > > > If producers are represented by different processes, they can belong to > > different memory cgroups, so that objects passed to the consumer will > > come from different kmem caches (per memcg caches), although they are > > all of the same kind. This means, we must call cache_from_obj() on each > > object passed to kmem_cache_free_bulk() in order to free each object to > > the cache it was allocated from. > > The we should change the API so that we do not specify kmem_cache on bulk > free. Do it like kfree without any cache spec. > Don't think so, because AFAIU the whole kmem_cache_free_bulk optimization comes from the assumption that objects passed to it are likely to share the same slab page. So they must be of the same kind, otherwise no optimization would be possible and the function wouldn't perform any better than calling kfree directly in a for-loop. By requiring the caller to specify the cache we emphasize this. Enabling kmemcg might break the assumption and neglect the benefit of using kmem_cache_free_bulk, but it is to be expected, because kmem accounting does not come for free. Callers who do care about the performance and count every cpu cycle will surely disable it, in which case cache_from_obj() will be a no-op and kmem_cache_free_bulk will bear fruit. Thanks, Vladimir -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f47.google.com (mail-qg0-f47.google.com [209.85.192.47]) by kanga.kvack.org (Postfix) with ESMTP id 4E3E76B0038 for ; Tue, 8 Dec 2015 10:14:04 -0500 (EST) Received: by qgcc31 with SMTP id c31so20011942qgc.3 for ; Tue, 08 Dec 2015 07:14:04 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id h82si3746533qhd.119.2015.12.08.07.14.03 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 07:14:03 -0800 (PST) Date: Tue, 8 Dec 2015 16:13:57 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator Message-ID: <20151208161357.47323842@redhat.com> In-Reply-To: <20151208145635.GI11488@esperanza> References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> <20151208141211.GH11488@esperanza> <20151208145635.GI11488@esperanza> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Vladimir Davydov Cc: Christoph Lameter , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Tue, 8 Dec 2015 17:56:35 +0300 Vladimir Davydov wrote: > On Tue, Dec 08, 2015 at 08:15:21AM -0600, Christoph Lameter wrote: > > On Tue, 8 Dec 2015, Vladimir Davydov wrote: > > > > > If producers are represented by different processes, they can belong to > > > different memory cgroups, so that objects passed to the consumer will > > > come from different kmem caches (per memcg caches), although they are > > > all of the same kind. This means, we must call cache_from_obj() on each > > > object passed to kmem_cache_free_bulk() in order to free each object to > > > the cache it was allocated from. > > > > The we should change the API so that we do not specify kmem_cache on bulk > > free. Do it like kfree without any cache spec. > > > > Don't think so, because AFAIU the whole kmem_cache_free_bulk > optimization comes from the assumption that objects passed to it are > likely to share the same slab page. So they must be of the same kind, > otherwise no optimization would be possible and the function wouldn't > perform any better than calling kfree directly in a for-loop. By > requiring the caller to specify the cache we emphasize this. I agree with Vladimir here. The performance gain for SLUB is especially depended on that objects are likely to share the same slab page. It might not hurt SLAB as much. > Enabling kmemcg might break the assumption and neglect the benefit of > using kmem_cache_free_bulk, but it is to be expected, because kmem > accounting does not come for free. Callers who do care about the > performance and count every cpu cycle will surely disable it, in which > case cache_from_obj() will be a no-op and kmem_cache_free_bulk will bear > fruit. True, compiler does realize, when CONFIG_MEMCG_KMEM is disabled, that it can optimize the call to cache_from_obj() away. -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ob0-f173.google.com (mail-ob0-f173.google.com [209.85.214.173]) by kanga.kvack.org (Postfix) with ESMTP id F270D82F65 for ; Tue, 8 Dec 2015 10:32:34 -0500 (EST) Received: by obcse5 with SMTP id se5so14922132obc.3 for ; Tue, 08 Dec 2015 07:32:34 -0800 (PST) Received: from resqmta-po-05v.sys.comcast.net (resqmta-po-05v.sys.comcast.net. [2001:558:fe16:19:96:114:154:164]) by mx.google.com with ESMTPS id l3si3343747oia.102.2015.12.08.07.32.33 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Tue, 08 Dec 2015 07:32:34 -0800 (PST) Date: Tue, 8 Dec 2015 09:32:32 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH 2/2] slab: implement bulk free in SLAB allocator In-Reply-To: <20151208145635.GI11488@esperanza> Message-ID: References: <20151203155600.3589.86568.stgit@firesoul> <20151203155736.3589.67424.stgit@firesoul> <20151207122549.109e82db@redhat.com> <20151208141211.GH11488@esperanza> <20151208145635.GI11488@esperanza> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Vladimir Davydov Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, 8 Dec 2015, Vladimir Davydov wrote: > Don't think so, because AFAIU the whole kmem_cache_free_bulk > optimization comes from the assumption that objects passed to it are > likely to share the same slab page. So they must be of the same kind, > otherwise no optimization would be possible and the function wouldn't > perform any better than calling kfree directly in a for-loop. By > requiring the caller to specify the cache we emphasize this. This is likely but an implementation specific feature that only SLUB can exploit. However, one page can only contain objects from the same slab page. So checking the slab cache too is irrelevant. We could take it out. If the logic finds two objects that share the same page then they will be from the same slab cache. The checking of the cache is just not necessary and actually increases the code size and therefore reduces performance. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f42.google.com (mail-qg0-f42.google.com [209.85.192.42]) by kanga.kvack.org (Postfix) with ESMTP id 386A66B0257 for ; Tue, 8 Dec 2015 11:18:56 -0500 (EST) Received: by qgec40 with SMTP id c40so23104927qge.2 for ; Tue, 08 Dec 2015 08:18:56 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 72si4036697qkw.84.2015.12.08.08.18.25 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:25 -0800 (PST) Subject: [RFC PATCH V2 0/9] slab: cleanup and bulk API for SLAB From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:22 +0100 Message-ID: <20151208161751.21945.53936.stgit@firesoul> In-Reply-To: <20151203155600.3589.86568.stgit@firesoul> References: <20151203155600.3589.86568.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton RFC series for code sharing between SLUB and SLAB, and dedublication of code in SLAB, as Christoph requested. See questions in code. Plus adding bulk API implemenation for SLAB. --- Jesper Dangaard Brouer (9): mm/slab: move SLUB alloc hooks to common mm/slab.h mm: generalize avoid fault-inject on bootstrap kmem_cache slab: use slab_pre_alloc_hook in SLAB allocator mm: kmemcheck skip object if slab allocation failed slab: use slab_post_alloc_hook in SLAB allocator slab: implement bulk alloc in SLAB allocator slab: avoid running debug SLAB code with IRQs disabled for alloc_bulk slab: implement bulk free in SLAB allocator slab: annotate code to generate more compact asm code mm/failslab.c | 2 + mm/kmemcheck.c | 3 + mm/slab.c | 122 +++++++++++++++++++++++++++++++++++--------------------- mm/slab.h | 92 ++++++++++++++++++++++++++++++++++++++++++ mm/slub.c | 54 ------------------------- 5 files changed, 174 insertions(+), 99 deletions(-) -- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f48.google.com (mail-qg0-f48.google.com [209.85.192.48]) by kanga.kvack.org (Postfix) with ESMTP id 7FF076B0258 for ; Tue, 8 Dec 2015 11:19:02 -0500 (EST) Received: by qgec40 with SMTP id c40so23109290qge.2 for ; Tue, 08 Dec 2015 08:19:02 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id c1si4083725qkh.3.2015.12.08.08.18.30 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:30 -0800 (PST) Subject: [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:27 +0100 Message-ID: <20151208161827.21945.25463.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton First step towards sharing alloc_hook's between SLUB and SLAB allocators. Move the SLUB allocators *_alloc_hook to the common mm/slab.h for internal slab definitions. Signed-off-by: Jesper Dangaard Brouer --- mm/slab.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/slub.c | 54 ---------------------------------------------- 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 7b6087197997..588bc5281fc8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -38,6 +38,17 @@ struct kmem_cache { #endif #include +#include +/* Q: Howto handle this nicely? below includes are needed for alloc hooks + * + * e.g. mm/mempool.c and mm/slab_common.c does not include kmemcheck.h + * including it here solves the probem, but should they include it + * themselves? + */ +#include +// Below includes are already included in other users of "mm/slab.h" +//#include +//#include /* * State of the slab allocator. @@ -319,6 +330,66 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) return s; } +#ifdef CONFIG_SLUB +static inline size_t slab_ksize(const struct kmem_cache *s) +{ +#ifdef CONFIG_SLUB_DEBUG + /* + * Debugging requires use of the padding between object + * and whatever may come after it. + */ + if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) + return s->object_size; +#endif + /* + * If we have the need to store the freelist pointer + * back there or track user information then we can + * only use the space before that information. + */ + if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) + return s->inuse; + /* + * Else we can use all the padding etc for the allocation + */ + return s->size; +} +#else /* !CONFIG_SLUB */ +static inline size_t slab_ksize(const struct kmem_cache *s) +{ + return s->object_size; +} +#endif + +static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, + gfp_t flags) +{ + flags &= gfp_allowed_mask; + lockdep_trace_alloc(flags); + might_sleep_if(gfpflags_allow_blocking(flags)); + + if (should_failslab(s->object_size, flags, s->flags)) + return NULL; + + return memcg_kmem_get_cache(s, flags); +} + +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) +{ + size_t i; + + flags &= gfp_allowed_mask; + for (i = 0; i < size; i++) { + void *object = p[i]; + + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); + kmemleak_alloc_recursive(object, s->object_size, 1, + s->flags, flags); + kasan_slab_alloc(s, object); + } + memcg_kmem_put_cache(s); +} + #ifndef CONFIG_SLOB /* * The slab lists for all objects. diff --git a/mm/slub.c b/mm/slub.c index 46997517406e..6bc179952150 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -284,30 +284,6 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) return (p - addr) / s->size; } -static inline size_t slab_ksize(const struct kmem_cache *s) -{ -#ifdef CONFIG_SLUB_DEBUG - /* - * Debugging requires use of the padding between object - * and whatever may come after it. - */ - if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) - return s->object_size; - -#endif - /* - * If we have the need to store the freelist pointer - * back there or track user information then we can - * only use the space before that information. - */ - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) - return s->inuse; - /* - * Else we can use all the padding etc for the allocation - */ - return s->size; -} - static inline int order_objects(int order, unsigned long size, int reserved) { return ((PAGE_SIZE << order) - reserved) / size; @@ -1279,36 +1255,6 @@ static inline void kfree_hook(const void *x) kasan_kfree_large(x); } -static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, - gfp_t flags) -{ - flags &= gfp_allowed_mask; - lockdep_trace_alloc(flags); - might_sleep_if(gfpflags_allow_blocking(flags)); - - if (should_failslab(s->object_size, flags, s->flags)) - return NULL; - - return memcg_kmem_get_cache(s, flags); -} - -static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, - size_t size, void **p) -{ - size_t i; - - flags &= gfp_allowed_mask; - for (i = 0; i < size; i++) { - void *object = p[i]; - - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); - kmemleak_alloc_recursive(object, s->object_size, 1, - s->flags, flags); - kasan_slab_alloc(s, object); - } - memcg_kmem_put_cache(s); -} - static inline void slab_free_hook(struct kmem_cache *s, void *x) { kmemleak_free_recursive(x, s->flags); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f41.google.com (mail-qg0-f41.google.com [209.85.192.41]) by kanga.kvack.org (Postfix) with ESMTP id 539726B0259 for ; Tue, 8 Dec 2015 11:19:08 -0500 (EST) Received: by qgeb1 with SMTP id b1so23378275qge.1 for ; Tue, 08 Dec 2015 08:19:08 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id d16si4010745qkb.125.2015.12.08.08.18.34 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:34 -0800 (PST) Subject: [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:32 +0100 Message-ID: <20151208161832.21945.55076.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Move slab_should_failslab() check from SLAB allocator to generic slab_pre_alloc_hook(). The check guards against slab alloc fault-injects failures for the bootstrap slab that is used for allocating "kmem_cache" objects to the allocator itself. I'm not really happy with this code... --- mm/failslab.c | 2 ++ mm/slab.c | 8 -------- mm/slab.h | 23 ++++++++++++++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/mm/failslab.c b/mm/failslab.c index 79171b4a5826..a2ad28ba696c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -13,6 +13,8 @@ static struct { bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) { + // Should we place bootstrap kmem_cache check here??? + if (gfpflags & __GFP_NOFAIL) return false; diff --git a/mm/slab.c b/mm/slab.c index 4765c97ce690..4684c2496982 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2917,14 +2917,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) #endif -static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) -{ - if (unlikely(cachep == kmem_cache)) - return false; - - return should_failslab(cachep->object_size, flags, cachep->flags); -} - static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; diff --git a/mm/slab.h b/mm/slab.h index 588bc5281fc8..4e7b0e62f3f4 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -360,6 +360,27 @@ static inline size_t slab_ksize(const struct kmem_cache *s) } #endif +/* FIXME: This construct sucks, because this compare+branch needs to + * get removed by compiler then !CONFIG_FAILSLAB (maybe compiler is + * smart enough to realize only "false" can be generated). + * + * Comments please: Pulling out CONFIG_FAILSLAB here looks ugly... + * should we instead change API of should_failslab() ?? + * + * Next question: is the bootstrap cache check okay to add for all + * allocators? (this would be the easiest, else need more ugly ifdef's) + */ +static inline bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) +{ + /* No fault-injection for bootstrap cache */ +#ifdef CONFIG_FAILSLAB + if (unlikely(cachep == kmem_cache)) + return false; +#endif + + return should_failslab(cachep->object_size, flags, cachep->flags); +} + static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { @@ -367,7 +388,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, lockdep_trace_alloc(flags); might_sleep_if(gfpflags_allow_blocking(flags)); - if (should_failslab(s->object_size, flags, s->flags)) + if (slab_should_failslab(s, flags)) return NULL; return memcg_kmem_get_cache(s, flags); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f54.google.com (mail-qg0-f54.google.com [209.85.192.54]) by kanga.kvack.org (Postfix) with ESMTP id 05BB96B025A for ; Tue, 8 Dec 2015 11:19:10 -0500 (EST) Received: by qgea14 with SMTP id a14so23342888qge.0 for ; Tue, 08 Dec 2015 08:19:09 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id e67si4045119qkb.67.2015.12.08.08.18.39 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:39 -0800 (PST) Subject: [RFC PATCH V2 3/9] slab: use slab_pre_alloc_hook in SLAB allocator From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:37 +0100 Message-ID: <20151208161837.21945.45442.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Dedublicate code in slab_alloc() and slab_alloc_node() by using the slab_pre_alloc_hook() call. Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 4684c2496982..17fd6268ad41 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3140,15 +3140,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *ptr; int slab_node = numa_mem_id(); - flags &= gfp_allowed_mask; - - lockdep_trace_alloc(flags); - - if (slab_should_failslab(cachep, flags)) + cachep = slab_pre_alloc_hook(cachep, flags); + if (!cachep) return NULL; - cachep = memcg_kmem_get_cache(cachep, flags); - cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); @@ -3228,15 +3223,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) unsigned long save_flags; void *objp; - flags &= gfp_allowed_mask; - - lockdep_trace_alloc(flags); - - if (slab_should_failslab(cachep, flags)) + cachep = slab_pre_alloc_hook(cachep, flags); + if (!cachep) return NULL; - cachep = memcg_kmem_get_cache(cachep, flags); - cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); objp = __do_cache_alloc(cachep, flags); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f53.google.com (mail-qg0-f53.google.com [209.85.192.53]) by kanga.kvack.org (Postfix) with ESMTP id 7C1956B025B for ; Tue, 8 Dec 2015 11:19:16 -0500 (EST) Received: by qgcc31 with SMTP id c31so23221359qgc.3 for ; Tue, 08 Dec 2015 08:19:16 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id m6si4087845qki.10.2015.12.08.08.18.44 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:44 -0800 (PST) Subject: [RFC PATCH V2 4/9] mm: kmemcheck skip object if slab allocation failed From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:42 +0100 Message-ID: <20151208161842.21945.131.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton In the SLAB allocator kmemcheck_slab_alloc() is guarded against being called in case the object is NULL. In SLUB allocator this NULL pointer invocation can happen, which seems like an oversight. Move the NULL pointer check into kmemcheck code (kmemcheck_slab_alloc) so the check gets moved out of the fastpath, when not compiled with CONFIG_KMEMCHECK. Signed-off-by: Jesper Dangaard Brouer --- mm/kmemcheck.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index cab58bb592d8..6f4f424037c0 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -60,6 +60,9 @@ void kmemcheck_free_shadow(struct page *page, int order) void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, size_t size) { + if (unlikely(!object)) /* Skip object if allocation failed */ + return; + /* * Has already been memset(), which initializes the shadow for us * as well. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f52.google.com (mail-qg0-f52.google.com [209.85.192.52]) by kanga.kvack.org (Postfix) with ESMTP id B18376B025C for ; Tue, 8 Dec 2015 11:19:22 -0500 (EST) Received: by qgeb1 with SMTP id b1so23389602qge.1 for ; Tue, 08 Dec 2015 08:19:22 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id d139si4035448qhd.89.2015.12.08.08.18.49 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:49 -0800 (PST) Subject: [RFC PATCH V2 5/9] slab: use slab_post_alloc_hook in SLAB allocator From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:47 +0100 Message-ID: <20151208161847.21945.28194.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Reviewers notice that the order in slab_post_alloc_hook() of kmemcheck_slab_alloc() and kmemleak_alloc_recursive() gets swapped compared to slab.c. Also notice memset now occurs before calling kmemcheck_slab_alloc() and kmemleak_alloc_recursive(). Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 17fd6268ad41..47e7bcab8c3b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3172,16 +3172,11 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, - flags); - if (likely(ptr)) { - kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); - if (unlikely(flags & __GFP_ZERO)) - memset(ptr, 0, cachep->object_size); - } + if (unlikely(flags & __GFP_ZERO) && ptr) + memset(ptr, 0, cachep->object_size); - memcg_kmem_put_cache(cachep); + slab_post_alloc_hook(cachep, flags, 1, &ptr); return ptr; } @@ -3232,17 +3227,12 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); - kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, - flags); prefetchw(objp); - if (likely(objp)) { - kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); - if (unlikely(flags & __GFP_ZERO)) - memset(objp, 0, cachep->object_size); - } + if (unlikely(flags & __GFP_ZERO) && objp) + memset(objp, 0, cachep->object_size); - memcg_kmem_put_cache(cachep); + slab_post_alloc_hook(cachep, flags, 1, &objp); return objp; } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f54.google.com (mail-qg0-f54.google.com [209.85.192.54]) by kanga.kvack.org (Postfix) with ESMTP id E88C66B025D for ; Tue, 8 Dec 2015 11:19:27 -0500 (EST) Received: by qgeb1 with SMTP id b1so23393895qge.1 for ; Tue, 08 Dec 2015 08:19:27 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id a59si4069225qgf.42.2015.12.08.08.18.54 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:18:55 -0800 (PST) Subject: [RFC PATCH V2 6/9] slab: implement bulk alloc in SLAB allocator From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:52 +0100 Message-ID: <20151208161852.21945.85266.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 47e7bcab8c3b..70be9235e083 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3392,9 +3392,42 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) EXPORT_SYMBOL(kmem_cache_free_bulk); int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) + void **p) { - return __kmem_cache_alloc_bulk(s, flags, size, p); + size_t i; + + s = slab_pre_alloc_hook(s, flags); + if (!s) + return 0; + + cache_alloc_debugcheck_before(s, flags); + + local_irq_disable(); + for (i = 0; i < size; i++) { + void *objp = __do_cache_alloc(s, flags); + + /* this call could be done outside IRQ disabled section */ + objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); + + if (unlikely(!objp)) + goto error; + p[i] = objp; + } + local_irq_enable(); + + /* Clear memory outside IRQ disabled section */ + if (unlikely(flags & __GFP_ZERO)) + for (i = 0; i < size; i++) + memset(p[i], 0, s->object_size); + + slab_post_alloc_hook(s, flags, size, p); + /* FIXME: Trace call missing. Christoph would like a bulk variant */ + return size; +error: + local_irq_enable(); + slab_post_alloc_hook(s, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f41.google.com (mail-qg0-f41.google.com [209.85.192.41]) by kanga.kvack.org (Postfix) with ESMTP id 2ACA26B025E for ; Tue, 8 Dec 2015 11:19:32 -0500 (EST) Received: by qgea14 with SMTP id a14so23361335qge.0 for ; Tue, 08 Dec 2015 08:19:32 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id n18si4068889qhc.21.2015.12.08.08.18.59 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:19:00 -0800 (PST) Subject: [RFC PATCH V2 7/9] slab: avoid running debug SLAB code with IRQs disabled for alloc_bulk From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:18:58 +0100 Message-ID: <20151208161857.21945.28254.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Move the call to cache_alloc_debugcheck_after() outside the IRQ disabled section in kmem_cache_alloc_bulk(). When CONFIG_DEBUG_SLAB is disabled the compiler should remove this code. Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 70be9235e083..35a4fb0970bd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3391,6 +3391,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); +static __always_inline void +cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p, unsigned long caller) +{ + size_t i; + + for (i = 0; i < size; i++) + p[i] = cache_alloc_debugcheck_after(s, flags, p[i], _RET_IP_); +} + int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { @@ -3406,15 +3416,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, for (i = 0; i < size; i++) { void *objp = __do_cache_alloc(s, flags); - /* this call could be done outside IRQ disabled section */ - objp = cache_alloc_debugcheck_after(s, flags, objp, _RET_IP_); - if (unlikely(!objp)) goto error; p[i] = objp; } local_irq_enable(); + cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); + /* Clear memory outside IRQ disabled section */ if (unlikely(flags & __GFP_ZERO)) for (i = 0; i < size; i++) @@ -3425,6 +3434,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, return size; error: local_irq_enable(); + cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); slab_post_alloc_hook(s, flags, i, p); __kmem_cache_free_bulk(s, i, p); return 0; -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f50.google.com (mail-qg0-f50.google.com [209.85.192.50]) by kanga.kvack.org (Postfix) with ESMTP id 6D4296B025E for ; Tue, 8 Dec 2015 11:19:38 -0500 (EST) Received: by qgcc31 with SMTP id c31so23238793qgc.3 for ; Tue, 08 Dec 2015 08:19:38 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id 66si4074454qhp.11.2015.12.08.08.19.04 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:19:05 -0800 (PST) Subject: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:19:03 +0100 Message-ID: <20151208161903.21945.33876.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Signed-off-by: Jesper Dangaard Brouer --- mm/slab.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 35a4fb0970bd..72c7958b4075 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3385,12 +3385,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) } EXPORT_SYMBOL(kmem_cache_alloc); -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) -{ - __kmem_cache_free_bulk(s, size, p); -} -EXPORT_SYMBOL(kmem_cache_free_bulk); - static __always_inline void cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p, unsigned long caller) @@ -3584,6 +3578,29 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) } EXPORT_SYMBOL(kmem_cache_free); +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +{ + struct kmem_cache *s; + size_t i; + + local_irq_disable(); + for (i = 0; i < size; i++) { + void *objp = p[i]; + + s = cache_from_obj(orig_s, objp); + + debug_check_no_locks_freed(objp, s->object_size); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, s->object_size); + + __cache_free(s, objp, _RET_IP_); + } + local_irq_enable(); + + /* FIXME: add tracing */ +} +EXPORT_SYMBOL(kmem_cache_free_bulk); + /** * kfree - free previously allocated memory * @objp: pointer returned by kmalloc. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f50.google.com (mail-qg0-f50.google.com [209.85.192.50]) by kanga.kvack.org (Postfix) with ESMTP id CE74B6B025F for ; Tue, 8 Dec 2015 11:19:44 -0500 (EST) Received: by qgcc31 with SMTP id c31so23243423qgc.3 for ; Tue, 08 Dec 2015 08:19:44 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id x66si4068166qhx.17.2015.12.08.08.19.10 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 08 Dec 2015 08:19:10 -0800 (PST) Subject: [RFC PATCH V2 9/9] slab: annotate code to generate more compact asm code From: Jesper Dangaard Brouer Date: Tue, 08 Dec 2015 17:19:08 +0100 Message-ID: <20151208161908.21945.177.stgit@firesoul> In-Reply-To: <20151208161751.21945.53936.stgit@firesoul> References: <20151208161751.21945.53936.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org Cc: Christoph Lameter , Vladimir Davydov , Jesper Dangaard Brouer , Joonsoo Kim , Linus Torvalds , Andrew Morton Premature optimizations for CONFIG_NUMA case... --- mm/slab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 72c7958b4075..6d79fc5668c4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3185,7 +3185,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) { void *objp; - if (current->mempolicy || cpuset_do_slab_mem_spread()) { + if (unlikely(current->mempolicy || cpuset_do_slab_mem_spread())) { objp = alternate_node_alloc(cache, flags); if (objp) goto out; @@ -3196,7 +3196,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) * We may just have run out of memory on the local node. * ____cache_alloc_node() knows how to locate memory on other nodes */ - if (!objp) + if (unlikely(!objp)) objp = ____cache_alloc_node(cache, flags, numa_mem_id()); out: -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f169.google.com (mail-io0-f169.google.com [209.85.223.169]) by kanga.kvack.org (Postfix) with ESMTP id B42936B0255 for ; Tue, 8 Dec 2015 21:35:44 -0500 (EST) Received: by ioc74 with SMTP id 74so44880751ioc.2 for ; Tue, 08 Dec 2015 18:35:44 -0800 (PST) Received: from lgeamrelo13.lge.com (LGEAMRELO13.lge.com. [156.147.23.53]) by mx.google.com with ESMTPS id i4si9345552igu.39.2015.12.08.18.35.43 for (version=TLS1 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Tue, 08 Dec 2015 18:35:43 -0800 (PST) Date: Wed, 9 Dec 2015 11:36:58 +0900 From: Joonsoo Kim Subject: Re: [RFC PATCH V2 2/9] mm: generalize avoid fault-inject on bootstrap kmem_cache Message-ID: <20151209023658.GA12482@js1304-P5Q-DELUXE> References: <20151208161751.21945.53936.stgit@firesoul> <20151208161832.21945.55076.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20151208161832.21945.55076.stgit@firesoul> Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Christoph Lameter , Vladimir Davydov , Linus Torvalds , Andrew Morton On Tue, Dec 08, 2015 at 05:18:32PM +0100, Jesper Dangaard Brouer wrote: > Move slab_should_failslab() check from SLAB allocator to generic > slab_pre_alloc_hook(). The check guards against slab alloc > fault-injects failures for the bootstrap slab that is used for > allocating "kmem_cache" objects to the allocator itself. > > I'm not really happy with this code... > --- > mm/failslab.c | 2 ++ > mm/slab.c | 8 -------- > mm/slab.h | 23 ++++++++++++++++++++++- > 3 files changed, 24 insertions(+), 9 deletions(-) > > diff --git a/mm/failslab.c b/mm/failslab.c > index 79171b4a5826..a2ad28ba696c 100644 > --- a/mm/failslab.c > +++ b/mm/failslab.c > @@ -13,6 +13,8 @@ static struct { > > bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) > { > + // Should we place bootstrap kmem_cache check here??? > + > if (gfpflags & __GFP_NOFAIL) > return false; > > diff --git a/mm/slab.c b/mm/slab.c > index 4765c97ce690..4684c2496982 100644 > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -2917,14 +2917,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, > #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) > #endif > > -static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) > -{ > - if (unlikely(cachep == kmem_cache)) > - return false; > - > - return should_failslab(cachep->object_size, flags, cachep->flags); > -} > - > static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) > { > void *objp; > diff --git a/mm/slab.h b/mm/slab.h > index 588bc5281fc8..4e7b0e62f3f4 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -360,6 +360,27 @@ static inline size_t slab_ksize(const struct kmem_cache *s) > } > #endif > > +/* FIXME: This construct sucks, because this compare+branch needs to > + * get removed by compiler then !CONFIG_FAILSLAB (maybe compiler is > + * smart enough to realize only "false" can be generated). > + * > + * Comments please: Pulling out CONFIG_FAILSLAB here looks ugly... > + * should we instead change API of should_failslab() ?? > + * > + * Next question: is the bootstrap cache check okay to add for all > + * allocators? (this would be the easiest, else need more ugly ifdef's) > + */ > +static inline bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) > +{ > + /* No fault-injection for bootstrap cache */ > +#ifdef CONFIG_FAILSLAB > + if (unlikely(cachep == kmem_cache)) > + return false; > +#endif > + > + return should_failslab(cachep->object_size, flags, cachep->flags); > +} > + > static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > gfp_t flags) > { > @@ -367,7 +388,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > lockdep_trace_alloc(flags); > might_sleep_if(gfpflags_allow_blocking(flags)); > > - if (should_failslab(s->object_size, flags, s->flags)) > + if (slab_should_failslab(s, flags)) > return NULL; It'd be better to remove slab_should_failslab() and insert following code snippet here. if (IS_ENABLED(CONFIG_FAILSLAB) && cachep != kmem_cache && should_failslab()) return NULL; Thanks. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f179.google.com (mail-io0-f179.google.com [209.85.223.179]) by kanga.kvack.org (Postfix) with ESMTP id 5F5526B0254 for ; Wed, 9 Dec 2015 10:43:22 -0500 (EST) Received: by ioir85 with SMTP id r85so63365490ioi.1 for ; Wed, 09 Dec 2015 07:43:22 -0800 (PST) Received: from resqmta-ch2-04v.sys.comcast.net (resqmta-ch2-04v.sys.comcast.net. [2001:558:fe21:29:69:252:207:36]) by mx.google.com with ESMTPS id d16si13351896igo.8.2015.12.09.07.43.21 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Wed, 09 Dec 2015 07:43:21 -0800 (PST) Date: Wed, 9 Dec 2015 09:43:20 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 1/9] mm/slab: move SLUB alloc hooks to common mm/slab.h In-Reply-To: <20151208161827.21945.25463.stgit@firesoul> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161827.21945.25463.stgit@firesoul> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote: > +/* Q: Howto handle this nicely? below includes are needed for alloc hooks > + * > + * e.g. mm/mempool.c and mm/slab_common.c does not include kmemcheck.h > + * including it here solves the probem, but should they include it > + * themselves? > + */ Including in mm/slab.h is enough. > +#ifdef CONFIG_SLUB Move this into slab_ksize? > +static inline size_t slab_ksize(const struct kmem_cache *s) > +{ > +#ifdef CONFIG_SLUB_DEBUG > + /* > + * Debugging requires use of the padding between object > + * and whatever may come after it. > + */ > + if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) > + return s->object_size; > +#endif > + /* > + * If we have the need to store the freelist pointer > + * back there or track user information then we can > + * only use the space before that information. > + */ > + if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) > + return s->inuse; > + /* > + * Else we can use all the padding etc for the allocation > + */ > + return s->size; > +} > +#else /* !CONFIG_SLUB */ Abnd drop the else branch? > +static inline size_t slab_ksize(const struct kmem_cache *s) > +{ > + return s->object_size; > +} > +#endif -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f176.google.com (mail-io0-f176.google.com [209.85.223.176]) by kanga.kvack.org (Postfix) with ESMTP id 574E76B0256 for ; Wed, 9 Dec 2015 11:06:41 -0500 (EST) Received: by ioc74 with SMTP id 74so63867695ioc.2 for ; Wed, 09 Dec 2015 08:06:41 -0800 (PST) Received: from resqmta-ch2-10v.sys.comcast.net (resqmta-ch2-10v.sys.comcast.net. [2001:558:fe21:29:69:252:207:42]) by mx.google.com with ESMTPS id l5si10676471igr.22.2015.12.09.08.06.40 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Wed, 09 Dec 2015 08:06:40 -0800 (PST) Date: Wed, 9 Dec 2015 10:06:39 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator In-Reply-To: <20151208161903.21945.33876.stgit@firesoul> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote: > +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) Drop orig_s as a parameter? This makes the function have less code and makes it more universally useful for freeing large amount of objects. > +{ > + struct kmem_cache *s; > + size_t i; > + > + local_irq_disable(); > + for (i = 0; i < size; i++) { > + void *objp = p[i]; > + > + s = cache_from_obj(orig_s, objp); And just use the cache the object belongs to. s = virt_to_head_page(objp)->slab_cache; > + > + debug_check_no_locks_freed(objp, s->object_size); > + if (!(s->flags & SLAB_DEBUG_OBJECTS)) > + debug_check_no_obj_freed(objp, s->object_size); > + > + __cache_free(s, objp, _RET_IP_); > + } > + local_irq_enable(); > + > + /* FIXME: add tracing */ > +} > +EXPORT_SYMBOL(kmem_cache_free_bulk); Could we do the following API change patch before this series so that kmem_cache_free_bulk is properly generalized? From: Christoph Lameter Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free() It is desirable and necessary to free objects from different kmem_caches. It is required in order to support memcg object freeing across different5 cgroups. So drop the pointless parameter and allow freeing of arbitrary lists of slab allocated objects. This patch also does the proper compound page handling so that arbitrary objects allocated via kmalloc() can be handled by kmem_cache_bulk_free(). Signed-off-by: Christoph Lameter Index: linux/include/linux/slab.h =================================================================== --- linux.orig/include/linux/slab.h +++ linux/include/linux/slab.h @@ -315,7 +315,7 @@ void kmem_cache_free(struct kmem_cache * * * Note that interrupts must be enabled when calling these functions. */ -void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); +void kmem_cache_free_bulk(size_t, void **); int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA Index: linux/mm/slab.c =================================================================== --- linux.orig/mm/slab.c +++ linux/mm/slab.c @@ -3413,9 +3413,9 @@ void *kmem_cache_alloc(struct kmem_cache } EXPORT_SYMBOL(kmem_cache_alloc); -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) +void kmem_cache_free_bulk(size_t size, void **p) { - __kmem_cache_free_bulk(s, size, p); + __kmem_cache_free_bulk(size, p); } EXPORT_SYMBOL(kmem_cache_free_bulk); Index: linux/mm/slab.h =================================================================== --- linux.orig/mm/slab.h +++ linux/mm/slab.h @@ -166,10 +166,10 @@ ssize_t slabinfo_write(struct file *file /* * Generic implementation of bulk operations * These are useful for situations in which the allocator cannot - * perform optimizations. In that case segments of the objecct listed + * perform optimizations. In that case segments of the object listed * may be allocated or freed using these operations. */ -void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); +void __kmem_cache_free_bulk(size_t, void **); int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_MEMCG_KMEM Index: linux/mm/slub.c =================================================================== --- linux.orig/mm/slub.c +++ linux/mm/slub.c @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc /* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +void kmem_cache_free_bulk(size_t size, void **p) { if (WARN_ON(!size)) return; do { struct detached_freelist df; - struct kmem_cache *s; + struct page *page; - /* Support for memcg */ - s = cache_from_obj(orig_s, p[size - 1]); + page = virt_to_head_page(p[size - 1]); - size = build_detached_freelist(s, size, p, &df); + if (unlikely(!PageSlab(page))) { + BUG_ON(!PageCompound(page)); + kfree_hook(p[size - 1]); + __free_kmem_pages(page, compound_order(page)); + p[--size] = NULL; + continue; + } + + size = build_detached_freelist(page->slab_cache, size, p, &df); if (unlikely(!df.page)) continue; - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); @@ -2963,7 +2970,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca error: local_irq_enable(); slab_post_alloc_hook(s, flags, i, p); - __kmem_cache_free_bulk(s, i, p); + __kmem_cache_free_bulk(i, p); return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f46.google.com (mail-qg0-f46.google.com [209.85.192.46]) by kanga.kvack.org (Postfix) with ESMTP id 3476B6B0254 for ; Wed, 9 Dec 2015 13:53:34 -0500 (EST) Received: by qgeb1 with SMTP id b1so94765752qge.1 for ; Wed, 09 Dec 2015 10:53:33 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id d133si10154439qkb.91.2015.12.09.10.53.32 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 09 Dec 2015 10:53:33 -0800 (PST) Date: Wed, 9 Dec 2015 19:53:25 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151209195325.68eaf314@redhat.com> In-Reply-To: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Wed, 9 Dec 2015 10:06:39 -0600 (CST) Christoph Lameter wrote: > On Tue, 8 Dec 2015, Jesper Dangaard Brouer wrote: > > > +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > > Drop orig_s as a parameter? This makes the function have less code and > makes it more universally useful for freeing large amount of objects. I really like the idea of making it able to free kmalloc'ed objects. But I hate to change the API again... (I do have a use-case in the network stack where I could use this feature). > Could we do the following API change patch before this series so that > kmem_cache_free_bulk is properly generalized? I'm traveling (to Sweden) Thursday (afternoon) and will be back late Friday (and have to play Viking in the weekend), thus to be realistic I'll start working on this Monday, so we can get some benchmark numbers to guide this decision. > From: Christoph Lameter > Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free() > > It is desirable and necessary to free objects from different kmem_caches. > It is required in order to support memcg object freeing across different5 > cgroups. > > So drop the pointless parameter and allow freeing of arbitrary lists > of slab allocated objects. > > This patch also does the proper compound page handling so that > arbitrary objects allocated via kmalloc() can be handled by > kmem_cache_bulk_free(). > > Signed-off-by: Christoph Lameter > > Index: linux/include/linux/slab.h > =================================================================== > --- linux.orig/include/linux/slab.h > +++ linux/include/linux/slab.h > @@ -315,7 +315,7 @@ void kmem_cache_free(struct kmem_cache * > * > * Note that interrupts must be enabled when calling these functions. > */ > -void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); > +void kmem_cache_free_bulk(size_t, void **); > int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); > > #ifdef CONFIG_NUMA > Index: linux/mm/slab.c > =================================================================== > --- linux.orig/mm/slab.c > +++ linux/mm/slab.c > @@ -3413,9 +3413,9 @@ void *kmem_cache_alloc(struct kmem_cache > } > EXPORT_SYMBOL(kmem_cache_alloc); > > -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) > +void kmem_cache_free_bulk(size_t size, void **p) > { > - __kmem_cache_free_bulk(s, size, p); > + __kmem_cache_free_bulk(size, p); > } > EXPORT_SYMBOL(kmem_cache_free_bulk); > > Index: linux/mm/slab.h > =================================================================== > --- linux.orig/mm/slab.h > +++ linux/mm/slab.h > @@ -166,10 +166,10 @@ ssize_t slabinfo_write(struct file *file > /* > * Generic implementation of bulk operations > * These are useful for situations in which the allocator cannot > - * perform optimizations. In that case segments of the objecct listed > + * perform optimizations. In that case segments of the object listed > * may be allocated or freed using these operations. > */ > -void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); > +void __kmem_cache_free_bulk(size_t, void **); > int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); > > #ifdef CONFIG_MEMCG_KMEM > Index: linux/mm/slub.c > =================================================================== > --- linux.orig/mm/slub.c > +++ linux/mm/slub.c > @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc > > > /* Note that interrupts must be enabled when calling this function. */ > -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > +void kmem_cache_free_bulk(size_t size, void **p) > { > if (WARN_ON(!size)) > return; > > do { > struct detached_freelist df; > - struct kmem_cache *s; > + struct page *page; > > - /* Support for memcg */ > - s = cache_from_obj(orig_s, p[size - 1]); > + page = virt_to_head_page(p[size - 1]); Think we can drop this. > - size = build_detached_freelist(s, size, p, &df); > + if (unlikely(!PageSlab(page))) { > + BUG_ON(!PageCompound(page)); > + kfree_hook(p[size - 1]); > + __free_kmem_pages(page, compound_order(page)); > + p[--size] = NULL; > + continue; > + } and move above into build_detached_freelist() and make it a little more pretty code wise (avoiding the p[size -1] juggling). > + > + size = build_detached_freelist(page->slab_cache, size, p, &df); also think we should be able to drop the kmem_cache param "page->slab_cache". > if (unlikely(!df.page)) > continue; > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > } while (likely(size)); > } > EXPORT_SYMBOL(kmem_cache_free_bulk); > @@ -2963,7 +2970,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca > error: > local_irq_enable(); > slab_post_alloc_hook(s, flags, i, p); > - __kmem_cache_free_bulk(s, i, p); > + __kmem_cache_free_bulk(i, p); > return 0; > } > EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ob0-f178.google.com (mail-ob0-f178.google.com [209.85.214.178]) by kanga.kvack.org (Postfix) with ESMTP id EF1436B0038 for ; Wed, 9 Dec 2015 14:41:09 -0500 (EST) Received: by obc18 with SMTP id 18so42158614obc.2 for ; Wed, 09 Dec 2015 11:41:09 -0800 (PST) Received: from resqmta-po-01v.sys.comcast.net (resqmta-po-01v.sys.comcast.net. [2001:558:fe16:19:96:114:154:160]) by mx.google.com with ESMTPS id zd2si771996obb.43.2015.12.09.11.41.08 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Wed, 09 Dec 2015 11:41:08 -0800 (PST) Date: Wed, 9 Dec 2015 13:41:07 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator In-Reply-To: <20151209195325.68eaf314@redhat.com> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote: > I really like the idea of making it able to free kmalloc'ed objects. > But I hate to change the API again... (I do have a use-case in the > network stack where I could use this feature). Now is the time to fix the API since its not that much in use yet if at all. > I'm traveling (to Sweden) Thursday (afternoon) and will be back late > Friday (and have to play Viking in the weekend), thus to be realistic > I'll start working on this Monday, so we can get some benchmark numbers > to guide this decision. Ok great. > > - struct kmem_cache *s; > > + struct page *page; > > > > - /* Support for memcg */ > > - s = cache_from_obj(orig_s, p[size - 1]); > > + page = virt_to_head_page(p[size - 1]); > > Think we can drop this. Well then you wont be able to check for a compound page. And you do not want this check in build detached freelist. > > > - size = build_detached_freelist(s, size, p, &df); > > + if (unlikely(!PageSlab(page))) { > > + BUG_ON(!PageCompound(page)); > > + kfree_hook(p[size - 1]); > > + __free_kmem_pages(page, compound_order(page)); > > + p[--size] = NULL; > > + continue; > > + } > > and move above into build_detached_freelist() and make it a little more > pretty code wise (avoiding the p[size -1] juggling). If we do this check here then we wont be needing it in build_detached_freelist. > > + > > + size = build_detached_freelist(page->slab_cache, size, p, &df); > > also think we should be able to drop the kmem_cache param "page->slab_cache". Yep. > > > > if (unlikely(!df.page)) > > continue; > > > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > > + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); Then we need df.slab_cache or something. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f176.google.com (mail-io0-f176.google.com [209.85.223.176]) by kanga.kvack.org (Postfix) with ESMTP id AF1946B0038 for ; Wed, 9 Dec 2015 15:51:05 -0500 (EST) Received: by iouu10 with SMTP id u10so74669766iou.0 for ; Wed, 09 Dec 2015 12:51:05 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id xa3si9808620igb.56.2015.12.09.12.51.05 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 09 Dec 2015 12:51:05 -0800 (PST) Date: Wed, 9 Dec 2015 21:50:58 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151209215058.0ef5964a@redhat.com> In-Reply-To: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com, David Miller On Wed, 9 Dec 2015 13:41:07 -0600 (CST) Christoph Lameter wrote: > On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote: > > > I really like the idea of making it able to free kmalloc'ed objects. > > But I hate to change the API again... (I do have a use-case in the > > network stack where I could use this feature). > > Now is the time to fix the API since its not that much in use yet if > at all. True. I was just so close submitting the network use-case to DaveM. Guess, that will have to wait if we choose this API change (and I'll have to wait another 3 month before the trees are in sync again). > > I'm traveling (to Sweden) Thursday (afternoon) and will be back late > > Friday (and have to play Viking in the weekend), thus to be realistic > > I'll start working on this Monday, so we can get some benchmark numbers > > to guide this decision. > > Ok great. I'm actually very eager to see if this works out :-) > > > - struct kmem_cache *s; > > > + struct page *page; > > > > > > - /* Support for memcg */ > > > - s = cache_from_obj(orig_s, p[size - 1]); > > > + page = virt_to_head_page(p[size - 1]); > > > > Think we can drop this. > > Well then you wont be able to check for a compound page. And you do not > want this check in build detached freelist. > > > > > > - size = build_detached_freelist(s, size, p, &df); > > > + if (unlikely(!PageSlab(page))) { > > > + BUG_ON(!PageCompound(page)); > > > + kfree_hook(p[size - 1]); > > > + __free_kmem_pages(page, compound_order(page)); > > > + p[--size] = NULL; > > > + continue; > > > + } > > > > and move above into build_detached_freelist() and make it a little more > > pretty code wise (avoiding the p[size -1] juggling). > > If we do this check here then we wont be needing it in > build_detached_freelist. I'll try see what looks best coding style wise... > > > + > > > + size = build_detached_freelist(page->slab_cache, size, p, &df); > > > > also think we should be able to drop the kmem_cache param "page->slab_cache". > > Yep. > > > > > > > > if (unlikely(!df.page)) > > > continue; > > > > > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > > > + slab_free(page->slab_cache, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > > Then we need df.slab_cache or something. What about df.page->slab_cache (?) -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qg0-f54.google.com (mail-qg0-f54.google.com [209.85.192.54]) by kanga.kvack.org (Postfix) with ESMTP id 7B76E6B0255 for ; Thu, 10 Dec 2015 10:10:27 -0500 (EST) Received: by qgec40 with SMTP id c40so145497993qge.2 for ; Thu, 10 Dec 2015 07:10:27 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id f79si15213539qge.19.2015.12.10.07.10.23 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 10 Dec 2015 07:10:23 -0800 (PST) Date: Thu, 10 Dec 2015 16:10:18 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151210161018.28cedb68@redhat.com> In-Reply-To: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Wed, 9 Dec 2015 13:41:07 -0600 (CST) Christoph Lameter wrote: > On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote: > > > I really like the idea of making it able to free kmalloc'ed objects. > > But I hate to change the API again... (I do have a use-case in the > > network stack where I could use this feature). > > Now is the time to fix the API since its not that much in use yet if at > all. Lets start the naming thread/flame (while waiting for my flight ;-)) If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(), and also make it handle kmalloc'ed objects. Why should we name it "kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ??? Or should we keep the name to have a symmetric API kmem_cache_{alloc,free}_bulk() call convention? I'm undecided... -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ig0-f170.google.com (mail-ig0-f170.google.com [209.85.213.170]) by kanga.kvack.org (Postfix) with ESMTP id 4CC046B0257 for ; Thu, 10 Dec 2015 10:15:52 -0500 (EST) Received: by mail-ig0-f170.google.com with SMTP id mv3so19693939igc.0 for ; Thu, 10 Dec 2015 07:15:52 -0800 (PST) Received: from resqmta-ch2-09v.sys.comcast.net (resqmta-ch2-09v.sys.comcast.net. [2001:558:fe21:29:69:252:207:41]) by mx.google.com with ESMTPS id v76si20745605ioi.35.2015.12.10.07.15.51 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Thu, 10 Dec 2015 07:15:51 -0800 (PST) Date: Thu, 10 Dec 2015 09:15:50 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator In-Reply-To: <20151209215058.0ef5964a@redhat.com> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> <20151209215058.0ef5964a@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , David Miller On Wed, 9 Dec 2015, Jesper Dangaard Brouer wrote: > True. I was just so close submitting the network use-case to DaveM. > Guess, that will have to wait if we choose this API change (and > I'll have to wait another 3 month before the trees are in sync again). why? Andrew can push that to next pretty soon and DaveM could carry that patch too. > > Then we need df.slab_cache or something. > > What about df.page->slab_cache (?) Fine come up with a patch. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f179.google.com (mail-io0-f179.google.com [209.85.223.179]) by kanga.kvack.org (Postfix) with ESMTP id 4D6E76B0257 for ; Thu, 10 Dec 2015 10:18:28 -0500 (EST) Received: by ioir85 with SMTP id r85so95465515ioi.1 for ; Thu, 10 Dec 2015 07:18:28 -0800 (PST) Received: from resqmta-ch2-06v.sys.comcast.net (resqmta-ch2-06v.sys.comcast.net. [2001:558:fe21:29:69:252:207:38]) by mx.google.com with ESMTPS id k65si20688341iok.56.2015.12.10.07.18.27 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Thu, 10 Dec 2015 07:18:27 -0800 (PST) Date: Thu, 10 Dec 2015 09:18:26 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator In-Reply-To: <20151210161018.28cedb68@redhat.com> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> <20151210161018.28cedb68@redhat.com> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Jesper Dangaard Brouer Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton On Thu, 10 Dec 2015, Jesper Dangaard Brouer wrote: > If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(), > and also make it handle kmalloc'ed objects. Why should we name it > "kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ??? Yes makes sense. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f45.google.com (mail-pa0-f45.google.com [209.85.220.45]) by kanga.kvack.org (Postfix) with ESMTP id AFE8C6B0257 for ; Thu, 10 Dec 2015 10:26:43 -0500 (EST) Received: by padhk6 with SMTP id hk6so9054923pad.2 for ; Thu, 10 Dec 2015 07:26:43 -0800 (PST) Received: from mx2.parallels.com (mx2.parallels.com. [199.115.105.18]) by mx.google.com with ESMTPS id x16si20935010pfa.116.2015.12.10.07.26.42 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 10 Dec 2015 07:26:43 -0800 (PST) Date: Thu, 10 Dec 2015 18:26:32 +0300 From: Vladimir Davydov Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151210152632.GA11488@esperanza> References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> <20151210161018.28cedb68@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Disposition: inline In-Reply-To: Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Thu, Dec 10, 2015 at 09:18:26AM -0600, Christoph Lameter wrote: > On Thu, 10 Dec 2015, Jesper Dangaard Brouer wrote: > > > If we drop the "kmem_cache *s" parameter from kmem_cache_free_bulk(), > > and also make it handle kmalloc'ed objects. Why should we name it > > "kmem_cache_free_bulk"? ... what about naming it kfree_bulk() ??? > > Yes makes sense. IMHO kmem_cache_alloc_bulk/kfree_bulk looks awkward, especially taking into account the fact that we pair kmem_cache_alloc/kmem_cache_free and kmalloc/kfree, but never kmem_cache_alloc/kfree. So I'd vote for kmem_cache_free_bulk taking a kmem_cache as an argument, but I'm not a potential user of this API, so please don't count my vote :-) Thanks, Vladimir -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f173.google.com (mail-io0-f173.google.com [209.85.223.173]) by kanga.kvack.org (Postfix) with ESMTP id D78FB6B0038 for ; Thu, 10 Dec 2015 12:24:58 -0500 (EST) Received: by iofh3 with SMTP id h3so99899626iof.3 for ; Thu, 10 Dec 2015 09:24:58 -0800 (PST) Received: from resqmta-ch2-01v.sys.comcast.net (resqmta-ch2-01v.sys.comcast.net. [2001:558:fe21:29:69:252:207:33]) by mx.google.com with ESMTPS id c141si21357394ioc.40.2015.12.10.09.24.58 for (version=TLS1_2 cipher=AES128-SHA bits=128/128); Thu, 10 Dec 2015 09:24:58 -0800 (PST) Date: Thu, 10 Dec 2015 11:24:57 -0600 (CST) From: Christoph Lameter Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator In-Reply-To: <20151210152632.GA11488@esperanza> Message-ID: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151209195325.68eaf314@redhat.com> <20151210161018.28cedb68@redhat.com> <20151210152632.GA11488@esperanza> Content-Type: text/plain; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Vladimir Davydov Cc: Jesper Dangaard Brouer , linux-mm@kvack.org, Joonsoo Kim , Linus Torvalds , Andrew Morton On Thu, 10 Dec 2015, Vladimir Davydov wrote: > IMHO kmem_cache_alloc_bulk/kfree_bulk looks awkward, especially taking > into account the fact that we pair kmem_cache_alloc/kmem_cache_free and > kmalloc/kfree, but never kmem_cache_alloc/kfree. > > So I'd vote for kmem_cache_free_bulk taking a kmem_cache as an argument, > but I'm not a potential user of this API, so please don't count my vote > :-) One way to have it less awkward is to keep naming it kmem_cache_free_bulk but omit the kmem_cache parameter like what I did initially. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-qk0-f171.google.com (mail-qk0-f171.google.com [209.85.220.171]) by kanga.kvack.org (Postfix) with ESMTP id D94CE6B0256 for ; Mon, 14 Dec 2015 10:20:06 -0500 (EST) Received: by qkht125 with SMTP id t125so138869617qkh.3 for ; Mon, 14 Dec 2015 07:20:06 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id y206si6225420qka.77.2015.12.14.07.20.03 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 14 Dec 2015 07:20:03 -0800 (PST) Date: Mon, 14 Dec 2015 16:19:58 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151214161958.1a8edf79@redhat.com> In-Reply-To: References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Wed, 9 Dec 2015 10:06:39 -0600 (CST) Christoph Lameter wrote: > From: Christoph Lameter > Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free() > > It is desirable and necessary to free objects from different kmem_caches. > It is required in order to support memcg object freeing across different5 > cgroups. > > So drop the pointless parameter and allow freeing of arbitrary lists > of slab allocated objects. > > This patch also does the proper compound page handling so that > arbitrary objects allocated via kmalloc() can be handled by > kmem_cache_bulk_free(). > > Signed-off-by: Christoph Lameter I've modified this patch, to instead introduce a kfree_bulk() and keep the old behavior of kmem_cache_free_bulk(). This allow us to easier compare the two impl. approaches. [...] > Index: linux/mm/slub.c > =================================================================== > --- linux.orig/mm/slub.c > +++ linux/mm/slub.c > @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc > > > /* Note that interrupts must be enabled when calling this function. */ > -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > +void kmem_cache_free_bulk(size_t size, void **p) Renamed to kfree_bulk(size_t size, void **p) > { > if (WARN_ON(!size)) > return; > > do { > struct detached_freelist df; > - struct kmem_cache *s; > + struct page *page; > > - /* Support for memcg */ > - s = cache_from_obj(orig_s, p[size - 1]); > + page = virt_to_head_page(p[size - 1]); > > - size = build_detached_freelist(s, size, p, &df); > + if (unlikely(!PageSlab(page))) { > + BUG_ON(!PageCompound(page)); > + kfree_hook(p[size - 1]); > + __free_kmem_pages(page, compound_order(page)); > + p[--size] = NULL; > + continue; > + } > + > + size = build_detached_freelist(page->slab_cache, size, p, &df); > if (unlikely(!df.page)) > continue; > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > + slab_free(page->slab_cache, df.page, df.freelist, > + df.tail, df.cnt, _RET_IP_); > } while (likely(size)); > } > EXPORT_SYMBOL(kmem_cache_free_bulk); This specific implementation was too slow, mostly because we call virt_to_head_page() both in this function and inside build_detached_freelist(). After integrating this directly into build_detached_freelist() I'm getting more comparative results. Results with disabled CONFIG_MEMCG_KMEM, and SLUB (and slab_nomerge for more accurate results between runs): bulk- fallback - kmem_cache_free_bulk - kfree_bulk 1 - 58 cycles 14.735 ns - 55 cycles 13.880 ns - 59 cycles 14.843 ns 2 - 53 cycles 13.298 ns - 32 cycles 8.037 ns - 34 cycles 8.592 ns 3 - 51 cycles 12.837 ns - 25 cycles 6.442 ns - 27 cycles 6.794 ns 4 - 50 cycles 12.514 ns - 23 cycles 5.952 ns - 23 cycles 5.958 ns 8 - 48 cycles 12.097 ns - 20 cycles 5.160 ns - 22 cycles 5.505 ns 16 - 47 cycles 11.888 ns - 19 cycles 4.900 ns - 19 cycles 4.969 ns 30 - 47 cycles 11.793 ns - 18 cycles 4.688 ns - 18 cycles 4.682 ns 32 - 47 cycles 11.926 ns - 18 cycles 4.674 ns - 18 cycles 4.702 ns 34 - 95 cycles 23.823 ns - 24 cycles 6.068 ns - 24 cycles 6.058 ns 48 - 81 cycles 20.258 ns - 21 cycles 5.360 ns - 21 cycles 5.338 ns 64 - 73 cycles 18.414 ns - 20 cycles 5.160 ns - 20 cycles 5.140 ns 128 - 90 cycles 22.563 ns - 27 cycles 6.765 ns - 27 cycles 6.801 ns 158 - 99 cycles 24.831 ns - 30 cycles 7.625 ns - 30 cycles 7.720 ns 250 - 104 cycles 26.173 ns - 37 cycles 9.271 ns - 37 cycles 9.371 ns As can been seen the old kmem_cache_free_bulk() is faster than the new kfree_bulk() (which omits the kmem_cache pointer and need to derive it from the page->slab_cache). The base (bulk=1) extra cost is 4 cycles, which then gets amortized as build_detached_freelist() combines objects belonging to same page. This is likely because the compiler, with disabled CONFIG_MEMCG_KMEM=n, can optimize and avoid doing the lookup of the kmem_cache structure. I'll start doing testing with CONFIG_MEMCG_KMEM enabled... -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-io0-f182.google.com (mail-io0-f182.google.com [209.85.223.182]) by kanga.kvack.org (Postfix) with ESMTP id C622B6B0257 for ; Tue, 15 Dec 2015 07:02:41 -0500 (EST) Received: by mail-io0-f182.google.com with SMTP id o67so16428752iof.3 for ; Tue, 15 Dec 2015 04:02:41 -0800 (PST) Received: from mx1.redhat.com (mx1.redhat.com. [209.132.183.28]) by mx.google.com with ESMTPS id a196si4319871ioe.161.2015.12.15.04.02.40 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 15 Dec 2015 04:02:40 -0800 (PST) Date: Tue, 15 Dec 2015 13:02:35 +0100 From: Jesper Dangaard Brouer Subject: Re: [RFC PATCH V2 8/9] slab: implement bulk free in SLAB allocator Message-ID: <20151215130235.10e92367@redhat.com> In-Reply-To: <20151214161958.1a8edf79@redhat.com> References: <20151208161751.21945.53936.stgit@firesoul> <20151208161903.21945.33876.stgit@firesoul> <20151214161958.1a8edf79@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Christoph Lameter Cc: linux-mm@kvack.org, Vladimir Davydov , Joonsoo Kim , Linus Torvalds , Andrew Morton , brouer@redhat.com On Mon, 14 Dec 2015 16:19:58 +0100 Jesper Dangaard Brouer wrote: > On Wed, 9 Dec 2015 10:06:39 -0600 (CST) Christoph Lameter wrote: > > > From: Christoph Lameter > > Subject: slab bulk api: Remove the kmem_cache parameter from kmem_cache_bulk_free() > > > > It is desirable and necessary to free objects from different kmem_caches. > > It is required in order to support memcg object freeing across different5 > > cgroups. > > > > So drop the pointless parameter and allow freeing of arbitrary lists > > of slab allocated objects. > > > > This patch also does the proper compound page handling so that > > arbitrary objects allocated via kmalloc() can be handled by > > kmem_cache_bulk_free(). > > > > Signed-off-by: Christoph Lameter > > I've modified this patch, to instead introduce a kfree_bulk() and keep > the old behavior of kmem_cache_free_bulk(). This allow us to easier > compare the two impl. approaches. > > [...] > > Index: linux/mm/slub.c > > =================================================================== > > --- linux.orig/mm/slub.c > > +++ linux/mm/slub.c > > @@ -2887,23 +2887,30 @@ static int build_detached_freelist(struc > > > > > > /* Note that interrupts must be enabled when calling this function. */ > > -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) > > +void kmem_cache_free_bulk(size_t size, void **p) > > Renamed to kfree_bulk(size_t size, void **p) > > > { > > if (WARN_ON(!size)) > > return; > > > > do { > > struct detached_freelist df; > > - struct kmem_cache *s; > > + struct page *page; > > > > - /* Support for memcg */ > > - s = cache_from_obj(orig_s, p[size - 1]); > > + page = virt_to_head_page(p[size - 1]); > > > > - size = build_detached_freelist(s, size, p, &df); > > + if (unlikely(!PageSlab(page))) { > > + BUG_ON(!PageCompound(page)); > > + kfree_hook(p[size - 1]); > > + __free_kmem_pages(page, compound_order(page)); > > + p[--size] = NULL; > > + continue; > > + } > > + > > + size = build_detached_freelist(page->slab_cache, size, p, &df); > > if (unlikely(!df.page)) > > continue; > > > > - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); > > + slab_free(page->slab_cache, df.page, df.freelist, > > + df.tail, df.cnt, _RET_IP_); > > } while (likely(size)); > > } > > EXPORT_SYMBOL(kmem_cache_free_bulk); > > This specific implementation was too slow, mostly because we call > virt_to_head_page() both in this function and inside build_detached_freelist(). > > After integrating this directly into build_detached_freelist() I'm > getting more comparative results. > > Results with disabled CONFIG_MEMCG_KMEM, and SLUB (and slab_nomerge for > more accurate results between runs): > > bulk- fallback - kmem_cache_free_bulk - kfree_bulk > 1 - 58 cycles 14.735 ns - 55 cycles 13.880 ns - 59 cycles 14.843 ns > 2 - 53 cycles 13.298 ns - 32 cycles 8.037 ns - 34 cycles 8.592 ns > 3 - 51 cycles 12.837 ns - 25 cycles 6.442 ns - 27 cycles 6.794 ns > 4 - 50 cycles 12.514 ns - 23 cycles 5.952 ns - 23 cycles 5.958 ns > 8 - 48 cycles 12.097 ns - 20 cycles 5.160 ns - 22 cycles 5.505 ns > 16 - 47 cycles 11.888 ns - 19 cycles 4.900 ns - 19 cycles 4.969 ns > 30 - 47 cycles 11.793 ns - 18 cycles 4.688 ns - 18 cycles 4.682 ns > 32 - 47 cycles 11.926 ns - 18 cycles 4.674 ns - 18 cycles 4.702 ns > 34 - 95 cycles 23.823 ns - 24 cycles 6.068 ns - 24 cycles 6.058 ns > 48 - 81 cycles 20.258 ns - 21 cycles 5.360 ns - 21 cycles 5.338 ns > 64 - 73 cycles 18.414 ns - 20 cycles 5.160 ns - 20 cycles 5.140 ns > 128 - 90 cycles 22.563 ns - 27 cycles 6.765 ns - 27 cycles 6.801 ns > 158 - 99 cycles 24.831 ns - 30 cycles 7.625 ns - 30 cycles 7.720 ns > 250 - 104 cycles 26.173 ns - 37 cycles 9.271 ns - 37 cycles 9.371 ns > > As can been seen the old kmem_cache_free_bulk() is faster than the new > kfree_bulk() (which omits the kmem_cache pointer and need to derive it > from the page->slab_cache). The base (bulk=1) extra cost is 4 cycles, > which then gets amortized as build_detached_freelist() combines objects > belonging to same page. > > This is likely because the compiler, with disabled CONFIG_MEMCG_KMEM=n, > can optimize and avoid doing the lookup of the kmem_cache structure. > > I'll start doing testing with CONFIG_MEMCG_KMEM enabled... Enabling CONFIG_MEMCG_KMEM didn't change the performance, likely because memcg_kmem_enabled() uses a static_key_false() construct. Implemented kfree_bulk() as an inline function just calling kmem_cache_free_bulk(NULL, size, p) run with CONFIG_MEMCG_KMEM=y (but no "user" of memcg): bulk- fallback - kmem_cache_free_bulk - kmem_cache_free_bulk(s=NULL) 1 - 60 cycles 15.232 ns - 52 cycles 13.124 ns - 58 cycles 14.562 ns 2 - 54 cycles 13.678 ns - 31 cycles 7.875 ns - 34 cycles 8.509 ns 3 - 52 cycles 13.112 ns - 25 cycles 6.406 ns - 28 cycles 7.045 ns 4 - 51 cycles 12.833 ns - 24 cycles 6.036 ns - 24 cycles 6.076 ns 8 - 49 cycles 12.367 ns - 21 cycles 5.404 ns - 22 cycles 5.522 ns 16 - 48 cycles 12.144 ns - 19 cycles 4.934 ns - 19 cycles 4.967 ns 30 - 50 cycles 12.542 ns - 18 cycles 4.685 ns - 18 cycles 4.687 ns 32 - 48 cycles 12.234 ns - 18 cycles 4.661 ns - 18 cycles 4.662 ns 34 - 96 cycles 24.204 ns - 24 cycles 6.068 ns - 24 cycles 6.067 ns 48 - 82 cycles 20.553 ns - 21 cycles 5.410 ns - 21 cycles 5.433 ns 64 - 74 cycles 18.743 ns - 20 cycles 5.171 ns - 20 cycles 5.179 ns 128 - 91 cycles 22.791 ns - 26 cycles 6.601 ns - 26 cycles 6.600 ns 158 - 100 cycles 25.191 ns - 30 cycles 7.569 ns - 30 cycles 7.617 ns 250 - 106 cycles 26.535 ns - 37 cycles 9.426 ns - 37 cycles 9.427 ns As can be seen, it is still more costly to get the kmem_cache pointer via the object->slab_cache, even when compiled with CONFIG_MEMCG_KMEM. But what happen when enabling a "user" of kmemcg, which modify the static_key_false() in function call memcg_kmem_enabled(). First notice normal kmem fastpath cost increased: 70 cycles(tsc) 17.640 ns bulk- fallback - kmem_cache_free_bulk - kmem_cache_free_bulk(s=NULL) 1 - 85 cycles 21.351 ns - 76 cycles 19.086 ns - 74 cycles 18.701 ns 2 - 79 cycles 19.907 ns - 43 cycles 10.848 ns - 41 cycles 10.467 ns 3 - 77 cycles 19.310 ns - 32 cycles 8.132 ns - 31 cycles 7.880 ns 4 - 76 cycles 19.017 ns - 28 cycles 7.195 ns - 28 cycles 7.050 ns 8 - 77 cycles 19.443 ns - 23 cycles 5.838 ns - 23 cycles 5.782 ns 16 - 75 cycles 18.815 ns - 20 cycles 5.174 ns - 20 cycles 5.127 ns 30 - 74 cycles 18.719 ns - 19 cycles 4.870 ns - 19 cycles 4.816 ns 32 - 75 cycles 18.917 ns - 19 cycles 4.850 ns - 19 cycles 4.800 ns 34 - 119 cycles 29.878 ns - 25 cycles 6.312 ns - 25 cycles 6.282 ns 48 - 106 cycles 26.536 ns - 21 cycles 5.471 ns - 21 cycles 5.448 ns 64 - 98 cycles 24.635 ns - 20 cycles 5.239 ns - 20 cycles 5.224 ns 128 - 114 cycles 28.586 ns - 27 cycles 6.768 ns - 26 cycles 6.736 ns 158 - 124 cycles 31.173 ns - 30 cycles 7.668 ns - 30 cycles 7.611 ns 250 - 129 cycles 32.336 ns - 37 cycles 9.460 ns - 37 cycles 9.468 ns With kmemcg runtime-enabled, I was expecting to see a bigger win for the case of kmem_cache_free_bulk(s=NULL). Implemented a function kfree_bulk_export(), which inlines build_detached_freelist() and which avoids including the code path for memcg. But result is almost the same: bulk- fallback - kmem_cache_free_bulk - kfree_bulk_export 1 - 84 cycles 21.221 ns - 77 cycles 19.324 ns - 74 cycles 18.515 ns 2 - 79 cycles 19.963 ns - 44 cycles 11.120 ns - 41 cycles 10.329 ns 3 - 77 cycles 19.471 ns - 33 cycles 8.291 ns - 31 cycles 7.771 ns 4 - 76 cycles 19.191 ns - 28 cycles 7.100 ns - 27 cycles 6.765 ns 8 - 78 cycles 19.525 ns - 23 cycles 5.781 ns - 22 cycles 5.737 ns 16 - 75 cycles 18.922 ns - 20 cycles 5.152 ns - 20 cycles 5.092 ns 30 - 75 cycles 18.812 ns - 19 cycles 4.864 ns - 19 cycles 4.833 ns 32 - 75 cycles 18.807 ns - 19 cycles 4.852 ns - 23 cycles 5.896 ns 34 - 119 cycles 29.989 ns - 25 cycles 6.258 ns - 25 cycles 6.411 ns 48 - 106 cycles 26.677 ns - 28 cycles 7.182 ns - 22 cycles 5.651 ns 64 - 98 cycles 24.716 ns - 20 cycles 5.245 ns - 21 cycles 5.425 ns 128 - 115 cycles 28.905 ns - 26 cycles 6.748 ns - 27 cycles 6.787 ns 158 - 124 cycles 31.039 ns - 30 cycles 7.604 ns - 30 cycles 7.629 ns 250 - 129 cycles 32.372 ns - 37 cycles 9.475 ns - 37 cycles 9.325 ns I took a closer look with perf, and it looks like the extra cost from enabling kmemcg originates from the alloc code path. Specifically the function call get_mem_cgroup_from_mm(9.95%) is the costly part, plus __memcg_kmem_get_cache(4.84%) and __memcg_kmem_put_cache(1.35%) (Added current code as a patch below signature) - - Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat Author of http://www.iptv-analyzer.org LinkedIn: http://www.linkedin.com/in/brouer [PATCH] New API kfree_bulk() --- include/linux/slab.h | 14 +++++++++++++ mm/slab_common.c | 8 ++++++-- mm/slub.c | 52 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..9dd353215c2b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -317,6 +317,20 @@ void kmem_cache_free(struct kmem_cache *, void *); */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +void kfree_bulk_export(size_t, void **); + +static __always_inline void kfree_bulk(size_t size, void **p) +{ + /* Reusing call to kmem_cache_free_bulk() allow kfree_bulk to + * use same code icache + */ + kmem_cache_free_bulk(NULL, size, p); + /* + * Inlining in kfree_bulk_export() generate smaller code size + * than more generic kmem_cache_free_bulk(). + */ + // kfree_bulk_export(size, p); +} #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; diff --git a/mm/slab_common.c b/mm/slab_common.c index 3c6a86b4ec25..963c25589949 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -108,8 +108,12 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) { size_t i; - for (i = 0; i < nr; i++) - kmem_cache_free(s, p[i]); + for (i = 0; i < nr; i++) { + if (s) + kmem_cache_free(s, p[i]); + else + kfree(p[i]); + } } int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, diff --git a/mm/slub.c b/mm/slub.c index 4e4dfc4cdd0c..1675f42f17b5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2833,29 +2833,46 @@ struct detached_freelist { * synchronization primitive. Look ahead in the array is limited due * to performance reasons. */ -static int build_detached_freelist(struct kmem_cache **s, size_t size, - void **p, struct detached_freelist *df) +static inline +int build_detached_freelist(struct kmem_cache **s, size_t size, + void **p, struct detached_freelist *df) { size_t first_skipped_index = 0; int lookahead = 3; void *object; + struct page *page; /* Always re-init detached_freelist */ df->page = NULL; do { object = p[--size]; + /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */ } while (!object && size); if (!object) return 0; - /* Support for memcg, compiler can optimize this out */ - *s = cache_from_obj(*s, object); + page = virt_to_head_page(object); + if (!*s) { + /* Handle kalloc'ed objects */ + if (unlikely(!PageSlab(page))) { + BUG_ON(!PageCompound(page)); + kfree_hook(object); + __free_kmem_pages(page, compound_order(page)); + p[size] = NULL; /* mark object processed */ + return size; + } + /* Derive kmem_cache from object */ + *s = page->slab_cache; + } else { + /* Support for memcg, compiler can optimize this out */ + *s = cache_from_obj(*s, object); + } /* Start new detached freelist */ + df->page = page; set_freepointer(*s, object, NULL); - df->page = virt_to_head_page(object); df->tail = object; df->freelist = object; p[size] = NULL; /* mark object processed */ @@ -2906,6 +2923,31 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); +/* Note that interrupts must be enabled when calling this function. + * This function can also handle kmem_cache allocated object arrays + */ +// Will likely remove this function +void kfree_bulk_export(size_t size, void **p) +{ + if (WARN_ON(!size)) + return; + + do { + struct detached_freelist df; + /* NULL here removes code in inlined build_detached_freelist */ + struct kmem_cache *s = NULL; + + size = build_detached_freelist(&s, size, p, &df); + if (unlikely(!df.page)) + continue; + + slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); +// slab_free(df.page->slab_cache, df.page, df.freelist, df.tail, +// df.cnt, _RET_IP_); + } while (likely(size)); +} +EXPORT_SYMBOL(kfree_bulk_export); + /* Note that interrupts must be enabled when calling this function. */ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org