All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-18  6:24 Vasily Averin
  2022-05-18 15:09 ` Shakeel Butt
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-18  6:24 UTC (permalink / raw)
  To: Steven Rostedt, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song
  Cc: kernel, linux-kernel, Ingo Molnar, Andrew Morton, linux-mm,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "allocated" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
	thanks to Matthew Wilcox
 2) updated the corresponding ding trace_* calls
 3) added boolean "allocated" entry into trace output,
	thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
Now kmem tracing output looks like this:

kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
---
 include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@
 #include <asm/page.h>
 #include <linux/memcontrol.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-18  6:24 [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints Vasily Averin
@ 2022-05-18 15:09 ` Shakeel Butt
  2022-05-18 15:45 ` Vasily Averin
  2022-05-18 20:04   ` Steven Rostedt
  2 siblings, 0 replies; 33+ messages in thread
From: Shakeel Butt @ 2022-05-18 15:09 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Steven Rostedt, Roman Gushchin, Vlastimil Babka, Matthew Wilcox,
	Hyeonggon Yoo, Muchun Song, kernel, LKML, Ingo Molnar,
	Andrew Morton, Linux MM, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 11:24 PM Vasily Averin <vvs@openvz.org> wrote:
>
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <vvs@openvz.org>

Acked-by: Shakeel Butt <shakeelb@google.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-18  6:24 [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints Vasily Averin
  2022-05-18 15:09 ` Shakeel Butt
@ 2022-05-18 15:45 ` Vasily Averin
  2022-05-18 20:04   ` Steven Rostedt
  2 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-18 15:45 UTC (permalink / raw)
  To: Steven Rostedt, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song
  Cc: kernel, linux-kernel, Ingo Molnar, Andrew Morton, linux-mm,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko

On 5/18/22 09:24, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "allocated" entry into trace output,

not "allocated" but "accounted"

> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-18  6:24 [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints Vasily Averin
@ 2022-05-18 20:04   ` Steven Rostedt
  2022-05-18 15:45 ` Vasily Averin
  2022-05-18 20:04   ` Steven Rostedt
  2 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-18 20:04 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Wed, 18 May 2022 09:24:51 +0300
Vasily Averin <vvs@openvz.org> wrote:

FYI, the subject should be something like: mm/tracing:

Because "tracing:" is reserved for tracing infrastructure updates.

> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated the corresponding ding trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
> Now kmem tracing output looks like this:
> 
> kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);

Now you could make this even faster in the fast path and save just the
s->flags.

	__entry->sflags = s ? s->flags : 0;

>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")

And then have: "accounted=%s":

		(__entry->gfp_flags & __GFP_ACCOUNT) ||
		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"


-- Steve

>  );
>  

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-18 20:04   ` Steven Rostedt
  0 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-18 20:04 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Wed, 18 May 2022 09:24:51 +0300
Vasily Averin <vvs@openvz.org> wrote:

FYI, the subject should be something like: mm/tracing:

Because "tracing:" is reserved for tracing infrastructure updates.

> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated the corresponding ding trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
> Now kmem tracing output looks like this:
> 
> kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);

Now you could make this even faster in the fast path and save just the
s->flags.

	__entry->sflags = s ? s->flags : 0;

>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")

And then have: "accounted=%s":

		(__entry->gfp_flags & __GFP_ACCOUNT) ||
		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"


-- Steve

>  );
>  


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-18 20:04   ` Steven Rostedt
@ 2022-05-19 11:35     ` Vasily Averin
  -1 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-19 11:35 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/18/22 23:04, Steven Rostedt wrote:
> On Wed, 18 May 2022 09:24:51 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
> FYI, the subject should be something like: mm/tracing:
> Because "tracing:" is reserved for tracing infrastructure updates.

Thank you for noticing.

>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>  		__entry->bytes_req	= bytes_req;
>>  		__entry->bytes_alloc	= bytes_alloc;
>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>> +					  (s && s->flags & SLAB_ACCOUNT);
> 
> Now you could make this even faster in the fast path and save just the
> s->flags.
> 
> 	__entry->sflags = s ? s->flags : 0;
> 
>>  	),
>>  
>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>  		(void *)__entry->call_site,
>>  		__entry->ptr,
>>  		__entry->bytes_req,
>>  		__entry->bytes_alloc,
>> -		show_gfp_flags(__entry->gfp_flags))
>> +		show_gfp_flags(__entry->gfp_flags),
>> +		__entry->accounted ? "true" : "false")
> 
> And then have: "accounted=%s":
> 
> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"

Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
Could you please explain why your variant is faster?

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-19 11:35     ` Vasily Averin
  0 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-19 11:35 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/18/22 23:04, Steven Rostedt wrote:
> On Wed, 18 May 2022 09:24:51 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
> FYI, the subject should be something like: mm/tracing:
> Because "tracing:" is reserved for tracing infrastructure updates.

Thank you for noticing.

>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>  		__entry->bytes_req	= bytes_req;
>>  		__entry->bytes_alloc	= bytes_alloc;
>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>> +					  (s && s->flags & SLAB_ACCOUNT);
> 
> Now you could make this even faster in the fast path and save just the
> s->flags.
> 
> 	__entry->sflags = s ? s->flags : 0;
> 
>>  	),
>>  
>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>  		(void *)__entry->call_site,
>>  		__entry->ptr,
>>  		__entry->bytes_req,
>>  		__entry->bytes_alloc,
>> -		show_gfp_flags(__entry->gfp_flags))
>> +		show_gfp_flags(__entry->gfp_flags),
>> +		__entry->accounted ? "true" : "false")
> 
> And then have: "accounted=%s":
> 
> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"

Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
Could you please explain why your variant is faster?

Thank you,
	Vasily Averin


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-19 11:35     ` Vasily Averin
@ 2022-05-19 14:03       ` Steven Rostedt
  -1 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-19 14:03 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Thu, 19 May 2022 14:35:46 +0300
Vasily Averin <vvs@openvz.org> wrote:

> >> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >>  		__entry->bytes_req	= bytes_req;
> >>  		__entry->bytes_alloc	= bytes_alloc;
> >>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> >> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> >> +					  (s && s->flags & SLAB_ACCOUNT);  
> > 
> > Now you could make this even faster in the fast path and save just the
> > s->flags.
> > 
> > 	__entry->sflags = s ? s->flags : 0;
> >   
> >>  	),
> >>  
> >> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> >> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> >>  		(void *)__entry->call_site,
> >>  		__entry->ptr,
> >>  		__entry->bytes_req,
> >>  		__entry->bytes_alloc,
> >> -		show_gfp_flags(__entry->gfp_flags))
> >> +		show_gfp_flags(__entry->gfp_flags),
> >> +		__entry->accounted ? "true" : "false")  
> > 
> > And then have: "accounted=%s":
> > 
> > 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> > 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
> 
> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
> Could you please explain why your variant is faster?

Micro-optimization, grant you, but it is faster because it moves some of
the logic into the slow path (the read side), and takes it out of the fast
path (the write side).

The idea of tracing is to squeeze out every cycle we can to keep the
tracing overhead down.

But it's really up to you if you need that. I'm not going to let this be a
blocker. This is more of an FYI than anything else.

-- Steve

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-19 14:03       ` Steven Rostedt
  0 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-19 14:03 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Thu, 19 May 2022 14:35:46 +0300
Vasily Averin <vvs@openvz.org> wrote:

> >> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >>  		__entry->bytes_req	= bytes_req;
> >>  		__entry->bytes_alloc	= bytes_alloc;
> >>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> >> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> >> +					  (s && s->flags & SLAB_ACCOUNT);  
> > 
> > Now you could make this even faster in the fast path and save just the
> > s->flags.
> > 
> > 	__entry->sflags = s ? s->flags : 0;
> >   
> >>  	),
> >>  
> >> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> >> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> >>  		(void *)__entry->call_site,
> >>  		__entry->ptr,
> >>  		__entry->bytes_req,
> >>  		__entry->bytes_alloc,
> >> -		show_gfp_flags(__entry->gfp_flags))
> >> +		show_gfp_flags(__entry->gfp_flags),
> >> +		__entry->accounted ? "true" : "false")  
> > 
> > And then have: "accounted=%s":
> > 
> > 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> > 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
> 
> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
> Could you please explain why your variant is faster?

Micro-optimization, grant you, but it is faster because it moves some of
the logic into the slow path (the read side), and takes it out of the fast
path (the write side).

The idea of tracing is to squeeze out every cycle we can to keep the
tracing overhead down.

But it's really up to you if you need that. I'm not going to let this be a
blocker. This is more of an FYI than anything else.

-- Steve


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-19 14:03       ` Steven Rostedt
@ 2022-05-19 16:29         ` Vasily Averin
  -1 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-19 16:29 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/19/22 17:03, Steven Rostedt wrote:
> On Thu, 19 May 2022 14:35:46 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>>>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>>>  		__entry->bytes_req	= bytes_req;
>>>>  		__entry->bytes_alloc	= bytes_alloc;
>>>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>>>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>>>> +					  (s && s->flags & SLAB_ACCOUNT);  
>>>
>>> Now you could make this even faster in the fast path and save just the
>>> s->flags.
>>>
>>> 	__entry->sflags = s ? s->flags : 0;
>>>   
>>>>  	),
>>>>  
>>>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>>>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>>>  		(void *)__entry->call_site,
>>>>  		__entry->ptr,
>>>>  		__entry->bytes_req,
>>>>  		__entry->bytes_alloc,
>>>> -		show_gfp_flags(__entry->gfp_flags))
>>>> +		show_gfp_flags(__entry->gfp_flags),
>>>> +		__entry->accounted ? "true" : "false")  
>>>
>>> And then have: "accounted=%s":
>>>
>>> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
>>> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
>>
>> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
>> Could you please explain why your variant is faster?
> 
> Micro-optimization, grant you, but it is faster because it moves some of
> the logic into the slow path (the read side), and takes it out of the fast
> path (the write side).
> 
> The idea of tracing is to squeeze out every cycle we can to keep the
> tracing overhead down.
> 
> But it's really up to you if you need that. I'm not going to let this be a
> blocker. This is more of an FYI than anything else.

Frankly speaking I vote for performance with both hands.
However I'm still would like to avoid new sparse warnings.
Christoph Hellwig just recently taught me, "never add '__force' before
thinking hard about them", but in this case I would need to use it three times.

I found that bitwise typecasts can be avoided by using translation unions. 

What do you think about following trick?

diff --git a/mm/slab.h b/mm/slab.h
index 95eb34174c1b..f676612ca40f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -882,4 +882,14 @@ void __check_heap_object(const void *ptr, unsigned long n,
 }
 #endif
 
+union gfp_flags_u {
+	unsigned long ulong;
+	gfp_t flags;
+};
+
+union slab_flags_u {
+	unsigned int uint;
+	slab_flags_t sflags;
+};
+
 #endif /* MM_SLAB_H */

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..91632a61e16d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,18 +13,20 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
 		__field(	const void *,	ptr		)
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
-		__field(	unsigned long,	gfp_flags	)
+		__field_struct(	union gfp_flags_u,	gfp	)
+		__field_struct(	union slab_flags_u,	s	)
 	),
 
 	TP_fast_assign(
@@ -32,51 +34,57 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->ptr		= ptr;
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->gfp.flags	= gfp_flags;
+		__entry->s.sflags	= s ? s->flags : 0;
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp.ulong),
+		((__entry->gfp.flags & __GFP_ACCOUNT) ||
+		 (__entry->s.sflags & SLAB_ACCOUNT)) ? "true" : "false")
 );
 
Thank you,
	Vasily Averin

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-19 16:29         ` Vasily Averin
  0 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-19 16:29 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/19/22 17:03, Steven Rostedt wrote:
> On Thu, 19 May 2022 14:35:46 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>>>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>>>  		__entry->bytes_req	= bytes_req;
>>>>  		__entry->bytes_alloc	= bytes_alloc;
>>>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>>>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>>>> +					  (s && s->flags & SLAB_ACCOUNT);  
>>>
>>> Now you could make this even faster in the fast path and save just the
>>> s->flags.
>>>
>>> 	__entry->sflags = s ? s->flags : 0;
>>>   
>>>>  	),
>>>>  
>>>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>>>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>>>  		(void *)__entry->call_site,
>>>>  		__entry->ptr,
>>>>  		__entry->bytes_req,
>>>>  		__entry->bytes_alloc,
>>>> -		show_gfp_flags(__entry->gfp_flags))
>>>> +		show_gfp_flags(__entry->gfp_flags),
>>>> +		__entry->accounted ? "true" : "false")  
>>>
>>> And then have: "accounted=%s":
>>>
>>> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
>>> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
>>
>> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
>> Could you please explain why your variant is faster?
> 
> Micro-optimization, grant you, but it is faster because it moves some of
> the logic into the slow path (the read side), and takes it out of the fast
> path (the write side).
> 
> The idea of tracing is to squeeze out every cycle we can to keep the
> tracing overhead down.
> 
> But it's really up to you if you need that. I'm not going to let this be a
> blocker. This is more of an FYI than anything else.

Frankly speaking I vote for performance with both hands.
However I'm still would like to avoid new sparse warnings.
Christoph Hellwig just recently taught me, "never add '__force' before
thinking hard about them", but in this case I would need to use it three times.

I found that bitwise typecasts can be avoided by using translation unions. 

What do you think about following trick?

diff --git a/mm/slab.h b/mm/slab.h
index 95eb34174c1b..f676612ca40f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -882,4 +882,14 @@ void __check_heap_object(const void *ptr, unsigned long n,
 }
 #endif
 
+union gfp_flags_u {
+	unsigned long ulong;
+	gfp_t flags;
+};
+
+union slab_flags_u {
+	unsigned int uint;
+	slab_flags_t sflags;
+};
+
 #endif /* MM_SLAB_H */

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..91632a61e16d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,18 +13,20 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
 		__field(	const void *,	ptr		)
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
-		__field(	unsigned long,	gfp_flags	)
+		__field_struct(	union gfp_flags_u,	gfp	)
+		__field_struct(	union slab_flags_u,	s	)
 	),
 
 	TP_fast_assign(
@@ -32,51 +34,57 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->ptr		= ptr;
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->gfp.flags	= gfp_flags;
+		__entry->s.sflags	= s ? s->flags : 0;
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp.ulong),
+		((__entry->gfp.flags & __GFP_ACCOUNT) ||
+		 (__entry->s.sflags & SLAB_ACCOUNT)) ? "true" : "false")
 );
 
Thank you,
	Vasily Averin


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-19 16:29         ` Vasily Averin
@ 2022-05-19 16:32           ` Steven Rostedt
  -1 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-19 16:32 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Thu, 19 May 2022 19:29:36 +0300
Vasily Averin <vvs@openvz.org> wrote:

> Frankly speaking I vote for performance with both hands.
> However I'm still would like to avoid new sparse warnings.
> Christoph Hellwig just recently taught me, "never add '__force' before
> thinking hard about them", but in this case I would need to use it three times.
> 
> I found that bitwise typecasts can be avoided by using translation unions. 
> 
> What do you think about following trick?

It's really up to you memory management folks. Although I may need to
update libtraceevent to handle the union case. That may be a bit tricky.

-- Steve

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-19 16:32           ` Steven Rostedt
  0 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2022-05-19 16:32 UTC (permalink / raw)
  To: Vasily Averin
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Thu, 19 May 2022 19:29:36 +0300
Vasily Averin <vvs@openvz.org> wrote:

> Frankly speaking I vote for performance with both hands.
> However I'm still would like to avoid new sparse warnings.
> Christoph Hellwig just recently taught me, "never add '__force' before
> thinking hard about them", but in this case I would need to use it three times.
> 
> I found that bitwise typecasts can be avoided by using translation unions. 
> 
> What do you think about following trick?

It's really up to you memory management folks. Although I may need to
update libtraceevent to handle the union case. That may be a bit tricky.

-- Steve


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-19 16:32           ` Steven Rostedt
@ 2022-05-21 18:32             ` Vasily Averin
  -1 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-21 18:32 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/19/22 19:32, Steven Rostedt wrote:
> On Thu, 19 May 2022 19:29:36 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>> Frankly speaking I vote for performance with both hands.
>> However I'm still would like to avoid new sparse warnings.
>> Christoph Hellwig just recently taught me, "never add '__force' before
>> thinking hard about them", but in this case I would need to use it three times.
>>
>> I found that bitwise typecasts can be avoided by using translation unions. 
>>
>> What do you think about following trick?
> 
> It's really up to you memory management folks. Although I may need to
> update libtraceevent to handle the union case. That may be a bit tricky.

In this case, I would like to keep sub-optimal v3 patch version.
If necessary, we can improve the performance later, and right now
I will send v4 with minor patch description changes and ask
Andrew Morton to include it in the mm tree.

Thank you,
	Vasily Averin


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints
@ 2022-05-21 18:32             ` Vasily Averin
  0 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-21 18:32 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: YoPOhRctb8wwbmY5, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Hyeonggon Yoo, Muchun Song, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On 5/19/22 19:32, Steven Rostedt wrote:
> On Thu, 19 May 2022 19:29:36 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>> Frankly speaking I vote for performance with both hands.
>> However I'm still would like to avoid new sparse warnings.
>> Christoph Hellwig just recently taught me, "never add '__force' before
>> thinking hard about them", but in this case I would need to use it three times.
>>
>> I found that bitwise typecasts can be avoided by using translation unions. 
>>
>> What do you think about following trick?
> 
> It's really up to you memory management folks. Although I may need to
> update libtraceevent to handle the union case. That may be a bit tricky.

In this case, I would like to keep sub-optimal v3 patch version.
If necessary, we can improve the performance later, and right now
I will send v4 with minor patch description changes and ask
Andrew Morton to include it in the mm tree.

Thank you,
	Vasily Averin



^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-21 18:32             ` Vasily Averin
  (?)
@ 2022-05-21 18:36             ` Vasily Averin
  2022-05-22  3:51               ` Hyeonggon Yoo
                                 ` (3 more replies)
  -1 siblings, 4 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-21 18:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kernel, linux-kernel, Steven Rostedt, Ingo Molnar, linux-mm,
	Shakeel Butt, Roman Gushchin, Vlastimil Babka, Matthew Wilcox,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko, Hyeonggon Yoo, Muchun Song

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
---
v4:
 1) replaced in patch descripion: "accounted" instead of "allocated"
 2) added "Acked-by" from Shakeel,
 3) re-addressed to akpm@

v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
	thanks to Matthew Wilcox
 2) updated according trace_* calls
 3) added boolean "allocated" entry into trace output,
	thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
 include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@
 #include <asm/page.h>
 #include <linux/memcontrol.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-21 18:36             ` [PATCH v4] " Vasily Averin
@ 2022-05-22  3:51               ` Hyeonggon Yoo
  2022-05-22  4:33                 ` Vasily Averin
  2022-05-23 13:12               ` Vlastimil Babka
                                 ` (2 subsequent siblings)
  3 siblings, 1 reply; 33+ messages in thread
From: Hyeonggon Yoo @ 2022-05-22  3:51 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>

May I ask what information do you want to collect
using this patch?

I pointed it in another thread but I'm not sure
printing SLAB_* flags in these tracepoint is good :(

If we decide to do that, it would be better to print
something like:
slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
instead of just printing 'accounted=true/false'. This patch is too
specific to SLAB_ACCOUNT.

And if what you want to know is just total slab memory that is accounted,
what about adding something like  SlabAccounted in /proc/meminfo?

Thanks.

> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||

I doubt someone will pass __GFP_ACCOUNT in gfpflags
when calling kmem_cache_alloc*().


> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmalloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DECLARE_EVENT_CLASS(kmem_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags,
>  		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
>  		__field(	int,		node		)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>  		__entry->node		= node;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
>  		show_gfp_flags(__entry->gfp_flags),
> -		__entry->node)
> +		__entry->node,
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
>  			       cachep->object_size, cachep->size, flags);
>  
>  	return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
>  	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(_RET_IP_, ret,
> +	trace_kmalloc(_RET_IP_, ret, cachep,
>  		      size, cachep->size, flags);
>  	return ret;
>  }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
>  {
>  	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
>  				    cachep->object_size, cachep->size,
>  				    flags, nodeid);
>  
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
>  	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, cachep,
>  			   size, cachep->size,
>  			   flags, nodeid);
>  	return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	ret = slab_alloc(cachep, NULL, flags, size, caller);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(caller, ret,
> +	trace_kmalloc(caller, ret, cachep,
>  		      size, cachep->size, flags);
>  
>  	return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
>  #include <asm/page.h>
>  #include <linux/memcontrol.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
>  #include "internal.h"
> -
>  #include "slab.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
>  enum slab_state slab_state;
>  LIST_HEAD(slab_caches);
>  DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
>  void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
>  {
>  	void *ret = kmalloc_order(size, flags, order);
> -	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> +	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
>  	return ret;
>  }
>  EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  		*m = size;
>  		ret = (void *)m + minalign;
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, size + minalign, gfp, node);
>  	} else {
>  		unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  			gfp |= __GFP_COMP;
>  		ret = slob_new_pages(gfp, order, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << order, gfp, node);
>  	}
>  
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>  
>  	if (c->size < PAGE_SIZE) {
>  		b = slob_alloc(c->size, flags, c->align, node, 0);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    SLOB_UNITS(c->size) * SLOB_UNIT,
>  					    flags, node);
>  	} else {
>  		b = slob_new_pages(flags, get_order(c->size), node);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    PAGE_SIZE << get_order(c->size),
>  					    flags, node);
>  	}
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
>  				s->size, gfpflags);
>  
>  	return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
>  void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
>  {
>  	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
>  	return ret;
>  }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
>  				    s->object_size, s->size, gfpflags, node);
>  
>  	return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, s,
>  			   size, s->size, gfpflags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>  
>  	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>  
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, flags, node);
>  
> -		trace_kmalloc_node(_RET_IP_, ret,
> +		trace_kmalloc_node(_RET_IP_, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   flags, node);
>  
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  
>  	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> +	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
>  	ret = slab_alloc(s, NULL, gfpflags, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc(caller, ret, size, s->size, gfpflags);
> +	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>  
>  	return ret;
>  }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, gfpflags, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   gfpflags, node);
>  
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> +	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>  
>  	return ret;
>  }
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22  3:51               ` Hyeonggon Yoo
@ 2022-05-22  4:33                 ` Vasily Averin
  2022-05-22  5:19                   ` Hyeonggon Yoo
  2022-05-22 20:09                   ` Steven Rostedt
  0 siblings, 2 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-22  4:33 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On 5/22/22 06:51, Hyeonggon Yoo wrote:
> On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
>> Slab caches marked with SLAB_ACCOUNT force accounting for every
>> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
>> Unfortunately, at the moment this flag is not visible in ftrace output,
>> and this makes it difficult to analyze the accounted allocations.
>>
>> This patch adds boolean "accounted" entry into trace output,
>> and set it to 'true' for calls used __GFP_ACCOUNT flag and
>> for allocations from caches marked with SLAB_ACCOUNT.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> Acked-by: Shakeel Butt <shakeelb@google.com>
> 
> May I ask what information do you want to collect
> using this patch?

I analyze ftrace output to understand which allocations are accounted.
When some userspace operation consume memory, it's important to account
most part of memory (>2/3 of all) to avoid misuse inside memcg-limited
contianers. Otherwise memcg-limited container can consume significant 
portion of host memory, trigger global OOM, wake up OOM-killer and kill
random processes on host.
If memory consumers are accounted, it leads to memcg-OOM only. 

Now kmem tracing output looks like this:

kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true

As you can see, without new field it is quite hard to understand, 
is last allocation accounted.

This analyze helps me to identify most important allocations for given scenario
and enable accounting for selected allocations.

An example of this analyze you can found here:
https://lore.kernel.org/all/d28233ee-bccb-7bc3-c2ec-461fd7f95e6a@openvz.org/

> I pointed it in another thread but I'm not sure
> printing SLAB_* flags in these tracepoint is good :(

I'm not sure I understand your arguments correctly. Could you please
explain your position in more details?

> If we decide to do that, it would be better to print
> something like:
> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> instead of just printing 'accounted=true/false'. This patch is too
> specific to SLAB_ACCOUNT.

Any extra output degrades performance.
For my task it's not important to know SLAB flags, I just need to understand,
is current allocation accounted or not.

> And if what you want to know is just total slab memory that is accounted,
> what about adding something like  SlabAccounted in /proc/meminfo?

It is not enough for me. I need to have per-process allocation information.

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22  4:33                 ` Vasily Averin
@ 2022-05-22  5:19                   ` Hyeonggon Yoo
  2022-05-22  5:42                     ` Shakeel Butt
  2022-05-22 18:53                     ` Vasily Averin
  2022-05-22 20:09                   ` Steven Rostedt
  1 sibling, 2 replies; 33+ messages in thread
From: Hyeonggon Yoo @ 2022-05-22  5:19 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On Sun, May 22, 2022 at 07:33:08AM +0300, Vasily Averin wrote:
> On 5/22/22 06:51, Hyeonggon Yoo wrote:
> > On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> >> Slab caches marked with SLAB_ACCOUNT force accounting for every
> >> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> >> Unfortunately, at the moment this flag is not visible in ftrace output,
> >> and this makes it difficult to analyze the accounted allocations.
> >>
> >> This patch adds boolean "accounted" entry into trace output,
> >> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> >> for allocations from caches marked with SLAB_ACCOUNT.
> >>
> >> Signed-off-by: Vasily Averin <vvs@openvz.org>
> >> Acked-by: Shakeel Butt <shakeelb@google.com>
> > 
> > May I ask what information do you want to collect
> > using this patch?
> 
> I analyze ftrace output to understand which allocations are accounted.
> When some userspace operation consume memory, it's important to account
> most part of memory (>2/3 of all) to avoid misuse inside memcg-limited
> contianers. Otherwise memcg-limited container can consume significant 
> portion of host memory, trigger global OOM, wake up OOM-killer and kill
> random processes on host.
> If memory consumers are accounted, it leads to memcg-OOM only. 
> 
> Now kmem tracing output looks like this:
> 
> kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> 
> As you can see, without new field it is quite hard to understand, 
> is last allocation accounted.
>
> This analyze helps me to identify most important allocations for given scenario
> and enable accounting for selected allocations.
> 
> An example of this analyze you can found here:
> https://lore.kernel.org/all/d28233ee-bccb-7bc3-c2ec-461fd7f95e6a@openvz.org/
> 

Thank you for detailed explanation. Makes sense to me.

> > If we decide to do that, it would be better to print
> > something like:
> > slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> > instead of just printing 'accounted=true/false'. This patch is too
> > specific to SLAB_ACCOUNT.
> 
> Any extra output degrades performance.

No strong opinion but just a concern that maybe later someone want add
something similar like 'reclaimable=true/false', 'dma=true/false', ...
and I would prefer more general solution. (especially if we'll not
change tracepoints after release because of backward compatibility)

> For my task it's not important to know SLAB flags, I just need to understand,
> is current allocation accounted or not.

SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT, SLAB_DMA, ... etc are SLAB flags.

'if current allocation is accounted or not' depends on SLAB_ACCOUNT
flag is set or not.

Thanks,
Hyeonggon

> > And if what you want to know is just total slab memory that is accounted,
> > what about adding something like  SlabAccounted in /proc/meminfo?
> 
> It is not enough for me. I need to have per-process allocation information.
> 
> Thank you,
> 	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22  5:19                   ` Hyeonggon Yoo
@ 2022-05-22  5:42                     ` Shakeel Butt
  2022-05-22 18:53                     ` Vasily Averin
  1 sibling, 0 replies; 33+ messages in thread
From: Shakeel Butt @ 2022-05-22  5:42 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Vasily Averin, Andrew Morton, kernel, LKML, Steven Rostedt,
	Ingo Molnar, Linux MM, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On Sat, May 21, 2022 at 10:19 PM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
[...]
>
> No strong opinion but just a concern that maybe later someone want add
> something similar like 'reclaimable=true/false', 'dma=true/false', ...
> and I would prefer more general solution. (especially if we'll not
> change tracepoints after release because of backward compatibility)
>

There is no contract for tracepoints to be stable and can be changed.

> > For my task it's not important to know SLAB flags, I just need to understand,
> > is current allocation accounted or not.
>
> SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT, SLAB_DMA, ... etc are SLAB flags.
>
> 'if current allocation is accounted or not' depends on SLAB_ACCOUNT
> flag is set or not.
>

allocation can be accounted due to __GFP_ACCOUNT as well.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22  5:19                   ` Hyeonggon Yoo
  2022-05-22  5:42                     ` Shakeel Butt
@ 2022-05-22 18:53                     ` Vasily Averin
  1 sibling, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-22 18:53 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On 5/22/22 08:19, Hyeonggon Yoo wrote:
> On Sun, May 22, 2022 at 07:33:08AM +0300, Vasily Averin wrote:
>> On 5/22/22 06:51, Hyeonggon Yoo wrote:
>>> If we decide to do that, it would be better to print
>>> something like:
>>> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
>>> instead of just printing 'accounted=true/false'. This patch is too
>>> specific to SLAB_ACCOUNT.
>>
>> Any extra output degrades performance.
> 
> No strong opinion but just a concern that maybe later someone want add
> something similar like 'reclaimable=true/false', 'dma=true/false', ...
> and I would prefer more general solution. (especially if we'll not
> change tracepoints after release because of backward compatibility)

I would like to quote Steven Rostedt:
https://lore.kernel.org/all/20220515180640.0ae2ead5@gandalf.local.home/
"
> Trace events are not ABI, but if we don't have a strong reason to break it,
> I'd preserve the old order.

Ideally everyone should be using libtraceevent which will parse the format
file for the needed entries.

Nothing (important) should be parsing the raw ascii from the trace files.
It's slow and unreliable. The raw format (trace_pipe_raw) files, along with
libtraceevent will handle fining the fields you are looking for, even if
the fields move around (internally or externally).

Then there's trace-cruncher (a python script that uses libtracefs and
libtraceevent) that will work too.

  https://github.com/vmware/trace-cruncher
"

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22  4:33                 ` Vasily Averin
  2022-05-22  5:19                   ` Hyeonggon Yoo
@ 2022-05-22 20:09                   ` Steven Rostedt
  2022-05-23  4:03                     ` Vasily Averin
  1 sibling, 1 reply; 33+ messages in thread
From: Steven Rostedt @ 2022-05-22 20:09 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Hyeonggon Yoo, Andrew Morton, kernel, linux-kernel, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On Sun, 22 May 2022 07:33:08 +0300
Vasily Averin <vvs@openvz.org> wrote:

> > slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> > instead of just printing 'accounted=true/false'. This patch is too
> > specific to SLAB_ACCOUNT.  
> 
> Any extra output degrades performance.
> For my task it's not important to know SLAB flags, I just need to understand,
> is current allocation accounted or not.

If you do save the flags in the event, you can report that on output with
the __print_flags() macro:

 TP_fast_assign(
	[..]
	__entry->sflags = s ? s->flags;
	[..]
 )
 TP_printk("... slab_flags=%s ..",
	[..]
	__print_flags(sflags, "|",
		{ SLAB_CONSISTENCY_CHECKS, "CONSISTENCY_CHECKS" },
		{ SLAB_RED_ZONE, "RED_ZONE" },
		{ SLAB_POISON, "POISON" },
		{ SLAB_HWCACHE_ALIGN, "HWCACHE_ALIGN" },
		{ SLAB_CACHE_DMA, "CACHE_DMA" },
		{ SLAB_CACHE_DMA32, "CACHE_DMA32" },
		{ SLAB_STORE_USER, "STORE_USER" },
		{ SLAB_PANIC, "PANIC" }), ... )


And you get the flag output looking nicely, and all the processing is done
on the reader path.

That's if you find it useful at all.

-- Steve

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-22 20:09                   ` Steven Rostedt
@ 2022-05-23  4:03                     ` Vasily Averin
  0 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-23  4:03 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Hyeonggon Yoo, Andrew Morton, kernel, linux-kernel, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On 5/22/22 23:09, Steven Rostedt wrote:
> On Sun, 22 May 2022 07:33:08 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>>> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
>>> instead of just printing 'accounted=true/false'. This patch is too
>>> specific to SLAB_ACCOUNT.  
>>
>> Any extra output degrades performance.
>> For my task it's not important to know SLAB flags, I just need to understand,
>> is current allocation accounted or not.
> 
> If you do save the flags in the event, you can report that on output with
> the __print_flags() macro:
> 
>  TP_fast_assign(
> 	[..]
> 	__entry->sflags = s ? s->flags;
> 	[..]
>  )
>  TP_printk("... slab_flags=%s ..",
> 	[..]
> 	__print_flags(sflags, "|",
> 		{ SLAB_CONSISTENCY_CHECKS, "CONSISTENCY_CHECKS" },
> 		{ SLAB_RED_ZONE, "RED_ZONE" },
> 		{ SLAB_POISON, "POISON" },
> 		{ SLAB_HWCACHE_ALIGN, "HWCACHE_ALIGN" },
> 		{ SLAB_CACHE_DMA, "CACHE_DMA" },
> 		{ SLAB_CACHE_DMA32, "CACHE_DMA32" },
> 		{ SLAB_STORE_USER, "STORE_USER" },
> 		{ SLAB_PANIC, "PANIC" }), ... )
> 
> 
> And you get the flag output looking nicely, and all the processing is done
> on the reader path.
> 
> That's if you find it useful at all.

Thank you for explanation!
Yes, we can do it however I really doubt that any other slab flags are of interest to anyone.

Btw. in this form slab flags array causes sparse warnings,
because SLAB_* are defined as bitwise slab_flags_t.
This should be translated to unsigned long similarly to gfp_t flags.

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-21 18:36             ` [PATCH v4] " Vasily Averin
  2022-05-22  3:51               ` Hyeonggon Yoo
@ 2022-05-23 13:12               ` Vlastimil Babka
  2022-05-30  7:47                 ` [PATCH v5] " Vasily Averin
  2022-05-25  1:34               ` [PATCH v4] tracing: " Roman Gushchin
  2022-05-25  7:33               ` Hyeonggon Yoo
  3 siblings, 1 reply; 33+ messages in thread
From: Vlastimil Babka @ 2022-05-23 13:12 UTC (permalink / raw)
  To: Vasily Averin, Andrew Morton
  Cc: kernel, linux-kernel, Steven Rostedt, Ingo Molnar, linux-mm,
	Shakeel Butt, Roman Gushchin, Matthew Wilcox, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko,
	Hyeonggon Yoo, Muchun Song

On 5/21/22 20:36, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)

I'd prefer the slab tree, given the files it touches and expected conflict
with Hyeonggon Yoo's v3 of kmalloc unification series. BTW the suggested
split of kmalloc/kmem_cache_alloc tracepoints [1] will be useful if
implemented, as we won't have to pass NULL kmem_cache pointers from some of
the kmalloc callers.
I would add it there after 5.19-rc1 to avoid conflict with kmem.h changes in
mm-stable, that should be part of mainline before rc1.

Thanks!

[1] https://lore.kernel.org/all/bbb97e3c-e597-dd6e-e213-55bc1779d901@suse.cz/

> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmalloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DECLARE_EVENT_CLASS(kmem_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags,
>  		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
>  		__field(	int,		node		)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>  		__entry->node		= node;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
>  		show_gfp_flags(__entry->gfp_flags),
> -		__entry->node)
> +		__entry->node,
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
>  			       cachep->object_size, cachep->size, flags);
>  
>  	return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
>  	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(_RET_IP_, ret,
> +	trace_kmalloc(_RET_IP_, ret, cachep,
>  		      size, cachep->size, flags);
>  	return ret;
>  }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
>  {
>  	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
>  				    cachep->object_size, cachep->size,
>  				    flags, nodeid);
>  
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
>  	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, cachep,
>  			   size, cachep->size,
>  			   flags, nodeid);
>  	return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	ret = slab_alloc(cachep, NULL, flags, size, caller);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(caller, ret,
> +	trace_kmalloc(caller, ret, cachep,
>  		      size, cachep->size, flags);
>  
>  	return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
>  #include <asm/page.h>
>  #include <linux/memcontrol.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
>  #include "internal.h"
> -
>  #include "slab.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
>  enum slab_state slab_state;
>  LIST_HEAD(slab_caches);
>  DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
>  void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
>  {
>  	void *ret = kmalloc_order(size, flags, order);
> -	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> +	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
>  	return ret;
>  }
>  EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  		*m = size;
>  		ret = (void *)m + minalign;
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, size + minalign, gfp, node);
>  	} else {
>  		unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  			gfp |= __GFP_COMP;
>  		ret = slob_new_pages(gfp, order, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << order, gfp, node);
>  	}
>  
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>  
>  	if (c->size < PAGE_SIZE) {
>  		b = slob_alloc(c->size, flags, c->align, node, 0);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    SLOB_UNITS(c->size) * SLOB_UNIT,
>  					    flags, node);
>  	} else {
>  		b = slob_new_pages(flags, get_order(c->size), node);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    PAGE_SIZE << get_order(c->size),
>  					    flags, node);
>  	}
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
>  				s->size, gfpflags);
>  
>  	return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
>  void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
>  {
>  	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
>  	return ret;
>  }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
>  				    s->object_size, s->size, gfpflags, node);
>  
>  	return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, s,
>  			   size, s->size, gfpflags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>  
>  	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>  
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, flags, node);
>  
> -		trace_kmalloc_node(_RET_IP_, ret,
> +		trace_kmalloc_node(_RET_IP_, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   flags, node);
>  
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  
>  	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> +	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
>  	ret = slab_alloc(s, NULL, gfpflags, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc(caller, ret, size, s->size, gfpflags);
> +	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>  
>  	return ret;
>  }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, gfpflags, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   gfpflags, node);
>  
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> +	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>  
>  	return ret;
>  }


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-21 18:36             ` [PATCH v4] " Vasily Averin
  2022-05-22  3:51               ` Hyeonggon Yoo
  2022-05-23 13:12               ` Vlastimil Babka
@ 2022-05-25  1:34               ` Roman Gushchin
  2022-05-25  7:33               ` Hyeonggon Yoo
  3 siblings, 0 replies; 33+ messages in thread
From: Roman Gushchin @ 2022-05-25  1:34 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Vlastimil Babka, Matthew Wilcox,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko, Hyeonggon Yoo, Muchun Song

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)

LGTM

Acked-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-21 18:36             ` [PATCH v4] " Vasily Averin
                                 ` (2 preceding siblings ...)
  2022-05-25  1:34               ` [PATCH v4] tracing: " Roman Gushchin
@ 2022-05-25  7:33               ` Hyeonggon Yoo
  2022-05-25  8:24                 ` Vasily Averin
  3 siblings, 1 reply; 33+ messages in thread
From: Hyeonggon Yoo @ 2022-05-25  7:33 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---


Maybe I worried a bit much...

I failed to apply it on slab/for-next and v5.18. What tree is it based on?

> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v4] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-25  7:33               ` Hyeonggon Yoo
@ 2022-05-25  8:24                 ` Vasily Averin
  0 siblings, 0 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-25  8:24 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Andrew Morton, kernel, linux-kernel, Steven Rostedt, Ingo Molnar,
	linux-mm, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko, Muchun Song

On 5/25/22 10:33, Hyeonggon Yoo wrote:
> On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
>> Slab caches marked with SLAB_ACCOUNT force accounting for every
>> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
>> Unfortunately, at the moment this flag is not visible in ftrace output,
>> and this makes it difficult to analyze the accounted allocations.
>>
>> This patch adds boolean "accounted" entry into trace output,
>> and set it to 'true' for calls used __GFP_ACCOUNT flag and
>> for allocations from caches marked with SLAB_ACCOUNT.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> Acked-by: Shakeel Butt <shakeelb@google.com>
>> ---
> 
> 
> Maybe I worried a bit much...
> 
> I failed to apply it on slab/for-next and v5.18. What tree is it based on?

Its context depends on changes in my other patches already included into mm-unstable.

>> v4:
>>  1) replaced in patch descripion: "accounted" instead of "allocated"
>>  2) added "Acked-by" from Shakeel,
>>  3) re-addressed to akpm@
>>
>> v3:
>>  1) rework kmem_cache_alloc* tracepoints once again,
>>     added struct kmem_cache argument into existing templates,
>> 	thanks to Matthew Wilcox
>>  2) updated according trace_* calls
>>  3) added boolean "allocated" entry into trace output,
>> 	thanks to Roman
>>  4) updated patch subject and description
>>
>> v2:
>>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>>     of current cachep->*size parameters.
>>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>>         and therefore it was replaced by kmalloc_node.
>> ---
>>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>>  mm/slab.c                   | 10 +++++-----
>>  mm/slab_common.c            |  9 ++++-----
>>  mm/slob.c                   |  8 ++++----
>>  mm/slub.c                   | 20 +++++++++----------
>>  5 files changed, 47 insertions(+), 38 deletions(-)


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-23 13:12               ` Vlastimil Babka
@ 2022-05-30  7:47                 ` Vasily Averin
  2022-05-30  8:25                   ` Muchun Song
  2022-05-31 11:46                   ` Hyeonggon Yoo
  0 siblings, 2 replies; 33+ messages in thread
From: Vasily Averin @ 2022-05-30  7:47 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: kernel, linux-kernel, Steven Rostedt, Ingo Molnar, Andrew Morton,
	linux-mm, Shakeel Butt, Roman Gushchin, Matthew Wilcox,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko, Hyeonggon Yoo, Muchun Song

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>

---
v5:
 1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
 2) added "Acked-by" from Roman
 3) re-addressed to slab maintaiers

v4:
 1) replaced in patch descripion: "accounted" instead of "allocated"
 2) added "Acked-by" from Shakeel,
 3) re-addressed to akpm@

v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
	thanks to Matthew Wilcox
 2) updated according trace_* calls
 3) added boolean "allocated" entry into trace output,
	thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
 include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f76668305ac5..8d8736a5b654 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..1f7864ff41e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3694,7 +3694,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 77c3adf40e50..6c9aac5d8f4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -26,13 +26,12 @@
 #include <linux/memcontrol.h>
 #include <linux/stackdepot.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -959,7 +958,7 @@ EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..56421fe461c4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -507,7 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -516,7 +516,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -616,12 +616,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..c323dd916b08 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3228,7 +3228,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3251,7 +3251,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3263,7 +3263,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3277,7 +3277,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4412,7 +4412,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4446,7 +4446,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4460,7 +4460,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4919,7 +4919,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4935,7 +4935,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4950,7 +4950,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-30  7:47                 ` [PATCH v5] " Vasily Averin
@ 2022-05-30  8:25                   ` Muchun Song
  2022-05-31 11:46                   ` Hyeonggon Yoo
  1 sibling, 0 replies; 33+ messages in thread
From: Muchun Song @ 2022-05-30  8:25 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Vlastimil Babka, kernel, linux-kernel, Steven Rostedt,
	Ingo Molnar, Andrew Morton, linux-mm, Shakeel Butt,
	Roman Gushchin, Matthew Wilcox, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, Michal Hocko, Hyeonggon Yoo

On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>

Acked-by: Muchun Song <songmuchun@bytedance.com>

Thanks.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-30  7:47                 ` [PATCH v5] " Vasily Averin
  2022-05-30  8:25                   ` Muchun Song
@ 2022-05-31 11:46                   ` Hyeonggon Yoo
  2022-05-31 16:58                     ` Vasily Averin
  1 sibling, 1 reply; 33+ messages in thread
From: Hyeonggon Yoo @ 2022-05-31 11:46 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Vlastimil Babka, kernel, linux-kernel, Steven Rostedt,
	Ingo Molnar, Andrew Morton, linux-mm, Shakeel Butt,
	Roman Gushchin, Matthew Wilcox, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, Michal Hocko, Muchun Song

On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
> 
> ---
> v5:
>  1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
>  2) added "Acked-by" from Roman
>  3) re-addressed to slab maintaiers
> 
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 

Looks good to me.
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

a small comment:
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  

It doesn't make sense for SLOB to print accounted=true because SLOB does
not support object accounting.

>  DEFINE_EVENT(kmem_alloc, kmalloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DECLARE_EVENT_CLASS(kmem_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags,
>  		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
>  		__field(	int,		node		)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>  		__entry->node		= node;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v5] tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-31 11:46                   ` Hyeonggon Yoo
@ 2022-05-31 16:58                     ` Vasily Averin
  2022-06-03  3:21                       ` [PATCH mm v6] mm/tracing: " Vasily Averin
  0 siblings, 1 reply; 33+ messages in thread
From: Vasily Averin @ 2022-05-31 16:58 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: Vlastimil Babka, kernel, linux-kernel, Steven Rostedt,
	Ingo Molnar, Andrew Morton, linux-mm, Shakeel Butt,
	Roman Gushchin, Matthew Wilcox, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, Michal Hocko, Muchun Song

On 5/31/22 14:46, Hyeonggon Yoo wrote:
> On Mon, May 30, 2022 at 10:47:26AM +0300, Vasily Averin wrote:
> Looks good to me.
> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 
> a small comment:
>>  
>>  	TP_fast_assign(
>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>  		__entry->bytes_req	= bytes_req;
>>  		__entry->bytes_alloc	= bytes_alloc;
>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>> +					  (s && s->flags & SLAB_ACCOUNT);
>>  	),
>>  
> 
> It doesn't make sense for SLOB to print accounted=true because SLOB does
> not support object accounting.

Thank you very much for this comment.
SLAB_ACCOUNT is not defined for SLOB, but __GFP_ACCOUNT really can incorrectly 
set this field to true.
I'll think how to handle this correctly.

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH mm v6] mm/tracing: add 'accounted' entry into output of allocation tracepoints
  2022-05-31 16:58                     ` Vasily Averin
@ 2022-06-03  3:21                       ` Vasily Averin
  2022-06-15  9:41                         ` Vlastimil Babka
  0 siblings, 1 reply; 33+ messages in thread
From: Vasily Averin @ 2022-06-03  3:21 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: kernel, linux-kernel, Andrew Morton, linux-mm, Shakeel Butt,
	Roman Gushchin, Michal Koutný,
	Steven Rostedt, Ingo Molnar, Michal Hocko, Muchun Song,
	Hyeonggon Yoo, Matthew Wilcox, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, cgroups

Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.
Set it to 'false' if accounting is disabled in configs.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

---
v6:
 1) subject changed from "tracing:" to "mm/tracing:"
 2) set flag to 'false' if accounting is disabled in configs
 3) added approvals from Muchun Song and Hyeonggon Yoo

v5:
 1) re-based to current upstream (v5.18-11267-gb00ed48bb0a7)
 2) added "Acked-by" from Roman
 3) re-addressed to slab maintaiers

v4:
 1) replaced in patch descripion: "accounted" instead of "allocated"
 2) added "Acked-by" from Shakeel,
 3) re-addressed to akpm@

v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
        thanks to Matthew Wilcox
 2) updated according trace_* calls
 3) added boolean "allocated" entry into trace output,
        thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
 include/trace/events/kmem.h | 39 ++++++++++++++++++++++++-------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f76668305ac5..97f60ad82453 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,47 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+					  ((gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT)) : false;
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +84,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +94,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..1f7864ff41e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3694,7 +3694,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 77c3adf40e50..6c9aac5d8f4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -26,13 +26,12 @@
 #include <linux/memcontrol.h>
 #include <linux/stackdepot.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -959,7 +958,7 @@ EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..56421fe461c4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -507,7 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -516,7 +516,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -616,12 +616,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..c323dd916b08 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3228,7 +3228,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3251,7 +3251,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3263,7 +3263,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3277,7 +3277,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4412,7 +4412,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4446,7 +4446,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4460,7 +4460,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4919,7 +4919,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4935,7 +4935,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4950,7 +4950,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH mm v6] mm/tracing: add 'accounted' entry into output of allocation tracepoints
  2022-06-03  3:21                       ` [PATCH mm v6] mm/tracing: " Vasily Averin
@ 2022-06-15  9:41                         ` Vlastimil Babka
  0 siblings, 0 replies; 33+ messages in thread
From: Vlastimil Babka @ 2022-06-15  9:41 UTC (permalink / raw)
  To: Vasily Averin
  Cc: kernel, linux-kernel, Andrew Morton, linux-mm, Shakeel Butt,
	Roman Gushchin, Michal Koutný,
	Steven Rostedt, Ingo Molnar, Michal Hocko, Muchun Song,
	Hyeonggon Yoo, Matthew Wilcox, Joonsoo Kim, David Rientjes,
	Pekka Enberg, Christoph Lameter, cgroups

On 6/3/22 05:21, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> Set it to 'false' if accounting is disabled in configs.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
> Acked-by: Muchun Song <songmuchun@bytedance.com>
> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Thanks, added to slab/for-5.20/tracing

> 
> ---
> v6:
>  1) subject changed from "tracing:" to "mm/tracing:"
>  2) set flag to 'false' if accounting is disabled in configs

Looks like you forgot the kmem_alloc_node variant, fixed up locally:

--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -94,8 +94,9 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
                __entry->bytes_alloc    = bytes_alloc;
                __entry->gfp_flags      = (__force unsigned long)gfp_flags;
                __entry->node           = node;
-               __entry->accounted      = (gfp_flags & __GFP_ACCOUNT) ||
-                                         (s && s->flags & SLAB_ACCOUNT);
+               __entry->accounted      = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+                                         ((gfp_flags & __GFP_ACCOUNT) ||
+                                         (s && s->flags & SLAB_ACCOUNT)) : false;
        ),
 
        TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2022-06-15  9:41 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-18  6:24 [PATCH v3] tracing: add 'accounted' entry into output of allocation tracepoints Vasily Averin
2022-05-18 15:09 ` Shakeel Butt
2022-05-18 15:45 ` Vasily Averin
2022-05-18 20:04 ` Steven Rostedt
2022-05-18 20:04   ` Steven Rostedt
2022-05-19 11:35   ` Vasily Averin
2022-05-19 11:35     ` Vasily Averin
2022-05-19 14:03     ` Steven Rostedt
2022-05-19 14:03       ` Steven Rostedt
2022-05-19 16:29       ` Vasily Averin
2022-05-19 16:29         ` Vasily Averin
2022-05-19 16:32         ` Steven Rostedt
2022-05-19 16:32           ` Steven Rostedt
2022-05-21 18:32           ` Vasily Averin
2022-05-21 18:32             ` Vasily Averin
2022-05-21 18:36             ` [PATCH v4] " Vasily Averin
2022-05-22  3:51               ` Hyeonggon Yoo
2022-05-22  4:33                 ` Vasily Averin
2022-05-22  5:19                   ` Hyeonggon Yoo
2022-05-22  5:42                     ` Shakeel Butt
2022-05-22 18:53                     ` Vasily Averin
2022-05-22 20:09                   ` Steven Rostedt
2022-05-23  4:03                     ` Vasily Averin
2022-05-23 13:12               ` Vlastimil Babka
2022-05-30  7:47                 ` [PATCH v5] " Vasily Averin
2022-05-30  8:25                   ` Muchun Song
2022-05-31 11:46                   ` Hyeonggon Yoo
2022-05-31 16:58                     ` Vasily Averin
2022-06-03  3:21                       ` [PATCH mm v6] mm/tracing: " Vasily Averin
2022-06-15  9:41                         ` Vlastimil Babka
2022-05-25  1:34               ` [PATCH v4] tracing: " Roman Gushchin
2022-05-25  7:33               ` Hyeonggon Yoo
2022-05-25  8:24                 ` Vasily Averin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.