linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute
       [not found] <1613543513-11965-1-git-send-email-faiyazm@codeaurora.org>
@ 2021-02-17 11:38 ` Faiyaz Mohammed
       [not found] ` <13df1c88-3848-1969-5424-33a927ec033e@suse.cz>
  1 sibling, 0 replies; 4+ messages in thread
From: Faiyaz Mohammed @ 2021-02-17 11:38 UTC (permalink / raw)
  To: cl, penberg, rientjes, iamjoonsoo.kim, akpm, vbabka, willy,
	linux-kernel, linux-mm
  Cc: vinmenon

+linux-mm, linux-kernel.

On 2/17/2021 12:01 PM, Faiyaz Mohammed wrote:
> Reading the sys slab alloc_calls, free_calls returns the available object
> owners, but the size of this file is limited to PAGE_SIZE
> because of the limitation of sysfs attributes, it is returning the
> partial owner info, which is not sufficient to debug/account the slab
> memory and alloc_calls output is not matching with /proc/slabinfo.
> 
> To remove the PAGE_SIZE limitation converted the sys slab
> alloc_calls, free_calls to bin attribute.
> 
> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
> ---
>  mm/slub.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 63 insertions(+), 21 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index b22a4b1..71cfe3b 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -37,6 +37,9 @@
>  
>  #include <trace/events/kmem.h>
>  
> +#define TRACE_ENTRY_MAX 80
> +#define TRACKS_PER_PAGE  ((PAGE_SIZE - KSYM_SYMBOL_LEN - 100) / TRACE_ENTRY_MAX)
> +
>  #include "internal.h"
>  
>  /*
> @@ -4748,6 +4751,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
>  	struct loc_track t = { 0, 0, NULL };
>  	int node;
>  	struct kmem_cache_node *n;
> +	unsigned int previous_read_count = 0;
>  
>  	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
>  			     GFP_KERNEL)) {
> @@ -4756,6 +4760,11 @@ static int list_locations(struct kmem_cache *s, char *buf,
>  	/* Push back cpu slabs */
>  	flush_all(s);
>  
> +	if (offset != 0)
> +		previous_read_count = (offset / TRACE_ENTRY_MAX);
> +
> +	memset(buf, 0, PAGE_SIZE);
> +
>  	for_each_kmem_cache_node(s, node, n) {
>  		unsigned long flags;
>  		struct page *page;
> @@ -4771,48 +4780,62 @@ static int list_locations(struct kmem_cache *s, char *buf,
>  		spin_unlock_irqrestore(&n->list_lock, flags);
>  	}
>  
> -	for (i = 0; i < t.count; i++) {
> +	for (i = previous_read_count; i < t.count; i++) {
>  		struct location *l = &t.loc[i];
> +		unsigned int cur_len = 0;
>  
> -		len += sysfs_emit_at(buf, len, "%7ld ", l->count);
> +		cur_len += sysfs_emit_at(buf, cur_len + len, "%7ld ", l->count);
>  
>  		if (l->addr)
> -			len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
> +			cur_len += sysfs_emit_at(buf, cur_len + len, "%pS", (void *)l->addr);
>  		else
> -			len += sysfs_emit_at(buf, len, "<not-available>");
> +			cur_len += sysfs_emit_at(buf, cur_len + len, "<not-available>");
>  
>  		if (l->sum_time != l->min_time)
> -			len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " age=%ld/%ld/%ld",
>  					     l->min_time,
>  					     (long)div_u64(l->sum_time,
>  							   l->count),
>  					     l->max_time);
>  		else
> -			len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " age=%ld", l->min_time);
>  
>  		if (l->min_pid != l->max_pid)
> -			len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " pid=%ld-%ld",
>  					     l->min_pid, l->max_pid);
>  		else
> -			len += sysfs_emit_at(buf, len, " pid=%ld",
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " pid=%ld",
>  					     l->min_pid);
>  
>  		if (num_online_cpus() > 1 &&
>  		    !cpumask_empty(to_cpumask(l->cpus)))
> -			len += sysfs_emit_at(buf, len, " cpus=%*pbl",
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " cpus=%*pbl",
>  					     cpumask_pr_args(to_cpumask(l->cpus)));
>  
>  		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
> -			len += sysfs_emit_at(buf, len, " nodes=%*pbl",
> +			cur_len += sysfs_emit_at(buf, cur_len + len, " nodes=%*pbl",
>  					     nodemask_pr_args(&l->nodes));
>  
> +		if (cur_len >= TRACE_ENTRY_MAX)
> +			cur_len -= (cur_len % TRACE_ENTRY_MAX) - 1;
> +		else if (cur_len < TRACE_ENTRY_MAX)
> +			cur_len += TRACE_ENTRY_MAX - (cur_len % TRACE_ENTRY_MAX) - 1;
> +
> +		len += cur_len;
> +
>  		len += sysfs_emit_at(buf, len, "\n");
> +
> +		if (i >= (previous_read_count + TRACKS_PER_PAGE))
> +			break;
> +
>  	}
>  
> -	free_loc_track(&t);
> -	if (!t.count)
> -		len += sysfs_emit_at(buf, len, "No data\n");
> +	if (((previous_read_count > t.count) | (i >= t.count)) && (offset != 0))
> +		len = 0;
> +	else if (!t.count)
> +		len += sprintf(buf, "No data\n");
>  
> +	free_loc_track(&t);
>  	return len;
>  }
>  #endif	/* CONFIG_SLUB_DEBUG */
> @@ -5280,21 +5303,33 @@ static ssize_t validate_store(struct kmem_cache *s,
>  }
>  SLAB_ATTR(validate);
>  
> -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
> +static ssize_t alloc_calls_read(struct file *filp, struct kobject *kobj,
> +				struct bin_attribute *bin_attr, char *buf,
> +					loff_t offset, size_t count)
>  {
> +	struct kmem_cache *s;
> +
> +	s = to_slab(kobj);
>  	if (!(s->flags & SLAB_STORE_USER))
>  		return -ENOSYS;
> -	return list_locations(s, buf, TRACK_ALLOC);
> +
> +	return list_locations(s, buf, offset, TRACK_ALLOC);
>  }
> -SLAB_ATTR_RO(alloc_calls);
> +BIN_ATTR_RO(alloc_calls, 0);
>  
> -static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
> +static ssize_t free_calls_read(struct file *filp, struct kobject *kobj,
> +				struct bin_attribute *bin_attr, char *buf,
> +					loff_t offset, size_t count)
>  {
> +	struct kmem_cache *s;
> +
> +	s = to_slab(kobj);
>  	if (!(s->flags & SLAB_STORE_USER))
>  		return -ENOSYS;
> -	return list_locations(s, buf, TRACK_FREE);
> +
> +	return list_locations(s, buf, offset, TRACK_FREE);
>  }
> -SLAB_ATTR_RO(free_calls);
> +BIN_ATTR_RO(free_calls, 0);
>  #endif /* CONFIG_SLUB_DEBUG */
>  
>  #ifdef CONFIG_FAILSLAB
> @@ -5430,6 +5465,14 @@ STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
>  STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
>  #endif	/* CONFIG_SLUB_STATS */
>  
> +
> +static struct bin_attribute *slab_bin_attrs[] = {
> +#ifdef CONFIG_SLUB_DEBUG
> +	&bin_attr_alloc_calls,
> +	&bin_attr_free_calls,
> +#endif
> +};
> +
>  static struct attribute *slab_attrs[] = {
>  	&slab_size_attr.attr,
>  	&object_size_attr.attr,
> @@ -5458,8 +5501,6 @@ static struct attribute *slab_attrs[] = {
>  	&poison_attr.attr,
>  	&store_user_attr.attr,
>  	&validate_attr.attr,
> -	&alloc_calls_attr.attr,
> -	&free_calls_attr.attr,
>  #endif
>  #ifdef CONFIG_ZONE_DMA
>  	&cache_dma_attr.attr,
> @@ -5505,6 +5546,7 @@ static struct attribute *slab_attrs[] = {
>  
>  static const struct attribute_group slab_attr_group = {
>  	.attrs = slab_attrs,
> +	.bin_attrs = slab_bin_attrs,
>  };
>  
>  static ssize_t slab_attr_show(struct kobject *kobj,
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute
       [not found]   ` <YD45e70b48gyXkIg@kroah.com>
@ 2021-03-19  5:28     ` Faiyaz Mohammed
  2021-03-19  6:54       ` Greg Kroah-Hartman
  2021-03-19  9:40       ` Vlastimil Babka
  0 siblings, 2 replies; 4+ messages in thread
From: Faiyaz Mohammed @ 2021-03-19  5:28 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Vlastimil Babka
  Cc: cl, penberg, rientjes, iamjoonsoo.kim, akpm, willy, vinmenon,
	Peter Zijlstra, linux-mm

Hi,

Sorry for late response!

On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote:
> On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote:
>> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote:
>>> Reading the sys slab alloc_calls, free_calls returns the available object
>>> owners, but the size of this file is limited to PAGE_SIZE
>>> because of the limitation of sysfs attributes, it is returning the
>>> partial owner info, which is not sufficient to debug/account the slab
>>> memory and alloc_calls output is not matching with /proc/slabinfo.
>>>
>>> To remove the PAGE_SIZE limitation converted the sys slab
>>> alloc_calls, free_calls to bin attribut
>>>
>>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
>>
>> After discussing this with Greg and PeterZ, sysfs should be one value per file,
>> and is just not proper API for this kind of info. We should reimplement at least
>> these clearly debugging "attributes" in debugfs, where they belong, instead of
>> trying to hack around the limitation in sysfs.
> 
> sysfs is _REQUIRED_ to be "one value per file", any kernel code that
> abuses this needs to be fixed up.
> 
> Why are these slab files in sysfs at all anyway?  They all feel like
> debugging stuff to me, why not move everything to debugfs?  Would make
> for a lot less code overall.
> 
Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and
/sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is
it okay to move only alloc_calls and free_calls? or we have to move
whole sysfs interface to debugfs?.

If we are moving only alloc_calls/free_calls interface to debugfs then I
think we can add all slab objects data into single file.

For example: /sys/kernel/debugfs/slab/alloc_calls, which will print all
slab objects data.

Example Output: alloc_list: call_site=__request_region+0xb4/0x2f0
count=228 object_size=128 slab_size=640 slab_name=kmalloc-128

or

We can have just like current sysfs interface, have separate
alloc_calls/free_calls traces per kmem cache.

Which one would be better?.

> 

Thanks and regards,
Mohammed Faiyaz


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute
  2021-03-19  5:28     ` Faiyaz Mohammed
@ 2021-03-19  6:54       ` Greg Kroah-Hartman
  2021-03-19  9:40       ` Vlastimil Babka
  1 sibling, 0 replies; 4+ messages in thread
From: Greg Kroah-Hartman @ 2021-03-19  6:54 UTC (permalink / raw)
  To: Faiyaz Mohammed
  Cc: Vlastimil Babka, cl, penberg, rientjes, iamjoonsoo.kim, akpm,
	willy, vinmenon, Peter Zijlstra, linux-mm

On Fri, Mar 19, 2021 at 10:58:55AM +0530, Faiyaz Mohammed wrote:
> Hi,
> 
> Sorry for late response!
> 
> On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote:
> > On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote:
> >> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote:
> >>> Reading the sys slab alloc_calls, free_calls returns the available object
> >>> owners, but the size of this file is limited to PAGE_SIZE
> >>> because of the limitation of sysfs attributes, it is returning the
> >>> partial owner info, which is not sufficient to debug/account the slab
> >>> memory and alloc_calls output is not matching with /proc/slabinfo.
> >>>
> >>> To remove the PAGE_SIZE limitation converted the sys slab
> >>> alloc_calls, free_calls to bin attribut
> >>>
> >>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
> >>
> >> After discussing this with Greg and PeterZ, sysfs should be one value per file,
> >> and is just not proper API for this kind of info. We should reimplement at least
> >> these clearly debugging "attributes" in debugfs, where they belong, instead of
> >> trying to hack around the limitation in sysfs.
> > 
> > sysfs is _REQUIRED_ to be "one value per file", any kernel code that
> > abuses this needs to be fixed up.
> > 
> > Why are these slab files in sysfs at all anyway?  They all feel like
> > debugging stuff to me, why not move everything to debugfs?  Would make
> > for a lot less code overall.
> > 
> Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and
> /sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is
> it okay to move only alloc_calls and free_calls? or we have to move
> whole sysfs interface to debugfs?.

sysfs files should only have "one value" in them.  Anything that
violates that rule, should be moved to debugfs.

thanks,

greg k-h


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute
  2021-03-19  5:28     ` Faiyaz Mohammed
  2021-03-19  6:54       ` Greg Kroah-Hartman
@ 2021-03-19  9:40       ` Vlastimil Babka
  1 sibling, 0 replies; 4+ messages in thread
From: Vlastimil Babka @ 2021-03-19  9:40 UTC (permalink / raw)
  To: Faiyaz Mohammed, Greg Kroah-Hartman
  Cc: cl, penberg, rientjes, iamjoonsoo.kim, akpm, willy, vinmenon,
	Peter Zijlstra, linux-mm

On 3/19/21 6:28 AM, Faiyaz Mohammed wrote:
> Hi,
> 
> Sorry for late response!
> 
> On 3/2/2021 6:41 PM, Greg Kroah-Hartman wrote:
>> On Tue, Mar 02, 2021 at 01:59:46PM +0100, Vlastimil Babka wrote:
>>> On 2/17/21 7:31 AM, Faiyaz Mohammed wrote:
>>>> Reading the sys slab alloc_calls, free_calls returns the available object
>>>> owners, but the size of this file is limited to PAGE_SIZE
>>>> because of the limitation of sysfs attributes, it is returning the
>>>> partial owner info, which is not sufficient to debug/account the slab
>>>> memory and alloc_calls output is not matching with /proc/slabinfo.
>>>>
>>>> To remove the PAGE_SIZE limitation converted the sys slab
>>>> alloc_calls, free_calls to bin attribut
>>>>
>>>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
>>>
>>> After discussing this with Greg and PeterZ, sysfs should be one value per file,
>>> and is just not proper API for this kind of info. We should reimplement at least
>>> these clearly debugging "attributes" in debugfs, where they belong, instead of
>>> trying to hack around the limitation in sysfs.
>> 
>> sysfs is _REQUIRED_ to be "one value per file", any kernel code that
>> abuses this needs to be fixed up.
>> 
>> Why are these slab files in sysfs at all anyway?  They all feel like
>> debugging stuff to me, why not move everything to debugfs?  Would make
>> for a lot less code overall.
>> 
> Yes, we can move the /sys/kernel/slab/kmemcache/alloc_calls and
> /sys/kernel/slab/kmemcache/free_calls implementation to debugfs but is
> it okay to move only alloc_calls and free_calls? or we have to move
> whole sysfs interface to debugfs?.

I don't think we need to move everything, just files where it makes sense.

> If we are moving only alloc_calls/free_calls interface to debugfs then I
> think we can add all slab objects data into single file.
> 
> For example: /sys/kernel/debugfs/slab/alloc_calls, which will print all
> slab objects data.
> 
> Example Output: alloc_list: call_site=__request_region+0xb4/0x2f0
> count=228 object_size=128 slab_size=640 slab_name=kmalloc-128

I wouldn't do this, as processing all caches will have a large overhead and then
somebody interested in single cache would throw most of the info away.

> or
> 
> We can have just like current sysfs interface, have separate
> alloc_calls/free_calls traces per kmem cache.
> 
> Which one would be better?.
> 
>> 
> 
> Thanks and regards,
> Mohammed Faiyaz
> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-19  9:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1613543513-11965-1-git-send-email-faiyazm@codeaurora.org>
2021-02-17 11:38 ` [PATCH v2] mm: slub: Convert sys slab alloc_calls, free_calls to bin attribute Faiyaz Mohammed
     [not found] ` <13df1c88-3848-1969-5424-33a927ec033e@suse.cz>
     [not found]   ` <YD45e70b48gyXkIg@kroah.com>
2021-03-19  5:28     ` Faiyaz Mohammed
2021-03-19  6:54       ` Greg Kroah-Hartman
2021-03-19  9:40       ` Vlastimil Babka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).