Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
From: Wen Yang <wenyang@linux.alibaba.com>
To: Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Xunlei Pang <xlpang@linux.alibaba.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm/slub: Detach node lock from counting free objects
Date: Sat, 8 Feb 2020 11:03:15 +0800
Message-ID: <5373ce28-c369-4e40-11dd-b269e4d2cb24@linux.alibaba.com> (raw)
In-Reply-To: <20200201031502.92218-1-wenyang@linux.alibaba.com>

[-- Attachment #1: Type: text/plain, Size: 7001 bytes --]


Hi,

I would greatly appreciate it if you kindly give me some feedback on this patch.

--

Best wishes,
Wen


On 2020/2/1 11:15 上午, Wen Yang wrote:
> The lock, protecting the node partial list, is taken when couting the free
> objects resident in that list. It introduces locking contention when the
> page(s) is moved between CPU and node partial lists in allocation path
> on another CPU. So reading "/proc/slabinfo" can possibily block the slab
> allocation on another CPU for a while, 200ms in extreme cases. If the
> slab object is to carry network packet, targeting the far-end disk array,
> it causes block IO jitter issue.
>
> This fixes the block IO jitter issue by caching the total inuse objects in
> the node in advance. The value is retrieved without taking the node partial
> list lock on reading "/proc/slabinfo".
>
> Signed-off-by: Wen Yang <wenyang@linux.alibaba.com>
> Cc: Christoph Lameter <cl@linux.com>
> Cc: Pekka Enberg <penberg@kernel.org>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Xunlei Pang <xlpang@linux.alibaba.com>
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> ---
>   mm/slab.h |  1 +
>   mm/slub.c | 42 +++++++++++++++++++++++++-----------------
>   2 files changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/mm/slab.h b/mm/slab.h
> index 7e94700aa78c..27d22837f7ff 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -619,6 +619,7 @@ struct kmem_cache_node {
>   #ifdef CONFIG_SLUB_DEBUG
>   	atomic_long_t nr_slabs;
>   	atomic_long_t total_objects;
> +	atomic_long_t total_inuse;
>   	struct list_head full;
>   #endif
>   #endif
> diff --git a/mm/slub.c b/mm/slub.c
> index 503e11b1c4e1..67640e797550 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1060,7 +1060,8 @@ static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
>   	return atomic_long_read(&n->nr_slabs);
>   }
>   
> -static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
> +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects,
> +				  int inuse)
>   {
>   	struct kmem_cache_node *n = get_node(s, node);
>   
> @@ -1073,14 +1074,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
>   	if (likely(n)) {
>   		atomic_long_inc(&n->nr_slabs);
>   		atomic_long_add(objects, &n->total_objects);
> +		atomic_long_add(inuse, &n->total_inuse);
>   	}
>   }
> -static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
> +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects,
> +				  int inuse)
>   {
>   	struct kmem_cache_node *n = get_node(s, node);
>   
>   	atomic_long_dec(&n->nr_slabs);
>   	atomic_long_sub(objects, &n->total_objects);
> +	atomic_long_sub(inuse, &n->total_inuse);
>   }
>   
>   /* Object debug checks for alloc/free paths */
> @@ -1395,9 +1399,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
>   static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
>   							{ return 0; }
>   static inline void inc_slabs_node(struct kmem_cache *s, int node,
> -							int objects) {}
> +							int objects,
> +							int inuse) {}
>   static inline void dec_slabs_node(struct kmem_cache *s, int node,
> -							int objects) {}
> +							int objects,
> +							int inuse) {}
>   
>   #endif /* CONFIG_SLUB_DEBUG */
>   
> @@ -1708,7 +1714,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
>   	if (!page)
>   		return NULL;
>   
> -	inc_slabs_node(s, page_to_nid(page), page->objects);
> +	inc_slabs_node(s, page_to_nid(page), page->objects, page->inuse);
>   
>   	return page;
>   }
> @@ -1768,7 +1774,9 @@ static void free_slab(struct kmem_cache *s, struct page *page)
>   
>   static void discard_slab(struct kmem_cache *s, struct page *page)
>   {
> -	dec_slabs_node(s, page_to_nid(page), page->objects);
> +	int inuse = page->objects;
> +
> +	dec_slabs_node(s, page_to_nid(page), page->objects, inuse);
>   	free_slab(s, page);
>   }
>   
> @@ -2396,9 +2404,9 @@ static inline int node_match(struct page *page, int node)
>   }
>   
>   #ifdef CONFIG_SLUB_DEBUG
> -static int count_free(struct page *page)
> +static inline unsigned long node_nr_inuse(struct kmem_cache_node *n)
>   {
> -	return page->objects - page->inuse;
> +	return atomic_long_read(&n->total_inuse);
>   }
>   
>   static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
> @@ -2448,14 +2456,14 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
>   	for_each_kmem_cache_node(s, node, n) {
>   		unsigned long nr_slabs;
>   		unsigned long nr_objs;
> -		unsigned long nr_free;
> +		unsigned long nr_inuse;
>   
> -		nr_free  = count_partial(n, count_free);
>   		nr_slabs = node_nr_slabs(n);
>   		nr_objs  = node_nr_objs(n);
> +		nr_inuse = node_nr_inuse(n);
>   
>   		pr_warn("  node %d: slabs: %ld, objs: %ld, free: %ld\n",
> -			node, nr_slabs, nr_objs, nr_free);
> +			node, nr_slabs, nr_objs, nr_objs - nr_inuse);
>   	}
>   #endif
>   }
> @@ -3348,6 +3356,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
>   #ifdef CONFIG_SLUB_DEBUG
>   	atomic_long_set(&n->nr_slabs, 0);
>   	atomic_long_set(&n->total_objects, 0);
> +	atomic_long_set(&n->total_inuse, 0);
>   	INIT_LIST_HEAD(&n->full);
>   #endif
>   }
> @@ -3411,7 +3420,7 @@ static void early_kmem_cache_node_alloc(int node)
>   	page->frozen = 0;
>   	kmem_cache_node->node[node] = n;
>   	init_kmem_cache_node(n);
> -	inc_slabs_node(kmem_cache_node, node, page->objects);
> +	inc_slabs_node(kmem_cache_node, node, page->objects, page->inuse);
>   
>   	/*
>   	 * No locks need to be taken here as it has just been
> @@ -4857,8 +4866,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
>   			if (flags & SO_TOTAL)
>   				x = atomic_long_read(&n->total_objects);
>   			else if (flags & SO_OBJECTS)
> -				x = atomic_long_read(&n->total_objects) -
> -					count_partial(n, count_free);
> +				x = atomic_long_read(&n->total_inuse);
>   			else
>   				x = atomic_long_read(&n->nr_slabs);
>   			total += x;
> @@ -5900,17 +5908,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
>   {
>   	unsigned long nr_slabs = 0;
>   	unsigned long nr_objs = 0;
> -	unsigned long nr_free = 0;
> +	unsigned long nr_inuse = 0;
>   	int node;
>   	struct kmem_cache_node *n;
>   
>   	for_each_kmem_cache_node(s, node, n) {
>   		nr_slabs += node_nr_slabs(n);
>   		nr_objs += node_nr_objs(n);
> -		nr_free += count_partial(n, count_free);
> +		nr_inuse += node_nr_inuse(n);
>   	}
>   
> -	sinfo->active_objs = nr_objs - nr_free;
> +	sinfo->active_objs = nr_inuse;
>   	sinfo->num_objs = nr_objs;
>   	sinfo->active_slabs = nr_slabs;
>   	sinfo->num_slabs = nr_slabs;

[-- Attachment #2: Type: text/html, Size: 8283 bytes --]

  reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-01  3:15 Wen Yang
2020-02-08  3:03 ` Wen Yang [this message]
2020-02-08 21:41   ` Christopher Lameter
2020-02-12 22:56     ` Andrew Morton
2020-02-14  2:16       ` Christopher Lameter
2020-02-12 22:52 ` Andrew Morton
2020-02-16  4:15   ` Wen Yang
2020-02-18 20:53     ` Roman Gushchin
2020-02-20 13:53       ` Wen Yang
2020-02-20 15:40         ` Roman Gushchin
2020-02-22  6:55           ` Wen Yang
2020-02-24 17:01             ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5373ce28-c369-4e40-11dd-b269e4d2cb24@linux.alibaba.com \
    --to=wenyang@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=xlpang@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git