From: Minchan Kim <minchan@kernel.org>
To: josef@toxicpanda.com
Cc: hannes@cmpxchg.org, riel@redhat.com, akpm@linux-foundation.org,
linux-mm@kvack.org, kernel-team@fb.com,
Josef Bacik <jbacik@fb.com>
Subject: Re: [PATCH 1/2] mm: use slab size in the slab shrinking ratio calculation
Date: Tue, 13 Jun 2017 14:28:02 +0900 [thread overview]
Message-ID: <20170613052802.GA16061@bbox> (raw)
In-Reply-To: <1496949546-2223-1-git-send-email-jbacik@fb.com>
Hello,
On Thu, Jun 08, 2017 at 03:19:05PM -0400, josef@toxicpanda.com wrote:
> From: Josef Bacik <jbacik@fb.com>
>
> When testing a slab heavy workload I noticed that we often would barely
> reclaim anything at all from slab when kswapd started doing reclaim.
> This is because we use the ratio of nr_scanned / nr_lru to determine how
> much of slab we should reclaim. But in a slab only/mostly workload we
> will not have much page cache to reclaim, and thus our ratio will be
> really low and not at all related to where the memory on the system is.
I want to understand this clearly.
Why nr_scanned / nr_lru is low if system doesnt' have much page cache?
Could you elaborate it a bit?
Thanks.
> Instead we want to use a ratio of the reclaimable slab to the actual
> reclaimable space on the system. That way if we are slab heavy we work
> harder to reclaim slab.
>
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
> mm/vmscan.c | 71 +++++++++++++++++++++++++++++++++++++------------------------
> 1 file changed, 43 insertions(+), 28 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index f84cdd3..16add44 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -307,8 +307,8 @@ EXPORT_SYMBOL(unregister_shrinker);
>
> static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> struct shrinker *shrinker,
> - unsigned long nr_scanned,
> - unsigned long nr_eligible)
> + unsigned long numerator,
> + unsigned long denominator)
> {
> unsigned long freed = 0;
> unsigned long long delta;
> @@ -333,9 +333,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
>
> total_scan = nr;
> - delta = (4 * nr_scanned) / shrinker->seeks;
> + delta = (4 * numerator) / shrinker->seeks;
> delta *= freeable;
> - do_div(delta, nr_eligible + 1);
> + do_div(delta, denominator + 1);
> total_scan += delta;
> if (total_scan < 0) {
> pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
> @@ -369,7 +369,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> total_scan = freeable * 2;
>
> trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
> - nr_scanned, nr_eligible,
> + numerator, denominator,
> freeable, delta, total_scan);
>
> /*
> @@ -429,8 +429,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> * @gfp_mask: allocation context
> * @nid: node whose slab caches to target
> * @memcg: memory cgroup whose slab caches to target
> - * @nr_scanned: pressure numerator
> - * @nr_eligible: pressure denominator
> + * @numerator: pressure numerator
> + * @denominator: pressure denominator
> *
> * Call the shrink functions to age shrinkable caches.
> *
> @@ -442,20 +442,16 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> * objects from the memory cgroup specified. Otherwise, only unaware
> * shrinkers are called.
> *
> - * @nr_scanned and @nr_eligible form a ratio that indicate how much of
> - * the available objects should be scanned. Page reclaim for example
> - * passes the number of pages scanned and the number of pages on the
> - * LRU lists that it considered on @nid, plus a bias in @nr_scanned
> - * when it encountered mapped pages. The ratio is further biased by
> - * the ->seeks setting of the shrink function, which indicates the
> - * cost to recreate an object relative to that of an LRU page.
> + * @numerator and @denominator form a ratio that indicate how much of
> + * the available objects should be scanned. Global reclaim for example will do
> + * the ratio of reclaimable slab to the lru sizes.
> *
> * Returns the number of reclaimed slab objects.
> */
> static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> struct mem_cgroup *memcg,
> - unsigned long nr_scanned,
> - unsigned long nr_eligible)
> + unsigned long numerator,
> + unsigned long denominator)
> {
> struct shrinker *shrinker;
> unsigned long freed = 0;
> @@ -463,9 +459,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
> return 0;
>
> - if (nr_scanned == 0)
> - nr_scanned = SWAP_CLUSTER_MAX;
> -
> if (!down_read_trylock(&shrinker_rwsem)) {
> /*
> * If we would return 0, our callers would understand that we
> @@ -496,7 +489,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
> sc.nid = 0;
>
> - freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
> + freed += do_shrink_slab(&sc, shrinker, numerator, denominator);
> }
>
> up_read(&shrinker_rwsem);
> @@ -2558,12 +2551,34 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
> return true;
> }
>
> +static unsigned long lruvec_reclaimable_pages(struct lruvec *lruvec)
> +{
> + unsigned long nr;
> +
> + nr = lruvec_page_state(lruvec, NR_ACTIVE_FILE) +
> + lruvec_page_state(lruvec, NR_INACTIVE_FILE) +
> + lruvec_page_state(lruvec, NR_ISOLATED_FILE);
> +
> + if (get_nr_swap_pages() > 0)
> + nr += lruvec_page_state(lruvec, NR_ACTIVE_ANON) +
> + lruvec_page_state(lruvec, NR_INACTIVE_ANON) +
> + lruvec_page_state(lruvec, NR_ISOLATED_ANON);
> +
> + return nr;
> +}
> +
> static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> {
> struct reclaim_state *reclaim_state = current->reclaim_state;
> unsigned long nr_reclaimed, nr_scanned;
> + unsigned long greclaim = 1, gslab = 1;
> bool reclaimable = false;
>
> + if (global_reclaim(sc)) {
> + gslab = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
> + greclaim = pgdat_reclaimable_pages(pgdat);
> + }
> +
> do {
> struct mem_cgroup *root = sc->target_mem_cgroup;
> struct mem_cgroup_reclaim_cookie reclaim = {
> @@ -2578,6 +2593,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
>
> memcg = mem_cgroup_iter(root, NULL, &reclaim);
> do {
> + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat,
> + memcg);
> + unsigned long nr_slab, nr_reclaim;
> unsigned long lru_pages;
> unsigned long reclaimed;
> unsigned long scanned;
> @@ -2592,14 +2610,16 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
>
> reclaimed = sc->nr_reclaimed;
> scanned = sc->nr_scanned;
> + nr_slab = lruvec_page_state(lruvec,
> + NR_SLAB_RECLAIMABLE);
> + nr_reclaim = lruvec_reclaimable_pages(lruvec);
>
> shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
> node_lru_pages += lru_pages;
>
> if (memcg)
> shrink_slab(sc->gfp_mask, pgdat->node_id,
> - memcg, sc->nr_scanned - scanned,
> - lru_pages);
> + memcg, nr_slab, nr_reclaim);
>
> /* Record the group's reclaim efficiency */
> vmpressure(sc->gfp_mask, memcg, false,
> @@ -2623,14 +2643,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> }
> } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
>
> - /*
> - * Shrink the slab caches in the same proportion that
> - * the eligible LRU pages were scanned.
> - */
> if (global_reclaim(sc))
> shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
> - sc->nr_scanned - nr_scanned,
> - node_lru_pages);
> + gslab, greclaim);
>
> /*
> * Record the subtree's reclaim efficiency. The reclaimed
> --
> 2.7.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2017-06-13 5:28 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-08 19:19 [PATCH 1/2] mm: use slab size in the slab shrinking ratio calculation josef
2017-06-08 19:19 ` [PATCH 2/2] mm: make kswapd try harder to keep active pages in cache josef
2017-06-13 5:28 ` Minchan Kim [this message]
2017-06-13 12:01 ` [PATCH 1/2] mm: use slab size in the slab shrinking ratio calculation Josef Bacik
2017-06-14 6:40 ` Minchan Kim
2017-06-19 15:11 ` Josef Bacik
2017-06-20 2:46 ` Minchan Kim
2017-06-27 13:59 ` Josef Bacik
2017-06-30 2:17 ` Minchan Kim
2017-06-30 15:03 ` Josef Bacik
2017-07-02 1:58 ` Dave Chinner
2017-07-03 13:52 ` Josef Bacik
2017-07-03 1:33 ` Minchan Kim
2017-07-03 13:50 ` Josef Bacik
2017-07-04 3:01 ` Minchan Kim
2017-07-04 13:21 ` Josef Bacik
2017-07-04 22:57 ` Dave Chinner
2017-07-05 4:59 ` Minchan Kim
2017-07-05 23:58 ` Dave Chinner
2017-07-06 3:56 ` Minchan Kim
2017-07-05 13:33 ` Josef Bacik
2017-07-05 23:30 ` Dave Chinner
2017-07-05 4:43 ` Minchan Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170613052802.GA16061@bbox \
--to=minchan@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=jbacik@fb.com \
--cc=josef@toxicpanda.com \
--cc=kernel-team@fb.com \
--cc=linux-mm@kvack.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).