linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Shakeel Butt <shakeelb@google.com>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	 Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>, Linux MM <linux-mm@kvack.org>,
	 Cgroups <cgroups@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	 Kernel Team <kernel-team@fb.com>
Subject: Re: [PATCH 02/11] mm: clean up and clarify lruvec lookup procedure
Date: Wed, 6 Nov 2019 18:50:49 -0800	[thread overview]
Message-ID: <CALvZod69GXQt06Ro60nDoYpYqho+ZRvnyLSSuD8stxUTPv-Dpg@mail.gmail.com> (raw)
In-Reply-To: <20190603210746.15800-3-hannes@cmpxchg.org>

On Mon, Jun 3, 2019 at 3:04 PM Johannes Weiner <hannes@cmpxchg.org> wrote:
>
> There is a per-memcg lruvec and a NUMA node lruvec. Which one is being
> used is somewhat confusing right now, and it's easy to make mistakes -
> especially when it comes to global reclaim.
>
> How it works: when memory cgroups are enabled, we always use the
> root_mem_cgroup's per-node lruvecs. When memory cgroups are not
> compiled in or disabled at runtime, we use pgdat->lruvec.
>
> Document that in a comment.
>
> Due to the way the reclaim code is generalized, all lookups use the
> mem_cgroup_lruvec() helper function, and nobody should have to find
> the right lruvec manually right now. But to avoid future mistakes,
> rename the pgdat->lruvec member to pgdat->__lruvec and delete the
> convenience wrapper that suggests it's a commonly accessed member.
>
> While in this area, swap the mem_cgroup_lruvec() argument order. The
> name suggests a memcg operation, yet it takes a pgdat first and a
> memcg second. I have to double

check

> take every time I call this. Fix that.
>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Reviewed-by: Shakeel Butt <shakeelb@google.com>


> ---
>  include/linux/memcontrol.h | 26 +++++++++++++-------------
>  include/linux/mmzone.h     | 15 ++++++++-------
>  mm/memcontrol.c            |  6 +++---
>  mm/page_alloc.c            |  2 +-
>  mm/vmscan.c                |  6 +++---
>  mm/workingset.c            |  8 ++++----
>  6 files changed, 32 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index fa1e8cb1b3e2..fc32cfaebf32 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -382,22 +382,22 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
>  }
>
>  /**
> - * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
> - * @node: node of the wanted lruvec
> + * mem_cgroup_lruvec - get the lru list vector for a memcg & node
>   * @memcg: memcg of the wanted lruvec
> + * @node: node of the wanted lruvec
>   *
> - * Returns the lru list vector holding pages for a given @node or a given
> - * @memcg and @zone. This can be the node lruvec, if the memory controller
> - * is disabled.
> + * Returns the lru list vector holding pages for a given @memcg &
> + * @node combination. This can be the node lruvec, if the memory
> + * controller is disabled.
>   */
> -static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
> -                               struct mem_cgroup *memcg)
> +static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
> +                                              struct pglist_data *pgdat)
>  {
>         struct mem_cgroup_per_node *mz;
>         struct lruvec *lruvec;
>
>         if (mem_cgroup_disabled()) {
> -               lruvec = node_lruvec(pgdat);
> +               lruvec = &pgdat->__lruvec;
>                 goto out;
>         }
>
> @@ -716,7 +716,7 @@ static inline void __mod_lruvec_page_state(struct page *page,
>                 return;
>         }
>
> -       lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
> +       lruvec = mem_cgroup_lruvec(page->mem_cgroup, pgdat);
>         __mod_lruvec_state(lruvec, idx, val);
>  }
>
> @@ -887,16 +887,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
>  {
>  }
>
> -static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
> -                               struct mem_cgroup *memcg)
> +static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
> +                                              struct pglist_data *pgdat)
>  {
> -       return node_lruvec(pgdat);
> +       return &pgdat->__lruvec;
>  }
>
>  static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
>                                                     struct pglist_data *pgdat)
>  {
> -       return &pgdat->lruvec;
> +       return &pgdat->__lruvec;
>  }
>
>  static inline bool mm_match_cgroup(struct mm_struct *mm,
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 427b79c39b3c..95d63a395f40 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -761,7 +761,13 @@ typedef struct pglist_data {
>  #endif
>
>         /* Fields commonly accessed by the page reclaim scanner */
> -       struct lruvec           lruvec;
> +
> +       /*
> +        * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
> +        *
> +        * Use mem_cgroup_lruvec() to look up lruvecs.
> +        */
> +       struct lruvec           __lruvec;
>
>         unsigned long           flags;
>
> @@ -784,11 +790,6 @@ typedef struct pglist_data {
>  #define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
>  #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
>
> -static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
> -{
> -       return &pgdat->lruvec;
> -}
> -
>  static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
>  {
>         return pgdat->node_start_pfn + pgdat->node_spanned_pages;
> @@ -826,7 +827,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
>  #ifdef CONFIG_MEMCG
>         return lruvec->pgdat;
>  #else
> -       return container_of(lruvec, struct pglist_data, lruvec);
> +       return container_of(lruvec, struct pglist_data, __lruvec);
>  #endif
>  }
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c193aef3ba9e..6de8ca735ee2 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1200,7 +1200,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
>         struct lruvec *lruvec;
>
>         if (mem_cgroup_disabled()) {
> -               lruvec = &pgdat->lruvec;
> +               lruvec = &pgdat->__lruvec;
>                 goto out;
>         }
>
> @@ -1518,7 +1518,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
>                 int nid, bool noswap)
>  {
> -       struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
> +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
>
>         if (lruvec_page_state(lruvec, NR_INACTIVE_FILE) ||
>             lruvec_page_state(lruvec, NR_ACTIVE_FILE))
> @@ -3406,7 +3406,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
>  static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
>                                            int nid, unsigned int lru_mask)
>  {
> -       struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
> +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
>         unsigned long nr = 0;
>         enum lru_list lru;
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index a345418b548e..cd8e64e536f7 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6619,7 +6619,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
>
>         pgdat_page_ext_init(pgdat);
>         spin_lock_init(&pgdat->lru_lock);
> -       lruvec_init(node_lruvec(pgdat));
> +       lruvec_init(&pgdat->__lruvec);
>  }
>
>  static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index f396424850aa..853be16ee5e2 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2477,7 +2477,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
>  static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
>                               struct scan_control *sc)
>  {
> -       struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
> +       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
>         unsigned long nr[NR_LRU_LISTS];
>         unsigned long targets[NR_LRU_LISTS];
>         unsigned long nr_to_scan;
> @@ -2988,7 +2988,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
>                 unsigned long refaults;
>                 struct lruvec *lruvec;
>
> -               lruvec = mem_cgroup_lruvec(pgdat, memcg);
> +               lruvec = mem_cgroup_lruvec(memcg, pgdat);
>                 refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
>                 lruvec->refaults = refaults;
>         } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
> @@ -3351,7 +3351,7 @@ static void age_active_anon(struct pglist_data *pgdat,
>
>         memcg = mem_cgroup_iter(NULL, NULL, NULL);
>         do {
> -               struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
> +               struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
>
>                 if (inactive_list_is_low(lruvec, false, sc, true))
>                         shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
> diff --git a/mm/workingset.c b/mm/workingset.c
> index e0b4edcb88c8..2aaa70bea99c 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -233,7 +233,7 @@ void *workingset_eviction(struct page *page)
>         VM_BUG_ON_PAGE(page_count(page), page);
>         VM_BUG_ON_PAGE(!PageLocked(page), page);
>
> -       lruvec = mem_cgroup_lruvec(pgdat, memcg);
> +       lruvec = mem_cgroup_lruvec(memcg, pgdat);
>         eviction = atomic_long_inc_return(&lruvec->inactive_age);
>         return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
>  }
> @@ -280,7 +280,7 @@ void workingset_refault(struct page *page, void *shadow)
>         memcg = mem_cgroup_from_id(memcgid);
>         if (!mem_cgroup_disabled() && !memcg)
>                 goto out;
> -       lruvec = mem_cgroup_lruvec(pgdat, memcg);
> +       lruvec = mem_cgroup_lruvec(memcg, pgdat);
>         refault = atomic_long_read(&lruvec->inactive_age);
>         active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
>
> @@ -345,7 +345,7 @@ void workingset_activation(struct page *page)
>         memcg = page_memcg_rcu(page);
>         if (!mem_cgroup_disabled() && !memcg)
>                 goto out;
> -       lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
> +       lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
>         atomic_long_inc(&lruvec->inactive_age);
>  out:
>         rcu_read_unlock();
> @@ -428,7 +428,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
>                 struct lruvec *lruvec;
>                 int i;
>
> -               lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg);
> +               lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
>                 for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
>                         pages += lruvec_page_state_local(lruvec,
>                                                          NR_LRU_BASE + i);
> --
> 2.21.0
>


  reply	other threads:[~2019-11-07  2:51 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-03 21:07 [PATCH 00/11] mm: fix page aging across multiple cgroups Johannes Weiner
2019-06-03 21:07 ` [PATCH 01/11] mm: vmscan: move inactive_list_is_low() swap check to the caller Johannes Weiner
2019-11-07  2:50   ` Shakeel Butt
2019-11-08  3:43     ` Andrew Morton
2019-06-03 21:07 ` [PATCH 02/11] mm: clean up and clarify lruvec lookup procedure Johannes Weiner
2019-11-07  2:50   ` Shakeel Butt [this message]
2019-06-03 21:07 ` [PATCH 03/11] mm: vmscan: simplify lruvec_lru_size() Johannes Weiner
2019-11-07  2:51   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 04/11] mm: vmscan: naming fixes: cgroup_reclaim() and writeback_working() Johannes Weiner
2019-11-07  2:51   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 05/11] mm: vmscan: replace shrink_node() loop with a retry jump Johannes Weiner
2019-11-07  2:51   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 06/11] mm: vmscan: turn shrink_node_memcg() into shrink_lruvec() Johannes Weiner
2019-11-07  2:51   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 07/11] mm: vmscan: split shrink_node() into node part and memcgs part Johannes Weiner
2019-11-07  2:51   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 08/11] mm: vmscan: harmonize writeback congestion tracking for nodes & memcgs Johannes Weiner
2019-11-07  2:52   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 09/11] mm: vmscan: move file exhaustion detection to the node level Johannes Weiner
2019-11-07  2:52   ` Shakeel Butt
2019-06-03 21:07 ` [PATCH 10/11] mm: vmscan: detect file thrashing at the reclaim root Johannes Weiner
2019-06-03 21:07 ` [PATCH 11/11] mm: vmscan: enforce inactive:active ratio " Johannes Weiner
2019-11-07  2:50 ` [PATCH 00/11] mm: fix page aging across multiple cgroups Shakeel Butt
2019-11-07 17:45   ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CALvZod69GXQt06Ro60nDoYpYqho+ZRvnyLSSuD8stxUTPv-Dpg@mail.gmail.com \
    --to=shakeelb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=aryabinin@virtuozzo.com \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=surenb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).