All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wei Xu <weixugc@google.com>
To: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Linux MM <linux-mm@kvack.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Huang Ying <ying.huang@intel.com>,
	Greg Thelen <gthelen@google.com>, Yang Shi <shy828301@gmail.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Brice Goglin <brice.goglin@gmail.com>,
	Michal Hocko <mhocko@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Hesham Almatary <hesham.almatary@huawei.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Alistair Popple <apopple@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Feng Tang <feng.tang@intel.com>,
	Jagdish Gediya <jvgediya@linux.ibm.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	David Rientjes <rientjes@google.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	Shakeel Butt <shakeelb@google.com>
Subject: Re: [RFC PATCH 2/3] mm/memory-tiers: Use page counter to track toptier memory usage
Date: Tue, 14 Jun 2022 17:30:06 -0700	[thread overview]
Message-ID: <CAAPL-u-GgtYy9rNJxnSUA5+PH75g2hvNgt=bnra=fQra-bHk9w@mail.gmail.com> (raw)
In-Reply-To: <cefeb63173fa0fac7543315a2abbd4b5a1b25af8.1655242024.git.tim.c.chen@linux.intel.com>

(Resend in plain text. Sorry.)

On Tue, Jun 14, 2022 at 3:26 PM Tim Chen <tim.c.chen@linux.intel.com> wrote:
>
> If we need to restrict toptier memory usage for a cgroup,
> we need to retrieve usage of toptier memory efficiently.
> Add a page counter to track toptier memory usage directly
> so its value can be returned right away.
> ---
>  include/linux/memcontrol.h |  1 +
>  mm/memcontrol.c            | 50 ++++++++++++++++++++++++++++++++------
>  2 files changed, 43 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 9ecead1042b9..b4f727cba1de 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -241,6 +241,7 @@ struct mem_cgroup {
>
>         /* Accounted resources */
>         struct page_counter memory;             /* Both v1 & v2 */
> +       struct page_counter toptier;
>
>         union {
>                 struct page_counter swap;       /* v2 only */
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2f6e95e6d200..2f20ec2712b8 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -848,6 +848,23 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
>         __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);
>  }
>
> +static inline void mem_cgroup_charge_toptier(struct mem_cgroup *memcg,
> +                                            int nid,
> +                                            int nr_pages)
> +{
> +       if (!node_is_toptier(nid) || !memcg)
> +               return;
> +
> +       if (nr_pages >= 0) {
> +               page_counter_charge(&memcg->toptier,
> +                               (unsigned long) nr_pages);
> +       } else {
> +               nr_pages = -nr_pages;
> +               page_counter_uncharge(&memcg->toptier,
> +                               (unsigned long) nr_pages);
> +       }
> +}

When we don't know which pages are being charged, we should still
charge the usage to toptier (assuming that toptier always include the
default tier), e.g. from try_charge_memcg().

The idea is that when lower tier memory is not used, memcg->toptier
and memcg->memory should have the same value. Otherwise, it can cause
confusions about where the pages of (memcg->memory - memcg->toptier)
go.

>  static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
>                                        enum mem_cgroup_events_target target)
>  {
> @@ -3027,6 +3044,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>                 if (!ret) {
>                         page->memcg_data = (unsigned long)objcg |
>                                 MEMCG_DATA_KMEM;
> +                       mem_cgroup_charge_toptier(page_memcg(page),
> +                                       page_to_nid(page), 1 << order);
>                         return 0;
>                 }
>                 obj_cgroup_put(objcg);
> @@ -3050,6 +3069,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
>
>         objcg = __folio_objcg(folio);
>         obj_cgroup_uncharge_pages(objcg, nr_pages);
> +       mem_cgroup_charge_toptier(page_memcg(page),
> +                       page_to_nid(page), -nr_pages);
>         folio->memcg_data = 0;
>         obj_cgroup_put(objcg);
>  }
> @@ -3947,13 +3968,10 @@ unsigned long mem_cgroup_memtier_usage(struct mem_cgroup *memcg,
>
>  unsigned long mem_cgroup_toptier_usage(struct mem_cgroup *memcg)
>  {
> -       struct memory_tier *top_tier;
> -
> -       top_tier = list_first_entry(&memory_tiers, struct memory_tier, list);
> -       if (top_tier)
> -               return mem_cgroup_memtier_usage(memcg, top_tier);
> -       else
> +       if (!memcg)
>                 return 0;
> +
> +       return page_counter_read(&memcg->toptier);
>  }
>
>  #endif /* CONFIG_NUMA */
> @@ -5228,11 +5246,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>                 memcg->oom_kill_disable = parent->oom_kill_disable;
>
>                 page_counter_init(&memcg->memory, &parent->memory);
> +               page_counter_init(&memcg->toptier, &parent->toptier);
>                 page_counter_init(&memcg->swap, &parent->swap);
>                 page_counter_init(&memcg->kmem, &parent->kmem);
>                 page_counter_init(&memcg->tcpmem, &parent->tcpmem);
>         } else {
>                 page_counter_init(&memcg->memory, NULL);
> +               page_counter_init(&memcg->toptier, NULL);
>                 page_counter_init(&memcg->swap, NULL);
>                 page_counter_init(&memcg->kmem, NULL);
>                 page_counter_init(&memcg->tcpmem, NULL);
> @@ -5678,6 +5698,8 @@ static int mem_cgroup_move_account(struct page *page,
>         memcg_check_events(to, nid);
>         mem_cgroup_charge_statistics(from, -nr_pages);
>         memcg_check_events(from, nid);
> +       mem_cgroup_charge_toptier(to, nid, nr_pages);
> +       mem_cgroup_charge_toptier(from, nid, -nr_pages);
>         local_irq_enable();
>  out_unlock:
>         folio_unlock(folio);
> @@ -6761,6 +6783,7 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
>
>         local_irq_disable();
>         mem_cgroup_charge_statistics(memcg, nr_pages);
> +       mem_cgroup_charge_toptier(memcg, folio_nid(folio), nr_pages);
>         memcg_check_events(memcg, folio_nid(folio));
>         local_irq_enable();
>  out:
> @@ -6853,6 +6876,7 @@ struct uncharge_gather {
>         unsigned long nr_memory;
>         unsigned long pgpgout;
>         unsigned long nr_kmem;
> +       unsigned long nr_toptier;
>         int nid;
>  };
>
> @@ -6867,6 +6891,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
>
>         if (ug->nr_memory) {
>                 page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
> +               page_counter_uncharge(&ug->memcg->toptier, ug->nr_toptier);
>                 if (do_memsw_account())
>                         page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
>                 if (ug->nr_kmem)
> @@ -6929,12 +6954,18 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
>                 ug->nr_memory += nr_pages;
>                 ug->nr_kmem += nr_pages;
>
> +               if (node_is_toptier(folio_nid(folio)))
> +                       ug->nr_toptier += nr_pages;
> +
>                 folio->memcg_data = 0;
>                 obj_cgroup_put(objcg);
>         } else {
>                 /* LRU pages aren't accounted at the root level */
> -               if (!mem_cgroup_is_root(memcg))
> +               if (!mem_cgroup_is_root(memcg)) {
>                         ug->nr_memory += nr_pages;
> +                       if (node_is_toptier(folio_nid(folio)))
> +                               ug->nr_toptier += nr_pages;
> +               }
>                 ug->pgpgout++;
>
>                 folio->memcg_data = 0;
> @@ -7011,6 +7042,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
>         /* Force-charge the new page. The old one will be freed soon */
>         if (!mem_cgroup_is_root(memcg)) {
>                 page_counter_charge(&memcg->memory, nr_pages);
> +               mem_cgroup_charge_toptier(memcg, folio_nid(new), nr_pages);
>                 if (do_memsw_account())
>                         page_counter_charge(&memcg->memsw, nr_pages);
>         }
> @@ -7231,8 +7263,10 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
>
>         folio->memcg_data = 0;
>
> -       if (!mem_cgroup_is_root(memcg))
> +       if (!mem_cgroup_is_root(memcg)) {
>                 page_counter_uncharge(&memcg->memory, nr_entries);
> +               mem_cgroup_charge_toptier(memcg, folio_nid(folio), -nr_entries);
> +       }
>
>         if (!cgroup_memory_noswap && memcg != swap_memcg) {
>                 if (!mem_cgroup_is_root(swap_memcg))
> --
> 2.35.1
>
>

  parent reply	other threads:[~2022-06-15  0:30 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-14 22:25 [RFC PATCH 0/3] Cgroup accounting of memory tier usage Tim Chen
2022-06-14 22:25 ` [RFC PATCH 1/3] mm/memory-tiers Add functions for tier memory usage in a cgroup Tim Chen
2022-06-21  4:18   ` Aneesh Kumar K.V
2022-06-23 23:07     ` Tim Chen
2022-06-14 22:25 ` [RFC PATCH 2/3] mm/memory-tiers: Use page counter to track toptier memory usage Tim Chen
2022-06-15  0:27   ` Wei Xu
2022-06-15  0:30   ` Wei Xu [this message]
2022-06-16  4:12     ` Tim Chen
2022-06-14 22:25 ` [RFC PATCH 3/3] mm/memory-tiers: Show toptier memory usage for cgroup Tim Chen
2022-06-15  4:58 ` [RFC PATCH 0/3] Cgroup accounting of memory tier usage Ying Huang
2022-06-15 17:47   ` Tim Chen
2022-06-15 11:11 ` Michal Hocko
2022-06-15 15:23   ` Tim Chen
2022-06-15 15:59     ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAAPL-u-GgtYy9rNJxnSUA5+PH75g2hvNgt=bnra=fQra-bHk9w@mail.gmail.com' \
    --to=weixugc@google.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=apopple@nvidia.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=brice.goglin@gmail.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=feng.tang@intel.com \
    --cc=gthelen@google.com \
    --cc=hesham.almatary@huawei.com \
    --cc=jvgediya@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    --cc=shy828301@gmail.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.