All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Huang, Ying" <ying.huang@intel.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: linux-mm@kvack.org, akpm@linux-foundation.org,
	Wei Xu <weixugc@google.com>, Yang Shi <shy828301@gmail.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Tim C Chen <tim.c.chen@intel.com>,
	Michal Hocko <mhocko@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Hesham Almatary <hesham.almatary@huawei.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Alistair Popple <apopple@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	jvgediya.oss@gmail.com
Subject: Re: [PATCH v10 6/8] mm/demotion: Add pg_data_t member to track node memory tier details
Date: Tue, 26 Jul 2022 16:02:33 +0800	[thread overview]
Message-ID: <87wnc0uwrq.fsf@yhuang6-desk2.ccr.corp.intel.com> (raw)
In-Reply-To: <20220720025920.1373558-7-aneesh.kumar@linux.ibm.com> (Aneesh Kumar K. V.'s message of "Wed, 20 Jul 2022 08:29:18 +0530")

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> Also update different helpes to use NODE_DATA()->memtier. Since
> node specific memtier can change based on the reassignment of
> NUMA node to a different memory tiers, accessing NODE_DATA()->memtier
> needs to happen under an rcu read lock or memory_tier_lock.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  include/linux/mmzone.h |  3 ++
>  mm/memory-tiers.c      | 65 +++++++++++++++++++++++++++++++++++-------
>  2 files changed, 57 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index aab70355d64f..353812495a70 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -928,6 +928,9 @@ typedef struct pglist_data {
>  	/* Per-node vmstats */
>  	struct per_cpu_nodestat __percpu *per_cpu_nodestats;
>  	atomic_long_t		vm_stat[NR_VM_NODE_STAT_ITEMS];
> +#ifdef CONFIG_NUMA
> +	struct memory_tier __rcu *memtier;
> +#endif
>  } pg_data_t;
>  
>  #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index a8cfe2ca3903..4715f9b96a44 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -138,13 +138,18 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level)
>  
>  static struct memory_tier *__node_get_memory_tier(int node)
>  {
> -	struct memory_tier *memtier;
> +	pg_data_t *pgdat;
>  
> -	list_for_each_entry(memtier, &memory_tiers, list) {
> -		if (node_isset(node, memtier->nodelist))
> -			return memtier;
> -	}
> -	return NULL;
> +	pgdat = NODE_DATA(node);
> +	if (!pgdat)
> +		return NULL;
> +	/*
> +	 * Since we hold memory_tier_lock, we can avoid
> +	 * RCU read locks when accessing the details. No
> +	 * parallel updates are possible here.
> +	 */
> +	return rcu_dereference_check(pgdat->memtier,
> +				     lockdep_is_held(&memory_tier_lock));
>  }
>  
>  #ifdef CONFIG_MIGRATION
> @@ -277,6 +282,29 @@ static inline void disable_all_migrate_targets(void) {}
>  static inline void establish_migration_targets(void) {}
>  #endif /* CONFIG_MIGRATION */
>  
> +static void memtier_node_set(int node, struct memory_tier *memtier)
> +{
> +	pg_data_t *pgdat;
> +	struct memory_tier *current_memtier;
> +
> +	pgdat = NODE_DATA(node);
> +	if (!pgdat)
> +		return;
> +	/*
> +	 * Make sure we mark the memtier NULL before we assign the new memory tier
> +	 * to the NUMA node. This make sure that anybody looking at NODE_DATA
> +	 * finds a NULL memtier or the one which is still valid.
> +	 */
> +	current_memtier = rcu_dereference_check(pgdat->memtier,
> +						lockdep_is_held(&memory_tier_lock));
> +	rcu_assign_pointer(pgdat->memtier, NULL);
> +	synchronize_rcu();
> +	if (current_memtier)
> +		node_clear(node, current_memtier->nodelist);

If pgdat->memtier == NULL, we don't need to set it to NULL and call
synchronize_rcu().  That is,

+	current_memtier = rcu_dereference_check(pgdat->memtier,
+						lockdep_is_held(&memory_tier_lock));
+	if (current_memtier) {
+               rcu_assign_pointer(pgdat->memtier, NULL);
+               synchronize_rcu();
+		node_clear(node, current_memtier->nodelist);
+       }

Same for clear_node_memory_tier().

Best Regards,
Huang, Ying

> +	node_set(node, memtier->nodelist);
> +	rcu_assign_pointer(pgdat->memtier, memtier);
> +}
> +
>  static void init_node_memory_tier(int node)
>  {
>  	int perf_level;
> @@ -295,7 +323,7 @@ static void init_node_memory_tier(int node)
>  	if (!memtier) {
>  		perf_level = node_devices[node]->perf_level;
>  		memtier = find_create_memory_tier(perf_level);
> -		node_set(node, memtier->nodelist);
> +		memtier_node_set(node, memtier);
>  	}
>  	establish_migration_targets();
>  	mutex_unlock(&memory_tier_lock);
> @@ -303,12 +331,25 @@ static void init_node_memory_tier(int node)
>  
>  static void clear_node_memory_tier(int node)
>  {
> -	struct memory_tier *memtier;
> +	pg_data_t *pgdat;
> +	struct memory_tier *current_memtier;
> +
> +	pgdat = NODE_DATA(node);
> +	if (!pgdat)
> +		return;
>  
>  	mutex_lock(&memory_tier_lock);
> -	memtier = __node_get_memory_tier(node);
> -	if (memtier) {
> -		node_clear(node, memtier->nodelist);
> +	/*
> +	 * Make sure we mark the memtier NULL before we assign the new memory tier
> +	 * to the NUMA node. This make sure that anybody looking at NODE_DATA
> +	 * finds a NULL memtier or the one which is still valid.
> +	 */
> +	current_memtier = rcu_dereference_check(pgdat->memtier,
> +						lockdep_is_held(&memory_tier_lock));
> +	rcu_assign_pointer(pgdat->memtier, NULL);
> +	synchronize_rcu();
> +	if (current_memtier) {
> +		node_clear(node, current_memtier->nodelist);
>  		establish_migration_targets();
>  	}
>  	mutex_unlock(&memory_tier_lock);
> @@ -383,6 +424,8 @@ static int __init memory_tier_init(void)
>  
>  		if (!node_property->perf_level)
>  			node_property->perf_level = default_memtier_perf_level;
> +
> +		rcu_assign_pointer(NODE_DATA(node)->memtier, memtier);
>  	}
>  	mutex_unlock(&memory_tier_lock);

  reply	other threads:[~2022-07-26  8:02 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-20  2:59 [PATCH v10 0/8] mm/demotion: Memory tiers and demotion Aneesh Kumar K.V
2022-07-20  2:59 ` [PATCH v10 1/8] mm/demotion: Add support for explicit memory tiers Aneesh Kumar K.V
2022-07-26  3:53   ` Huang, Ying
2022-07-26 11:59     ` Aneesh Kumar K V
2022-07-27  1:16       ` Huang, Ying
2022-07-28 17:23         ` Johannes Weiner
2022-07-20  2:59 ` [PATCH v10 2/8] mm/demotion: Move memory demotion related code Aneesh Kumar K.V
2022-07-20  2:59 ` [PATCH v10 3/8] mm/demotion: Add hotplug callbacks to handle new numa node onlined Aneesh Kumar K.V
2022-07-26  4:03   ` Huang, Ying
2022-07-26 12:03     ` Aneesh Kumar K V
2022-07-27  1:53       ` Huang, Ying
2022-07-27  4:38         ` Aneesh Kumar K.V
2022-07-28  6:42           ` Huang, Ying
2022-07-20  2:59 ` [PATCH v10 4/8] mm/demotion/dax/kmem: Set node's performance level to MEMTIER_PERF_LEVEL_PMEM Aneesh Kumar K.V
2022-07-21  6:07   ` kernel test robot
2022-07-25  6:37   ` Huang, Ying
2022-07-25  6:48     ` Aneesh Kumar K V
2022-07-25  8:35       ` Huang, Ying
2022-07-25  8:42         ` Aneesh Kumar K V
2022-07-26  2:13           ` Huang, Ying
2022-07-27  4:31             ` Aneesh Kumar K.V
2022-07-28  6:39               ` Huang, Ying
2022-07-20  2:59 ` [PATCH v10 5/8] mm/demotion: Build demotion targets based on explicit memory tiers Aneesh Kumar K.V
2022-07-20  3:38   ` Aneesh Kumar K.V
2022-07-21  0:02   ` kernel test robot
2022-07-26  7:44   ` Huang, Ying
2022-07-26 12:30     ` Aneesh Kumar K V
2022-07-27  1:40       ` Huang, Ying
2022-07-27  4:35         ` Aneesh Kumar K.V
2022-07-28  6:51           ` Huang, Ying
2022-08-03  3:18         ` Aneesh Kumar K.V
2022-08-04  4:19           ` Huang, Ying
2022-07-20  2:59 ` [PATCH v10 6/8] mm/demotion: Add pg_data_t member to track node memory tier details Aneesh Kumar K.V
2022-07-26  8:02   ` Huang, Ying [this message]
2022-07-20  2:59 ` [PATCH v10 7/8] mm/demotion: Demote pages according to allocation fallback order Aneesh Kumar K.V
2022-07-26  8:24   ` Huang, Ying
2022-07-20  2:59 ` [PATCH v10 8/8] mm/demotion: Update node_is_toptier to work with memory tiers Aneesh Kumar K.V
2022-07-25  8:54   ` Huang, Ying
2022-07-25  8:56     ` Aneesh Kumar K V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87wnc0uwrq.fsf@yhuang6-desk2.ccr.corp.intel.com \
    --to=ying.huang@intel.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=apopple@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=hannes@cmpxchg.org \
    --cc=hesham.almatary@huawei.com \
    --cc=jvgediya.oss@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shy828301@gmail.com \
    --cc=tim.c.chen@intel.com \
    --cc=weixugc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.