linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
@ 2022-08-25  9:23 Aneesh Kumar K.V
  2022-08-25  9:23 ` [RFC PATCH 2/2] mm/demotion: Expose memory tier " Aneesh Kumar K.V
  2022-08-26  1:50 ` [RFC PATCH 1/2] mm/demotion: Expose memory type " Huang, Ying
  0 siblings, 2 replies; 9+ messages in thread
From: Aneesh Kumar K.V @ 2022-08-25  9:23 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: Wei Xu, Huang Ying, Yang Shi, Davidlohr Bueso, Tim C Chen,
	Michal Hocko, Linux Kernel Mailing List, Hesham Almatary,
	Dave Hansen, Jonathan Cameron, Alistair Popple, Dan Williams,
	Johannes Weiner, jvgediya.oss, Bharata B Rao, Aneesh Kumar K.V

This patch adds /sys/devices/virtual/memtier/ where all memory tier related
details can be found. All allocated memory types will be listed there as
/sys/devices/virtual/memtier/memtypeN/

The nodes which are part of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/nodes.

The adistance value of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/adistance.

A directory listing looks like:
:/sys/devices/virtual/memtier# tree memtype1
memtype1
├── adistance
├── nodes
├── subsystem -> ../../../../bus/memtier
└── uevent

Since we will be using struct device to expose details via sysfs, drop struct
kref and use struct device for refcounting the memtype.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/linux/memory-tiers.h |  3 +-
 mm/memory-tiers.c            | 97 +++++++++++++++++++++++++++++++++---
 2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index ecd865922707..487209a572b2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -6,6 +6,7 @@
 #include <linux/nodemask.h>
 #include <linux/kref.h>
 #include <linux/mmzone.h>
+#include <linux/device.h>
 /*
  * Each tier cover a abstrace distance chunk size of 128
  */
@@ -28,7 +29,7 @@ struct memory_dev_type {
 	int adistance;
 	/* Nodes of same abstract distance */
 	nodemask_t nodes;
-	struct kref kref;
+	struct device dev;
 };
 
 #ifdef CONFIG_NUMA
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index ba844fe9cc8c..9eef3bd8d134 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -4,6 +4,7 @@
 #include <linux/sysfs.h>
 #include <linux/kobject.h>
 #include <linux/memory.h>
+#include <linux/idr.h>
 #include <linux/memory-tiers.h>
 
 #include "internal.h"
@@ -31,6 +32,15 @@ static DEFINE_MUTEX(memory_tier_lock);
 static LIST_HEAD(memory_tiers);
 static struct memory_dev_type *node_memory_types[MAX_NUMNODES];
 static struct memory_dev_type *default_dram_type;
+
+#define MAX_MEMORY_TYPE_ID	20
+static DEFINE_IDR(memory_type_idr);
+#define to_memory_type(device) container_of(device, struct memory_dev_type, dev)
+static struct bus_type memory_tier_subsys = {
+	.name = "memtier",
+	.dev_name = "memtier",
+};
+
 #ifdef CONFIG_MIGRATION
 static int top_tier_adistance;
 /*
@@ -388,7 +398,7 @@ static inline void __init_node_memory_type(int node, struct memory_dev_type *mem
 {
 	if (!node_memory_types[node]) {
 		node_memory_types[node] = memtype;
-		kref_get(&memtype->kref);
+		get_device(&memtype->dev);
 	}
 }
 
@@ -460,33 +470,87 @@ static bool clear_node_memory_tier(int node)
 	return cleared;
 }
 
-static void release_memtype(struct kref *kref)
+static ssize_t nodes_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
 {
-	struct memory_dev_type *memtype;
+	int ret;
+	struct memory_dev_type *memtype = to_memory_type(dev);
 
-	memtype = container_of(kref, struct memory_dev_type, kref);
+	mutex_lock(&memory_tier_lock);
+	ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&memtype->nodes));
+	mutex_unlock(&memory_tier_lock);
+	return ret;
+}
+static DEVICE_ATTR_RO(nodes);
+
+static ssize_t adistance_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct memory_dev_type *memtype = to_memory_type(dev);
+
+	mutex_lock(&memory_tier_lock);
+	ret = sysfs_emit(buf, "%d\n", memtype->adistance);
+	mutex_unlock(&memory_tier_lock);
+	return ret;
+}
+static DEVICE_ATTR_RO(adistance);
+
+static struct attribute *memtype_dev_attrs[] = {
+	&dev_attr_nodes.attr,
+	&dev_attr_adistance.attr,
+	NULL
+};
+
+static const struct attribute_group memtype_dev_group = {
+	.attrs = memtype_dev_attrs,
+};
+
+static const struct attribute_group *memtype_dev_groups[] = {
+	&memtype_dev_group,
+	NULL
+};
+
+static void memtype_device_release(struct device *dev)
+{
+	struct memory_dev_type *memtype = to_memory_type(dev);
+
+	idr_remove(&memory_type_idr, memtype->dev.id);
 	kfree(memtype);
 }
 
 struct memory_dev_type *alloc_memory_type(int adistance)
 {
+	int ret;
 	struct memory_dev_type *memtype;
 
-	memtype = kmalloc(sizeof(*memtype), GFP_KERNEL);
+	memtype = kzalloc(sizeof(*memtype), GFP_KERNEL);
 	if (!memtype)
 		return ERR_PTR(-ENOMEM);
 
 	memtype->adistance = adistance;
 	INIT_LIST_HEAD(&memtype->tier_sibiling);
 	memtype->nodes  = NODE_MASK_NONE;
-	kref_init(&memtype->kref);
+	memtype->dev.id = idr_alloc(&memory_type_idr, NULL,
+				 1, MAX_MEMORY_TYPE_ID + 1, GFP_KERNEL);
+	memtype->dev.bus = &memory_tier_subsys;
+	memtype->dev.release = memtype_device_release;
+	memtype->dev.groups = memtype_dev_groups;
+	dev_set_name(&memtype->dev, "%s%d", "memtype", memtype->dev.id);
+
+	ret = device_register(&memtype->dev);
+	if (ret) {
+		put_device(&memtype->dev);
+		return ERR_PTR(ret);
+	}
+
 	return memtype;
 }
 EXPORT_SYMBOL_GPL(alloc_memory_type);
 
 void destroy_memory_type(struct memory_dev_type *memtype)
 {
-	kref_put(&memtype->kref, release_memtype);
+	device_unregister(&memtype->dev);
 }
 EXPORT_SYMBOL_GPL(destroy_memory_type);
 
@@ -504,7 +568,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
 	mutex_lock(&memory_tier_lock);
 	if (node_memory_types[node] == memtype) {
 		node_memory_types[node] = NULL;
-		kref_put(&memtype->kref, release_memtype);
+		put_device(&memtype->dev);
 	}
 	mutex_unlock(&memory_tier_lock);
 }
@@ -582,6 +646,23 @@ static int __init memory_tier_init(void)
 }
 subsys_initcall(memory_tier_init);
 
+/*
+ * initialize sysfs subsys in core_initcall so that
+ * other kernel components can do alloc_memory_type
+ * before memory_tier_init
+ */
+static int __init memory_tier_sysfs_init(void)
+{
+	int err;
+
+	err = subsys_virtual_register(&memory_tier_subsys, NULL);
+	if (err)
+		return err;
+
+	return 0;
+}
+core_initcall(memory_tier_sysfs_init);
+
 bool numa_demotion_enabled = false;
 
 #ifdef CONFIG_MIGRATION
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 2/2] mm/demotion: Expose memory tier details via sysfs
  2022-08-25  9:23 [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs Aneesh Kumar K.V
@ 2022-08-25  9:23 ` Aneesh Kumar K.V
  2022-08-26  4:31   ` Huang, Ying
  2022-08-26  1:50 ` [RFC PATCH 1/2] mm/demotion: Expose memory type " Huang, Ying
  1 sibling, 1 reply; 9+ messages in thread
From: Aneesh Kumar K.V @ 2022-08-25  9:23 UTC (permalink / raw)
  To: linux-mm, akpm
  Cc: Wei Xu, Huang Ying, Yang Shi, Davidlohr Bueso, Tim C Chen,
	Michal Hocko, Linux Kernel Mailing List, Hesham Almatary,
	Dave Hansen, Jonathan Cameron, Alistair Popple, Dan Williams,
	Johannes Weiner, jvgediya.oss, Bharata B Rao, Aneesh Kumar K.V

All allocated memory tiers will be listed as
/sys/devices/virtual/memtier/memtierN/

Each memtier directory contains symbolic link for the memory types
that are part of the memory tier. A directory hierarchy looks like

:/sys/devices/virtual/memtier# tree memtier512/
memtier512/
├── memtype1 -> ../memtype1
├── memtype2 -> ../memtype2
├── subsystem -> ../../../../bus/memtier
└── uevent

The nodes which are part of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/nodes.

The adistance value of a specific memory type can be listed via
/sys/devices/system/memtier/memtypeN/adistance.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/memory-tiers.c | 62 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 9eef3bd8d134..4005c3124ff0 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -20,6 +20,7 @@ struct memory_tier {
 	 * adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE
 	 */
 	int adistance_start;
+	struct device dev;
 	/* All the nodes that are part of all the lower memory tiers. */
 	nodemask_t lower_tier_mask;
 };
@@ -36,6 +37,7 @@ static struct memory_dev_type *default_dram_type;
 #define MAX_MEMORY_TYPE_ID	20
 static DEFINE_IDR(memory_type_idr);
 #define to_memory_type(device) container_of(device, struct memory_dev_type, dev)
+#define to_memory_tier(device) container_of(device, struct memory_tier, dev)
 static struct bus_type memory_tier_subsys = {
 	.name = "memtier",
 	.dev_name = "memtier",
@@ -103,8 +105,25 @@ static int top_tier_adistance;
 static struct demotion_nodes *node_demotion __read_mostly;
 #endif /* CONFIG_MIGRATION */
 
+static void memory_tier_device_release(struct device *dev)
+{
+	struct memory_tier *tier = to_memory_tier(dev);
+	/*
+	 * synchronize_rcu in clear_node_memory_tier makes sure
+	 * we don't have rcu access to this memory tier.
+	 */
+	kfree(tier);
+}
+
+static void destroy_memory_tier(struct memory_tier *memtier)
+{
+	list_del(&memtier->list);
+	device_unregister(&memtier->dev);
+}
+
 static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
 {
+	int ret;
 	bool found_slot = false;
 	struct memory_tier *memtier, *new_memtier;
 	int adistance = memtype->adistance;
@@ -128,15 +147,14 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
 
 	list_for_each_entry(memtier, &memory_tiers, list) {
 		if (adistance == memtier->adistance_start) {
-			list_add(&memtype->tier_sibiling, &memtier->memory_types);
-			return memtier;
+			goto link_memtype;
 		} else if (adistance < memtier->adistance_start) {
 			found_slot = true;
 			break;
 		}
 	}
 
-	new_memtier = kmalloc(sizeof(struct memory_tier), GFP_KERNEL);
+	new_memtier = kzalloc(sizeof(struct memory_tier), GFP_KERNEL);
 	if (!new_memtier)
 		return ERR_PTR(-ENOMEM);
 
@@ -147,8 +165,30 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
 		list_add_tail(&new_memtier->list, &memtier->list);
 	else
 		list_add_tail(&new_memtier->list, &memory_tiers);
-	list_add(&memtype->tier_sibiling, &new_memtier->memory_types);
-	return new_memtier;
+
+	new_memtier->dev.id = adistance;
+	new_memtier->dev.bus = &memory_tier_subsys;
+	new_memtier->dev.release = memory_tier_device_release;
+
+	ret = device_register(&new_memtier->dev);
+	if (ret) {
+		list_del(&memtier->list);
+		put_device(&memtier->dev);
+		return ERR_PTR(ret);
+	}
+	memtier = new_memtier;
+
+link_memtype:
+	list_add(&memtype->tier_sibiling, &memtier->memory_types);
+	/*
+	 * ignore error below because the driver creating the device can get
+	 * unloaded and hence the below sysfs create link can fail. We continue
+	 * with the in memory representation.
+	 */
+	ret = sysfs_create_link(&memtier->dev.kobj,
+				&memtype->dev.kobj, kobject_name(&memtype->dev.kobj));
+
+	return memtier;
 }
 
 static struct memory_tier *__node_get_memory_tier(int node)
@@ -424,16 +464,6 @@ static struct memory_tier *set_node_memory_tier(int node)
 	return memtier;
 }
 
-static void destroy_memory_tier(struct memory_tier *memtier)
-{
-	list_del(&memtier->list);
-	/*
-	 * synchronize_rcu in clear_node_memory_tier makes sure
-	 * we don't have rcu access to this memory tier.
-	 */
-	kfree(memtier);
-}
-
 static bool clear_node_memory_tier(int node)
 {
 	bool cleared = false;
@@ -462,6 +492,8 @@ static bool clear_node_memory_tier(int node)
 		node_clear(node, memtype->nodes);
 		if (nodes_empty(memtype->nodes)) {
 			list_del_init(&memtype->tier_sibiling);
+			sysfs_delete_link(&memtier->dev.kobj,
+					  &memtype->dev.kobj, kobject_name(&memtype->dev.kobj));
 			if (list_empty(&memtier->memory_types))
 				destroy_memory_tier(memtier);
 		}
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-25  9:23 [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs Aneesh Kumar K.V
  2022-08-25  9:23 ` [RFC PATCH 2/2] mm/demotion: Expose memory tier " Aneesh Kumar K.V
@ 2022-08-26  1:50 ` Huang, Ying
  2022-08-26  2:37   ` Aneesh Kumar K V
  1 sibling, 1 reply; 9+ messages in thread
From: Huang, Ying @ 2022-08-26  1:50 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-mm, akpm, Wei Xu, Yang Shi, Davidlohr Bueso, Tim C Chen,
	Michal Hocko, Linux Kernel Mailing List, Hesham Almatary,
	Dave Hansen, Jonathan Cameron, Alistair Popple, Dan Williams,
	Johannes Weiner, jvgediya.oss, Bharata B Rao

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
> details can be found. All allocated memory types will be listed there as
> /sys/devices/virtual/memtier/memtypeN/

Another choice is to make memory types and memory tiers system devices.
That is,

/sys/devices/system/memory_type/memory_typeN
/sys/devices/system/memory_tier/memory_tierN

That looks more natural to me.  Because we already have "node" and
"memory" devices there.  Why don't you put memory types and memory tiers
there?

And, I think we shouldn't put "memory_type" in the "memory_tier"
directory.  "memory_type" isn't a part of "memory_tier".

> The nodes which are part of a specific memory type can be listed via
> /sys/devices/system/memtier/memtypeN/nodes.

How about create links to /sys/devices/system/node/nodeN in
"memory_type".  But I'm OK to have "nodes" file too.

> The adistance value of a specific memory type can be listed via
> /sys/devices/system/memtier/memtypeN/adistance.
>
> A directory listing looks like:
> :/sys/devices/virtual/memtier# tree memtype1
> memtype1
> ├── adistance

Why not just use "abstract_distance"?  This is user space interface,
it's better to be intuitive.

> ├── nodes
> ├── subsystem -> ../../../../bus/memtier
> └── uevent
>
> Since we will be using struct device to expose details via sysfs, drop struct
> kref and use struct device for refcounting the memtype.
>

Best Regards,
Huang, Ying

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-26  1:50 ` [RFC PATCH 1/2] mm/demotion: Expose memory type " Huang, Ying
@ 2022-08-26  2:37   ` Aneesh Kumar K V
  2022-08-26  8:00     ` Wei Xu
  0 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K V @ 2022-08-26  2:37 UTC (permalink / raw)
  To: Huang, Ying
  Cc: linux-mm, akpm, Wei Xu, Yang Shi, Davidlohr Bueso, Tim C Chen,
	Michal Hocko, Linux Kernel Mailing List, Hesham Almatary,
	Dave Hansen, Jonathan Cameron, Alistair Popple, Dan Williams,
	Johannes Weiner, jvgediya.oss, Bharata B Rao

On 8/26/22 7:20 AM, Huang, Ying wrote:
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> 
>> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
>> details can be found. All allocated memory types will be listed there as
>> /sys/devices/virtual/memtier/memtypeN/
> 
> Another choice is to make memory types and memory tiers system devices.
> That is,
> 
> /sys/devices/system/memory_type/memory_typeN
> /sys/devices/system/memory_tier/memory_tierN
> 

subsys_system_register() documentation says 

 * Do not use this interface for anything new, it exists for compatibility
 * with bad ideas only. New subsystems should use plain subsystems; and
 * add the subsystem-wide attributes should be added to the subsystem
 * directory itself and not some create fake root-device placed in
 * /sys/devices/system/<name>.
 
memtier being a virtual device, I was under the impression that /sys/devices/virtual
is the recommended place. 

> That looks more natural to me.  Because we already have "node" and
> "memory" devices there.  Why don't you put memory types and memory tiers
> there?
> 
> And, I think we shouldn't put "memory_type" in the "memory_tier"
> directory.  "memory_type" isn't a part of "memory_tier".
> 

I was looking consolidating both memory tier and memory type into the same sysfs subsystem.
Your recommendation imply we create two subsystem memory_tier and memtype. I was
trying to avoid that. May be a generic term like "memory_tiering" can help to
consolidate all tiering related details there? 


>> The nodes which are part of a specific memory type can be listed via
>> /sys/devices/system/memtier/memtypeN/nodes.
> 
> How about create links to /sys/devices/system/node/nodeN in
> "memory_type".  But I'm OK to have "nodes" file too.
> 
>> The adistance value of a specific memory type can be listed via
>> /sys/devices/system/memtier/memtypeN/adistance.
>>
>> A directory listing looks like:
>> :/sys/devices/virtual/memtier# tree memtype1
>> memtype1
>> ├── adistance
> 
> Why not just use "abstract_distance"?  This is user space interface,
> it's better to be intuitive.
> 
>> ├── nodes
>> ├── subsystem -> ../../../../bus/memtier
>> └── uevent
>>
>> Since we will be using struct device to expose details via sysfs, drop struct
>> kref and use struct device for refcounting the memtype.
>>
> 
> Best Regards,
> Huang, Ying


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 2/2] mm/demotion: Expose memory tier details via sysfs
  2022-08-25  9:23 ` [RFC PATCH 2/2] mm/demotion: Expose memory tier " Aneesh Kumar K.V
@ 2022-08-26  4:31   ` Huang, Ying
  0 siblings, 0 replies; 9+ messages in thread
From: Huang, Ying @ 2022-08-26  4:31 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-mm, akpm, Wei Xu, Yang Shi, Davidlohr Bueso, Tim C Chen,
	Michal Hocko, Linux Kernel Mailing List, Hesham Almatary,
	Dave Hansen, Jonathan Cameron, Alistair Popple, Dan Williams,
	Johannes Weiner, jvgediya.oss, Bharata B Rao

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> All allocated memory tiers will be listed as
> /sys/devices/virtual/memtier/memtierN/
>
> Each memtier directory contains symbolic link for the memory types
> that are part of the memory tier. A directory hierarchy looks like
>
> :/sys/devices/virtual/memtier# tree memtier512/
> memtier512/

So you suggest to use abstract_distance_start as memory tier ID?  That
will make memory tier ID stable unless we change abstract distance chunk
size or abstract distance division points.  That is, we have at least 2
choices here

1. memory_tier0, memory_tier1, memory_tier2, ...

The ID will start from 0.  This is easy to understand by users.  The
main drawback is that the memory tier ID may be changed when a NUMA node
is onlined/offlined.  That is, the memory tier ID is relatively
unstable.

2. memory_tier<abstract_distance_start1>, memory_tier<abstract_distance_start2>, ...

The ID will be discontinuous. So it's not as intuitive as 0,1,2,....
The main advantage is that the memory tier ID will not change when a
NUMA node is onlined/offlined.  The ID will be changed only when we
change abstract distance chunk size or abstract distance division
points.  That is considered relatively seldom.

Personally, I prefer the 2nd choice too.  But I want to collect opinions
from other people too.

> ├── memtype1 -> ../memtype1
> ├── memtype2 -> ../memtype2

I think abstract_distance_start and abstract_distance_end is the key
information of a memory tier too.  So we should show them here.

> ├── subsystem -> ../../../../bus/memtier
> └── uevent
>
> The nodes which are part of a specific memory type can be listed via
> /sys/devices/system/memtier/memtypeN/nodes.
>
> The adistance value of a specific memory type can be listed via
> /sys/devices/system/memtier/memtypeN/adistance.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Best Regards,
Huang, Ying

[snip]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-26  2:37   ` Aneesh Kumar K V
@ 2022-08-26  8:00     ` Wei Xu
  2022-08-26  8:05       ` Aneesh Kumar K V
  0 siblings, 1 reply; 9+ messages in thread
From: Wei Xu @ 2022-08-26  8:00 UTC (permalink / raw)
  To: Aneesh Kumar K V
  Cc: Huang, Ying, Linux MM, Andrew Morton, Yang Shi, Davidlohr Bueso,
	Tim C Chen, Michal Hocko, Linux Kernel Mailing List,
	Hesham Almatary, Dave Hansen, Jonathan Cameron, Alistair Popple,
	Dan Williams, Johannes Weiner, jvgediya.oss, Bharata B Rao

On Thu, Aug 25, 2022 at 8:00 PM Aneesh Kumar K V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 8/26/22 7:20 AM, Huang, Ying wrote:
> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> >
> >> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
> >> details can be found. All allocated memory types will be listed there as
> >> /sys/devices/virtual/memtier/memtypeN/
> >
> > Another choice is to make memory types and memory tiers system devices.
> > That is,
> >
> > /sys/devices/system/memory_type/memory_typeN
> > /sys/devices/system/memory_tier/memory_tierN
> >
>
> subsys_system_register() documentation says
>
>  * Do not use this interface for anything new, it exists for compatibility
>  * with bad ideas only. New subsystems should use plain subsystems; and
>  * add the subsystem-wide attributes should be added to the subsystem
>  * directory itself and not some create fake root-device placed in
>  * /sys/devices/system/<name>.
>
> memtier being a virtual device, I was under the impression that /sys/devices/virtual
> is the recommended place.
>
> > That looks more natural to me.  Because we already have "node" and
> > "memory" devices there.  Why don't you put memory types and memory tiers
> > there?
> >
> > And, I think we shouldn't put "memory_type" in the "memory_tier"
> > directory.  "memory_type" isn't a part of "memory_tier".
> >
>
> I was looking consolidating both memory tier and memory type into the same sysfs subsystem.
> Your recommendation imply we create two subsystem memory_tier and memtype. I was
> trying to avoid that. May be a generic term like "memory_tiering" can help to
> consolidate all tiering related details there?
>

A generic term "memory_tiering" sounds good to me.

Given that this will be a user-facing, stable kernel API, I think we'd
better to only add what is most useful for userspace and don't have to
mirror the kernel internal data structures in this interface.

My understanding is that we haven't fully settled down on how to
customize memory tiers from userspace.  So we don't have to show
memory_type yet, which is a kernel data structure at this point.

The userspace does need to know what are the memory tiers and which
NUMA nodes are included in each memory tier.  How about we provide the
"nodelist" interface for each memory tier as in the original proposal?

The userspace would also like to know which memory tiers/nodes belong
to the top tiers (the promotion targets).  We can provide a "toptiers"
or "toptiers_nodelist" interface to report that.

Both should still be useful even if we decide to add memory_type for
memory tier customization.

> >> The nodes which are part of a specific memory type can be listed via
> >> /sys/devices/system/memtier/memtypeN/nodes.
> >
> > How about create links to /sys/devices/system/node/nodeN in
> > "memory_type".  But I'm OK to have "nodes" file too.
> >
> >> The adistance value of a specific memory type can be listed via
> >> /sys/devices/system/memtier/memtypeN/adistance.
> >>
> >> A directory listing looks like:
> >> :/sys/devices/virtual/memtier# tree memtype1
> >> memtype1
> >> ├── adistance
> >
> > Why not just use "abstract_distance"?  This is user space interface,
> > it's better to be intuitive.
> >
> >> ├── nodes
> >> ├── subsystem -> ../../../../bus/memtier
> >> └── uevent
> >>
> >> Since we will be using struct device to expose details via sysfs, drop struct
> >> kref and use struct device for refcounting the memtype.
> >>
> >
> > Best Regards,
> > Huang, Ying
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-26  8:00     ` Wei Xu
@ 2022-08-26  8:05       ` Aneesh Kumar K V
  2022-08-26  9:15         ` Wei Xu
  0 siblings, 1 reply; 9+ messages in thread
From: Aneesh Kumar K V @ 2022-08-26  8:05 UTC (permalink / raw)
  To: Wei Xu
  Cc: Huang, Ying, Linux MM, Andrew Morton, Yang Shi, Davidlohr Bueso,
	Tim C Chen, Michal Hocko, Linux Kernel Mailing List,
	Hesham Almatary, Dave Hansen, Jonathan Cameron, Alistair Popple,
	Dan Williams, Johannes Weiner, jvgediya.oss, Bharata B Rao

On 8/26/22 1:30 PM, Wei Xu wrote:
> On Thu, Aug 25, 2022 at 8:00 PM Aneesh Kumar K V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> On 8/26/22 7:20 AM, Huang, Ying wrote:
>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>>>
>>>> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
>>>> details can be found. All allocated memory types will be listed there as
>>>> /sys/devices/virtual/memtier/memtypeN/
>>>
>>> Another choice is to make memory types and memory tiers system devices.
>>> That is,
>>>
>>> /sys/devices/system/memory_type/memory_typeN
>>> /sys/devices/system/memory_tier/memory_tierN
>>>
>>
>> subsys_system_register() documentation says
>>
>>  * Do not use this interface for anything new, it exists for compatibility
>>  * with bad ideas only. New subsystems should use plain subsystems; and
>>  * add the subsystem-wide attributes should be added to the subsystem
>>  * directory itself and not some create fake root-device placed in
>>  * /sys/devices/system/<name>.
>>
>> memtier being a virtual device, I was under the impression that /sys/devices/virtual
>> is the recommended place.
>>
>>> That looks more natural to me.  Because we already have "node" and
>>> "memory" devices there.  Why don't you put memory types and memory tiers
>>> there?
>>>
>>> And, I think we shouldn't put "memory_type" in the "memory_tier"
>>> directory.  "memory_type" isn't a part of "memory_tier".
>>>
>>
>> I was looking consolidating both memory tier and memory type into the same sysfs subsystem.
>> Your recommendation imply we create two subsystem memory_tier and memtype. I was
>> trying to avoid that. May be a generic term like "memory_tiering" can help to
>> consolidate all tiering related details there?
>>
> 
> A generic term "memory_tiering" sounds good to me.
> 
> Given that this will be a user-facing, stable kernel API, I think we'd
> better to only add what is most useful for userspace and don't have to
> mirror the kernel internal data structures in this interface.
> 
> My understanding is that we haven't fully settled down on how to
> customize memory tiers from userspace.  So we don't have to show
> memory_type yet, which is a kernel data structure at this point.
> 
> The userspace does need to know what are the memory tiers and which
> NUMA nodes are included in each memory tier.  How about we provide the
> "nodelist" interface for each memory tier as in the original proposal?
> 
> The userspace would also like to know which memory tiers/nodes belong
> to the top tiers (the promotion targets).  We can provide a "toptiers"
> or "toptiers_nodelist" interface to report that.
> 

How about also including abstract distance range of a memory tier?
That will be useful to derive the hierarchy.

> Both should still be useful even if we decide to add memory_type for
> memory tier customization.
> 

-aneesh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-26  8:05       ` Aneesh Kumar K V
@ 2022-08-26  9:15         ` Wei Xu
  2022-08-28 16:20           ` Aneesh Kumar K.V
  0 siblings, 1 reply; 9+ messages in thread
From: Wei Xu @ 2022-08-26  9:15 UTC (permalink / raw)
  To: Aneesh Kumar K V
  Cc: Huang, Ying, Linux MM, Andrew Morton, Yang Shi, Davidlohr Bueso,
	Tim C Chen, Michal Hocko, Linux Kernel Mailing List,
	Hesham Almatary, Dave Hansen, Jonathan Cameron, Alistair Popple,
	Dan Williams, Johannes Weiner, jvgediya.oss, Bharata B Rao

On Fri, Aug 26, 2022 at 1:05 AM Aneesh Kumar K V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 8/26/22 1:30 PM, Wei Xu wrote:
> > On Thu, Aug 25, 2022 at 8:00 PM Aneesh Kumar K V
> > <aneesh.kumar@linux.ibm.com> wrote:
> >>
> >> On 8/26/22 7:20 AM, Huang, Ying wrote:
> >>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> >>>
> >>>> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
> >>>> details can be found. All allocated memory types will be listed there as
> >>>> /sys/devices/virtual/memtier/memtypeN/
> >>>
> >>> Another choice is to make memory types and memory tiers system devices.
> >>> That is,
> >>>
> >>> /sys/devices/system/memory_type/memory_typeN
> >>> /sys/devices/system/memory_tier/memory_tierN
> >>>
> >>
> >> subsys_system_register() documentation says
> >>
> >>  * Do not use this interface for anything new, it exists for compatibility
> >>  * with bad ideas only. New subsystems should use plain subsystems; and
> >>  * add the subsystem-wide attributes should be added to the subsystem
> >>  * directory itself and not some create fake root-device placed in
> >>  * /sys/devices/system/<name>.
> >>
> >> memtier being a virtual device, I was under the impression that /sys/devices/virtual
> >> is the recommended place.
> >>
> >>> That looks more natural to me.  Because we already have "node" and
> >>> "memory" devices there.  Why don't you put memory types and memory tiers
> >>> there?
> >>>
> >>> And, I think we shouldn't put "memory_type" in the "memory_tier"
> >>> directory.  "memory_type" isn't a part of "memory_tier".
> >>>
> >>
> >> I was looking consolidating both memory tier and memory type into the same sysfs subsystem.
> >> Your recommendation imply we create two subsystem memory_tier and memtype. I was
> >> trying to avoid that. May be a generic term like "memory_tiering" can help to
> >> consolidate all tiering related details there?
> >>
> >
> > A generic term "memory_tiering" sounds good to me.
> >
> > Given that this will be a user-facing, stable kernel API, I think we'd
> > better to only add what is most useful for userspace and don't have to
> > mirror the kernel internal data structures in this interface.
> >
> > My understanding is that we haven't fully settled down on how to
> > customize memory tiers from userspace.  So we don't have to show
> > memory_type yet, which is a kernel data structure at this point.
> >
> > The userspace does need to know what are the memory tiers and which
> > NUMA nodes are included in each memory tier.  How about we provide the
> > "nodelist" interface for each memory tier as in the original proposal?
> >
> > The userspace would also like to know which memory tiers/nodes belong
> > to the top tiers (the promotion targets).  We can provide a "toptiers"
> > or "toptiers_nodelist" interface to report that.
> >
>
> How about also including abstract distance range of a memory tier?
> That will be useful to derive the hierarchy.

With the base abstract distance in the memtier name, do we need to
show the abstract distance range if we don't customize memory tiers?

> > Both should still be useful even if we decide to add memory_type for
> > memory tier customization.
> >
>
> -aneesh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs
  2022-08-26  9:15         ` Wei Xu
@ 2022-08-28 16:20           ` Aneesh Kumar K.V
  0 siblings, 0 replies; 9+ messages in thread
From: Aneesh Kumar K.V @ 2022-08-28 16:20 UTC (permalink / raw)
  To: Wei Xu
  Cc: Huang, Ying, Linux MM, Andrew Morton, Yang Shi, Davidlohr Bueso,
	Tim C Chen, Michal Hocko, Linux Kernel Mailing List,
	Hesham Almatary, Dave Hansen, Jonathan Cameron, Alistair Popple,
	Dan Williams, Johannes Weiner, jvgediya.oss, Bharata B Rao

Wei Xu <weixugc@google.com> writes:

>
> On Fri, Aug 26, 2022 at 1:05 AM Aneesh Kumar K V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> On 8/26/22 1:30 PM, Wei Xu wrote:
>> > On Thu, Aug 25, 2022 at 8:00 PM Aneesh Kumar K V
>> > <aneesh.kumar@linux.ibm.com> wrote:
>> >>
>> >> On 8/26/22 7:20 AM, Huang, Ying wrote:
>> >>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> >>>
>> >>>> This patch adds /sys/devices/virtual/memtier/ where all memory tier related
>> >>>> details can be found. All allocated memory types will be listed there as
>> >>>> /sys/devices/virtual/memtier/memtypeN/
>> >>>
>> >>> Another choice is to make memory types and memory tiers system devices.
>> >>> That is,
>> >>>
>> >>> /sys/devices/system/memory_type/memory_typeN
>> >>> /sys/devices/system/memory_tier/memory_tierN
>> >>>
>> >>
>> >> subsys_system_register() documentation says
>> >>
>> >>  * Do not use this interface for anything new, it exists for compatibility
>> >>  * with bad ideas only. New subsystems should use plain subsystems; and
>> >>  * add the subsystem-wide attributes should be added to the subsystem
>> >>  * directory itself and not some create fake root-device placed in
>> >>  * /sys/devices/system/<name>.
>> >>
>> >> memtier being a virtual device, I was under the impression that /sys/devices/virtual
>> >> is the recommended place.
>> >>
>> >>> That looks more natural to me.  Because we already have "node" and
>> >>> "memory" devices there.  Why don't you put memory types and memory tiers
>> >>> there?
>> >>>
>> >>> And, I think we shouldn't put "memory_type" in the "memory_tier"
>> >>> directory.  "memory_type" isn't a part of "memory_tier".
>> >>>
>> >>
>> >> I was looking consolidating both memory tier and memory type into the same sysfs subsystem.
>> >> Your recommendation imply we create two subsystem memory_tier and memtype. I was
>> >> trying to avoid that. May be a generic term like "memory_tiering" can help to
>> >> consolidate all tiering related details there?
>> >>
>> >
>> > A generic term "memory_tiering" sounds good to me.
>> >
>> > Given that this will be a user-facing, stable kernel API, I think we'd
>> > better to only add what is most useful for userspace and don't have to
>> > mirror the kernel internal data structures in this interface.
>> >
>> > My understanding is that we haven't fully settled down on how to
>> > customize memory tiers from userspace.  So we don't have to show
>> > memory_type yet, which is a kernel data structure at this point.
>> >
>> > The userspace does need to know what are the memory tiers and which
>> > NUMA nodes are included in each memory tier.  How about we provide the
>> > "nodelist" interface for each memory tier as in the original proposal?
>> >
>> > The userspace would also like to know which memory tiers/nodes belong
>> > to the top tiers (the promotion targets).  We can provide a "toptiers"
>> > or "toptiers_nodelist" interface to report that.
>> >
>>
>> How about also including abstract distance range of a memory tier?
>> That will be useful to derive the hierarchy.
>
> With the base abstract distance in the memtier name, do we need to
> show the abstract distance range if we don't customize memory tiers?
>

IMHO it would simpler to let userspace find abstract distance by reading
a file rather than parsing a file name.

-aneesh

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-08-28 16:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-25  9:23 [RFC PATCH 1/2] mm/demotion: Expose memory type details via sysfs Aneesh Kumar K.V
2022-08-25  9:23 ` [RFC PATCH 2/2] mm/demotion: Expose memory tier " Aneesh Kumar K.V
2022-08-26  4:31   ` Huang, Ying
2022-08-26  1:50 ` [RFC PATCH 1/2] mm/demotion: Expose memory type " Huang, Ying
2022-08-26  2:37   ` Aneesh Kumar K V
2022-08-26  8:00     ` Wei Xu
2022-08-26  8:05       ` Aneesh Kumar K V
2022-08-26  9:15         ` Wei Xu
2022-08-28 16:20           ` Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).