All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] mm/mmzone: Introduce a new macro for_each_node_zonelist()
@ 2022-04-16 13:20 Donghyeok Kim
  2022-04-18 14:05 ` Mike Rapoport
  0 siblings, 1 reply; 3+ messages in thread
From: Donghyeok Kim @ 2022-04-16 13:20 UTC (permalink / raw)
  To: Andrew Morton, Mike Kravetz, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Vlastimil Babka, Roman Gushchin
  Cc: Ohhoon Kwon, JaeSang Yoo, Wonhyuk Yang, Jiyoup Kim,
	Donghyeok Kim, linux-mm, linux-kernel

There are some codes using for_each_zone_zonelist() even when only
iterating each node is needed. This commit introduces a new macro
for_each_node_zonelist() which iterates through valid nodes in the
zonelist.

By using this new macro, code can be written in a much simpler form.
Also, slab/slub can now skip trying to allocate from the node which was
previously tried and failed.

Co-developed-by: Ohhoon Kwon <ohkwon1043@gmail.com>
Signed-off-by: Ohhoon Kwon <ohkwon1043@gmail.com>
Signed-off-by: Donghyeok Kim <dthex5d@gmail.com>
---
V1 -> V2: Fix a compile error

 include/linux/mmzone.h | 36 ++++++++++++++++++++++++++++++++++++
 mm/hugetlb.c           | 17 +++++++----------
 mm/mmzone.c            | 17 +++++++++++++++++
 mm/slab.c              |  7 ++-----
 mm/slub.c              |  8 ++++----
 mm/vmscan.c            | 16 ++++++----------
 6 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9aaa04ac862f..cb2ddd0b4c95 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1464,6 +1464,42 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 #define for_each_zone_zonelist(zone, z, zlist, highidx) \
 	for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
 
+
+struct zoneref *next_node_zones_zonelist(struct zoneref *z,
+					int prev_nid,
+					enum zone_type highest_zoneidx,
+					nodemask_t *nodes);
+
+/**
+ * for_each_node_zonelist_nodemask - helper macro to iterate over valid nodes in a zonelist which have at least one zone at or below a given zone index and within a nodemask
+ * @node: The current node in the iterator
+ * @z: First matched zoneref within current node
+ * @zlist: The zonelist being iterated
+ * @highidx: The zone index of the highest zone in the node
+ * @nodemask: Nodemask allowed by the allocator
+ *
+ * This iterator iterates through all nodes which have at least one zone at or below a given zone index and
+ * within a given nodemask
+ */
+#define for_each_node_zonelist_nodemask(node, z, zlist, highidx, nodemask)		\
+	for (z = first_zones_zonelist(zlist, highidx, nodemask),			\
+			node = zonelist_zone(z) ? zonelist_node_idx(z) : NUMA_NO_NODE;	\
+		zonelist_zone(z);							\
+		z = next_node_zones_zonelist(++z, node, highidx, nodemask),		\
+			node = zonelist_zone(z) ? zonelist_node_idx(z) : NUMA_NO_NODE)
+
+/**
+ * for_each_node_zonelist - helper macro to iterate over nodes in a zonelist which have at least one zone at or below a given zone index
+ * @node: The current node in the iterator
+ * @z: First matched zoneref within current node
+ * @zlist: The zonelist being iterated
+ * @highidx: The zone index of the highest zone in the node
+ *
+ * This iterator iterates through all nodes which have at least one zone at or below a given zone index.
+ */
+#define for_each_node_zonelist(node, z, zlist, highidx) \
+	for_each_node_zonelist_nodemask(node, z, zlist, highidx, NULL)
+
 /* Whether the 'nodes' are all movable nodes */
 static inline bool movable_only_nodes(nodemask_t *nodes)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index daa4bdd6c26c..283f28f1aca8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1157,7 +1157,6 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
 {
 	unsigned int cpuset_mems_cookie;
 	struct zonelist *zonelist;
-	struct zone *zone;
 	struct zoneref *z;
 	int node = NUMA_NO_NODE;
 
@@ -1165,18 +1164,16 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
 
 retry_cpuset:
 	cpuset_mems_cookie = read_mems_allowed_begin();
-	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+
+	/*
+	 * no need to ask again on the same node. Pool is node rather than
+	 * zone aware
+	 */
+	for_each_node_zonelist_nodemask(node, z, zonelist, gfp_zone(gfp_mask), nmask) {
 		struct page *page;
 
-		if (!cpuset_zone_allowed(zone, gfp_mask))
-			continue;
-		/*
-		 * no need to ask again on the same node. Pool is node rather than
-		 * zone aware
-		 */
-		if (zone_to_nid(zone) == node)
+		if (!cpuset_node_allowed(node, gfp_mask))
 			continue;
-		node = zone_to_nid(zone);
 
 		page = dequeue_huge_page_node_exact(h, node);
 		if (page)
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 68e1511be12d..8b7d6286056e 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -72,6 +72,23 @@ struct zoneref *__next_zones_zonelist(struct zoneref *z,
 	return z;
 }
 
+/* Returns the zone in the next node and at or below highest_zoneidx in a zonelist */
+struct zoneref *next_node_zones_zonelist(struct zoneref *z,
+					int prev_nid,
+					enum zone_type highest_zoneidx,
+					nodemask_t *nodes)
+{
+	if (likely(nodes == NULL))
+		while (z->zone && (zonelist_node_idx(z) == prev_nid || zonelist_zone_idx(z) > highest_zoneidx))
+			z++;
+	else
+		while (z->zone && (zonelist_node_idx(z) == prev_nid || zonelist_zone_idx(z) > highest_zoneidx ||
+				!zref_in_nodemask(z, nodes)))
+			z++;
+
+	return z;
+}
+
 void lruvec_init(struct lruvec *lruvec)
 {
 	enum lru_list lru;
diff --git a/mm/slab.c b/mm/slab.c
index a301f266efd1..b374fb88f80e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3077,7 +3077,6 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 {
 	struct zonelist *zonelist;
 	struct zoneref *z;
-	struct zone *zone;
 	enum zone_type highest_zoneidx = gfp_zone(flags);
 	void *obj = NULL;
 	struct slab *slab;
@@ -3096,10 +3095,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 	 * Look through allowed nodes for objects available
 	 * from existing per node queues.
 	 */
-	for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
-		nid = zone_to_nid(zone);
-
-		if (cpuset_zone_allowed(zone, flags) &&
+	for_each_node_zonelist(nid, z, zonelist, highest_zoneidx) {
+		if (cpuset_node_allowed(nid, flags) &&
 			get_node(cache, nid) &&
 			get_node(cache, nid)->free_objects) {
 				obj = ____cache_alloc_node(cache,
diff --git a/mm/slub.c b/mm/slub.c
index 6dc703488d30..3e8b4aa98b84 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2192,7 +2192,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
 #ifdef CONFIG_NUMA
 	struct zonelist *zonelist;
 	struct zoneref *z;
-	struct zone *zone;
+	int nid;
 	enum zone_type highest_zoneidx = gfp_zone(flags);
 	void *object;
 	unsigned int cpuset_mems_cookie;
@@ -2222,12 +2222,12 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
 	do {
 		cpuset_mems_cookie = read_mems_allowed_begin();
 		zonelist = node_zonelist(mempolicy_slab_node(), flags);
-		for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
+		for_each_node_zonelist(nid, z, zonelist, highest_zoneidx) {
 			struct kmem_cache_node *n;
 
-			n = get_node(s, zone_to_nid(zone));
+			n = get_node(s, nid);
 
-			if (n && cpuset_zone_allowed(zone, flags) &&
+			if (n && cpuset_node_allowed(nid, flags) &&
 					n->nr_partial > s->min_partial) {
 				object = get_partial_node(s, n, ret_slab, flags);
 				if (object) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d4a7d2bd276d..342874d54c45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6176,9 +6176,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 					  struct scan_control *sc)
 {
 	int initial_priority = sc->priority;
-	pg_data_t *last_pgdat;
+	pg_data_t *pgdat;
 	struct zoneref *z;
-	struct zone *zone;
+	int nid;
 retry:
 	delayacct_freepages_start();
 
@@ -6205,20 +6205,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			sc->may_writepage = 1;
 	} while (--sc->priority >= 0);
 
-	last_pgdat = NULL;
-	for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
+	for_each_node_zonelist_nodemask(nid, z, zonelist, sc->reclaim_idx,
 					sc->nodemask) {
-		if (zone->zone_pgdat == last_pgdat)
-			continue;
-		last_pgdat = zone->zone_pgdat;
+		pgdat = NODE_DATA(nid);
 
-		snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+		snapshot_refaults(sc->target_mem_cgroup, pgdat);
 
 		if (cgroup_reclaim(sc)) {
 			struct lruvec *lruvec;
 
-			lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
-						   zone->zone_pgdat);
+			lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 			clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
 		}
 	}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm/mmzone: Introduce a new macro for_each_node_zonelist()
  2022-04-16 13:20 [PATCH v2] mm/mmzone: Introduce a new macro for_each_node_zonelist() Donghyeok Kim
@ 2022-04-18 14:05 ` Mike Rapoport
  2022-04-19 12:53   ` David Hildenbrand
  0 siblings, 1 reply; 3+ messages in thread
From: Mike Rapoport @ 2022-04-18 14:05 UTC (permalink / raw)
  To: Donghyeok Kim
  Cc: Andrew Morton, Mike Kravetz, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Vlastimil Babka, Roman Gushchin,
	Ohhoon Kwon, JaeSang Yoo, Wonhyuk Yang, Jiyoup Kim, linux-mm,
	linux-kernel

On Sat, Apr 16, 2022 at 10:20:35PM +0900, Donghyeok Kim wrote:
> There are some codes using for_each_zone_zonelist() even when only
> iterating each node is needed. This commit introduces a new macro
> for_each_node_zonelist() which iterates through valid nodes in the
> zonelist.
> 
> By using this new macro, code can be written in a much simpler form.
> Also, slab/slub can now skip trying to allocate from the node which was
> previously tried and failed.
> 
> Co-developed-by: Ohhoon Kwon <ohkwon1043@gmail.com>
> Signed-off-by: Ohhoon Kwon <ohkwon1043@gmail.com>
> Signed-off-by: Donghyeok Kim <dthex5d@gmail.com>
> ---
> V1 -> V2: Fix a compile error
> 
>  include/linux/mmzone.h | 36 ++++++++++++++++++++++++++++++++++++
>  mm/hugetlb.c           | 17 +++++++----------
>  mm/mmzone.c            | 17 +++++++++++++++++
>  mm/slab.c              |  7 ++-----
>  mm/slub.c              |  8 ++++----
>  mm/vmscan.c            | 16 ++++++----------
>  6 files changed, 72 insertions(+), 29 deletions(-)

Even counting the comments this does not look like a great simplification.

> 
...

> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index daa4bdd6c26c..283f28f1aca8 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1157,7 +1157,6 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
>  {
>  	unsigned int cpuset_mems_cookie;
>  	struct zonelist *zonelist;
> -	struct zone *zone;
>  	struct zoneref *z;
>  	int node = NUMA_NO_NODE;
>  
> @@ -1165,18 +1164,16 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
>  
>  retry_cpuset:
>  	cpuset_mems_cookie = read_mems_allowed_begin();
> -	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
> +
> +	/*
> +	 * no need to ask again on the same node. Pool is node rather than
> +	 * zone aware
> +	 */
> +	for_each_node_zonelist_nodemask(node, z, zonelist, gfp_zone(gfp_mask), nmask) {
>  		struct page *page;
>  
> -		if (!cpuset_zone_allowed(zone, gfp_mask))
> -			continue;
> -		/*
> -		 * no need to ask again on the same node. Pool is node rather than
> -		 * zone aware
> -		 */
> -		if (zone_to_nid(zone) == node)
> +		if (!cpuset_node_allowed(node, gfp_mask))
>  			continue;
> -		node = zone_to_nid(zone);

The actual change here seems to be the deletion of the node assignment. I'd
say it's not worth the churn.

>  
>  		page = dequeue_huge_page_node_exact(h, node);
>  		if (page)

...

> diff --git a/mm/slab.c b/mm/slab.c
> index a301f266efd1..b374fb88f80e 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3077,7 +3077,6 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
>  {
>  	struct zonelist *zonelist;
>  	struct zoneref *z;
> -	struct zone *zone;
>  	enum zone_type highest_zoneidx = gfp_zone(flags);
>  	void *obj = NULL;
>  	struct slab *slab;
> @@ -3096,10 +3095,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
>  	 * Look through allowed nodes for objects available
>  	 * from existing per node queues.
>  	 */
> -	for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
> -		nid = zone_to_nid(zone);
> -
> -		if (cpuset_zone_allowed(zone, flags) &&
> +	for_each_node_zonelist(nid, z, zonelist, highest_zoneidx) {
> +		if (cpuset_node_allowed(nid, flags) &&

Same here.

>  			get_node(cache, nid) &&
>  			get_node(cache, nid)->free_objects) {
>  				obj = ____cache_alloc_node(cache,
> diff --git a/mm/slub.c b/mm/slub.c
> index 6dc703488d30..3e8b4aa98b84 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2192,7 +2192,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
>  #ifdef CONFIG_NUMA
>  	struct zonelist *zonelist;
>  	struct zoneref *z;
> -	struct zone *zone;
> +	int nid;
>  	enum zone_type highest_zoneidx = gfp_zone(flags);
>  	void *object;
>  	unsigned int cpuset_mems_cookie;
> @@ -2222,12 +2222,12 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
>  	do {
>  		cpuset_mems_cookie = read_mems_allowed_begin();
>  		zonelist = node_zonelist(mempolicy_slab_node(), flags);
> -		for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
> +		for_each_node_zonelist(nid, z, zonelist, highest_zoneidx) {
>  			struct kmem_cache_node *n;
>  
> -			n = get_node(s, zone_to_nid(zone));
> +			n = get_node(s, nid);
>  
> -			if (n && cpuset_zone_allowed(zone, flags) &&
> +			if (n && cpuset_node_allowed(nid, flags) &&
>  					n->nr_partial > s->min_partial) {

And the changes here look rather like using different iterator variable.

>  				object = get_partial_node(s, n, ret_slab, flags);
>  				if (object) {
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index d4a7d2bd276d..342874d54c45 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -6176,9 +6176,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>  					  struct scan_control *sc)
>  {
>  	int initial_priority = sc->priority;
> -	pg_data_t *last_pgdat;
> +	pg_data_t *pgdat;
>  	struct zoneref *z;
> -	struct zone *zone;
> +	int nid;
>  retry:
>  	delayacct_freepages_start();
>  
> @@ -6205,20 +6205,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>  			sc->may_writepage = 1;
>  	} while (--sc->priority >= 0);
>  
> -	last_pgdat = NULL;
> -	for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx,
> +	for_each_node_zonelist_nodemask(nid, z, zonelist, sc->reclaim_idx,
>  					sc->nodemask) {
> -		if (zone->zone_pgdat == last_pgdat)
> -			continue;
> -		last_pgdat = zone->zone_pgdat;
> +		pgdat = NODE_DATA(nid);

I don't think this simplification is compelling enough to justify all the
changes.

>  
> -		snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
> +		snapshot_refaults(sc->target_mem_cgroup, pgdat);
>  
>  		if (cgroup_reclaim(sc)) {
>  			struct lruvec *lruvec;
>  
> -			lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
> -						   zone->zone_pgdat);
> +			lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
>  			clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
>  		}
>  	}
> -- 
> 2.17.1
> 
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm/mmzone: Introduce a new macro for_each_node_zonelist()
  2022-04-18 14:05 ` Mike Rapoport
@ 2022-04-19 12:53   ` David Hildenbrand
  0 siblings, 0 replies; 3+ messages in thread
From: David Hildenbrand @ 2022-04-19 12:53 UTC (permalink / raw)
  To: Mike Rapoport, Donghyeok Kim
  Cc: Andrew Morton, Mike Kravetz, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Vlastimil Babka, Roman Gushchin,
	Ohhoon Kwon, JaeSang Yoo, Wonhyuk Yang, Jiyoup Kim, linux-mm,
	linux-kernel

On 18.04.22 16:05, Mike Rapoport wrote:
> On Sat, Apr 16, 2022 at 10:20:35PM +0900, Donghyeok Kim wrote:
>> There are some codes using for_each_zone_zonelist() even when only
>> iterating each node is needed. This commit introduces a new macro
>> for_each_node_zonelist() which iterates through valid nodes in the
>> zonelist.
>>
>> By using this new macro, code can be written in a much simpler form.
>> Also, slab/slub can now skip trying to allocate from the node which was
>> previously tried and failed.
>>
>> Co-developed-by: Ohhoon Kwon <ohkwon1043@gmail.com>
>> Signed-off-by: Ohhoon Kwon <ohkwon1043@gmail.com>
>> Signed-off-by: Donghyeok Kim <dthex5d@gmail.com>
>> ---
>> V1 -> V2: Fix a compile error
>>
>>  include/linux/mmzone.h | 36 ++++++++++++++++++++++++++++++++++++
>>  mm/hugetlb.c           | 17 +++++++----------
>>  mm/mmzone.c            | 17 +++++++++++++++++
>>  mm/slab.c              |  7 ++-----
>>  mm/slub.c              |  8 ++++----
>>  mm/vmscan.c            | 16 ++++++----------
>>  6 files changed, 72 insertions(+), 29 deletions(-)
> 
> Even counting the comments this does not look like a great simplification.
> 

I tend to agree.


-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-04-19 12:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-16 13:20 [PATCH v2] mm/mmzone: Introduce a new macro for_each_node_zonelist() Donghyeok Kim
2022-04-18 14:05 ` Mike Rapoport
2022-04-19 12:53   ` David Hildenbrand

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.