linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Some small improvements for memblock.
@ 2023-01-13  8:26 Peng Zhang
  2023-01-13  8:26 ` [PATCH 1/3] memblock: Make a boundary tighter in memblock_add_range() Peng Zhang
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Peng Zhang @ 2023-01-13  8:26 UTC (permalink / raw)
  To: rppt, akpm; +Cc: linux-mm, linux-kernel, Peng Zhang

Hi,
I found some small optimizations while reading the code of memblock.
Please help to review. Thanks.

Peng Zhang (3):
  memblock: Make a boundary tighter in memblock_add_range().
  memblock: Make finding index faster when modify regions.
  memblock: Avoid useless checks in memblock_merge_regions().

 mm/memblock.c | 71 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 17 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] memblock: Make a boundary tighter in memblock_add_range().
  2023-01-13  8:26 [PATCH 0/3] Some small improvements for memblock Peng Zhang
@ 2023-01-13  8:26 ` Peng Zhang
  2023-01-13  8:26 ` [PATCH 2/3] memblock: Make finding index faster when modify regions Peng Zhang
  2023-01-13  8:26 ` [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions() Peng Zhang
  2 siblings, 0 replies; 11+ messages in thread
From: Peng Zhang @ 2023-01-13  8:26 UTC (permalink / raw)
  To: rppt, akpm; +Cc: linux-mm, linux-kernel, Peng Zhang

When type->cnt * 2 + 1 is less than or equal to type->max, there is
enough empty regions to insert.

Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
 mm/memblock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 511d4783dcf1..6eedcfc5dcc1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -601,11 +601,11 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	/*
 	 * The worst case is when new range overlaps all existing regions,
 	 * then we'll need type->cnt + 1 empty regions in @type. So if
-	 * type->cnt * 2 + 1 is less than type->max, we know
+	 * type->cnt * 2 + 1 is less than or equal to type->max, we know
 	 * that there is enough empty regions in @type, and we can insert
 	 * regions directly.
 	 */
-	if (type->cnt * 2 + 1 < type->max)
+	if (type->cnt * 2 + 1 <= type->max)
 		insert = true;
 
 repeat:
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-13  8:26 [PATCH 0/3] Some small improvements for memblock Peng Zhang
  2023-01-13  8:26 ` [PATCH 1/3] memblock: Make a boundary tighter in memblock_add_range() Peng Zhang
@ 2023-01-13  8:26 ` Peng Zhang
  2023-01-15 14:02   ` Mike Rapoport
  2023-01-13  8:26 ` [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions() Peng Zhang
  2 siblings, 1 reply; 11+ messages in thread
From: Peng Zhang @ 2023-01-13  8:26 UTC (permalink / raw)
  To: rppt, akpm; +Cc: linux-mm, linux-kernel, Peng Zhang

We can use binary search to find the index to modify regions in
memblock_add_range() and memblock_isolate_range(). Because the
arrangement of regions is ordered. It may be faster when there are
many regions. So implemented a binary search and a new macro to walk
regions.

Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
 mm/memblock.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 6eedcfc5dcc1..cb92770ac22e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -149,6 +149,11 @@ static __refdata struct memblock_type *memblock_memory = &memblock.memory;
 	     i < memblock_type->cnt;					\
 	     i++, rgn = &memblock_type->regions[i])
 
+#define for_each_memblock_type_start(i, start, memblock_type, rgn)	\
+	for (i = start, rgn = &memblock_type->regions[i];		\
+	     i < memblock_type->cnt;					\
+	     i++, rgn = &memblock_type->regions[i])
+
 #define memblock_dbg(fmt, ...)						\
 	do {								\
 		if (memblock_debug)					\
@@ -181,6 +186,24 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
+/*
+ * Binary search for the first region not to the left of @base.
+ */
+static unsigned long __init_memblock memblock_lower_bound_region(struct memblock_type *type,
+								 phys_addr_t base)
+{
+	unsigned long idx, low_idx = 0, high_idx = type->cnt;
+
+	while (low_idx < high_idx) {
+		idx = (low_idx + high_idx) >> 1;
+		if (type->regions[idx].base + type->regions[idx].size <= base)
+			low_idx = idx + 1;
+		else
+			high_idx = idx;
+	}
+	return low_idx;
+}
+
 bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 					phys_addr_t base, phys_addr_t size)
 {
@@ -581,7 +604,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	bool insert = false;
 	phys_addr_t obase = base;
 	phys_addr_t end = base + memblock_cap_size(base, &size);
-	int idx, nr_new;
+	int idx, start_idx, nr_new;
 	struct memblock_region *rgn;
 
 	if (!size)
@@ -607,6 +630,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	 */
 	if (type->cnt * 2 + 1 <= type->max)
 		insert = true;
+	start_idx = memblock_lower_bound_region(type, base);
 
 repeat:
 	/*
@@ -617,7 +641,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	base = obase;
 	nr_new = 0;
 
-	for_each_memblock_type(idx, type, rgn) {
+	for_each_memblock_type_start(idx, start_idx, type, rgn) {
 		phys_addr_t rbase = rgn->base;
 		phys_addr_t rend = rbase + rgn->size;
 
@@ -737,7 +761,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 					int *start_rgn, int *end_rgn)
 {
 	phys_addr_t end = base + memblock_cap_size(base, &size);
-	int idx;
+	int idx, start_idx;
 	struct memblock_region *rgn;
 
 	*start_rgn = *end_rgn = 0;
@@ -750,7 +774,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 		if (memblock_double_array(type, base, size) < 0)
 			return -ENOMEM;
 
-	for_each_memblock_type(idx, type, rgn) {
+	start_idx = memblock_lower_bound_region(type, base);
+	for_each_memblock_type_start(idx, start_idx, type, rgn) {
 		phys_addr_t rbase = rgn->base;
 		phys_addr_t rend = rbase + rgn->size;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions().
  2023-01-13  8:26 [PATCH 0/3] Some small improvements for memblock Peng Zhang
  2023-01-13  8:26 ` [PATCH 1/3] memblock: Make a boundary tighter in memblock_add_range() Peng Zhang
  2023-01-13  8:26 ` [PATCH 2/3] memblock: Make finding index faster when modify regions Peng Zhang
@ 2023-01-13  8:26 ` Peng Zhang
  2023-01-17  7:31   ` Peng Zhang
  2023-01-19 13:04   ` Mike Rapoport
  2 siblings, 2 replies; 11+ messages in thread
From: Peng Zhang @ 2023-01-13  8:26 UTC (permalink / raw)
  To: rppt, akpm; +Cc: linux-mm, linux-kernel, Peng Zhang

memblock_merge_regions() is called after regions have been modified to
merge the neighboring compatible regions. That will check all regions
but most checks is useless.

Most of the time we only insert one or a few new regions, or modify one
or a few regions. At this time, we don't need to check all regions. We
only need to check the changed regions, because other not related
regions cannot be merged. So this patch add two parameters to
memblock_merge_regions() to indicate the lower and upper boundary to scan.

Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
 mm/memblock.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index cb92770ac22e..e19eb08efc73 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -523,15 +523,18 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
 /**
  * memblock_merge_regions - merge neighboring compatible regions
  * @type: memblock type to scan
- *
- * Scan @type and merge neighboring compatible regions.
+ * @start_rgn: start scanning from (@start_rgn - 1)
+ * @end_rgn: end scanning at (@end_rgn - 1)
+ * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn)
  */
-static void __init_memblock memblock_merge_regions(struct memblock_type *type)
+static void __init_memblock memblock_merge_regions(struct memblock_type *type,
+						   int start_rgn,
+						   int end_rgn)
 {
-	int i = 0;
+	int i = max(start_rgn - 1, 0);
 
-	/* cnt never goes below 1 */
-	while (i < type->cnt - 1) {
+	end_rgn = min(end_rgn, (int)type->cnt - 1);
+	while (i < end_rgn) {
 		struct memblock_region *this = &type->regions[i];
 		struct memblock_region *next = &type->regions[i + 1];
 
@@ -548,6 +551,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
 		/* move forward from next + 1, index of which is i + 2 */
 		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
 		type->cnt--;
+		end_rgn--;
 	}
 }
 
@@ -604,7 +608,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	bool insert = false;
 	phys_addr_t obase = base;
 	phys_addr_t end = base + memblock_cap_size(base, &size);
-	int idx, start_idx, nr_new;
+	int idx, start_idx, nr_new, start_rgn = -1, end_rgn;
 	struct memblock_region *rgn;
 
 	if (!size)
@@ -659,10 +663,14 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 #endif
 			WARN_ON(flags != rgn->flags);
 			nr_new++;
-			if (insert)
+			if (insert) {
+				if (start_rgn == -1)
+					start_rgn = idx;
+				end_rgn = idx + 1;
 				memblock_insert_region(type, idx++, base,
 						       rbase - base, nid,
 						       flags);
+			}
 		}
 		/* area below @rend is dealt with, forget about it */
 		base = min(rend, end);
@@ -671,9 +679,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 	/* insert the remaining portion */
 	if (base < end) {
 		nr_new++;
-		if (insert)
+		if (insert) {
+			if (start_rgn == -1)
+				start_rgn = idx;
+			end_rgn = idx + 1;
 			memblock_insert_region(type, idx, base, end - base,
 					       nid, flags);
+		}
 	}
 
 	if (!nr_new)
@@ -690,7 +702,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
 		insert = true;
 		goto repeat;
 	} else {
-		memblock_merge_regions(type);
+		memblock_merge_regions(type, start_rgn, end_rgn);
 		return 0;
 	}
 }
@@ -927,7 +939,7 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
 			r->flags &= ~flag;
 	}
 
-	memblock_merge_regions(type);
+	memblock_merge_regions(type, start_rgn, end_rgn);
 	return 0;
 }
 
@@ -1300,7 +1312,7 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 	for (i = start_rgn; i < end_rgn; i++)
 		memblock_set_region_node(&type->regions[i], nid);
 
-	memblock_merge_regions(type);
+	memblock_merge_regions(type, start_rgn, end_rgn);
 #endif
 	return 0;
 }
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-13  8:26 ` [PATCH 2/3] memblock: Make finding index faster when modify regions Peng Zhang
@ 2023-01-15 14:02   ` Mike Rapoport
  2023-01-16  3:17     ` Peng Zhang
  0 siblings, 1 reply; 11+ messages in thread
From: Mike Rapoport @ 2023-01-15 14:02 UTC (permalink / raw)
  To: Peng Zhang; +Cc: akpm, linux-mm, linux-kernel

Hi,

On Fri, Jan 13, 2023 at 04:26:58PM +0800, Peng Zhang wrote:
> We can use binary search to find the index to modify regions in
> memblock_add_range() and memblock_isolate_range(). Because the
> arrangement of regions is ordered. It may be faster when there are
> many regions. So implemented a binary search and a new macro to walk
> regions.

Did you see a measurable speedup with this optimization?
I'm not in favor of micro-optimizations that complicate code.

> Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
> ---
>  mm/memblock.c | 33 +++++++++++++++++++++++++++++----
>  1 file changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 6eedcfc5dcc1..cb92770ac22e 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -149,6 +149,11 @@ static __refdata struct memblock_type *memblock_memory = &memblock.memory;
>  	     i < memblock_type->cnt;					\
>  	     i++, rgn = &memblock_type->regions[i])
>  
> +#define for_each_memblock_type_start(i, start, memblock_type, rgn)	\
> +	for (i = start, rgn = &memblock_type->regions[i];		\
> +	     i < memblock_type->cnt;					\
> +	     i++, rgn = &memblock_type->regions[i])
> +
>  #define memblock_dbg(fmt, ...)						\
>  	do {								\
>  		if (memblock_debug)					\
> @@ -181,6 +186,24 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
>  	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
>  }
>  
> +/*
> + * Binary search for the first region not to the left of @base.
> + */
> +static unsigned long __init_memblock memblock_lower_bound_region(struct memblock_type *type,
> +								 phys_addr_t base)
> +{
> +	unsigned long idx, low_idx = 0, high_idx = type->cnt;
> +
> +	while (low_idx < high_idx) {
> +		idx = (low_idx + high_idx) >> 1;
> +		if (type->regions[idx].base + type->regions[idx].size <= base)
> +			low_idx = idx + 1;
> +		else
> +			high_idx = idx;
> +	}
> +	return low_idx;
> +}
> +
>  bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
>  					phys_addr_t base, phys_addr_t size)
>  {
> @@ -581,7 +604,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  	bool insert = false;
>  	phys_addr_t obase = base;
>  	phys_addr_t end = base + memblock_cap_size(base, &size);
> -	int idx, nr_new;
> +	int idx, start_idx, nr_new;
>  	struct memblock_region *rgn;
>  
>  	if (!size)
> @@ -607,6 +630,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  	 */
>  	if (type->cnt * 2 + 1 <= type->max)
>  		insert = true;
> +	start_idx = memblock_lower_bound_region(type, base);
>  
>  repeat:
>  	/*
> @@ -617,7 +641,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  	base = obase;
>  	nr_new = 0;
>  
> -	for_each_memblock_type(idx, type, rgn) {
> +	for_each_memblock_type_start(idx, start_idx, type, rgn) {
>  		phys_addr_t rbase = rgn->base;
>  		phys_addr_t rend = rbase + rgn->size;
>  
> @@ -737,7 +761,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
>  					int *start_rgn, int *end_rgn)
>  {
>  	phys_addr_t end = base + memblock_cap_size(base, &size);
> -	int idx;
> +	int idx, start_idx;
>  	struct memblock_region *rgn;
>  
>  	*start_rgn = *end_rgn = 0;
> @@ -750,7 +774,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
>  		if (memblock_double_array(type, base, size) < 0)
>  			return -ENOMEM;
>  
> -	for_each_memblock_type(idx, type, rgn) {
> +	start_idx = memblock_lower_bound_region(type, base);
> +	for_each_memblock_type_start(idx, start_idx, type, rgn) {
>  		phys_addr_t rbase = rgn->base;
>  		phys_addr_t rend = rbase + rgn->size;
>  
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-15 14:02   ` Mike Rapoport
@ 2023-01-16  3:17     ` Peng Zhang
  2023-01-16  7:37       ` Mike Rapoport
  0 siblings, 1 reply; 11+ messages in thread
From: Peng Zhang @ 2023-01-16  3:17 UTC (permalink / raw)
  To: Mike Rapoport; +Cc: akpm, linux-mm, linux-kernel



在 2023/1/15 22:02, Mike Rapoport 写道:
> Hi,
> 
> On Fri, Jan 13, 2023 at 04:26:58PM +0800, Peng Zhang wrote:
>> We can use binary search to find the index to modify regions in
>> memblock_add_range() and memblock_isolate_range(). Because the
>> arrangement of regions is ordered. It may be faster when there are
>> many regions. So implemented a binary search and a new macro to walk
>> regions.
> 
> Did you see a measurable speedup with this optimization?
> I'm not in favor of micro-optimizations that complicate code.
>
Thank you for your reply. I haven't measured this patch yet, 
theoretically this small optimization might be difficult to observe.
If you think this patch complicates the code, you can ignore this patch.

These three patches are independent and they can be applied 
independently. The logic of the third patch is very simple. It will not 
complicate the code. It is tested by the default configuration of qemu. 
The total number of iterations of memblock_merge_regions() in the third 
patch is reduced from more than one thousand to more than one hundred, 
this is only in the case of a small number of regions. Can you consider 
the third patch?

Sincerely yours,
Peng.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-16  3:17     ` Peng Zhang
@ 2023-01-16  7:37       ` Mike Rapoport
  2023-01-16  8:40         ` Peng Zhang
  0 siblings, 1 reply; 11+ messages in thread
From: Mike Rapoport @ 2023-01-16  7:37 UTC (permalink / raw)
  To: Peng Zhang; +Cc: akpm, linux-mm, linux-kernel

On Mon, Jan 16, 2023 at 11:17:40AM +0800, Peng Zhang wrote:
> 
> 
> 在 2023/1/15 22:02, Mike Rapoport 写道:
> > Hi,
> > 
> > On Fri, Jan 13, 2023 at 04:26:58PM +0800, Peng Zhang wrote:
> > > We can use binary search to find the index to modify regions in
> > > memblock_add_range() and memblock_isolate_range(). Because the
> > > arrangement of regions is ordered. It may be faster when there are
> > > many regions. So implemented a binary search and a new macro to walk
> > > regions.
> > 
> > Did you see a measurable speedup with this optimization?
> > I'm not in favor of micro-optimizations that complicate code.
> > 
> Thank you for your reply. I haven't measured this patch yet, theoretically
> this small optimization might be difficult to observe.
> If you think this patch complicates the code, you can ignore this patch.
> 
> These three patches are independent and they can be applied independently.
> The logic of the third patch is very simple. It will not complicate the
> code. It is tested by the default configuration of qemu. The total number of
> iterations of memblock_merge_regions() in the third patch is reduced from
> more than one thousand to more than one hundred, this is only in the case of
> a small number of regions. Can you consider the third patch?

Can you please send the numbers and show how did you obtained them?

> Sincerely yours,
> Peng.

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-16  7:37       ` Mike Rapoport
@ 2023-01-16  8:40         ` Peng Zhang
  2023-01-19 13:00           ` Mike Rapoport
  0 siblings, 1 reply; 11+ messages in thread
From: Peng Zhang @ 2023-01-16  8:40 UTC (permalink / raw)
  To: Mike Rapoport; +Cc: akpm, linux-mm, linux-kernel



在 2023/1/16 15:37, Mike Rapoport 写道:
> On Mon, Jan 16, 2023 at 11:17:40AM +0800, Peng Zhang wrote:
>>
>>
>> 在 2023/1/15 22:02, Mike Rapoport 写道:
>>> Hi,
>>>
>>> On Fri, Jan 13, 2023 at 04:26:58PM +0800, Peng Zhang wrote:
>>>> We can use binary search to find the index to modify regions in
>>>> memblock_add_range() and memblock_isolate_range(). Because the
>>>> arrangement of regions is ordered. It may be faster when there are
>>>> many regions. So implemented a binary search and a new macro to walk
>>>> regions.
>>>
>>> Did you see a measurable speedup with this optimization?
>>> I'm not in favor of micro-optimizations that complicate code.
>>>
>> Thank you for your reply. I haven't measured this patch yet, theoretically
>> this small optimization might be difficult to observe.
>> If you think this patch complicates the code, you can ignore this patch.
>>
>> These three patches are independent and they can be applied independently.
>> The logic of the third patch is very simple. It will not complicate the
>> code. It is tested by the default configuration of qemu. The total number of
>> iterations of memblock_merge_regions() in the third patch is reduced from
>> more than one thousand to more than one hundred, this is only in the case of
>> a small number of regions. Can you consider the third patch?
> 
> Can you please send the numbers and show how did you obtained them?
> 
>> Sincerely yours,
>> Peng.
> 
I obtained the numbers like this:

void memblock_merge_regions(struct memblock_type *type) {
	static int iteration_count = 0;
	static int max_nr_regions = 0;

	max_nr_regions = max(max_nr_regions, (int)type->cnt);
	...
	while () {
		iteration_count++;
		...
	}
	pr_info("iteration_count: %d max_nr_regions %d", iteration_count, 
max_nr_regions);
}

Boot the kernel by qemu.
The folowing numbers is the last output.

master branch:
iteration_count: 1762 max_nr_regions 41

patched:
iteration_count: 182 max_nr_regions 41

If max_nr_regions is larger, the difference will be more.

Thanks.

Sincerely yours,
Peng


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions().
  2023-01-13  8:26 ` [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions() Peng Zhang
@ 2023-01-17  7:31   ` Peng Zhang
  2023-01-19 13:04   ` Mike Rapoport
  1 sibling, 0 replies; 11+ messages in thread
From: Peng Zhang @ 2023-01-17  7:31 UTC (permalink / raw)
  To: rppt; +Cc: linux-mm, linux-kernel, akpm

Hi, Mike

What's your opinion of this patch?

Sincerely yours,
Peng.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] memblock: Make finding index faster when modify regions.
  2023-01-16  8:40         ` Peng Zhang
@ 2023-01-19 13:00           ` Mike Rapoport
  0 siblings, 0 replies; 11+ messages in thread
From: Mike Rapoport @ 2023-01-19 13:00 UTC (permalink / raw)
  To: Peng Zhang; +Cc: akpm, linux-mm, linux-kernel

On Mon, Jan 16, 2023 at 04:40:47PM +0800, Peng Zhang wrote:
> 
> 
> 在 2023/1/16 15:37, Mike Rapoport 写道:
> > On Mon, Jan 16, 2023 at 11:17:40AM +0800, Peng Zhang wrote:
> > > 
> > > 
> > > 在 2023/1/15 22:02, Mike Rapoport 写道:
> > > > Hi,
> > > > 
> > > > On Fri, Jan 13, 2023 at 04:26:58PM +0800, Peng Zhang wrote:
> > > > > We can use binary search to find the index to modify regions in
> > > > > memblock_add_range() and memblock_isolate_range(). Because the
> > > > > arrangement of regions is ordered. It may be faster when there are
> > > > > many regions. So implemented a binary search and a new macro to walk
> > > > > regions.
> > > > 
> > > > Did you see a measurable speedup with this optimization?
> > > > I'm not in favor of micro-optimizations that complicate code.
> > > > 
> > > Thank you for your reply. I haven't measured this patch yet, theoretically
> > > this small optimization might be difficult to observe.
> > > If you think this patch complicates the code, you can ignore this patch.
> > > 
> > > These three patches are independent and they can be applied independently.
> > > The logic of the third patch is very simple. It will not complicate the
> > > code. It is tested by the default configuration of qemu. The total number of
> > > iterations of memblock_merge_regions() in the third patch is reduced from
> > > more than one thousand to more than one hundred, this is only in the case of
> > > a small number of regions. Can you consider the third patch?
> > 
> > Can you please send the numbers and show how did you obtained them?
> > 
> > > Sincerely yours,
> > > Peng.
> > 
> I obtained the numbers like this:
> 
> void memblock_merge_regions(struct memblock_type *type) {
> 	static int iteration_count = 0;
> 	static int max_nr_regions = 0;
> 
> 	max_nr_regions = max(max_nr_regions, (int)type->cnt);
> 	...
> 	while () {
> 		iteration_count++;
> 		...
> 	}
> 	pr_info("iteration_count: %d max_nr_regions %d", iteration_count,
> max_nr_regions);
> }
> 
> Boot the kernel by qemu.
> The folowing numbers is the last output.
> 
> master branch:
> iteration_count: 1762 max_nr_regions 41
> 
> patched:
> iteration_count: 182 max_nr_regions 41

The numbers that indicate what speed up your patch achieves should be a
part of the changelog. It would be great if you could test it on a real
machine and measure the actual time saved by your changes.

> If max_nr_regions is larger, the difference will be more.
> 
> Thanks.
> 
> Sincerely yours,
> Peng

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions().
  2023-01-13  8:26 ` [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions() Peng Zhang
  2023-01-17  7:31   ` Peng Zhang
@ 2023-01-19 13:04   ` Mike Rapoport
  1 sibling, 0 replies; 11+ messages in thread
From: Mike Rapoport @ 2023-01-19 13:04 UTC (permalink / raw)
  To: Peng Zhang; +Cc: akpm, linux-mm, linux-kernel

On Fri, Jan 13, 2023 at 04:26:59PM +0800, Peng Zhang wrote:
> memblock_merge_regions() is called after regions have been modified to
> merge the neighboring compatible regions. That will check all regions
> but most checks is useless.
> 
> Most of the time we only insert one or a few new regions, or modify one
> or a few regions. At this time, we don't need to check all regions. We
> only need to check the changed regions, because other not related
> regions cannot be merged. So this patch add two parameters to
> memblock_merge_regions() to indicate the lower and upper boundary to scan.
> 
> Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
> ---
>  mm/memblock.c | 36 ++++++++++++++++++++++++------------
>  1 file changed, 24 insertions(+), 12 deletions(-)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index cb92770ac22e..e19eb08efc73 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -523,15 +523,18 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
>  /**
>   * memblock_merge_regions - merge neighboring compatible regions
>   * @type: memblock type to scan
> - *
> - * Scan @type and merge neighboring compatible regions.
> + * @start_rgn: start scanning from (@start_rgn - 1)
> + * @end_rgn: end scanning at (@end_rgn - 1)
> + * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn)
>   */
> -static void __init_memblock memblock_merge_regions(struct memblock_type *type)
> +static void __init_memblock memblock_merge_regions(struct memblock_type *type,
> +						   int start_rgn,
> +						   int end_rgn)

Make start_rgn and end_rgn unsigned longs and ...

>  {
> -	int i = 0;
> +	int i = max(start_rgn - 1, 0);
  
... open code max() as 

	if (start_rgn)
		i = start_rgn;

> -	/* cnt never goes below 1 */
> -	while (i < type->cnt - 1) {
> +	end_rgn = min(end_rgn, (int)type->cnt - 1);

... and drop the casting here.

> +	while (i < end_rgn) {
>  		struct memblock_region *this = &type->regions[i];
>  		struct memblock_region *next = &type->regions[i + 1];
>  
> @@ -548,6 +551,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
>  		/* move forward from next + 1, index of which is i + 2 */
>  		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
>  		type->cnt--;
> +		end_rgn--;
>  	}
>  }
>  
> @@ -604,7 +608,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  	bool insert = false;
>  	phys_addr_t obase = base;
>  	phys_addr_t end = base + memblock_cap_size(base, &size);
> -	int idx, start_idx, nr_new;
> +	int idx, start_idx, nr_new, start_rgn = -1, end_rgn;
>  	struct memblock_region *rgn;
>  
>  	if (!size)
> @@ -659,10 +663,14 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  #endif
>  			WARN_ON(flags != rgn->flags);
>  			nr_new++;
> -			if (insert)
> +			if (insert) {
> +				if (start_rgn == -1)

you can initialize start_rgn with 0 and use if(!start_rgn) here.

> +					start_rgn = idx;
> +				end_rgn = idx + 1;
>  				memblock_insert_region(type, idx++, base,
>  						       rbase - base, nid,
>  						       flags);
> +			}
>  		}
>  		/* area below @rend is dealt with, forget about it */
>  		base = min(rend, end);
> @@ -671,9 +679,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  	/* insert the remaining portion */
>  	if (base < end) {
>  		nr_new++;
> -		if (insert)
> +		if (insert) {
> +			if (start_rgn == -1)

Ditto.

> +				start_rgn = idx;
> +			end_rgn = idx + 1;
>  			memblock_insert_region(type, idx, base, end - base,
>  					       nid, flags);
> +		}
>  	}
>  
>  	if (!nr_new)
> @@ -690,7 +702,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
>  		insert = true;
>  		goto repeat;
>  	} else {
> -		memblock_merge_regions(type);
> +		memblock_merge_regions(type, start_rgn, end_rgn);
>  		return 0;
>  	}
>  }
> @@ -927,7 +939,7 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
>  			r->flags &= ~flag;
>  	}
>  
> -	memblock_merge_regions(type);
> +	memblock_merge_regions(type, start_rgn, end_rgn);
>  	return 0;
>  }
>  
> @@ -1300,7 +1312,7 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
>  	for (i = start_rgn; i < end_rgn; i++)
>  		memblock_set_region_node(&type->regions[i], nid);
>  
> -	memblock_merge_regions(type);
> +	memblock_merge_regions(type, start_rgn, end_rgn);
>  #endif
>  	return 0;
>  }
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-01-19 13:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-13  8:26 [PATCH 0/3] Some small improvements for memblock Peng Zhang
2023-01-13  8:26 ` [PATCH 1/3] memblock: Make a boundary tighter in memblock_add_range() Peng Zhang
2023-01-13  8:26 ` [PATCH 2/3] memblock: Make finding index faster when modify regions Peng Zhang
2023-01-15 14:02   ` Mike Rapoport
2023-01-16  3:17     ` Peng Zhang
2023-01-16  7:37       ` Mike Rapoport
2023-01-16  8:40         ` Peng Zhang
2023-01-19 13:00           ` Mike Rapoport
2023-01-13  8:26 ` [PATCH 3/3] memblock: Avoid useless checks in memblock_merge_regions() Peng Zhang
2023-01-17  7:31   ` Peng Zhang
2023-01-19 13:04   ` Mike Rapoport

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).