linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump
@ 2019-04-03  3:05 Chen Zhou
  2019-04-03  3:05 ` [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G Chen Zhou
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Chen Zhou @ 2019-04-03  3:05 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, akpm, rppt, ard.biesheuvel,
	takahiro.akashi
  Cc: linux-arm-kernel, linux-kernel, kexec, linux-mm, wangkefeng.wang,
	Chen Zhou

When crashkernel is reserved above 4G in memory, kernel should reserve
some amount of low memory for swiotlb and some DMA buffers. So there may
be two crash kernel regions, one is below 4G, the other is above 4G.

Crash dump kernel reads more than one crash kernel regions via a dtb
property under node /chosen,
linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>.

Besides, we need to modify kexec-tools:
  arm64: support more than one crash kernel regions

Chen Zhou (3):
  arm64: kdump: support reserving crashkernel above 4G
  arm64: kdump: support more than one crash kernel regions
  kdump: update Documentation about crashkernel on arm64

 Documentation/admin-guide/kernel-parameters.txt |   4 +-
 arch/arm64/kernel/setup.c                       |   3 +
 arch/arm64/mm/init.c                            | 108 ++++++++++++++++++++----
 include/linux/memblock.h                        |   1 +
 mm/memblock.c                                   |  40 +++++++++
 5 files changed, 139 insertions(+), 17 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G
  2019-04-03  3:05 [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Chen Zhou
@ 2019-04-03  3:05 ` Chen Zhou
  2019-04-04 14:46   ` Mike Rapoport
  2019-04-03  3:05 ` [PATCH 2/3] arm64: kdump: support more than one crash kernel regions Chen Zhou
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-03  3:05 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, akpm, rppt, ard.biesheuvel,
	takahiro.akashi
  Cc: linux-arm-kernel, linux-kernel, kexec, linux-mm, wangkefeng.wang,
	Chen Zhou

When crashkernel is reserved above 4G in memory, kernel should
reserve some amount of low memory for swiotlb and some DMA buffers.

Kernel would try to allocate at least 256M below 4G automatically
as x86_64 if crashkernel is above 4G. Meanwhile, support
crashkernel=X,[high,low] in arm64.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
---
 arch/arm64/kernel/setup.c |  3 ++
 arch/arm64/mm/init.c      | 71 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 413d566..82cd9a0 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -243,6 +243,9 @@ static void __init request_standard_resources(void)
 			request_resource(res, &kernel_data);
 #ifdef CONFIG_KEXEC_CORE
 		/* Userspace will find "Crash kernel" region in /proc/iomem. */
+		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
+		    crashk_low_res.end <= res->end)
+			request_resource(res, &crashk_low_res);
 		if (crashk_res.end && crashk_res.start >= res->start &&
 		    crashk_res.end <= res->end)
 			request_resource(res, &crashk_res);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc1350..ceb2a25 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -64,6 +64,57 @@ EXPORT_SYMBOL(memstart_addr);
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
+static int __init reserve_crashkernel_low(void)
+{
+	unsigned long long base, low_base = 0, low_size = 0;
+	unsigned long total_low_mem;
+	int ret;
+
+	total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
+
+	/* crashkernel=Y,low */
+	ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
+	if (ret) {
+		/*
+		 * two parts from lib/swiotlb.c:
+		 * -swiotlb size: user-specified with swiotlb= or default.
+		 *
+		 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
+		 * to 8M for other buffers that may need to stay low too. Also
+		 * make sure we allocate enough extra low memory so that we
+		 * don't run out of DMA buffers for 32-bit devices.
+		 */
+		low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
+	} else {
+		/* passed with crashkernel=0,low ? */
+		if (!low_size)
+			return 0;
+	}
+
+	low_base = memblock_find_in_range(0, 1ULL << 32, low_size, SZ_2M);
+	if (!low_base) {
+		pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
+				(unsigned long)(low_size >> 20));
+		return -ENOMEM;
+	}
+
+	ret = memblock_reserve(low_base, low_size);
+	if (ret) {
+		pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
+		return ret;
+	}
+
+	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(low_size >> 20),
+			(unsigned long)(low_base >> 20),
+			(unsigned long)(total_low_mem >> 20));
+
+	crashk_low_res.start = low_base;
+	crashk_low_res.end   = low_base + low_size - 1;
+
+	return 0;
+}
+
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -74,19 +125,28 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
+	bool high = false;
 	int ret;
 
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 				&crash_size, &crash_base);
 	/* no crashkernel= or invalid value specified */
-	if (ret || !crash_size)
-		return;
+	if (ret || !crash_size) {
+		/* crashkernel=X,high */
+		ret = parse_crashkernel_high(boot_command_line, memblock_phys_mem_size(),
+				&crash_size, &crash_base);
+		if (ret || !crash_size)
+			return;
+		high = true;
+	}
 
 	crash_size = PAGE_ALIGN(crash_size);
 
 	if (crash_base == 0) {
 		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+		crash_base = memblock_find_in_range(0,
+				high ? memblock_end_of_DRAM()
+				: ARCH_LOW_ADDRESS_LIMIT,
 				crash_size, SZ_2M);
 		if (crash_base == 0) {
 			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -112,6 +172,11 @@ static void __init reserve_crashkernel(void)
 	}
 	memblock_reserve(crash_base, crash_size);
 
+	if (crash_base >= SZ_4G && reserve_crashkernel_low()) {
+		memblock_free(crash_base, crash_size);
+		return;
+	}
+
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-03  3:05 [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Chen Zhou
  2019-04-03  3:05 ` [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G Chen Zhou
@ 2019-04-03  3:05 ` Chen Zhou
  2019-04-03 11:29   ` Mike Rapoport
  2019-04-03  3:05 ` [PATCH 3/3] kdump: update Documentation about crashkernel on arm64 Chen Zhou
  2019-04-09  5:20 ` [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Bhupesh Sharma
  3 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-03  3:05 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, akpm, rppt, ard.biesheuvel,
	takahiro.akashi
  Cc: linux-arm-kernel, linux-kernel, kexec, linux-mm, wangkefeng.wang,
	Chen Zhou

After commit (arm64: kdump: support reserving crashkernel above 4G),
there may be two crash kernel regions, one is below 4G, the other is
above 4G.

Crash dump kernel reads more than one crash kernel regions via a dtb
property under node /chosen,
linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
---
 arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ceb2a25..769c77a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
+# define CRASH_MAX_USABLE_RANGES        2
+
 static int __init reserve_crashkernel_low(void)
 {
 	unsigned long long base, low_base = 0, low_size = 0;
@@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
 		const char *uname, int depth, void *data)
 {
 	struct memblock_region *usablemem = data;
-	const __be32 *reg;
-	int len;
+	const __be32 *reg, *endp;
+	int len, nr = 0;
 
 	if (depth != 1 || strcmp(uname, "chosen") != 0)
 		return 0;
@@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
 	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
 		return 1;
 
-	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
-	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+	endp = reg + (len / sizeof(__be32));
+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+		if (++nr >= CRASH_MAX_USABLE_RANGES)
+			break;
+	}
 
 	return 1;
 }
 
 static void __init fdt_enforce_memory_region(void)
 {
-	struct memblock_region reg = {
-		.size = 0,
-	};
-
-	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
-
-	if (reg.size)
-		memblock_cap_memory_range(reg.base, reg.size);
+	int i, cnt = 0;
+	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
+
+	memset(regs, 0, sizeof(regs));
+	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
+
+	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
+		if (regs[i].size)
+			cnt++;
+		else
+			break;
+	if (cnt)
+		memblock_cap_memory_ranges(regs, cnt);
 }
 
 void __init arm64_memblock_init(void)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 47e3c06..aeade34 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
 void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
+void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
 void memblock_mem_limit_remove_map(phys_addr_t limit);
 bool memblock_is_memory(phys_addr_t addr);
 bool memblock_is_map_memory(phys_addr_t addr);
diff --git a/mm/memblock.c b/mm/memblock.c
index 28fa8926..1a7f4ee7c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
 			base + size, PHYS_ADDR_MAX);
 }
 
+void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
+{
+	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
+	int i, j, ret, nr = 0;
+
+	for (i = 0; i < cnt; i++) {
+		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
+				regs[i].size, &start_rgn[i], &end_rgn[i]);
+		if (ret)
+			break;
+		nr++;
+	}
+	if (!nr)
+		return;
+
+	/* remove all the MAP regions */
+	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
+		if (!memblock_is_nomap(&memblock.memory.regions[i]))
+			memblock_remove_region(&memblock.memory, i);
+
+	for (i = nr - 1; i > 0; i--)
+		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
+			if (!memblock_is_nomap(&memblock.memory.regions[j]))
+				memblock_remove_region(&memblock.memory, j);
+
+	for (i = start_rgn[0] - 1; i >= 0; i--)
+		if (!memblock_is_nomap(&memblock.memory.regions[i]))
+			memblock_remove_region(&memblock.memory, i);
+
+	/* truncate the reserved regions */
+	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
+
+	for (i = nr - 1; i > 0; i--)
+		memblock_remove_range(&memblock.reserved,
+				regs[i].base, regs[i - 1].base + regs[i - 1].size);
+
+	memblock_remove_range(&memblock.reserved,
+			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
+}
+
 void __init memblock_mem_limit_remove_map(phys_addr_t limit)
 {
 	phys_addr_t max_addr;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] kdump: update Documentation about crashkernel on arm64
  2019-04-03  3:05 [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Chen Zhou
  2019-04-03  3:05 ` [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G Chen Zhou
  2019-04-03  3:05 ` [PATCH 2/3] arm64: kdump: support more than one crash kernel regions Chen Zhou
@ 2019-04-03  3:05 ` Chen Zhou
  2019-04-09  5:20 ` [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Bhupesh Sharma
  3 siblings, 0 replies; 16+ messages in thread
From: Chen Zhou @ 2019-04-03  3:05 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, akpm, rppt, ard.biesheuvel,
	takahiro.akashi
  Cc: linux-arm-kernel, linux-kernel, kexec, linux-mm, wangkefeng.wang,
	Chen Zhou

Now we support crashkernel=X,[high,low] on arm64, update the
Documentation.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 27a5f8c..6772f4f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -715,14 +715,14 @@
 			Documentation/kdump/kdump.txt for an example.
 
 	crashkernel=size[KMG],high
-			[KNL, x86_64] range could be above 4G. Allow kernel
+			[KNL, x86_64, arm64] range could be above 4G. Allow kernel
 			to allocate physical memory region from top, so could
 			be above 4G if system have more than 4G ram installed.
 			Otherwise memory region will be allocated below 4G, if
 			available.
 			It will be ignored if crashkernel=X is specified.
 	crashkernel=size[KMG],low
-			[KNL, x86_64] range under 4G. When crashkernel=X,high
+			[KNL, x86_64, arm64] range under 4G. When crashkernel=X,high
 			is passed, kernel could allocate physical memory region
 			above 4G, that cause second kernel crash on system
 			that require some amount of low memory, e.g. swiotlb
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-03  3:05 ` [PATCH 2/3] arm64: kdump: support more than one crash kernel regions Chen Zhou
@ 2019-04-03 11:29   ` Mike Rapoport
  2019-04-03 13:51     ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Rapoport @ 2019-04-03 11:29 UTC (permalink / raw)
  To: Chen Zhou
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
> After commit (arm64: kdump: support reserving crashkernel above 4G),
> there may be two crash kernel regions, one is below 4G, the other is
> above 4G.
> 
> Crash dump kernel reads more than one crash kernel regions via a dtb
> property under node /chosen,
> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> ---
>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
>  include/linux/memblock.h |  1 +
>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 66 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index ceb2a25..769c77a 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  
>  #ifdef CONFIG_KEXEC_CORE
> +# define CRASH_MAX_USABLE_RANGES        2
> +
>  static int __init reserve_crashkernel_low(void)
>  {
>  	unsigned long long base, low_base = 0, low_size = 0;
> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>  		const char *uname, int depth, void *data)
>  {
>  	struct memblock_region *usablemem = data;
> -	const __be32 *reg;
> -	int len;
> +	const __be32 *reg, *endp;
> +	int len, nr = 0;
>  
>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
>  		return 0;
> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
>  		return 1;
>  
> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
> +	endp = reg + (len / sizeof(__be32));
> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
> +
> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
> +			break;
> +	}
>  
>  	return 1;
>  }
>  
>  static void __init fdt_enforce_memory_region(void)
>  {
> -	struct memblock_region reg = {
> -		.size = 0,
> -	};
> -
> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
> -
> -	if (reg.size)
> -		memblock_cap_memory_range(reg.base, reg.size);
> +	int i, cnt = 0;
> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
> +
> +	memset(regs, 0, sizeof(regs));
> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
> +
> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
> +		if (regs[i].size)
> +			cnt++;
> +		else
> +			break;
> +	if (cnt)
> +		memblock_cap_memory_ranges(regs, cnt);

Why not simply call memblock_cap_memory_range() for each region?

>  }
>  
>  void __init arm64_memblock_init(void)
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 47e3c06..aeade34 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
>  phys_addr_t memblock_end_of_DRAM(void);
>  void memblock_enforce_memory_limit(phys_addr_t memory_limit);
>  void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
> +void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
>  void memblock_mem_limit_remove_map(phys_addr_t limit);
>  bool memblock_is_memory(phys_addr_t addr);
>  bool memblock_is_map_memory(phys_addr_t addr);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 28fa8926..1a7f4ee7c 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
>  			base + size, PHYS_ADDR_MAX);
>  }
>  
> +void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
> +{
> +	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
> +	int i, j, ret, nr = 0;
> +
> +	for (i = 0; i < cnt; i++) {
> +		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
> +				regs[i].size, &start_rgn[i], &end_rgn[i]);
> +		if (ret)
> +			break;
> +		nr++;
> +	}
> +	if (!nr)
> +		return;
> +
> +	/* remove all the MAP regions */
> +	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
> +			memblock_remove_region(&memblock.memory, i);
> +
> +	for (i = nr - 1; i > 0; i--)
> +		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
> +			if (!memblock_is_nomap(&memblock.memory.regions[j]))
> +				memblock_remove_region(&memblock.memory, j);
> +
> +	for (i = start_rgn[0] - 1; i >= 0; i--)
> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
> +			memblock_remove_region(&memblock.memory, i);
> +
> +	/* truncate the reserved regions */
> +	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
> +
> +	for (i = nr - 1; i > 0; i--)
> +		memblock_remove_range(&memblock.reserved,
> +				regs[i].base, regs[i - 1].base + regs[i - 1].size);
> +
> +	memblock_remove_range(&memblock.reserved,
> +			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
> +}
> +
>  void __init memblock_mem_limit_remove_map(phys_addr_t limit)
>  {
>  	phys_addr_t max_addr;
> -- 
> 2.7.4
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-03 11:29   ` Mike Rapoport
@ 2019-04-03 13:51     ` Chen Zhou
  2019-04-04 14:44       ` Mike Rapoport
  0 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-03 13:51 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi Mike,

On 2019/4/3 19:29, Mike Rapoport wrote:
> On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
>> After commit (arm64: kdump: support reserving crashkernel above 4G),
>> there may be two crash kernel regions, one is below 4G, the other is
>> above 4G.
>>
>> Crash dump kernel reads more than one crash kernel regions via a dtb
>> property under node /chosen,
>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> ---
>>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
>>  include/linux/memblock.h |  1 +
>>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 66 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index ceb2a25..769c77a 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  
>>  #ifdef CONFIG_KEXEC_CORE
>> +# define CRASH_MAX_USABLE_RANGES        2
>> +
>>  static int __init reserve_crashkernel_low(void)
>>  {
>>  	unsigned long long base, low_base = 0, low_size = 0;
>> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>  		const char *uname, int depth, void *data)
>>  {
>>  	struct memblock_region *usablemem = data;
>> -	const __be32 *reg;
>> -	int len;
>> +	const __be32 *reg, *endp;
>> +	int len, nr = 0;
>>  
>>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
>>  		return 0;
>> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
>>  		return 1;
>>  
>> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
>> +	endp = reg + (len / sizeof(__be32));
>> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
>> +
>> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
>> +			break;
>> +	}
>>  
>>  	return 1;
>>  }
>>  
>>  static void __init fdt_enforce_memory_region(void)
>>  {
>> -	struct memblock_region reg = {
>> -		.size = 0,
>> -	};
>> -
>> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
>> -
>> -	if (reg.size)
>> -		memblock_cap_memory_range(reg.base, reg.size);
>> +	int i, cnt = 0;
>> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
>> +
>> +	memset(regs, 0, sizeof(regs));
>> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
>> +
>> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
>> +		if (regs[i].size)
>> +			cnt++;
>> +		else
>> +			break;
>> +	if (cnt)
>> +		memblock_cap_memory_ranges(regs, cnt);
> 
> Why not simply call memblock_cap_memory_range() for each region?

Function memblock_cap_memory_range() removes all memory type ranges except specified range.
So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
on kdump capture kernel.

Thanks,
Chen Zhou

> 
>>  }
>>  
>>  void __init arm64_memblock_init(void)
>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>> index 47e3c06..aeade34 100644
>> --- a/include/linux/memblock.h
>> +++ b/include/linux/memblock.h
>> @@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
>>  phys_addr_t memblock_end_of_DRAM(void);
>>  void memblock_enforce_memory_limit(phys_addr_t memory_limit);
>>  void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
>> +void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
>>  void memblock_mem_limit_remove_map(phys_addr_t limit);
>>  bool memblock_is_memory(phys_addr_t addr);
>>  bool memblock_is_map_memory(phys_addr_t addr);
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 28fa8926..1a7f4ee7c 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
>>  			base + size, PHYS_ADDR_MAX);
>>  }
>>  
>> +void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
>> +{
>> +	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
>> +	int i, j, ret, nr = 0;
>> +
>> +	for (i = 0; i < cnt; i++) {
>> +		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
>> +				regs[i].size, &start_rgn[i], &end_rgn[i]);
>> +		if (ret)
>> +			break;
>> +		nr++;
>> +	}
>> +	if (!nr)
>> +		return;
>> +
>> +	/* remove all the MAP regions */
>> +	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>> +			memblock_remove_region(&memblock.memory, i);
>> +
>> +	for (i = nr - 1; i > 0; i--)
>> +		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
>> +			if (!memblock_is_nomap(&memblock.memory.regions[j]))
>> +				memblock_remove_region(&memblock.memory, j);
>> +
>> +	for (i = start_rgn[0] - 1; i >= 0; i--)
>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>> +			memblock_remove_region(&memblock.memory, i);
>> +
>> +	/* truncate the reserved regions */
>> +	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
>> +
>> +	for (i = nr - 1; i > 0; i--)
>> +		memblock_remove_range(&memblock.reserved,
>> +				regs[i].base, regs[i - 1].base + regs[i - 1].size);
>> +
>> +	memblock_remove_range(&memblock.reserved,
>> +			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
>> +}
>> +
>>  void __init memblock_mem_limit_remove_map(phys_addr_t limit)
>>  {
>>  	phys_addr_t max_addr;
>> -- 
>> 2.7.4
>>
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-03 13:51     ` Chen Zhou
@ 2019-04-04 14:44       ` Mike Rapoport
  2019-04-05  2:17         ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Rapoport @ 2019-04-04 14:44 UTC (permalink / raw)
  To: Chen Zhou
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi,

On Wed, Apr 03, 2019 at 09:51:27PM +0800, Chen Zhou wrote:
> Hi Mike,
> 
> On 2019/4/3 19:29, Mike Rapoport wrote:
> > On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
> >> After commit (arm64: kdump: support reserving crashkernel above 4G),
> >> there may be two crash kernel regions, one is below 4G, the other is
> >> above 4G.
> >>
> >> Crash dump kernel reads more than one crash kernel regions via a dtb
> >> property under node /chosen,
> >> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
> >>
> >> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> >> ---
> >>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
> >>  include/linux/memblock.h |  1 +
> >>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
> >>  3 files changed, 66 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >> index ceb2a25..769c77a 100644
> >> --- a/arch/arm64/mm/init.c
> >> +++ b/arch/arm64/mm/init.c
> >> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
> >>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>  
> >>  #ifdef CONFIG_KEXEC_CORE
> >> +# define CRASH_MAX_USABLE_RANGES        2
> >> +
> >>  static int __init reserve_crashkernel_low(void)
> >>  {
> >>  	unsigned long long base, low_base = 0, low_size = 0;
> >> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
> >>  		const char *uname, int depth, void *data)
> >>  {
> >>  	struct memblock_region *usablemem = data;
> >> -	const __be32 *reg;
> >> -	int len;
> >> +	const __be32 *reg, *endp;
> >> +	int len, nr = 0;
> >>  
> >>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
> >>  		return 0;
> >> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
> >>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
> >>  		return 1;
> >>  
> >> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> >> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
> >> +	endp = reg + (len / sizeof(__be32));
> >> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
> >> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> >> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
> >> +
> >> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
> >> +			break;
> >> +	}
> >>  
> >>  	return 1;
> >>  }
> >>  
> >>  static void __init fdt_enforce_memory_region(void)
> >>  {
> >> -	struct memblock_region reg = {
> >> -		.size = 0,
> >> -	};
> >> -
> >> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
> >> -
> >> -	if (reg.size)
> >> -		memblock_cap_memory_range(reg.base, reg.size);
> >> +	int i, cnt = 0;
> >> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
> >> +
> >> +	memset(regs, 0, sizeof(regs));
> >> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
> >> +
> >> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
> >> +		if (regs[i].size)
> >> +			cnt++;
> >> +		else
> >> +			break;
> >> +	if (cnt)
> >> +		memblock_cap_memory_ranges(regs, cnt);
> > 
> > Why not simply call memblock_cap_memory_range() for each region?
> 
> Function memblock_cap_memory_range() removes all memory type ranges except specified range.
> So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
> on kdump capture kernel.

Thanks for the clarification.
I still think that memblock_cap_memory_ranges() is overly complex. 

How about doing something like this:

Cap the memory range for [min(regs[*].start, max(regs[*].end)] and then
removing the range in the middle?
 
> Thanks,
> Chen Zhou
> 
> > 
> >>  }
> >>  
> >>  void __init arm64_memblock_init(void)
> >> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> >> index 47e3c06..aeade34 100644
> >> --- a/include/linux/memblock.h
> >> +++ b/include/linux/memblock.h
> >> @@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
> >>  phys_addr_t memblock_end_of_DRAM(void);
> >>  void memblock_enforce_memory_limit(phys_addr_t memory_limit);
> >>  void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
> >> +void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
> >>  void memblock_mem_limit_remove_map(phys_addr_t limit);
> >>  bool memblock_is_memory(phys_addr_t addr);
> >>  bool memblock_is_map_memory(phys_addr_t addr);
> >> diff --git a/mm/memblock.c b/mm/memblock.c
> >> index 28fa8926..1a7f4ee7c 100644
> >> --- a/mm/memblock.c
> >> +++ b/mm/memblock.c
> >> @@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
> >>  			base + size, PHYS_ADDR_MAX);
> >>  }
> >>  
> >> +void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
> >> +{
> >> +	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
> >> +	int i, j, ret, nr = 0;
> >> +
> >> +	for (i = 0; i < cnt; i++) {
> >> +		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
> >> +				regs[i].size, &start_rgn[i], &end_rgn[i]);
> >> +		if (ret)
> >> +			break;
> >> +		nr++;
> >> +	}
> >> +	if (!nr)
> >> +		return;
> >> +
> >> +	/* remove all the MAP regions */
> >> +	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
> >> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
> >> +			memblock_remove_region(&memblock.memory, i);
> >> +
> >> +	for (i = nr - 1; i > 0; i--)
> >> +		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
> >> +			if (!memblock_is_nomap(&memblock.memory.regions[j]))
> >> +				memblock_remove_region(&memblock.memory, j);
> >> +
> >> +	for (i = start_rgn[0] - 1; i >= 0; i--)
> >> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
> >> +			memblock_remove_region(&memblock.memory, i);
> >> +
> >> +	/* truncate the reserved regions */
> >> +	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
> >> +
> >> +	for (i = nr - 1; i > 0; i--)
> >> +		memblock_remove_range(&memblock.reserved,
> >> +				regs[i].base, regs[i - 1].base + regs[i - 1].size);
> >> +
> >> +	memblock_remove_range(&memblock.reserved,
> >> +			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
> >> +}
> >> +
> >>  void __init memblock_mem_limit_remove_map(phys_addr_t limit)
> >>  {
> >>  	phys_addr_t max_addr;
> >> -- 
> >> 2.7.4
> >>
> > 
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G
  2019-04-03  3:05 ` [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G Chen Zhou
@ 2019-04-04 14:46   ` Mike Rapoport
  2019-04-05  3:03     ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Rapoport @ 2019-04-04 14:46 UTC (permalink / raw)
  To: Chen Zhou
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi,

On Wed, Apr 03, 2019 at 11:05:44AM +0800, Chen Zhou wrote:
> When crashkernel is reserved above 4G in memory, kernel should
> reserve some amount of low memory for swiotlb and some DMA buffers.
> 
> Kernel would try to allocate at least 256M below 4G automatically
> as x86_64 if crashkernel is above 4G. Meanwhile, support
> crashkernel=X,[high,low] in arm64.
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> ---
>  arch/arm64/kernel/setup.c |  3 ++
>  arch/arm64/mm/init.c      | 71 +++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 71 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 413d566..82cd9a0 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -243,6 +243,9 @@ static void __init request_standard_resources(void)
>  			request_resource(res, &kernel_data);
>  #ifdef CONFIG_KEXEC_CORE
>  		/* Userspace will find "Crash kernel" region in /proc/iomem. */
> +		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
> +		    crashk_low_res.end <= res->end)
> +			request_resource(res, &crashk_low_res);
>  		if (crashk_res.end && crashk_res.start >= res->start &&
>  		    crashk_res.end <= res->end)
>  			request_resource(res, &crashk_res);
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 6bc1350..ceb2a25 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -64,6 +64,57 @@ EXPORT_SYMBOL(memstart_addr);
>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  
>  #ifdef CONFIG_KEXEC_CORE
> +static int __init reserve_crashkernel_low(void)
> +{
> +	unsigned long long base, low_base = 0, low_size = 0;
> +	unsigned long total_low_mem;
> +	int ret;
> +
> +	total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
> +
> +	/* crashkernel=Y,low */
> +	ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
> +	if (ret) {
> +		/*
> +		 * two parts from lib/swiotlb.c:
> +		 * -swiotlb size: user-specified with swiotlb= or default.
> +		 *
> +		 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
> +		 * to 8M for other buffers that may need to stay low too. Also
> +		 * make sure we allocate enough extra low memory so that we
> +		 * don't run out of DMA buffers for 32-bit devices.
> +		 */
> +		low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
> +	} else {
> +		/* passed with crashkernel=0,low ? */
> +		if (!low_size)
> +			return 0;
> +	}
> +
> +	low_base = memblock_find_in_range(0, 1ULL << 32, low_size, SZ_2M);
> +	if (!low_base) {
> +		pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
> +				(unsigned long)(low_size >> 20));
> +		return -ENOMEM;
> +	}
> +
> +	ret = memblock_reserve(low_base, low_size);
> +	if (ret) {
> +		pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
> +		return ret;
> +	}
> +
> +	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
> +			(unsigned long)(low_size >> 20),
> +			(unsigned long)(low_base >> 20),
> +			(unsigned long)(total_low_mem >> 20));
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +
> +	return 0;
> +}
> +
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel
>   *
> @@ -74,19 +125,28 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
> +	bool high = false;
>  	int ret;
>  
>  	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>  				&crash_size, &crash_base);
>  	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(boot_command_line, memblock_phys_mem_size(),
> +				&crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +		high = true;
> +	}
>  
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	if (crash_base == 0) {
>  		/* Current arm64 boot protocol requires 2MB alignment */
> -		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
> +		crash_base = memblock_find_in_range(0,
> +				high ? memblock_end_of_DRAM()
> +				: ARCH_LOW_ADDRESS_LIMIT,
>  				crash_size, SZ_2M);
>  		if (crash_base == 0) {
>  			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> @@ -112,6 +172,11 @@ static void __init reserve_crashkernel(void)
>  	}
>  	memblock_reserve(crash_base, crash_size);
>  
> +	if (crash_base >= SZ_4G && reserve_crashkernel_low()) {
> +		memblock_free(crash_base, crash_size);
> +		return;
> +	}
> +

This very reminds what x86 does. Any chance some of the code can be reused
rather than duplicated?

>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>  		crash_base, crash_base + crash_size, crash_size >> 20);
>  
> -- 
> 2.7.4
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-04 14:44       ` Mike Rapoport
@ 2019-04-05  2:17         ` Chen Zhou
  2019-04-05  3:47           ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-05  2:17 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi Mike,

On 2019/4/4 22:44, Mike Rapoport wrote:
> Hi,
> 
> On Wed, Apr 03, 2019 at 09:51:27PM +0800, Chen Zhou wrote:
>> Hi Mike,
>>
>> On 2019/4/3 19:29, Mike Rapoport wrote:
>>> On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
>>>> After commit (arm64: kdump: support reserving crashkernel above 4G),
>>>> there may be two crash kernel regions, one is below 4G, the other is
>>>> above 4G.
>>>>
>>>> Crash dump kernel reads more than one crash kernel regions via a dtb
>>>> property under node /chosen,
>>>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
>>>>
>>>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>>>> ---
>>>>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
>>>>  include/linux/memblock.h |  1 +
>>>>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 66 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>> index ceb2a25..769c77a 100644
>>>> --- a/arch/arm64/mm/init.c
>>>> +++ b/arch/arm64/mm/init.c
>>>> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
>>>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>  
>>>>  #ifdef CONFIG_KEXEC_CORE
>>>> +# define CRASH_MAX_USABLE_RANGES        2
>>>> +
>>>>  static int __init reserve_crashkernel_low(void)
>>>>  {
>>>>  	unsigned long long base, low_base = 0, low_size = 0;
>>>> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>  		const char *uname, int depth, void *data)
>>>>  {
>>>>  	struct memblock_region *usablemem = data;
>>>> -	const __be32 *reg;
>>>> -	int len;
>>>> +	const __be32 *reg, *endp;
>>>> +	int len, nr = 0;
>>>>  
>>>>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
>>>>  		return 0;
>>>> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
>>>>  		return 1;
>>>>  
>>>> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>> +	endp = reg + (len / sizeof(__be32));
>>>> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>>>> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>> +
>>>> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
>>>> +			break;
>>>> +	}
>>>>  
>>>>  	return 1;
>>>>  }
>>>>  
>>>>  static void __init fdt_enforce_memory_region(void)
>>>>  {
>>>> -	struct memblock_region reg = {
>>>> -		.size = 0,
>>>> -	};
>>>> -
>>>> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
>>>> -
>>>> -	if (reg.size)
>>>> -		memblock_cap_memory_range(reg.base, reg.size);
>>>> +	int i, cnt = 0;
>>>> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
>>>> +
>>>> +	memset(regs, 0, sizeof(regs));
>>>> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
>>>> +
>>>> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
>>>> +		if (regs[i].size)
>>>> +			cnt++;
>>>> +		else
>>>> +			break;
>>>> +	if (cnt)
>>>> +		memblock_cap_memory_ranges(regs, cnt);
>>>
>>> Why not simply call memblock_cap_memory_range() for each region?
>>
>> Function memblock_cap_memory_range() removes all memory type ranges except specified range.
>> So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
>> on kdump capture kernel.
> 
> Thanks for the clarification.
> I still think that memblock_cap_memory_ranges() is overly complex. 
> 
> How about doing something like this:
> 
> Cap the memory range for [min(regs[*].start, max(regs[*].end)] and then
> removing the range in the middle?

Yes, that would be ok. But that would do one more memblock_cap_memory_range operation.
That is, if there are n regions, we need to do (n + 1) operations, which doesn't seem to
matter.

I agree with you, your idea is better.

Thanks,
Chen Zhou

>  
>> Thanks,
>> Chen Zhou
>>
>>>
>>>>  }
>>>>  
>>>>  void __init arm64_memblock_init(void)
>>>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>>>> index 47e3c06..aeade34 100644
>>>> --- a/include/linux/memblock.h
>>>> +++ b/include/linux/memblock.h
>>>> @@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
>>>>  phys_addr_t memblock_end_of_DRAM(void);
>>>>  void memblock_enforce_memory_limit(phys_addr_t memory_limit);
>>>>  void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
>>>> +void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
>>>>  void memblock_mem_limit_remove_map(phys_addr_t limit);
>>>>  bool memblock_is_memory(phys_addr_t addr);
>>>>  bool memblock_is_map_memory(phys_addr_t addr);
>>>> diff --git a/mm/memblock.c b/mm/memblock.c
>>>> index 28fa8926..1a7f4ee7c 100644
>>>> --- a/mm/memblock.c
>>>> +++ b/mm/memblock.c
>>>> @@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
>>>>  			base + size, PHYS_ADDR_MAX);
>>>>  }
>>>>  
>>>> +void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
>>>> +{
>>>> +	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
>>>> +	int i, j, ret, nr = 0;
>>>> +
>>>> +	for (i = 0; i < cnt; i++) {
>>>> +		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
>>>> +				regs[i].size, &start_rgn[i], &end_rgn[i]);
>>>> +		if (ret)
>>>> +			break;
>>>> +		nr++;
>>>> +	}
>>>> +	if (!nr)
>>>> +		return;
>>>> +
>>>> +	/* remove all the MAP regions */
>>>> +	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
>>>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>>>> +			memblock_remove_region(&memblock.memory, i);
>>>> +
>>>> +	for (i = nr - 1; i > 0; i--)
>>>> +		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
>>>> +			if (!memblock_is_nomap(&memblock.memory.regions[j]))
>>>> +				memblock_remove_region(&memblock.memory, j);
>>>> +
>>>> +	for (i = start_rgn[0] - 1; i >= 0; i--)
>>>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>>>> +			memblock_remove_region(&memblock.memory, i);
>>>> +
>>>> +	/* truncate the reserved regions */
>>>> +	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
>>>> +
>>>> +	for (i = nr - 1; i > 0; i--)
>>>> +		memblock_remove_range(&memblock.reserved,
>>>> +				regs[i].base, regs[i - 1].base + regs[i - 1].size);
>>>> +
>>>> +	memblock_remove_range(&memblock.reserved,
>>>> +			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
>>>> +}
>>>> +
>>>>  void __init memblock_mem_limit_remove_map(phys_addr_t limit)
>>>>  {
>>>>  	phys_addr_t max_addr;
>>>> -- 
>>>> 2.7.4
>>>>
>>>
>>
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G
  2019-04-04 14:46   ` Mike Rapoport
@ 2019-04-05  3:03     ` Chen Zhou
  0 siblings, 0 replies; 16+ messages in thread
From: Chen Zhou @ 2019-04-05  3:03 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang


Hi Mike,

On 2019/4/4 22:46, Mike Rapoport wrote:
> Hi,
> 
> On Wed, Apr 03, 2019 at 11:05:44AM +0800, Chen Zhou wrote:
>> When crashkernel is reserved above 4G in memory, kernel should
>> reserve some amount of low memory for swiotlb and some DMA buffers.
>>
>> Kernel would try to allocate at least 256M below 4G automatically
>> as x86_64 if crashkernel is above 4G. Meanwhile, support
>> crashkernel=X,[high,low] in arm64.
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> ---
>>  arch/arm64/kernel/setup.c |  3 ++
>>  arch/arm64/mm/init.c      | 71 +++++++++++++++++++++++++++++++++++++++++++++--
>>  2 files changed, 71 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index 413d566..82cd9a0 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -243,6 +243,9 @@ static void __init request_standard_resources(void)
>>  			request_resource(res, &kernel_data);
>>  #ifdef CONFIG_KEXEC_CORE
>>  		/* Userspace will find "Crash kernel" region in /proc/iomem. */
>> +		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
>> +		    crashk_low_res.end <= res->end)
>> +			request_resource(res, &crashk_low_res);
>>  		if (crashk_res.end && crashk_res.start >= res->start &&
>>  		    crashk_res.end <= res->end)
>>  			request_resource(res, &crashk_res);
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 6bc1350..ceb2a25 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -64,6 +64,57 @@ EXPORT_SYMBOL(memstart_addr);
>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  
>>  #ifdef CONFIG_KEXEC_CORE
>> +static int __init reserve_crashkernel_low(void)
>> +{
>> +	unsigned long long base, low_base = 0, low_size = 0;
>> +	unsigned long total_low_mem;
>> +	int ret;
>> +
>> +	total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
>> +
>> +	/* crashkernel=Y,low */
>> +	ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
>> +	if (ret) {
>> +		/*
>> +		 * two parts from lib/swiotlb.c:
>> +		 * -swiotlb size: user-specified with swiotlb= or default.
>> +		 *
>> +		 * -swiotlb overflow buffer: now hardcoded to 32k. We round it
>> +		 * to 8M for other buffers that may need to stay low too. Also
>> +		 * make sure we allocate enough extra low memory so that we
>> +		 * don't run out of DMA buffers for 32-bit devices.
>> +		 */
>> +		low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
>> +	} else {
>> +		/* passed with crashkernel=0,low ? */
>> +		if (!low_size)
>> +			return 0;
>> +	}
>> +
>> +	low_base = memblock_find_in_range(0, 1ULL << 32, low_size, SZ_2M);
>> +	if (!low_base) {
>> +		pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
>> +				(unsigned long)(low_size >> 20));
>> +		return -ENOMEM;
>> +	}
>> +
>> +	ret = memblock_reserve(low_base, low_size);
>> +	if (ret) {
>> +		pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
>> +		return ret;
>> +	}
>> +
>> +	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
>> +			(unsigned long)(low_size >> 20),
>> +			(unsigned long)(low_base >> 20),
>> +			(unsigned long)(total_low_mem >> 20));
>> +
>> +	crashk_low_res.start = low_base;
>> +	crashk_low_res.end   = low_base + low_size - 1;
>> +
>> +	return 0;
>> +}
>> +
>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
>>   *
>> @@ -74,19 +125,28 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  static void __init reserve_crashkernel(void)
>>  {
>>  	unsigned long long crash_base, crash_size;
>> +	bool high = false;
>>  	int ret;
>>  
>>  	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>>  				&crash_size, &crash_base);
>>  	/* no crashkernel= or invalid value specified */
>> -	if (ret || !crash_size)
>> -		return;
>> +	if (ret || !crash_size) {
>> +		/* crashkernel=X,high */
>> +		ret = parse_crashkernel_high(boot_command_line, memblock_phys_mem_size(),
>> +				&crash_size, &crash_base);
>> +		if (ret || !crash_size)
>> +			return;
>> +		high = true;
>> +	}
>>  
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	if (crash_base == 0) {
>>  		/* Current arm64 boot protocol requires 2MB alignment */
>> -		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
>> +		crash_base = memblock_find_in_range(0,
>> +				high ? memblock_end_of_DRAM()
>> +				: ARCH_LOW_ADDRESS_LIMIT,
>>  				crash_size, SZ_2M);
>>  		if (crash_base == 0) {
>>  			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>> @@ -112,6 +172,11 @@ static void __init reserve_crashkernel(void)
>>  	}
>>  	memblock_reserve(crash_base, crash_size);
>>  
>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low()) {
>> +		memblock_free(crash_base, crash_size);
>> +		return;
>> +	}
>> +
> 
> This very reminds what x86 does. Any chance some of the code can be reused
> rather than duplicated?

As i said in the comment, i transport reserve_crashkernel_low() from x86_64. There are minor
differences. In arm64, we don't need to do insert_resource(), we do request_resource()
in request_standard_resources() later.

How about doing like this:

move common reserve_crashkernel_low() code into kernel/kexec_core.c.
and do in x86 like this:
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -573,9 +573,12 @@ static void __init reserve_crashkernel(void)
                return;
        }

-       if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
-               memblock_free(crash_base, crash_size);
-               return;
+       if (crash_base >= (1ULL << 32)) {
+               if (reserve_crashkernel_low()) {
+                       memblock_free(crash_base, crash_size);
+                       return;
+               } else
+                       insert_resource(&iomem_resource, &crashk_low_res);
        }

> 
>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>  
>> -- 
>> 2.7.4
>>
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-05  2:17         ` Chen Zhou
@ 2019-04-05  3:47           ` Chen Zhou
  2019-04-08  6:57             ` Mike Rapoport
  0 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-05  3:47 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi Mike,

On 2019/4/5 10:17, Chen Zhou wrote:
> Hi Mike,
> 
> On 2019/4/4 22:44, Mike Rapoport wrote:
>> Hi,
>>
>> On Wed, Apr 03, 2019 at 09:51:27PM +0800, Chen Zhou wrote:
>>> Hi Mike,
>>>
>>> On 2019/4/3 19:29, Mike Rapoport wrote:
>>>> On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
>>>>> After commit (arm64: kdump: support reserving crashkernel above 4G),
>>>>> there may be two crash kernel regions, one is below 4G, the other is
>>>>> above 4G.
>>>>>
>>>>> Crash dump kernel reads more than one crash kernel regions via a dtb
>>>>> property under node /chosen,
>>>>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
>>>>>
>>>>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>>>>> ---
>>>>>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
>>>>>  include/linux/memblock.h |  1 +
>>>>>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 66 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>>> index ceb2a25..769c77a 100644
>>>>> --- a/arch/arm64/mm/init.c
>>>>> +++ b/arch/arm64/mm/init.c
>>>>> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
>>>>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>>  
>>>>>  #ifdef CONFIG_KEXEC_CORE
>>>>> +# define CRASH_MAX_USABLE_RANGES        2
>>>>> +
>>>>>  static int __init reserve_crashkernel_low(void)
>>>>>  {
>>>>>  	unsigned long long base, low_base = 0, low_size = 0;
>>>>> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>>  		const char *uname, int depth, void *data)
>>>>>  {
>>>>>  	struct memblock_region *usablemem = data;
>>>>> -	const __be32 *reg;
>>>>> -	int len;
>>>>> +	const __be32 *reg, *endp;
>>>>> +	int len, nr = 0;
>>>>>  
>>>>>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
>>>>>  		return 0;
>>>>> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
>>>>>  		return 1;
>>>>>  
>>>>> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>>> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>>> +	endp = reg + (len / sizeof(__be32));
>>>>> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>>>>> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>>> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>>> +
>>>>> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
>>>>> +			break;
>>>>> +	}
>>>>>  
>>>>>  	return 1;
>>>>>  }
>>>>>  
>>>>>  static void __init fdt_enforce_memory_region(void)
>>>>>  {
>>>>> -	struct memblock_region reg = {
>>>>> -		.size = 0,
>>>>> -	};
>>>>> -
>>>>> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
>>>>> -
>>>>> -	if (reg.size)
>>>>> -		memblock_cap_memory_range(reg.base, reg.size);
>>>>> +	int i, cnt = 0;
>>>>> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
>>>>> +
>>>>> +	memset(regs, 0, sizeof(regs));
>>>>> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
>>>>> +
>>>>> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
>>>>> +		if (regs[i].size)
>>>>> +			cnt++;
>>>>> +		else
>>>>> +			break;
>>>>> +	if (cnt)
>>>>> +		memblock_cap_memory_ranges(regs, cnt);
>>>>
>>>> Why not simply call memblock_cap_memory_range() for each region?
>>>
>>> Function memblock_cap_memory_range() removes all memory type ranges except specified range.
>>> So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
>>> on kdump capture kernel.
>>
>> Thanks for the clarification.
>> I still think that memblock_cap_memory_ranges() is overly complex. 
>>
>> How about doing something like this:
>>
>> Cap the memory range for [min(regs[*].start, max(regs[*].end)] and then
>> removing the range in the middle?
> 
> Yes, that would be ok. But that would do one more memblock_cap_memory_range operation.
> That is, if there are n regions, we need to do (n + 1) operations, which doesn't seem to
> matter.
> 
> I agree with you, your idea is better.
> 
> Thanks,
> Chen Zhou

Sorry, just ignore my previous reply, I got that wrong.

I think it carefully, we can cap the memory range for [min(regs[*].start, max(regs[*].end)]
firstly. But how to remove the middle ranges, we still can't use memblock_cap_memory_range()
directly and the extra remove operation may be complex.

For more than one regions, i think add a new memblock_cap_memory_ranges() may be better.
Besides, memblock_cap_memory_ranges() is also applicable for one region.

How about replace memblock_cap_memory_range() with memblock_cap_memory_ranges()?

Thanks,
Chen Zhou

> 
>>  
>>> Thanks,
>>> Chen Zhou
>>>
>>>>
>>>>>  }
>>>>>  
>>>>>  void __init arm64_memblock_init(void)
>>>>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>>>>> index 47e3c06..aeade34 100644
>>>>> --- a/include/linux/memblock.h
>>>>> +++ b/include/linux/memblock.h
>>>>> @@ -446,6 +446,7 @@ phys_addr_t memblock_start_of_DRAM(void);
>>>>>  phys_addr_t memblock_end_of_DRAM(void);
>>>>>  void memblock_enforce_memory_limit(phys_addr_t memory_limit);
>>>>>  void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size);
>>>>> +void memblock_cap_memory_ranges(struct memblock_region *regs, int cnt);
>>>>>  void memblock_mem_limit_remove_map(phys_addr_t limit);
>>>>>  bool memblock_is_memory(phys_addr_t addr);
>>>>>  bool memblock_is_map_memory(phys_addr_t addr);
>>>>> diff --git a/mm/memblock.c b/mm/memblock.c
>>>>> index 28fa8926..1a7f4ee7c 100644
>>>>> --- a/mm/memblock.c
>>>>> +++ b/mm/memblock.c
>>>>> @@ -1697,6 +1697,46 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
>>>>>  			base + size, PHYS_ADDR_MAX);
>>>>>  }
>>>>>  
>>>>> +void __init memblock_cap_memory_ranges(struct memblock_region *regs, int cnt)
>>>>> +{
>>>>> +	int start_rgn[INIT_MEMBLOCK_REGIONS], end_rgn[INIT_MEMBLOCK_REGIONS];
>>>>> +	int i, j, ret, nr = 0;
>>>>> +
>>>>> +	for (i = 0; i < cnt; i++) {
>>>>> +		ret = memblock_isolate_range(&memblock.memory, regs[i].base,
>>>>> +				regs[i].size, &start_rgn[i], &end_rgn[i]);
>>>>> +		if (ret)
>>>>> +			break;
>>>>> +		nr++;
>>>>> +	}
>>>>> +	if (!nr)
>>>>> +		return;
>>>>> +
>>>>> +	/* remove all the MAP regions */
>>>>> +	for (i = memblock.memory.cnt - 1; i >= end_rgn[nr - 1]; i--)
>>>>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>>>>> +			memblock_remove_region(&memblock.memory, i);
>>>>> +
>>>>> +	for (i = nr - 1; i > 0; i--)
>>>>> +		for (j = start_rgn[i] - 1; j >= end_rgn[i - 1]; j--)
>>>>> +			if (!memblock_is_nomap(&memblock.memory.regions[j]))
>>>>> +				memblock_remove_region(&memblock.memory, j);
>>>>> +
>>>>> +	for (i = start_rgn[0] - 1; i >= 0; i--)
>>>>> +		if (!memblock_is_nomap(&memblock.memory.regions[i]))
>>>>> +			memblock_remove_region(&memblock.memory, i);
>>>>> +
>>>>> +	/* truncate the reserved regions */
>>>>> +	memblock_remove_range(&memblock.reserved, 0, regs[0].base);
>>>>> +
>>>>> +	for (i = nr - 1; i > 0; i--)
>>>>> +		memblock_remove_range(&memblock.reserved,
>>>>> +				regs[i].base, regs[i - 1].base + regs[i - 1].size);
>>>>> +
>>>>> +	memblock_remove_range(&memblock.reserved,
>>>>> +			regs[nr - 1].base + regs[nr - 1].size, PHYS_ADDR_MAX);
>>>>> +}
>>>>> +
>>>>>  void __init memblock_mem_limit_remove_map(phys_addr_t limit)
>>>>>  {
>>>>>  	phys_addr_t max_addr;
>>>>> -- 
>>>>> 2.7.4
>>>>>
>>>>
>>>
>>
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-05  3:47           ` Chen Zhou
@ 2019-04-08  6:57             ` Mike Rapoport
  2019-04-08  8:39               ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Rapoport @ 2019-04-08  6:57 UTC (permalink / raw)
  To: Chen Zhou
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi,

On Fri, Apr 05, 2019 at 11:47:27AM +0800, Chen Zhou wrote:
> Hi Mike,
> 
> On 2019/4/5 10:17, Chen Zhou wrote:
> > Hi Mike,
> > 
> > On 2019/4/4 22:44, Mike Rapoport wrote:
> >> Hi,
> >>
> >> On Wed, Apr 03, 2019 at 09:51:27PM +0800, Chen Zhou wrote:
> >>> Hi Mike,
> >>>
> >>> On 2019/4/3 19:29, Mike Rapoport wrote:
> >>>> On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
> >>>>> After commit (arm64: kdump: support reserving crashkernel above 4G),
> >>>>> there may be two crash kernel regions, one is below 4G, the other is
> >>>>> above 4G.
> >>>>>
> >>>>> Crash dump kernel reads more than one crash kernel regions via a dtb
> >>>>> property under node /chosen,
> >>>>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
> >>>>>
> >>>>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> >>>>> ---
> >>>>>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
> >>>>>  include/linux/memblock.h |  1 +
> >>>>>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
> >>>>>  3 files changed, 66 insertions(+), 12 deletions(-)
> >>>>>
> >>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >>>>> index ceb2a25..769c77a 100644
> >>>>> --- a/arch/arm64/mm/init.c
> >>>>> +++ b/arch/arm64/mm/init.c
> >>>>> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
> >>>>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>>>>  
> >>>>>  #ifdef CONFIG_KEXEC_CORE
> >>>>> +# define CRASH_MAX_USABLE_RANGES        2
> >>>>> +
> >>>>>  static int __init reserve_crashkernel_low(void)
> >>>>>  {
> >>>>>  	unsigned long long base, low_base = 0, low_size = 0;
> >>>>> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
> >>>>>  		const char *uname, int depth, void *data)
> >>>>>  {
> >>>>>  	struct memblock_region *usablemem = data;
> >>>>> -	const __be32 *reg;
> >>>>> -	int len;
> >>>>> +	const __be32 *reg, *endp;
> >>>>> +	int len, nr = 0;
> >>>>>  
> >>>>>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
> >>>>>  		return 0;
> >>>>> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
> >>>>>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
> >>>>>  		return 1;
> >>>>>  
> >>>>> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> >>>>> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
> >>>>> +	endp = reg + (len / sizeof(__be32));
> >>>>> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
> >>>>> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> >>>>> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
> >>>>> +
> >>>>> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
> >>>>> +			break;
> >>>>> +	}
> >>>>>  
> >>>>>  	return 1;
> >>>>>  }
> >>>>>  
> >>>>>  static void __init fdt_enforce_memory_region(void)
> >>>>>  {
> >>>>> -	struct memblock_region reg = {
> >>>>> -		.size = 0,
> >>>>> -	};
> >>>>> -
> >>>>> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
> >>>>> -
> >>>>> -	if (reg.size)
> >>>>> -		memblock_cap_memory_range(reg.base, reg.size);
> >>>>> +	int i, cnt = 0;
> >>>>> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
> >>>>> +
> >>>>> +	memset(regs, 0, sizeof(regs));
> >>>>> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
> >>>>> +
> >>>>> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
> >>>>> +		if (regs[i].size)
> >>>>> +			cnt++;
> >>>>> +		else
> >>>>> +			break;
> >>>>> +	if (cnt)
> >>>>> +		memblock_cap_memory_ranges(regs, cnt);
> >>>>
> >>>> Why not simply call memblock_cap_memory_range() for each region?
> >>>
> >>> Function memblock_cap_memory_range() removes all memory type ranges except specified range.
> >>> So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
> >>> on kdump capture kernel.
> >>
> >> Thanks for the clarification.
> >> I still think that memblock_cap_memory_ranges() is overly complex. 
> >>
> >> How about doing something like this:
> >>
> >> Cap the memory range for [min(regs[*].start, max(regs[*].end)] and then
> >> removing the range in the middle?
> > 
> > Yes, that would be ok. But that would do one more memblock_cap_memory_range operation.
> > That is, if there are n regions, we need to do (n + 1) operations, which doesn't seem to
> > matter.
> > 
> > I agree with you, your idea is better.
> > 
> > Thanks,
> > Chen Zhou
> 
> Sorry, just ignore my previous reply, I got that wrong.
> 
> I think it carefully, we can cap the memory range for [min(regs[*].start, max(regs[*].end)]
> firstly. But how to remove the middle ranges, we still can't use memblock_cap_memory_range()
> directly and the extra remove operation may be complex.
> 
> For more than one regions, i think add a new memblock_cap_memory_ranges() may be better.
> Besides, memblock_cap_memory_ranges() is also applicable for one region.
> 
> How about replace memblock_cap_memory_range() with memblock_cap_memory_ranges()?

arm64 is the only user of both MEMBLOCK_NOMAP and memblock_cap_memory_range()
and I don't expect other architectures will use these interfaces.
It seems that capping the memory for arm64 crash kernel the way I've
suggested can be implemented in fdt_enforce_memory_region(). If we'd ever
need such functionality elsewhere or CRASH_MAX_USABLE_RANGES will need to
grow we'll rethink the solution.
 
> Thanks,
> Chen Zhou

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-08  6:57             ` Mike Rapoport
@ 2019-04-08  8:39               ` Chen Zhou
  2019-04-08 15:38                 ` Chen Zhou
  0 siblings, 1 reply; 16+ messages in thread
From: Chen Zhou @ 2019-04-08  8:39 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: catalin.marinas, will.deacon, akpm, ard.biesheuvel,
	takahiro.akashi, linux-arm-kernel, linux-kernel, kexec, linux-mm,
	wangkefeng.wang

Hi Mike,

On 2019/4/8 14:57, Mike Rapoport wrote:
> Hi,
> 
> On Fri, Apr 05, 2019 at 11:47:27AM +0800, Chen Zhou wrote:
>> Hi Mike,
>>
>> On 2019/4/5 10:17, Chen Zhou wrote:
>>> Hi Mike,
>>>
>>> On 2019/4/4 22:44, Mike Rapoport wrote:
>>>> Hi,
>>>>
>>>> On Wed, Apr 03, 2019 at 09:51:27PM +0800, Chen Zhou wrote:
>>>>> Hi Mike,
>>>>>
>>>>> On 2019/4/3 19:29, Mike Rapoport wrote:
>>>>>> On Wed, Apr 03, 2019 at 11:05:45AM +0800, Chen Zhou wrote:
>>>>>>> After commit (arm64: kdump: support reserving crashkernel above 4G),
>>>>>>> there may be two crash kernel regions, one is below 4G, the other is
>>>>>>> above 4G.
>>>>>>>
>>>>>>> Crash dump kernel reads more than one crash kernel regions via a dtb
>>>>>>> property under node /chosen,
>>>>>>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>
>>>>>>>
>>>>>>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>>>>>>> ---
>>>>>>>  arch/arm64/mm/init.c     | 37 +++++++++++++++++++++++++------------
>>>>>>>  include/linux/memblock.h |  1 +
>>>>>>>  mm/memblock.c            | 40 ++++++++++++++++++++++++++++++++++++++++
>>>>>>>  3 files changed, 66 insertions(+), 12 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>>>>> index ceb2a25..769c77a 100644
>>>>>>> --- a/arch/arm64/mm/init.c
>>>>>>> +++ b/arch/arm64/mm/init.c
>>>>>>> @@ -64,6 +64,8 @@ EXPORT_SYMBOL(memstart_addr);
>>>>>>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>>>>  
>>>>>>>  #ifdef CONFIG_KEXEC_CORE
>>>>>>> +# define CRASH_MAX_USABLE_RANGES        2
>>>>>>> +
>>>>>>>  static int __init reserve_crashkernel_low(void)
>>>>>>>  {
>>>>>>>  	unsigned long long base, low_base = 0, low_size = 0;
>>>>>>> @@ -346,8 +348,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>>>>  		const char *uname, int depth, void *data)
>>>>>>>  {
>>>>>>>  	struct memblock_region *usablemem = data;
>>>>>>> -	const __be32 *reg;
>>>>>>> -	int len;
>>>>>>> +	const __be32 *reg, *endp;
>>>>>>> +	int len, nr = 0;
>>>>>>>  
>>>>>>>  	if (depth != 1 || strcmp(uname, "chosen") != 0)
>>>>>>>  		return 0;
>>>>>>> @@ -356,22 +358,33 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
>>>>>>>  	if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
>>>>>>>  		return 1;
>>>>>>>  
>>>>>>> -	usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>>>>> -	usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>>>>> +	endp = reg + (len / sizeof(__be32));
>>>>>>> +	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
>>>>>>> +		usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
>>>>>>> +		usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
>>>>>>> +
>>>>>>> +		if (++nr >= CRASH_MAX_USABLE_RANGES)
>>>>>>> +			break;
>>>>>>> +	}
>>>>>>>  
>>>>>>>  	return 1;
>>>>>>>  }
>>>>>>>  
>>>>>>>  static void __init fdt_enforce_memory_region(void)
>>>>>>>  {
>>>>>>> -	struct memblock_region reg = {
>>>>>>> -		.size = 0,
>>>>>>> -	};
>>>>>>> -
>>>>>>> -	of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
>>>>>>> -
>>>>>>> -	if (reg.size)
>>>>>>> -		memblock_cap_memory_range(reg.base, reg.size);
>>>>>>> +	int i, cnt = 0;
>>>>>>> +	struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
>>>>>>> +
>>>>>>> +	memset(regs, 0, sizeof(regs));
>>>>>>> +	of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
>>>>>>> +
>>>>>>> +	for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
>>>>>>> +		if (regs[i].size)
>>>>>>> +			cnt++;
>>>>>>> +		else
>>>>>>> +			break;
>>>>>>> +	if (cnt)
>>>>>>> +		memblock_cap_memory_ranges(regs, cnt);
>>>>>>
>>>>>> Why not simply call memblock_cap_memory_range() for each region?
>>>>>
>>>>> Function memblock_cap_memory_range() removes all memory type ranges except specified range.
>>>>> So if we call memblock_cap_memory_range() for each region simply, there will be no usable-memory
>>>>> on kdump capture kernel.
>>>>
>>>> Thanks for the clarification.
>>>> I still think that memblock_cap_memory_ranges() is overly complex. 
>>>>
>>>> How about doing something like this:
>>>>
>>>> Cap the memory range for [min(regs[*].start, max(regs[*].end)] and then
>>>> removing the range in the middle?
>>>
>>> Yes, that would be ok. But that would do one more memblock_cap_memory_range operation.
>>> That is, if there are n regions, we need to do (n + 1) operations, which doesn't seem to
>>> matter.
>>>
>>> I agree with you, your idea is better.
>>>
>>> Thanks,
>>> Chen Zhou
>>
>> Sorry, just ignore my previous reply, I got that wrong.
>>
>> I think it carefully, we can cap the memory range for [min(regs[*].start, max(regs[*].end)]
>> firstly. But how to remove the middle ranges, we still can't use memblock_cap_memory_range()
>> directly and the extra remove operation may be complex.
>>
>> For more than one regions, i think add a new memblock_cap_memory_ranges() may be better.
>> Besides, memblock_cap_memory_ranges() is also applicable for one region.
>>
>> How about replace memblock_cap_memory_range() with memblock_cap_memory_ranges()?
> 
> arm64 is the only user of both MEMBLOCK_NOMAP and memblock_cap_memory_range()
> and I don't expect other architectures will use these interfaces.
> It seems that capping the memory for arm64 crash kernel the way I've
> suggested can be implemented in fdt_enforce_memory_region(). If we'd ever
> need such functionality elsewhere or CRASH_MAX_USABLE_RANGES will need to
> grow we'll rethink the solution.

Ok, i will implement that in fdt_enforce_memory_region() in next version.
And we will support at most two crash kernel regions now.

Thanks,
Chen Zhou

>  
>> Thanks,
>> Chen Zhou
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] arm64: kdump: support more than one crash kernel regions
  2019-04-08  8:39               ` Chen Zhou
@ 2019-04-08 15:38                 ` Chen Zhou
  0 siblings, 0 replies; 16+ messages in thread
From: Chen Zhou @ 2019-04-08 15:38 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: wangkefeng.wang, ard.biesheuvel, catalin.marinas, will.deacon,
	linux-kernel, linux-mm, takahiro.akashi, akpm, kexec,
	linux-arm-kernel

Hi Mike,

On 2019/4/8 16:39, Chen Zhou wrote:

>>>
>>> Sorry, just ignore my previous reply, I got that wrong.
>>>
>>> I think it carefully, we can cap the memory range for [min(regs[*].start, max(regs[*].end)]
>>> firstly. But how to remove the middle ranges, we still can't use memblock_cap_memory_range()
>>> directly and the extra remove operation may be complex.
>>>
>>> For more than one regions, i think add a new memblock_cap_memory_ranges() may be better.
>>> Besides, memblock_cap_memory_ranges() is also applicable for one region.
>>>
>>> How about replace memblock_cap_memory_range() with memblock_cap_memory_ranges()?
>>
>> arm64 is the only user of both MEMBLOCK_NOMAP and memblock_cap_memory_range()
>> and I don't expect other architectures will use these interfaces.
>> It seems that capping the memory for arm64 crash kernel the way I've
>> suggested can be implemented in fdt_enforce_memory_region(). If we'd ever
>> need such functionality elsewhere or CRASH_MAX_USABLE_RANGES will need to
>> grow we'll rethink the solution.
> 
> Ok, i will implement that in fdt_enforce_memory_region() in next version.
> And we will support at most two crash kernel regions now.
> 
> Thanks,
> Chen Zhou
> 

I implement that in fdt_enforce_memory_region() simply as below.
You have a look at if it is the way you suggested.

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f9fa5f8..52bd69db 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -65,6 +65,11 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;

 #ifdef CONFIG_KEXEC_CORE

+/* at most two crash kernel regions, low_region and high_region */
+#define CRASH_MAX_USABLE_RANGES        2
+#define LOW_REGION_IDX                 0
+#define HIGH_REGION_IDX                        1
+
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -296,8 +301,8 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
                const char *uname, int depth, void *data)
 {
        struct memblock_region *usablemem = data;
-       const __be32 *reg;
-       int len;
+       const __be32 *reg, *endp;
+       int len, nr = 0;

        if (depth != 1 || strcmp(uname, "chosen") != 0)
                return 0;
@@ -306,22 +311,62 @@ static int __init early_init_dt_scan_usablemem(unsigned long node,
        if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
                return 1;

-       usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
-       usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
+       endp = reg + (len / sizeof(__be32));
+       while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+               usablemem[nr].base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+               usablemem[nr].size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+               if (++nr >= CRASH_MAX_USABLE_RANGES)
+                       break;
+       }

        return 1;
 }

 static void __init fdt_enforce_memory_region(void)
 {
-       struct memblock_region reg = {
-               .size = 0,
-       };
+       int i, cnt = 0;
+       struct memblock_region regs[CRASH_MAX_USABLE_RANGES];
+
+       memset(regs, 0, sizeof(regs));
+       of_scan_flat_dt(early_init_dt_scan_usablemem, regs);
+
+       for (i = 0; i < CRASH_MAX_USABLE_RANGES; i++)
+               if (regs[i].size)
+                       cnt++;
+               else
+                       break;
+
+       if (cnt - 1 == LOW_REGION_IDX)
+               memblock_cap_memory_range(regs[LOW_REGION_IDX].base,
+                               regs[LOW_REGION_IDX].size);
+       else if (cnt - 1 == HIGH_REGION_IDX) {
+               /*
+                * Two crash kernel regions, cap the memory range
+                * [regs[LOW_REGION_IDX].base, regs[HIGH_REGION_IDX].end]
+                * and then remove the memory range in the middle.
+                */
+               int start_rgn, end_rgn, i, ret;
+               phys_addr_t mid_base, mid_size;
+
+               mid_base = regs[LOW_REGION_IDX].base + regs[LOW_REGION_IDX].size;
+               mid_size = regs[HIGH_REGION_IDX].base - mid_base;
+               ret = memblock_isolate_range(&memblock.memory, mid_base, mid_size,
+                               &start_rgn, &end_rgn);

-       of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
+               if (ret)
+                       return;

-       if (reg.size)
-               memblock_cap_memory_range(reg.base, reg.size);
+               memblock_cap_memory_range(regs[LOW_REGION_IDX].base,
+                               regs[HIGH_REGION_IDX].base - regs[LOW_REGION_IDX].base +
+                               regs[HIGH_REGION_IDX].size);
+               for (i = end_rgn - 1; i >= start_rgn; i--) {
+                       if (!memblock_is_nomap(&memblock.memory.regions[i]))
+                               memblock_remove_region(&memblock.memory, i);
+               }
+               memblock_remove_range(&memblock.reserved, mid_base,
+                               mid_base + mid_size);
+       }
 }

 void __init arm64_memblock_init(void)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 294d5d8..787d252 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -110,9 +110,15 @@ void memblock_discard(void);

 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
                                   phys_addr_t size, phys_addr_t align);
+void memblock_remove_region(struct memblock_type *type, unsigned long r);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
+int memblock_isolate_range(struct memblock_type *type,
+                                       phys_addr_t base, phys_addr_t size,
+                                       int *start_rgn, int *end_rgn);
+int memblock_remove_range(struct memblock_type *type,
+                                       phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index e7665cf..7130c3a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -357,7 +357,7 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
        return ret;
 }

-static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
+void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
        type->total_size -= type->regions[r].size;
        memmove(&type->regions[r], &type->regions[r + 1],
@@ -724,7 +724,7 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
  * Return:
  * 0 on success, -errno on failure.
  */
-static int __init_memblock memblock_isolate_range(struct memblock_type *type,
+int __init_memblock memblock_isolate_range(struct memblock_type *type,
                                        phys_addr_t base, phys_addr_t size,
                                        int *start_rgn, int *end_rgn)
 {
@@ -784,7 +784,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
        return 0;
 }

-static int __init_memblock memblock_remove_range(struct memblock_type *type,
+int __init_memblock memblock_remove_range(struct memblock_type *type,
                                          phys_addr_t base, phys_addr_t size)
 {
        int start_rgn, end_rgn;


Thanks,
Chen Zhou



> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
> 


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump
  2019-04-03  3:05 [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Chen Zhou
                   ` (2 preceding siblings ...)
  2019-04-03  3:05 ` [PATCH 3/3] kdump: update Documentation about crashkernel on arm64 Chen Zhou
@ 2019-04-09  5:20 ` Bhupesh Sharma
  2019-04-09  9:07   ` Chen Zhou
  3 siblings, 1 reply; 16+ messages in thread
From: Bhupesh Sharma @ 2019-04-09  5:20 UTC (permalink / raw)
  To: Chen Zhou, catalin.marinas, will.deacon, akpm, rppt,
	ard.biesheuvel, takahiro.akashi
  Cc: wangkefeng.wang, kexec, linux-kernel, linux-mm, linux-arm-kernel

Hi Chen,

Thanks for the patchset.

Before I review the patches in detail, I have a couple of generic 
queries. Please see them in-line:

On 04/03/2019 11:05 AM, Chen Zhou wrote:
> When crashkernel is reserved above 4G in memory, kernel should reserve
> some amount of low memory for swiotlb and some DMA buffers. So there may
> be two crash kernel regions, one is below 4G, the other is above 4G.
> 
> Crash dump kernel reads more than one crash kernel regions via a dtb
> property under node /chosen,
> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>.
> 
> Besides, we need to modify kexec-tools:
>    arm64: support more than one crash kernel regions
> 
> Chen Zhou (3):
>    arm64: kdump: support reserving crashkernel above 4G
>    arm64: kdump: support more than one crash kernel regions
>    kdump: update Documentation about crashkernel on arm64
> 
>   Documentation/admin-guide/kernel-parameters.txt |   4 +-
>   arch/arm64/kernel/setup.c                       |   3 +
>   arch/arm64/mm/init.c                            | 108 ++++++++++++++++++++----
>   include/linux/memblock.h                        |   1 +
>   mm/memblock.c                                   |  40 +++++++++
>   5 files changed, 139 insertions(+), 17 deletions(-)

I am wondering about the use-case for the same. I remember normally 
fedora-based arm64 systems can do well with a maximum crashkernel size 
of <=512MB reserved below the 4G boundary.

So, do you mean that for your use-case (may be a huawei board based 
setup?), you need:

- more than 512MB of crashkernel size, or
- you want to split the crashkernel reservation across the 4GB boundary 
irrespective of the crashkernel size value.

Thanks,
Bhupesh


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump
  2019-04-09  5:20 ` [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Bhupesh Sharma
@ 2019-04-09  9:07   ` Chen Zhou
  0 siblings, 0 replies; 16+ messages in thread
From: Chen Zhou @ 2019-04-09  9:07 UTC (permalink / raw)
  To: Bhupesh Sharma, catalin.marinas, will.deacon, akpm, rppt,
	ard.biesheuvel, takahiro.akashi
  Cc: wangkefeng.wang, kexec, linux-kernel, linux-mm, linux-arm-kernel

Hi Bhupesh,

On 2019/4/9 13:20, Bhupesh Sharma wrote:
> Hi Chen,
> 
> Thanks for the patchset.
> 
> Before I review the patches in detail, I have a couple of generic queries. Please see them in-line:
> 
> On 04/03/2019 11:05 AM, Chen Zhou wrote:
>> When crashkernel is reserved above 4G in memory, kernel should reserve
>> some amount of low memory for swiotlb and some DMA buffers. So there may
>> be two crash kernel regions, one is below 4G, the other is above 4G.
>>
>> Crash dump kernel reads more than one crash kernel regions via a dtb
>> property under node /chosen,
>> linux,usable-memory-range = <BASE1 SIZE1 [BASE2 SIZE2]>.
>>
>> Besides, we need to modify kexec-tools:
>>    arm64: support more than one crash kernel regions
>>
>> Chen Zhou (3):
>>    arm64: kdump: support reserving crashkernel above 4G
>>    arm64: kdump: support more than one crash kernel regions
>>    kdump: update Documentation about crashkernel on arm64
>>
>>   Documentation/admin-guide/kernel-parameters.txt |   4 +-
>>   arch/arm64/kernel/setup.c                       |   3 +
>>   arch/arm64/mm/init.c                            | 108 ++++++++++++++++++++----
>>   include/linux/memblock.h                        |   1 +
>>   mm/memblock.c                                   |  40 +++++++++
>>   5 files changed, 139 insertions(+), 17 deletions(-)
> 
> I am wondering about the use-case for the same. I remember normally fedora-based arm64 systems can do well with a maximum crashkernel size of <=512MB reserved below the 4G boundary.
> 
> So, do you mean that for your use-case (may be a huawei board based setup?), you need:
> 
> - more than 512MB of crashkernel size, or
> - you want to split the crashkernel reservation across the 4GB boundary irrespective of the crashkernel size value.
> 
> Thanks,
> Bhupesh
> 
> 
> .
> 

I do this based on below reasons.

1. ARM64 kdump support crashkernel=Y[@X], but now it seems unusable if X is specified above 4GB.
2. There are some cases we couldn't reserve 512MB crashkernel below 4G successfully if there is
no continous 512MB system RAM below 4GB. In this case, we need to reserve crashkernel above 4GB.
3. As the memory increases, the bitmap_size in makedumpfile may also increases, we need more memory
in kdump capture kernel for kernel dump.

Thanks,
Chen Zhou



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-04-09  9:08 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-03  3:05 [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Chen Zhou
2019-04-03  3:05 ` [PATCH 1/3] arm64: kdump: support reserving crashkernel above 4G Chen Zhou
2019-04-04 14:46   ` Mike Rapoport
2019-04-05  3:03     ` Chen Zhou
2019-04-03  3:05 ` [PATCH 2/3] arm64: kdump: support more than one crash kernel regions Chen Zhou
2019-04-03 11:29   ` Mike Rapoport
2019-04-03 13:51     ` Chen Zhou
2019-04-04 14:44       ` Mike Rapoport
2019-04-05  2:17         ` Chen Zhou
2019-04-05  3:47           ` Chen Zhou
2019-04-08  6:57             ` Mike Rapoport
2019-04-08  8:39               ` Chen Zhou
2019-04-08 15:38                 ` Chen Zhou
2019-04-03  3:05 ` [PATCH 3/3] kdump: update Documentation about crashkernel on arm64 Chen Zhou
2019-04-09  5:20 ` [PATCH 0/3] support reserving crashkernel above 4G on arm64 kdump Bhupesh Sharma
2019-04-09  9:07   ` Chen Zhou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).