All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
@ 2019-04-08 23:10 Junichi Nomura
  2019-04-10 17:14 ` Borislav Petkov
  2019-06-06 19:22 ` [tip:x86/boot] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels tip-bot for Junichi Nomura
  0 siblings, 2 replies; 48+ messages in thread
From: Junichi Nomura @ 2019-04-08 23:10 UTC (permalink / raw)
  To: Borislav Petkov, Dave Young, Chao Fan, Baoquan He
  Cc: Kairui Song, x86, kexec, linux-kernel

Commit 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in
boot_params") broke kexec boot on EFI systems.  efi_get_rsdp_addr()
in the early parsing code tries to search RSDP from EFI table but
that will crash because the table address is virtual when the kernel
was booted by kexec.

In the case of kexec, physical address of EFI tables is provided
via efi_setup_data in boot_params, which is set up by kexec(1).

Factor out the table parsing code and use different pointers depending
on whether the kernel is booted by kexec or not.

Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Baoquan He <bhe@redhat.com>
Tested-by: Chao Fan <fanc.fnst@cn.fujitsu.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Young <dyoung@redhat.com>

--
Original post:
  https://lore.kernel.org/lkml/20190322110342.GA16202@jeru.linux.bs1.fc.nec.co.jp/

v2: Added comments above __efi_get_rsdp_addr() and kexec_get_rsdp_addr() 

v3: Properly ifdef out 64bit-only kexec code to avoid 32bit build warnings

v4:
 - Make sure to avoid efi_get_rsdp_addr() when kexec setup_data exists
   even if the data is invalid.
 - Return instead of hang if systab is 0 in kexec_get_rsdp_addr().
 - Check 32bit EFI loader signature in the case of kexec as well.
 - Factor out EFI-related boot_params handling into efi_read_boot_params() to
   avoid duplication between efi_get_rsdp_addr() and kexec_get_rsdp_addr().

The patch was tested on 3 different models of EFI-booted physical machines
for both normal kexec and panic kexec.

There is a report, that similar problem still happens even with this patch:
  https://lore.kernel.org/lkml/20190404140809.GA7789@dhcp-128-65.nay.redhat.com/

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad5..2bc8dca 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,71 +44,80 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+#ifdef CONFIG_EFI
+static unsigned long kexec_efi_setup_data;
+static unsigned long efi_systab;
+static bool efi_booted;
+static bool efi_64;
+
+static unsigned long efi_get_kexec_setup_data_addr(void)
 {
-	acpi_physical_address rsdp_addr = 0;
+#if defined(CONFIG_X86_64)
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *) pa_data;
+		if (data->type == SETUP_EFI)
+			return pa_data + sizeof(struct setup_data);
+		pa_data = data->next;
+	}
+#endif
+	return 0;
+}
 
-#ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
-	unsigned int nr_tables;
+static void efi_read_boot_params(void)
+{
+	struct efi_setup_data *esd;
 	struct efi_info *ei;
-	bool efi_64;
-	int size, i;
 	char *sig;
 
+	kexec_efi_setup_data = efi_get_kexec_setup_data_addr();
+
 	ei = &boot_params->efi_info;
 	sig = (char *)&ei->efi_loader_signature;
 
 	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
 		efi_64 = true;
+		efi_booted = true;
 	} else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
 		efi_64 = false;
+		efi_booted = true;
 	} else {
 		debug_putstr("Wrong EFI loader signature.\n");
-		return 0;
+		return;
 	}
 
 	/* Get systab from boot params. */
 #ifdef CONFIG_X86_64
-	systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+	efi_systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
 #else
 	if (ei->efi_systab_hi || ei->efi_memmap_hi) {
 		debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
-		return 0;
+		return;
 	}
-	systab = ei->efi_systab;
+	efi_systab = ei->efi_systab;
 #endif
-	if (!systab)
-		error("EFI system table not found.");
-
-	/* Handle EFI bitness properly */
-	if (efi_64) {
-		efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
-
-		config_tables	= stbl->tables;
-		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
-	} else {
-		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
-
-		config_tables	= stbl->tables;
-		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
-	}
+}
 
-	if (!config_tables)
-		error("EFI config tables not found.");
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables)
+{
+	acpi_physical_address rsdp_addr = 0;
+	int i;
 
 	/* Get EFI tables from systab. */
 	for (i = 0; i < nr_tables; i++) {
 		acpi_physical_address table;
 		efi_guid_t guid;
 
-		config_tables += size;
-
 		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
 
 			guid  = tbl->guid;
 			table = tbl->table;
@@ -118,7 +127,7 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 				return 0;
 			}
 		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
 
 			guid  = tbl->guid;
 			table = tbl->table;
@@ -129,9 +138,88 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
 			return table;
 	}
-#endif
+
 	return rsdp_addr;
 }
+#endif
+
+/*
+ * EFI/kexec support is only added for 64bit. So we don't have to
+ * care 32bit case.
+ */
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+	struct efi_setup_data *esd;
+	unsigned int nr_tables;
+
+	if (!efi_booted || !kexec_efi_setup_data)
+		return 0;
+
+	esd = (struct efi_setup_data *) kexec_efi_setup_data;
+
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return 0;
+	}
+
+	if (!efi_systab) {
+		debug_putstr("EFI system table not found in kexec boot_params.");
+		return 0;
+	}
+
+	/* Handle EFI bitness properly */
+	if (efi_64) {
+		efi_system_table_64_t *stbl = (efi_system_table_64_t *)efi_systab;
+
+		nr_tables	= stbl->nr_tables;
+	} else {
+		efi_system_table_32_t *stbl = (efi_system_table_32_t *)efi_systab;
+
+		nr_tables	= stbl->nr_tables;
+	}
+
+	return __efi_get_rsdp_addr((unsigned long) esd->tables, nr_tables);
+#else
+	return 0;
+#endif
+}
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+	unsigned long config_tables;
+	unsigned int nr_tables;
+
+	efi_read_boot_params();
+
+	if (!efi_booted || kexec_efi_setup_data)
+		return 0;
+
+	if (!efi_systab)
+		error("EFI system table not found.");
+
+	/* Handle EFI bitness properly */
+	if (efi_64) {
+		efi_system_table_64_t *stbl = (efi_system_table_64_t *)efi_systab;
+
+		config_tables	= stbl->tables;
+		nr_tables	= stbl->nr_tables;
+	} else {
+		efi_system_table_32_t *stbl = (efi_system_table_32_t *)efi_systab;
+
+		config_tables	= stbl->tables;
+		nr_tables	= stbl->nr_tables;
+	}
+
+	if (!config_tables)
+		error("EFI config tables not found.");
+
+	return __efi_get_rsdp_addr(config_tables, nr_tables);
+#else
+	return 0;
+#endif
+}
 
 static u8 compute_checksum(u8 *buffer, u32 length)
 {
@@ -224,6 +312,9 @@ acpi_physical_address get_rsdp_addr(void)
 		pa = efi_get_rsdp_addr();
 
 	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
+	if (!pa)
 		pa = bios_get_rsdp_addr();
 
 	return pa;

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-08 23:10 [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
@ 2019-04-10 17:14 ` Borislav Petkov
  2019-04-10 23:34   ` Junichi Nomura
  2019-06-06 19:22 ` [tip:x86/boot] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels tip-bot for Junichi Nomura
  1 sibling, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-10 17:14 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Mon, Apr 08, 2019 at 11:10:17PM +0000, Junichi Nomura wrote:
> Commit 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in
> boot_params") broke kexec boot on EFI systems.  efi_get_rsdp_addr()
> in the early parsing code tries to search RSDP from EFI table but
> that will crash because the table address is virtual when the kernel
> was booted by kexec.
> 
> In the case of kexec, physical address of EFI tables is provided
> via efi_setup_data in boot_params, which is set up by kexec(1).
> 
> Factor out the table parsing code and use different pointers depending
> on whether the kernel is booted by kexec or not.
> 
> Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
> Acked-by: Baoquan He <bhe@redhat.com>
> Tested-by: Chao Fan <fanc.fnst@cn.fujitsu.com>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Dave Young <dyoung@redhat.com>
> 
> --
> Original post:
>   https://lore.kernel.org/lkml/20190322110342.GA16202@jeru.linux.bs1.fc.nec.co.jp/
> 
> v2: Added comments above __efi_get_rsdp_addr() and kexec_get_rsdp_addr() 
> 
> v3: Properly ifdef out 64bit-only kexec code to avoid 32bit build warnings
> 
> v4:
>  - Make sure to avoid efi_get_rsdp_addr() when kexec setup_data exists
>    even if the data is invalid.
>  - Return instead of hang if systab is 0 in kexec_get_rsdp_addr().
>  - Check 32bit EFI loader signature in the case of kexec as well.
>  - Factor out EFI-related boot_params handling into efi_read_boot_params() to
>    avoid duplication between efi_get_rsdp_addr() and kexec_get_rsdp_addr().
> 
> The patch was tested on 3 different models of EFI-booted physical machines
> for both normal kexec and panic kexec.
> 
> There is a report, that similar problem still happens even with this patch:
>   https://lore.kernel.org/lkml/20190404140809.GA7789@dhcp-128-65.nay.redhat.com/
> 
> diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> index 0ef4ad5..2bc8dca 100644
> --- a/arch/x86/boot/compressed/acpi.c
> +++ b/arch/x86/boot/compressed/acpi.c
> @@ -44,71 +44,80 @@ static acpi_physical_address get_acpi_rsdp(void)
>  	return addr;
>  }
>  
> -/* Search EFI system tables for RSDP. */
> -static acpi_physical_address efi_get_rsdp_addr(void)
> +#ifdef CONFIG_EFI
> +static unsigned long kexec_efi_setup_data;
> +static unsigned long efi_systab;
> +static bool efi_booted;
> +static bool efi_64;
> +
> +static unsigned long efi_get_kexec_setup_data_addr(void)
>  {
> -	acpi_physical_address rsdp_addr = 0;
> +#if defined(CONFIG_X86_64)
> +	struct setup_data *data;
> +	u64 pa_data;
> +
> +	pa_data = boot_params->hdr.setup_data;
> +	while (pa_data) {
> +		data = (struct setup_data *) pa_data;
> +		if (data->type == SETUP_EFI)
> +			return pa_data + sizeof(struct setup_data);
> +		pa_data = data->next;
> +	}
> +#endif
> +	return 0;
> +}
>  
> -#ifdef CONFIG_EFI
> -	unsigned long systab, systab_tables, config_tables;
> -	unsigned int nr_tables;
> +static void efi_read_boot_params(void)
> +{
> +	struct efi_setup_data *esd;
>  	struct efi_info *ei;
> -	bool efi_64;
> -	int size, i;
>  	char *sig;
>  
> +	kexec_efi_setup_data = efi_get_kexec_setup_data_addr();

Why is that written here and tested in another function?!?

> +
>  	ei = &boot_params->efi_info;
>  	sig = (char *)&ei->efi_loader_signature;
>  
>  	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
>  		efi_64 = true;
> +		efi_booted = true;

What is that ugliness for? Have you heard of functions returning values?

This patch has gone all downhill.

> +/*
> + * EFI/kexec support is only added for 64bit. So we don't have to
> + * care 32bit case.
> + */
> +static acpi_physical_address kexec_get_rsdp_addr(void)
> +{
> +#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
> +	struct efi_setup_data *esd;
> +	unsigned int nr_tables;
> +
> +	if (!efi_booted || !kexec_efi_setup_data)
> +		return 0;
> +
> +	esd = (struct efi_setup_data *) kexec_efi_setup_data;
> +
> +	if (!esd->tables) {
> +		debug_putstr("Wrong kexec SETUP_EFI data.\n");
> +		return 0;
> +	}
> +
> +	if (!efi_systab) {
> +		debug_putstr("EFI system table not found in kexec boot_params.");
> +		return 0;
> +	}
> +
> +	/* Handle EFI bitness properly */
> +	if (efi_64) {
> +		efi_system_table_64_t *stbl = (efi_system_table_64_t *)efi_systab;
> +
> +		nr_tables	= stbl->nr_tables;
> +	} else {
> +		efi_system_table_32_t *stbl = (efi_system_table_32_t *)efi_systab;
> +
> +		nr_tables	= stbl->nr_tables;
> +	}
> +
> +	return __efi_get_rsdp_addr((unsigned long) esd->tables, nr_tables);
> +#else
> +	return 0;
> +#endif
> +}
> +
> +static acpi_physical_address efi_get_rsdp_addr(void)
> +{
> +#ifdef CONFIG_EFI
> +	unsigned long config_tables;
> +	unsigned int nr_tables;
> +
> +	efi_read_boot_params();

Why do you read boot params here?

No, no, no.

First you do

	efi_get_rsdp_addr()

if you cannot get an address, you

	- parse boot params
	- then parse EFI tables from the address the kexeced kernel received

No intermixing of code paths and assigning variables in one function and
using them in another.

You were on the right track with v3...

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-10 17:14 ` Borislav Petkov
@ 2019-04-10 23:34   ` Junichi Nomura
  2019-04-11  8:09     ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-10 23:34 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/11/19 2:14 AM, Borislav Petkov wrote:
> On Mon, Apr 08, 2019 at 11:10:17PM +0000, Junichi Nomura wrote:
>> -#ifdef CONFIG_EFI
>> -	unsigned long systab, systab_tables, config_tables;
>> -	unsigned int nr_tables;
>> +static void efi_read_boot_params(void)
>> +{
>> +	struct efi_setup_data *esd;
>>  	struct efi_info *ei;
>> -	bool efi_64;
>> -	int size, i;
>>  	char *sig;
>>  
>> +	kexec_efi_setup_data = efi_get_kexec_setup_data_addr();
> 
> Why is that written here and tested in another function?!?

Both efi_get_rsdp_addr() and kexec_get_rsdp_addr() need to check
the result of efi_get_kexec_setup_data_addr(); the former to check
whether to exit early, the latter to use the address of the tables.
I thought it's better to store the result instead of calling twice.

>> +
>>  	ei = &boot_params->efi_info;
>>  	sig = (char *)&ei->efi_loader_signature;
>>  
>>  	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
>>  		efi_64 = true;
>> +		efi_booted = true;
> 
> What is that ugliness for? Have you heard of functions returning values?

Same as above. I didn't want to do signature check twice, in
efi_get_rsdp_addr() and kexec_get_rsdp_addr().
Also, the signature check has 2 return values, whether it was 32bit
or 64bit, and whether the signature was valid or not.
I could return one of them via pointer passed parameter but I thought
it's a little bit ugly.  Or I could encode them as something like
EFI_SIGNATURE_64, EFI_SIGNATURE_32, and EFI_SIGNATURE_INVALID.
But I'm not sure it's good to introduce such a thing just for here.

>> +static acpi_physical_address efi_get_rsdp_addr(void)
>> +{
>> +#ifdef CONFIG_EFI
>> +	unsigned long config_tables;
>> +	unsigned int nr_tables;
>> +
>> +	efi_read_boot_params();
> 
> Why do you read boot params here?
> 
> No, no, no.
> 
> First you do
> 
> 	efi_get_rsdp_addr()
> 
> if you cannot get an address, you

But efi_get_rsdp_addr() needs to check whether the kernel was
kexec booted to avoid accessing invalid EFI table address.
efi_get_kexec_setup_data_addr() is the only method I know
to check if it was kexec-booted.

> 	- parse boot params
> 	- then parse EFI tables from the address the kexeced kernel received
> 
> No intermixing of code paths and assigning variables in one function and
> using them in another.

Yeah, I don't like that. But if we are to handle 32bit EFI case,
efi_get_rsdp_addr() and kexec_get_rsdp_addr() become full of
duplication.

> You were on the right track with v3...

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-10 23:34   ` Junichi Nomura
@ 2019-04-11  8:09     ` Borislav Petkov
  2019-04-11  8:16       ` Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-11  8:09 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Wed, Apr 10, 2019 at 11:34:51PM +0000, Junichi Nomura wrote:
> But efi_get_rsdp_addr() needs to check whether the kernel was
> kexec booted to avoid accessing invalid EFI table address.
> efi_get_kexec_setup_data_addr() is the only method I know
> to check if it was kexec-booted.

Your v3 had the right approach - you first check if you can get the
address as a kexec-ed kernel. If you do, you use that one and continue
the normal path.

If you don't, you fall back to efi_get_rsdp_addr() and get it directly
from EFI.

And then carve out the functionality you need to call multiple times in
helper functions like __efi_get_rsdp_addr().

Why doesn't that work anymore?

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  8:09     ` Borislav Petkov
@ 2019-04-11  8:16       ` Junichi Nomura
  2019-04-11  8:37         ` Borislav Petkov
  2019-04-11  8:42         ` Baoquan He
  0 siblings, 2 replies; 48+ messages in thread
From: Junichi Nomura @ 2019-04-11  8:16 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/11/19 5:09 PM, Borislav Petkov wrote:
> On Wed, Apr 10, 2019 at 11:34:51PM +0000, Junichi Nomura wrote:
>> But efi_get_rsdp_addr() needs to check whether the kernel was
>> kexec booted to avoid accessing invalid EFI table address.
>> efi_get_kexec_setup_data_addr() is the only method I know
>> to check if it was kexec-booted.
> 
> Your v3 had the right approach - you first check if you can get the
> address as a kexec-ed kernel. If you do, you use that one and continue
> the normal path.
> 
> If you don't, you fall back to efi_get_rsdp_addr() and get it directly
> from EFI.
> 
> And then carve out the functionality you need to call multiple times in
> helper functions like __efi_get_rsdp_addr().
> 
> Why doesn't that work anymore?

kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
will hit the problem of accessing invalid table pointer again.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  8:16       ` Junichi Nomura
@ 2019-04-11  8:37         ` Borislav Petkov
  2019-04-11  9:13           ` Junichi Nomura
  2019-04-11  8:42         ` Baoquan He
  1 sibling, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-11  8:37 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Thu, Apr 11, 2019 at 08:16:45AM +0000, Junichi Nomura wrote:
> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
> will hit the problem of accessing invalid table pointer again.

Then you need to do this:

	if (kexeced kernel) {
		addr = kexec_get_rsdp_addr();
		if (!addr) {
			/* cannot get address */
			return -1;
		}

		return addr;
	}

and the calling function get_rsdp_addr() must check the return value and
if it is not 0, return immediately.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  8:16       ` Junichi Nomura
  2019-04-11  8:37         ` Borislav Petkov
@ 2019-04-11  8:42         ` Baoquan He
  2019-04-11  9:14           ` Junichi Nomura
  1 sibling, 1 reply; 48+ messages in thread
From: Baoquan He @ 2019-04-11  8:42 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Borislav Petkov, Dave Young, Chao Fan, Kairui Song, x86, kexec,
	linux-kernel

On 04/11/19 at 08:16am, Junichi Nomura wrote:
> On 4/11/19 5:09 PM, Borislav Petkov wrote:
> > On Wed, Apr 10, 2019 at 11:34:51PM +0000, Junichi Nomura wrote:
> >> But efi_get_rsdp_addr() needs to check whether the kernel was
> >> kexec booted to avoid accessing invalid EFI table address.
> >> efi_get_kexec_setup_data_addr() is the only method I know
> >> to check if it was kexec-booted.
> > 
> > Your v3 had the right approach - you first check if you can get the
> > address as a kexec-ed kernel. If you do, you use that one and continue
> > the normal path.
> > 
> > If you don't, you fall back to efi_get_rsdp_addr() and get it directly
> > from EFI.
> > 
> > And then carve out the functionality you need to call multiple times in
> > helper functions like __efi_get_rsdp_addr().
> > 
> > Why doesn't that work anymore?
> 
> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
> will hit the problem of accessing invalid table pointer again.

Seems you are trying to address Dave Young's comment in 
http://lkml.kernel.org/r/20190404073233.GC5708@dhcp-128-65.nay.redhat.com

We may need discuss and make clear if those are doable. E.g the first
comment, if not hang by below line of code, returning 0 for what? Can
kexec still be saved, or just reset to firmware?

	error("EFI system table not found in kexec boot_params.")

It may need be clarified firstly, then go further to rearrange patch.
That can ease the work, I guess.

Personal opinion.

Thanks
Baoquan

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  8:37         ` Borislav Petkov
@ 2019-04-11  9:13           ` Junichi Nomura
  2019-04-11  9:21             ` Boris Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-11  9:13 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/11/19 5:37 PM, Borislav Petkov wrote:
> On Thu, Apr 11, 2019 at 08:16:45AM +0000, Junichi Nomura wrote:
>> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
>> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
>> will hit the problem of accessing invalid table pointer again.
> 
> Then you need to do this:
> 
> 	if (kexeced kernel) {
> 		addr = kexec_get_rsdp_addr();
> 		if (!addr) {
> 			/* cannot get address */
> 			return -1;
> 		}
> 
> 		return addr;
> 	}
> 
> and the calling function get_rsdp_addr() must check the return value and
> if it is not 0, return immediately.

Do you mean making get_rsdp_addr() like this?

acpi_physical_address get_rsdp_addr(void)
{
        acpi_physical_address pa;
+       struct efi_setup_data *esd;

        pa = get_acpi_rsdp();

        if (!pa)
                pa = boot_params->acpi_rsdp_addr;

+       esd = (struct efi_setup_data *) efi_get_kexec_setup_data_addr();
+       if (esd)
+		return kexec_get_rsdp_addr(esd);

        if (!pa)
                pa = efi_get_rsdp_addr();



-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  8:42         ` Baoquan He
@ 2019-04-11  9:14           ` Junichi Nomura
  2019-04-12  0:23             ` Baoquan He
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-11  9:14 UTC (permalink / raw)
  To: Baoquan He
  Cc: Borislav Petkov, Dave Young, Chao Fan, Kairui Song, x86, kexec,
	linux-kernel

On 4/11/19 5:42 PM, Baoquan He wrote:
> On 04/11/19 at 08:16am, Junichi Nomura wrote:
>> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
>> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
>> will hit the problem of accessing invalid table pointer again.
> 
> Seems you are trying to address Dave Young's comment in 
> http://lkml.kernel.org/r/20190404073233.GC5708@dhcp-128-65.nay.redhat.com

Right. His "In case kexec_get_rsdp_addr failed.." comment.

> We may need discuss and make clear if those are doable. E.g the first
> comment, if not hang by below line of code, returning 0 for what? Can
> kexec still be saved, or just reset to firmware?
> 
> 	error("EFI system table not found in kexec boot_params.")

If we return 0 and also don't hang in the rest of get_rsdp_addr(),
it just work as the same way as v5.0 and earlier kernel do.

Failure cases in kexec_get_rsdp_addr() are followings:
1. efi_setup_data is invalid
2. loader signature is invalid
3. EFI systab is not found in boot_params
4. RSDP is not found by parsing tables pointed to by efi_setup_data

I think all of them are critical for EFI boot, so one option could be
we never return failure in kexec_get_rsdp_addr() and just hang.
But hanging in this very early stage of boot may make the problem
harder to investigate once happens. Even earlyprintk is not working yet.
So the other option is returning 0 to defer the crash for later stage.

> It may need be clarified firstly, then go further to rearrange patch.
> That can ease the work, I guess.
> 
> Personal opinion.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  9:13           ` Junichi Nomura
@ 2019-04-11  9:21             ` Boris Petkov
  2019-04-11  9:32               ` Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Boris Petkov @ 2019-04-11  9:21 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On April 11, 2019 11:13:03 AM GMT+02:00, Junichi Nomura <j-nomura@ce.jp.nec.com> wrote:
>On 4/11/19 5:37 PM, Borislav Petkov wrote:
>> On Thu, Apr 11, 2019 at 08:16:45AM +0000, Junichi Nomura wrote:
>>> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
>>> setup_data was invalid. In such a case, falling back to
>efi_get_rsdp_addr()
>>> will hit the problem of accessing invalid table pointer again.
>> 
>> Then you need to do this:
>> 
>> 	if (kexeced kernel) {
>> 		addr = kexec_get_rsdp_addr();
>> 		if (!addr) {
>> 			/* cannot get address */
>> 			return -1;
>> 		}
>> 
>> 		return addr;
>> 	}
>> 
>> and the calling function get_rsdp_addr() must check the return value
>and
>> if it is not 0, return immediately.
>
>Do you mean making get_rsdp_addr() like this?

Does that look like what I've typed above?

-- 
Sent from a small device: formatting sux and brevity is inevitable.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  9:21             ` Boris Petkov
@ 2019-04-11  9:32               ` Junichi Nomura
  2019-04-11  9:40                 ` Boris Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-11  9:32 UTC (permalink / raw)
  To: Boris Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/11/19 6:21 PM, Boris Petkov wrote:
> On April 11, 2019 11:13:03 AM GMT+02:00, Junichi Nomura <j-nomura@ce.jp.nec.com> wrote:
>> On 4/11/19 5:37 PM, Borislav Petkov wrote:
>>> On Thu, Apr 11, 2019 at 08:16:45AM +0000, Junichi Nomura wrote:
>>>> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
>>>> setup_data was invalid. In such a case, falling back to
>> efi_get_rsdp_addr()
>>>> will hit the problem of accessing invalid table pointer again.
>>>
>>> Then you need to do this:
>>>
>>> 	if (kexeced kernel) {
>>> 		addr = kexec_get_rsdp_addr();
>>> 		if (!addr) {
>>> 			/* cannot get address */
>>> 			return -1;
>>> 		}
>>>
>>> 		return addr;
>>> 	}
>>>
>>> and the calling function get_rsdp_addr() must check the return value
>> and
>>> if it is not 0, return immediately.
>>
>> Do you mean making get_rsdp_addr() like this?
> 
> Does that look like what I've typed above?

No. But that's why I asked the question. Do you mean putting
the above code in efi_get_rsdp_addr()?

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  9:32               ` Junichi Nomura
@ 2019-04-11  9:40                 ` Boris Petkov
  2019-04-11 12:58                   ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Boris Petkov @ 2019-04-11  9:40 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On April 11, 2019 11:32:59 AM GMT+02:00, Junichi Nomura <j-nomura@ce.jp.nec.com> wrote:
>No. But that's why I asked the question. Do you mean putting
>the above code in efi_get_rsdp_addr()?

I'll do what I mean whan I get back later.

-- 
Sent from a small device: formatting sux and brevity is inevitable.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  9:40                 ` Boris Petkov
@ 2019-04-11 12:58                   ` Borislav Petkov
  2019-04-12  2:54                     ` Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-11 12:58 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

Something like this based on your v3. I still need to figure out a
reliable way to check in kexec_get_rsdp_addr() whether we're a kexec-ed
kernel but other than that, it should look something like this:

---
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b..039d91258171 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -8,6 +8,8 @@
 #include <linux/efi.h>
 #include <asm/efi.h>
 
+#define EFI_SETUP_DATA_INVALID -1ULL
+
 /*
  * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
  * for termination.
@@ -44,17 +46,108 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *) pa_data;
+		if (data->type == SETUP_EFI)
+			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+		pa_data = data->next;
+	}
+	return NULL;
+}
+
+#ifdef CONFIG_EFI
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+		    bool efi_64)
 {
 	acpi_physical_address rsdp_addr = 0;
+	int i;
+
+	/* Get EFI tables from systab. */
+	for (i = 0; i < nr_tables; i++) {
+		acpi_physical_address table;
+		efi_guid_t guid;
+
+		if (efi_64) {
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+
+			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+				return 0;
+			}
+		} else {
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+		}
+
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+			rsdp_addr = table;
+		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+			return table;
+	}
+
+	return rsdp_addr;
+}
+#endif
+
+/* EFI/kexec support is 64-bit only. */
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+	efi_system_table_64_t *systab;
+	struct efi_setup_data *esd;
+	struct efi_info *ei;
+	char *sig;
+
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return 0;
+
+	esd = get_kexec_setup_data_addr();
+	if (!esd)
+		return EFI_SETUP_DATA_INVALID;
 
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return 0;
+	}
+
+	ei = &boot_params->efi_info;
+	sig = (char *)&ei->efi_loader_signature;
+	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+		debug_putstr("Wrong kexec EFI loader signature.\n");
+		return 0;
+	}
+
+	/* Get systab from boot params. */
+	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+	if (!systab)
+		error("EFI system table not found in kexec boot_params.");
+
+	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
 #ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
+	unsigned long systab, config_tables;
 	unsigned int nr_tables;
 	struct efi_info *ei;
 	bool efi_64;
-	int size, i;
 	char *sig;
 
 	ei = &boot_params->efi_info;
@@ -88,49 +181,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
 	} else {
 		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
 	}
 
 	if (!config_tables)
 		error("EFI config tables not found.");
 
-	/* Get EFI tables from systab. */
-	for (i = 0; i < nr_tables; i++) {
-		acpi_physical_address table;
-		efi_guid_t guid;
-
-		config_tables += size;
-
-		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-
-			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-				return 0;
-			}
-		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-		}
-
-		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-			rsdp_addr = table;
-		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-			return table;
-	}
+	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+	return 0;
 #endif
-	return rsdp_addr;
 }
 
 static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +284,12 @@ acpi_physical_address get_rsdp_addr(void)
 	if (!pa)
 		pa = boot_params->acpi_rsdp_addr;
 
+	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
+	if (pa == EFI_SETUP_DATA_INVALID)
+		return 0;
+
 	if (!pa)
 		pa = efi_get_rsdp_addr();
 


-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11  9:14           ` Junichi Nomura
@ 2019-04-12  0:23             ` Baoquan He
  2019-04-15  7:46               ` Dave Young
  0 siblings, 1 reply; 48+ messages in thread
From: Baoquan He @ 2019-04-12  0:23 UTC (permalink / raw)
  To: Junichi Nomura, Dave Young
  Cc: Borislav Petkov, Chao Fan, Kairui Song, x86, kexec, linux-kernel

On 04/11/19 at 09:14am, Junichi Nomura wrote:
> On 4/11/19 5:42 PM, Baoquan He wrote:
> > On 04/11/19 at 08:16am, Junichi Nomura wrote:
> >> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
> >> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
> >> will hit the problem of accessing invalid table pointer again.
> > 
> > Seems you are trying to address Dave Young's comment in 
> > http://lkml.kernel.org/r/20190404073233.GC5708@dhcp-128-65.nay.redhat.com
> 
> Right. His "In case kexec_get_rsdp_addr failed.." comment.
> 
> > We may need discuss and make clear if those are doable. E.g the first
> > comment, if not hang by below line of code, returning 0 for what? Can
> > kexec still be saved, or just reset to firmware?
> > 
> > 	error("EFI system table not found in kexec boot_params.")
> 
> If we return 0 and also don't hang in the rest of get_rsdp_addr(),
> it just work as the same way as v5.0 and earlier kernel do.
> 
> Failure cases in kexec_get_rsdp_addr() are followings:
> 1. efi_setup_data is invalid
> 2. loader signature is invalid
> 3. EFI systab is not found in boot_params
> 4. RSDP is not found by parsing tables pointed to by efi_setup_data
> 
> I think all of them are critical for EFI boot, so one option could be
> we never return failure in kexec_get_rsdp_addr() and just hang.
> But hanging in this very early stage of boot may make the problem
> harder to investigate once happens. Even earlyprintk is not working yet.
> So the other option is returning 0 to defer the crash for later stage.

OK, I got the point, thanks. So it is deferred to the late stage, KASLR
may not avoid those memory region which is marked as hotpluggable in
SRAT. Kernel can boot up, but doesn't function well on hotplug stuff.
In this case, people don't know why it happened. We are still blind.

Seems early console in efi is the problem, but not kexec or hotplug. I
am fine to hang, or make it continue booting for now.

Hi Dave, 

Is it possible to fix the efi early console issue? I mean the
feasibility, I believe it won't be easy. Ask this because not only this
issue encountered, any other issue could be triggered during boot
decompressing stage. If efi has this problem, we can't debug them
either.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-11 12:58                   ` Borislav Petkov
@ 2019-04-12  2:54                     ` Junichi Nomura
  2019-04-12  8:49                       ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-12  2:54 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/11/19 9:58 PM, Borislav Petkov wrote:
> Something like this based on your v3. I still need to figure out a
> reliable way to check in kexec_get_rsdp_addr() whether we're a kexec-ed
> kernel but other than that, it should look something like this:

Thank you.

> +static struct efi_setup_data *get_kexec_setup_data_addr(void)
> +{
> +	struct setup_data *data;
> +	u64 pa_data;
> +
> +	pa_data = boot_params->hdr.setup_data;
> +	while (pa_data) {
> +		data = (struct setup_data *) pa_data;
> +		if (data->type == SETUP_EFI)
> +			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
> +
> +		pa_data = data->next;
> +	}
> +	return NULL;
> +}

Without #ifdef CONFIG_X86_64, I got compiler warnings on 32bit build
about casting u64 to pointer.

> +/* EFI/kexec support is 64-bit only. */
> +static acpi_physical_address kexec_get_rsdp_addr(void)
> +{

We need #ifdef CONFIG_EFI to avoid build failure about undefined
__efi_get_rsdp_addr().

> +	efi_system_table_64_t *systab;
> +	struct efi_setup_data *esd;
> +	struct efi_info *ei;
> +	char *sig;
> +
> +	if (!IS_ENABLED(CONFIG_X86_64))
> +		return 0;
> +
> +	esd = get_kexec_setup_data_addr();
> +	if (!esd)
> +		return EFI_SETUP_DATA_INVALID;
>  
> +	if (!esd->tables) {
> +		debug_putstr("Wrong kexec SETUP_EFI data.\n");
> +		return 0;
> +	}

I think that should be the other way around:

	esd = get_kexec_setup_data_addr();
	if (!esd) // the kernel is not kexec booted
		return 0;

	if (!esd->tables) {  // kexec booted but data is invalid
		debug_putstr("Wrong kexec SETUP_EFI data.\n");
		return EFI_SETUP_DATA_INVALID;
	}

And other error returns in kexec_get_rsdp_addr() should also be
EFI_SETUP_DATA_INVALID.

> +	/* Get systab from boot params. */
> +	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));

Though there is !IS_ENABLED(CONFIG_X86_64), compiler still sees this
code and emits warning on 32bit build.

To fix 32bit build warnings, non-EFI build errors and return
values from kexec_get_rsdp_addr(), the patch becomes:

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad5..715a8b0 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -8,6 +8,8 @@
 #include <linux/efi.h>
 #include <asm/efi.h>
 
+#define EFI_SETUP_DATA_INVALID -1ULL
+
 /*
  * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
  * for termination.
@@ -44,17 +46,114 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *) pa_data;
+		if (data->type == SETUP_EFI)
+			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+		pa_data = data->next;
+	}
+#endif
+	return NULL;
+}
+
+#ifdef CONFIG_EFI
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+		    bool efi_64)
+{
+	acpi_physical_address rsdp_addr = 0;
+	int i;
+
+	/* Get EFI tables from systab. */
+	for (i = 0; i < nr_tables; i++) {
+		acpi_physical_address table;
+		efi_guid_t guid;
+
+		if (efi_64) {
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+
+			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+				return 0;
+			}
+		} else {
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+		}
+
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+			rsdp_addr = table;
+		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+			return table;
+	}
+
+	return rsdp_addr;
+}
+#endif
+
+/* EFI/kexec support is 64-bit only. */
+static acpi_physical_address kexec_get_rsdp_addr(void)
 {
 	acpi_physical_address rsdp_addr = 0;
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+	efi_system_table_64_t *systab;
+	struct efi_setup_data *esd;
+	struct efi_info *ei;
+	char *sig;
+
+	esd = get_kexec_setup_data_addr();
+	if (!esd)
+		return 0;
 
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return EFI_SETUP_DATA_INVALID;
+	}
+
+	ei = &boot_params->efi_info;
+	sig = (char *)&ei->efi_loader_signature;
+	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+		debug_putstr("Wrong kexec EFI loader signature.\n");
+		return EFI_SETUP_DATA_INVALID;
+	}
+
+	/* Get systab from boot params. */
+	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+	if (!systab)
+		error("EFI system table not found in kexec boot_params.");
+
+	rsdp_addr = __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+	if (!rsdp_addr)
+		return EFI_SETUP_DATA_INVALID;
+#endif
+
+	return rsdp_addr;
+}
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
 #ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
+	unsigned long systab, config_tables;
 	unsigned int nr_tables;
 	struct efi_info *ei;
 	bool efi_64;
-	int size, i;
 	char *sig;
 
 	ei = &boot_params->efi_info;
@@ -88,49 +187,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
 	} else {
 		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
 	}
 
 	if (!config_tables)
 		error("EFI config tables not found.");
 
-	/* Get EFI tables from systab. */
-	for (i = 0; i < nr_tables; i++) {
-		acpi_physical_address table;
-		efi_guid_t guid;
-
-		config_tables += size;
-
-		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-
-			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-				return 0;
-			}
-		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-		}
-
-		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-			rsdp_addr = table;
-		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-			return table;
-	}
+	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+	return 0;
 #endif
-	return rsdp_addr;
 }
 
 static u8 compute_checksum(u8 *buffer, u32 length)
@@ -221,6 +291,12 @@ acpi_physical_address get_rsdp_addr(void)
 		pa = boot_params->acpi_rsdp_addr;
 
 	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
+	if (pa == EFI_SETUP_DATA_INVALID)
+		return 0;
+
+	if (!pa)
 		pa = efi_get_rsdp_addr();
 
 	if (!pa)
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c56..e98f8cf 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -379,6 +379,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	debug_putaddr(output);
 	debug_putaddr(output_len);
 	debug_putaddr(kernel_total_size);
+	debug_putaddr(boot_params->acpi_rsdp_addr);
 
 #ifdef CONFIG_X86_64
 	/* Report address of 32-bit trampoline */

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-12  2:54                     ` Junichi Nomura
@ 2019-04-12  8:49                       ` Borislav Petkov
  2019-04-12 13:35                         ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-12  8:49 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Fri, Apr 12, 2019 at 02:54:17AM +0000, Junichi Nomura wrote:
> Without #ifdef CONFIG_X86_64, I got compiler warnings on 32bit build
> about casting u64 to pointer.

Yah, stupid ifdeffery.

> We need #ifdef CONFIG_EFI to avoid build failure about undefined
> __efi_get_rsdp_addr().

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index c3020e2d8f67..4b1d4a0a4269 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -46,7 +46,6 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-#ifdef CONFIG_EFI
 /*
  * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
  * ACPI_TABLE_GUID are found, take the former, which has more features.
@@ -56,6 +55,8 @@ __efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
 		    bool efi_64)
 {
 	acpi_physical_address rsdp_addr = 0;
+
+#ifdef CONFIG_EFI
 	int i;
 
 	/* Get EFI tables from systab. */
@@ -85,10 +86,9 @@ __efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
 		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
 			return table;
 	}
-
+#endif
 	return rsdp_addr;
 }
-#endif
 
 /* EFI/kexec support is 64-bit only. */
 #ifdef CONFIG_X86_64
---

> I think that should be the other way around:

No, it shouldn't.

kexec_get_rsdp_addr() must do:

	if (!kexec_kernel)
		return 0:

        esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
        if (!esd)
                return EFI_SETUP_DATA_INVALID;

	...

Now I need to go figure out whether there's a reliable way to know in
the kexec kernel that it *is* a kexec kernel.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-12  8:49                       ` Borislav Petkov
@ 2019-04-12 13:35                         ` Borislav Petkov
  2019-04-15  7:01                           ` Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-12 13:35 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Fri, Apr 12, 2019 at 10:49:56AM +0200, Borislav Petkov wrote:
> Now I need to go figure out whether there's a reliable way to know in
> the kexec kernel that it *is* a kexec kernel.

Actually, thinking about this more, we don't need to know whether the
kernel was kexeced or not. Why?

Because if it is kexec'ed, kexec(1) passes the required info in
setup_data. Now, if for whatever reason the kexec'ed kernel fails to
parse that EFI info and get the systab to figure out the RDSP, then it
doesn't have any other choice but fail booting.

Because there's no way it can figure out where the EFI runtime has been
mapped and recover by finding the RDSP from there.

So I think we're perfectly fine with the old approach:

        if (!pa)
                pa = kexec_get_rsdp_addr();

        if (!pa)
                pa = efi_get_rsdp_addr();


-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-12 13:35                         ` Borislav Petkov
@ 2019-04-15  7:01                           ` Junichi Nomura
  2019-04-15  9:07                             ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-15  7:01 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/12/19 10:35 PM, Borislav Petkov wrote:
> On Fri, Apr 12, 2019 at 10:49:56AM +0200, Borislav Petkov wrote:
>> Now I need to go figure out whether there's a reliable way to know in
>> the kexec kernel that it *is* a kexec kernel.
> 
> Actually, thinking about this more, we don't need to know whether the
> kernel was kexeced or not. Why?
> 
> Because if it is kexec'ed, kexec(1) passes the required info in
> setup_data. Now, if for whatever reason the kexec'ed kernel fails to
> parse that EFI info and get the systab to figure out the RDSP, then it
> doesn't have any other choice but fail booting.
> 
> Because there's no way it can figure out where the EFI runtime has been
> mapped and recover by finding the RDSP from there.
> 
> So I think we're perfectly fine with the old approach:
> 
>         if (!pa)
>                 pa = kexec_get_rsdp_addr();
> 
>         if (!pa)
>                 pa = efi_get_rsdp_addr();

OK. Then I'll go back to v3 and make sure to hang when
something is wrong during kexec boot on EFI system.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-12  0:23             ` Baoquan He
@ 2019-04-15  7:46               ` Dave Young
  0 siblings, 0 replies; 48+ messages in thread
From: Dave Young @ 2019-04-15  7:46 UTC (permalink / raw)
  To: Baoquan He
  Cc: Junichi Nomura, Borislav Petkov, Chao Fan, Kairui Song, x86,
	kexec, linux-kernel

On 04/12/19 at 08:23am, Baoquan He wrote:
> On 04/11/19 at 09:14am, Junichi Nomura wrote:
> > On 4/11/19 5:42 PM, Baoquan He wrote:
> > > On 04/11/19 at 08:16am, Junichi Nomura wrote:
> > >> kexec_get_rsdp_addr() might fail on kexec-booted kernel, e.g. if the
> > >> setup_data was invalid. In such a case, falling back to efi_get_rsdp_addr()
> > >> will hit the problem of accessing invalid table pointer again.
> > > 
> > > Seems you are trying to address Dave Young's comment in 
> > > http://lkml.kernel.org/r/20190404073233.GC5708@dhcp-128-65.nay.redhat.com
> > 
> > Right. His "In case kexec_get_rsdp_addr failed.." comment.
> > 
> > > We may need discuss and make clear if those are doable. E.g the first
> > > comment, if not hang by below line of code, returning 0 for what? Can
> > > kexec still be saved, or just reset to firmware?
> > > 
> > > 	error("EFI system table not found in kexec boot_params.")
> > 
> > If we return 0 and also don't hang in the rest of get_rsdp_addr(),
> > it just work as the same way as v5.0 and earlier kernel do.
> > 
> > Failure cases in kexec_get_rsdp_addr() are followings:
> > 1. efi_setup_data is invalid
> > 2. loader signature is invalid
> > 3. EFI systab is not found in boot_params
> > 4. RSDP is not found by parsing tables pointed to by efi_setup_data
> > 
> > I think all of them are critical for EFI boot, so one option could be
> > we never return failure in kexec_get_rsdp_addr() and just hang.
> > But hanging in this very early stage of boot may make the problem
> > harder to investigate once happens. Even earlyprintk is not working yet.
> > So the other option is returning 0 to defer the crash for later stage.
> 
> OK, I got the point, thanks. So it is deferred to the late stage, KASLR
> may not avoid those memory region which is marked as hotpluggable in
> SRAT. Kernel can boot up, but doesn't function well on hotplug stuff.
> In this case, people don't know why it happened. We are still blind.
> 
> Seems early console in efi is the problem, but not kexec or hotplug. I
> am fine to hang, or make it continue booting for now.
> 
> Hi Dave, 
> 
> Is it possible to fix the efi early console issue? I mean the
> feasibility, I believe it won't be easy. Ask this because not only this
> issue encountered, any other issue could be triggered during boot
> decompressing stage. If efi has this problem, we can't debug them
> either.

For normal boot, it maybe doable to use some boot services eg. some
graphic protocols efi firmware provided.

But for kexec, it is different because it become virtual mode, boot
services are not available, and kernel takes over the mode setting etc.
the early framebuffer maybe usable, maybe not, it is not reliable.

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15  7:01                           ` Junichi Nomura
@ 2019-04-15  9:07                             ` Borislav Petkov
  2019-04-15 10:25                               ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-15  9:07 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Mon, Apr 15, 2019 at 07:01:54AM +0000, Junichi Nomura wrote:
> OK. Then I'll go back to v3 and make sure to hang when
> something is wrong during kexec boot on EFI system.

No need - I have it here locally. I'll clean it up and post it for
review.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15  9:07                             ` Borislav Petkov
@ 2019-04-15 10:25                               ` Borislav Petkov
  2019-04-15 23:00                                 ` Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-15 10:25 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Mon, Apr 15, 2019 at 11:07:17AM +0200, Borislav Petkov wrote:
> On Mon, Apr 15, 2019 at 07:01:54AM +0000, Junichi Nomura wrote:
> > OK. Then I'll go back to v3 and make sure to hang when
> > something is wrong during kexec boot on EFI system.
> 
> No need - I have it here locally. I'll clean it up and post it for
> review.

Here it is. Ok, not ok?

---
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b..089639a8a384 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+		    bool efi_64)
 {
 	acpi_physical_address rsdp_addr = 0;
 
 #ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
+	int i;
+
+	/* Get EFI tables from systab. */
+	for (i = 0; i < nr_tables; i++) {
+		acpi_physical_address table;
+		efi_guid_t guid;
+
+		if (efi_64) {
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+
+			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+				return 0;
+			}
+		} else {
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+		}
+
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+			rsdp_addr = table;
+		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+			return table;
+	}
+#endif
+	return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data * get_kexec_setup_data_addr(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *)pa_data;
+		if (data->type == SETUP_EFI)
+			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+		pa_data = data->next;
+	}
+	return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+	efi_system_table_64_t *systab;
+	struct efi_setup_data *esd;
+	struct efi_info *ei;
+	char *sig;
+
+	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+	if (!esd)
+		return 0;
+
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return 0;
+	}
+
+	ei = &boot_params->efi_info;
+	sig = (char *)&ei->efi_loader_signature;
+	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+		debug_putstr("Wrong kexec EFI loader signature.\n");
+		return 0;
+	}
+
+	/* Get systab from boot params. */
+	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+	if (!systab)
+		error("EFI system table not found in kexec boot_params.");
+
+	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+	unsigned long systab, config_tables;
 	unsigned int nr_tables;
 	struct efi_info *ei;
 	bool efi_64;
-	int size, i;
 	char *sig;
 
 	ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
 	} else {
 		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
 	}
 
 	if (!config_tables)
 		error("EFI config tables not found.");
 
-	/* Get EFI tables from systab. */
-	for (i = 0; i < nr_tables; i++) {
-		acpi_physical_address table;
-		efi_guid_t guid;
-
-		config_tables += size;
-
-		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-
-			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-				return 0;
-			}
-		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-		}
-
-		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-			rsdp_addr = table;
-		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-			return table;
-	}
+	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+	return 0;
 #endif
-	return rsdp_addr;
 }
 
 static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
 	if (!pa)
 		pa = boot_params->acpi_rsdp_addr;
 
+	/*
+	 * Try to get EFI data from setup_data. This can happen when we're a
+	 * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+	 * us.
+	 */
+	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
 	if (!pa)
 		pa = efi_get_rsdp_addr();

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15 10:25                               ` Borislav Petkov
@ 2019-04-15 23:00                                 ` Junichi Nomura
  2019-04-15 23:14                                   ` Junichi Nomura
  2019-04-16  9:40                                   ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Borislav Petkov
  0 siblings, 2 replies; 48+ messages in thread
From: Junichi Nomura @ 2019-04-15 23:00 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/15/19 7:25 PM, Borislav Petkov wrote:
> On Mon, Apr 15, 2019 at 11:07:17AM +0200, Borislav Petkov wrote:
>> On Mon, Apr 15, 2019 at 07:01:54AM +0000, Junichi Nomura wrote:
>>> OK. Then I'll go back to v3 and make sure to hang when
>>> something is wrong during kexec boot on EFI system.
>>
>> No need - I have it here locally. I'll clean it up and post it for
>> review.
> 
> Here it is. Ok, not ok?

Thank you.  Basically ok.
I put some comments below about whether to hang or return.

> +static acpi_physical_address kexec_get_rsdp_addr(void)
> +{
> +	efi_system_table_64_t *systab;
> +	struct efi_setup_data *esd;
> +	struct efi_info *ei;
> +	char *sig;
> +
> +	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
> +	if (!esd)
> +		return 0;
> +
> +	if (!esd->tables) {
> +		debug_putstr("Wrong kexec SETUP_EFI data.\n");
> +		return 0;
> +	}

I thought we should hang here instead of return so that we
don't run into efi_get_rsdp_addr() in case of kexec.

> +	ei = &boot_params->efi_info;
> +	sig = (char *)&ei->efi_loader_signature;
> +	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
> +		debug_putstr("Wrong kexec EFI loader signature.\n");
> +		return 0;
> +	}

Same here.

> +	/* Get systab from boot params. */
> +	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
> +	if (!systab)
> +		error("EFI system table not found in kexec boot_params.");
> +
> +	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);

Same here when __efi_get_rsdp_addr() returns 0.

I'm fine with either way, though.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15 23:00                                 ` Junichi Nomura
@ 2019-04-15 23:14                                   ` Junichi Nomura
  2019-04-16  9:45                                     ` Borislav Petkov
  2019-04-16  9:40                                   ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Borislav Petkov
  1 sibling, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-15 23:14 UTC (permalink / raw)
  To: Borislav Petkov, Dave Young, Chao Fan, Baoquan He
  Cc: Kairui Song, x86, kexec, linux-kernel

On 4/16/19 8:00 AM, Junichi Nomura wrote:
> On 4/15/19 7:25 PM, Borislav Petkov wrote:
>> On Mon, Apr 15, 2019 at 11:07:17AM +0200, Borislav Petkov wrote:
>>> On Mon, Apr 15, 2019 at 07:01:54AM +0000, Junichi Nomura wrote:
>>>> OK. Then I'll go back to v3 and make sure to hang when
>>>> something is wrong during kexec boot on EFI system.
>>>
>>> No need - I have it here locally. I'll clean it up and post it for
>>> review.
>>
>> Here it is. Ok, not ok?
> 
> Thank you.  Basically ok.
> I put some comments below about whether to hang or return.
> 
>> +static acpi_physical_address kexec_get_rsdp_addr(void)
>> +{
>> +	efi_system_table_64_t *systab;
>> +	struct efi_setup_data *esd;
>> +	struct efi_info *ei;
>> +	char *sig;
>> +
>> +	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
>> +	if (!esd)
>> +		return 0;
>> +
>> +	if (!esd->tables) {
>> +		debug_putstr("Wrong kexec SETUP_EFI data.\n");
>> +		return 0;
>> +	}
> 
> I thought we should hang here instead of return so that we
> don't run into efi_get_rsdp_addr() in case of kexec.
> 
>> +	ei = &boot_params->efi_info;
>> +	sig = (char *)&ei->efi_loader_signature;
>> +	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
>> +		debug_putstr("Wrong kexec EFI loader signature.\n");
>> +		return 0;
>> +	}
> 
> Same here.

One more question just for clarification.

I see kexec is only supported on 64bit kernel. But are we sure
we don't need to support kexec on EFI32 + 64bit kernel?

I don't have such an environment and as far as I tried with OVMF i386
and KVM guest, that combination doesn't work reliably even with v5.0.
So I suppose people don't care.

>> +	/* Get systab from boot params. */
>> +	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
>> +	if (!systab)
>> +		error("EFI system table not found in kexec boot_params.");
>> +
>> +	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
> 
> Same here when __efi_get_rsdp_addr() returns 0.
> 
> I'm fine with either way, though.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15 23:00                                 ` Junichi Nomura
  2019-04-15 23:14                                   ` Junichi Nomura
@ 2019-04-16  9:40                                   ` Borislav Petkov
  2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
  2019-04-16 22:44                                     ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
  1 sibling, 2 replies; 48+ messages in thread
From: Borislav Petkov @ 2019-04-16  9:40 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Mon, Apr 15, 2019 at 11:00:25PM +0000, Junichi Nomura wrote:
> I thought we should hang here instead of return so that we
> don't run into efi_get_rsdp_addr() in case of kexec.

Hanging that early without debug output is not very friendly to
debuggers, methinks.

> > +	ei = &boot_params->efi_info;
> > +	sig = (char *)&ei->efi_loader_signature;
> > +	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
> > +		debug_putstr("Wrong kexec EFI loader signature.\n");
> > +		return 0;
> > +	}
> 
> Same here.

Ditto.

You get the idea.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-15 23:14                                   ` Junichi Nomura
@ 2019-04-16  9:45                                     ` Borislav Petkov
  2019-04-16 23:09                                       ` kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel) Junichi Nomura
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-16  9:45 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Mon, Apr 15, 2019 at 11:14:34PM +0000, Junichi Nomura wrote:
> I see kexec is only supported on 64bit kernel. But are we sure
> we don't need to support kexec on EFI32 + 64bit kernel?
> 
> I don't have such an environment and as far as I tried with OVMF i386
> and KVM guest, that combination doesn't work reliably even with v5.0.

What does that mean exactly?

If it can be fixed, we can try to.

> So I suppose people don't care.

There's that.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16  9:40                                   ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Borislav Petkov
@ 2019-04-16  9:52                                     ` Borislav Petkov
  2019-04-16 10:02                                       ` Ingo Molnar
                                                         ` (2 more replies)
  2019-04-16 22:44                                     ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
  1 sibling, 3 replies; 48+ messages in thread
From: Borislav Petkov @ 2019-04-16  9:52 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

I'll queue the below in the next days if there are no more complaints:

---
From: Junichi Nomura <j-nomura@ce.jp.nec.com>

Commit

  3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")

broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early
parsing code tries to search RSDP from the EFI tables but that will
crash because the table address is virtual when the kernel was booted by
kexec (set_virtual_address_map() has run in the first kernel and cannot
be run again in the second kernel).

In the case of kexec, the physical address of EFI tables is provided via
efi_setup_data in boot_params, which is set up by kexec(1).

Factor out the table parsing code and use different pointers depending
on whether the kernel is booted by kexec or not.

 [ bp: Massage. ]

Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Chao Fan <fanc.fnst@cn.fujitsu.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Young <dyoung@redhat.com>
Link: https://lkml.kernel.org/r/20190408231011.GA5402@jeru.linux.bs1.fc.nec.co.jp
---
 arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++--------
 1 file changed, 107 insertions(+), 36 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b..8cecce1ac0cd 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+		    bool efi_64)
 {
 	acpi_physical_address rsdp_addr = 0;
 
 #ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
+	int i;
+
+	/* Get EFI tables from systab. */
+	for (i = 0; i < nr_tables; i++) {
+		acpi_physical_address table;
+		efi_guid_t guid;
+
+		if (efi_64) {
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+
+			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+				return 0;
+			}
+		} else {
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+		}
+
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+			rsdp_addr = table;
+		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+			return table;
+	}
+#endif
+	return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *)pa_data;
+		if (data->type == SETUP_EFI)
+			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+		pa_data = data->next;
+	}
+	return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+	efi_system_table_64_t *systab;
+	struct efi_setup_data *esd;
+	struct efi_info *ei;
+	char *sig;
+
+	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+	if (!esd)
+		return 0;
+
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return 0;
+	}
+
+	ei = &boot_params->efi_info;
+	sig = (char *)&ei->efi_loader_signature;
+	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+		debug_putstr("Wrong kexec EFI loader signature.\n");
+		return 0;
+	}
+
+	/* Get systab from boot params. */
+	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+	if (!systab)
+		error("EFI system table not found in kexec boot_params.");
+
+	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+	unsigned long systab, config_tables;
 	unsigned int nr_tables;
 	struct efi_info *ei;
 	bool efi_64;
-	int size, i;
 	char *sig;
 
 	ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
 	} else {
 		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
 	}
 
 	if (!config_tables)
 		error("EFI config tables not found.");
 
-	/* Get EFI tables from systab. */
-	for (i = 0; i < nr_tables; i++) {
-		acpi_physical_address table;
-		efi_guid_t guid;
-
-		config_tables += size;
-
-		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-
-			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-				return 0;
-			}
-		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-		}
-
-		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-			rsdp_addr = table;
-		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-			return table;
-	}
+	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+	return 0;
 #endif
-	return rsdp_addr;
 }
 
 static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
 	if (!pa)
 		pa = boot_params->acpi_rsdp_addr;
 
+	/*
+	 * Try to get EFI data from setup_data. This can happen when we're a
+	 * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+	 * us.
+	 */
+	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
 	if (!pa)
 		pa = efi_get_rsdp_addr();
 
-- 
2.21.0


-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
@ 2019-04-16 10:02                                       ` Ingo Molnar
  2019-04-16 10:31                                         ` Borislav Petkov
  2019-04-16 11:41                                       ` Dave Young
  2019-04-19  8:34                                       ` [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it Kairui Song
  2 siblings, 1 reply; 48+ messages in thread
From: Ingo Molnar @ 2019-04-16 10:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Junichi Nomura, Dave Young, Chao Fan, Baoquan He, Kairui Song,
	x86, kexec, linux-kernel


* Borislav Petkov <bp@alien8.de> wrote:

> I'll queue the below in the next days if there are no more complaints:

Just a minor style nit, this was inherited from existing code:

> +			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
> +			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;

You might want to update that to the canonical form of:

> +			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
> +			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;

Looks good otherwise.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16 10:02                                       ` Ingo Molnar
@ 2019-04-16 10:31                                         ` Borislav Petkov
  0 siblings, 0 replies; 48+ messages in thread
From: Borislav Petkov @ 2019-04-16 10:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Junichi Nomura, Dave Young, Chao Fan, Baoquan He, Kairui Song,
	x86, kexec, linux-kernel

On Tue, Apr 16, 2019 at 12:02:26PM +0200, Ingo Molnar wrote:
> 
> * Borislav Petkov <bp@alien8.de> wrote:
> 
> > I'll queue the below in the next days if there are no more complaints:
> 
> Just a minor style nit, this was inherited from existing code:
> 
> > +			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
> > +			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
> 
> You might want to update that to the canonical form of:
> 
> > +			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
> > +			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
> 
> Looks good otherwise.

Fixed, thanks.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
  2019-04-16 10:02                                       ` Ingo Molnar
@ 2019-04-16 11:41                                       ` Dave Young
  2019-04-16 13:22                                         ` Borislav Petkov
  2019-04-19  8:34                                       ` [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it Kairui Song
  2 siblings, 1 reply; 48+ messages in thread
From: Dave Young @ 2019-04-16 11:41 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On 04/16/19 at 11:52am, Borislav Petkov wrote:
> I'll queue the below in the next days if there are no more complaints:

As for the kexec breakage, even with the V3 patch, kexec still hangs on
a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
wait a few days see if we can make some progress to find the cause?

> 
> ---
> From: Junichi Nomura <j-nomura@ce.jp.nec.com>
> 
> Commit
> 
>   3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
> 
> broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early
> parsing code tries to search RSDP from the EFI tables but that will
> crash because the table address is virtual when the kernel was booted by
> kexec (set_virtual_address_map() has run in the first kernel and cannot
> be run again in the second kernel).
> 
> In the case of kexec, the physical address of EFI tables is provided via
> efi_setup_data in boot_params, which is set up by kexec(1).
> 
> Factor out the table parsing code and use different pointers depending
> on whether the kernel is booted by kexec or not.
> 
>  [ bp: Massage. ]
> 
> Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
> Signed-off-by: Borislav Petkov <bp@suse.de>
> Cc: Chao Fan <fanc.fnst@cn.fujitsu.com>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Dave Young <dyoung@redhat.com>
> Link: https://lkml.kernel.org/r/20190408231011.GA5402@jeru.linux.bs1.fc.nec.co.jp
> ---
>  arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++--------
>  1 file changed, 107 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> index 0ef4ad55b29b..8cecce1ac0cd 100644
> --- a/arch/x86/boot/compressed/acpi.c
> +++ b/arch/x86/boot/compressed/acpi.c
> @@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
>  	return addr;
>  }
>  
> -/* Search EFI system tables for RSDP. */
> -static acpi_physical_address efi_get_rsdp_addr(void)
> +/*
> + * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
> + * ACPI_TABLE_GUID are found, take the former, which has more features.
> + */
> +static acpi_physical_address
> +__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
> +		    bool efi_64)
>  {
>  	acpi_physical_address rsdp_addr = 0;
>  
>  #ifdef CONFIG_EFI
> -	unsigned long systab, systab_tables, config_tables;
> +	int i;
> +
> +	/* Get EFI tables from systab. */
> +	for (i = 0; i < nr_tables; i++) {
> +		acpi_physical_address table;
> +		efi_guid_t guid;
> +
> +		if (efi_64) {
> +			efi_config_table_64_t *tbl = (efi_config_table_64_t *) config_tables + i;
> +
> +			guid  = tbl->guid;
> +			table = tbl->table;
> +
> +			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
> +				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
> +				return 0;
> +			}
> +		} else {
> +			efi_config_table_32_t *tbl = (efi_config_table_32_t *) config_tables + i;
> +
> +			guid  = tbl->guid;
> +			table = tbl->table;
> +		}
> +
> +		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
> +			rsdp_addr = table;
> +		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
> +			return table;
> +	}
> +#endif
> +	return rsdp_addr;
> +}
> +
> +/* EFI/kexec support is 64-bit only. */
> +#ifdef CONFIG_X86_64
> +static struct efi_setup_data *get_kexec_setup_data_addr(void)
> +{
> +	struct setup_data *data;
> +	u64 pa_data;
> +
> +	pa_data = boot_params->hdr.setup_data;
> +	while (pa_data) {
> +		data = (struct setup_data *)pa_data;
> +		if (data->type == SETUP_EFI)
> +			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
> +
> +		pa_data = data->next;
> +	}
> +	return NULL;
> +}
> +
> +static acpi_physical_address kexec_get_rsdp_addr(void)
> +{
> +	efi_system_table_64_t *systab;
> +	struct efi_setup_data *esd;
> +	struct efi_info *ei;
> +	char *sig;
> +
> +	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
> +	if (!esd)
> +		return 0;
> +
> +	if (!esd->tables) {
> +		debug_putstr("Wrong kexec SETUP_EFI data.\n");
> +		return 0;
> +	}
> +
> +	ei = &boot_params->efi_info;
> +	sig = (char *)&ei->efi_loader_signature;
> +	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
> +		debug_putstr("Wrong kexec EFI loader signature.\n");
> +		return 0;
> +	}
> +
> +	/* Get systab from boot params. */
> +	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
> +	if (!systab)
> +		error("EFI system table not found in kexec boot_params.");
> +
> +	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
> +}
> +#else
> +static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
> +#endif /* CONFIG_X86_64 */
> +
> +static acpi_physical_address efi_get_rsdp_addr(void)
> +{
> +#ifdef CONFIG_EFI
> +	unsigned long systab, config_tables;
>  	unsigned int nr_tables;
>  	struct efi_info *ei;
>  	bool efi_64;
> -	int size, i;
>  	char *sig;
>  
>  	ei = &boot_params->efi_info;
> @@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
>  
>  		config_tables	= stbl->tables;
>  		nr_tables	= stbl->nr_tables;
> -		size		= sizeof(efi_config_table_64_t);
>  	} else {
>  		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
>  
>  		config_tables	= stbl->tables;
>  		nr_tables	= stbl->nr_tables;
> -		size		= sizeof(efi_config_table_32_t);
>  	}
>  
>  	if (!config_tables)
>  		error("EFI config tables not found.");
>  
> -	/* Get EFI tables from systab. */
> -	for (i = 0; i < nr_tables; i++) {
> -		acpi_physical_address table;
> -		efi_guid_t guid;
> -
> -		config_tables += size;
> -
> -		if (efi_64) {
> -			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
> -
> -			guid  = tbl->guid;
> -			table = tbl->table;
> -
> -			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
> -				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
> -				return 0;
> -			}
> -		} else {
> -			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
> -
> -			guid  = tbl->guid;
> -			table = tbl->table;
> -		}
> -
> -		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
> -			rsdp_addr = table;
> -		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
> -			return table;
> -	}
> +	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
> +#else
> +	return 0;
>  #endif
> -	return rsdp_addr;
>  }
>  
>  static u8 compute_checksum(u8 *buffer, u32 length)
> @@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
>  	if (!pa)
>  		pa = boot_params->acpi_rsdp_addr;
>  
> +	/*
> +	 * Try to get EFI data from setup_data. This can happen when we're a
> +	 * kexec'ed kernel and kexec(1) has passed all the required EFI info to
> +	 * us.
> +	 */
> +	if (!pa)
> +		pa = kexec_get_rsdp_addr();
> +
>  	if (!pa)
>  		pa = efi_get_rsdp_addr();
>  
> -- 
> 2.21.0
> 
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16 11:41                                       ` Dave Young
@ 2019-04-16 13:22                                         ` Borislav Petkov
  2019-04-17  1:38                                           ` Dave Young
  0 siblings, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-16 13:22 UTC (permalink / raw)
  To: Dave Young
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On Tue, Apr 16, 2019 at 07:41:33PM +0800, Dave Young wrote:
> On 04/16/19 at 11:52am, Borislav Petkov wrote:
> > I'll queue the below in the next days if there are no more complaints:
> 
> As for the kexec breakage, even with the V3 patch, kexec still hangs on
> a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
> wait a few days see if we can make some progress to find the cause?

How is applying this patch going to change anything?

I was told that the breakage is there even without it...

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-16  9:40                                   ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Borislav Petkov
  2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
@ 2019-04-16 22:44                                     ` Junichi Nomura
  2019-04-17  7:02                                       ` Dave Young
  1 sibling, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-16 22:44 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/16/19 6:40 PM, Borislav Petkov wrote:
> On Mon, Apr 15, 2019 at 11:00:25PM +0000, Junichi Nomura wrote:
>> I thought we should hang here instead of return so that we
>> don't run into efi_get_rsdp_addr() in case of kexec.
> 
> Hanging that early without debug output is not very friendly to
> debuggers, methinks.

Right. But if we could move get_rsdp_addr() after console_init(),
you get debug output.

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel)
  2019-04-16  9:45                                     ` Borislav Petkov
@ 2019-04-16 23:09                                       ` Junichi Nomura
  2019-04-17  5:14                                         ` Dave Young
  0 siblings, 1 reply; 48+ messages in thread
From: Junichi Nomura @ 2019-04-16 23:09 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On 4/16/19 6:45 PM, Borislav Petkov wrote:
> On Mon, Apr 15, 2019 at 11:14:34PM +0000, Junichi Nomura wrote:
>> I see kexec is only supported on 64bit kernel. But are we sure
>> we don't need to support kexec on EFI32 + 64bit kernel?
>>
>> I don't have such an environment and as far as I tried with OVMF i386
>> and KVM guest, that combination doesn't work reliably even with v5.0.
> 
> What does that mean exactly?
> 
> If it can be fixed, we can try to.

When I do kexec on OVMF i386 + x86_64 kernel, 1st kexec seems to work.
But 2nd kexec (i.e. kexec from kexec-booted system) causes kernel
crash during boot like this:

[   69.907176] kexec_core: Starting new kernel
early console in extract_kernel
input_data: 0x000000003e7a73b1
input_len: 0x00000000004464c8
output: 0x000000003d600000
output_len: 0x00000000015c7248
kernel_total_size: 0x000000000142c000
trampoline_32bit: 0x000000000009d000
booted via startup_64()
Physical KASLR using RDRAND RDTSC...
Virtual KASLR using RDRAND RDTSC...

Decompressing Linux... Parsing ELF... Performing relocations... done.
Booting the kernel.
[    0.000000] Linux version 5.0.0-dirty (root@vm76) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-28) (GCC)) #2 SMP Mon Apr 8 04:42:45 EDT 2019
[    0.000000] Command line: root=UUID=6bea2b7b-e6cc-4dba-ac79-be6530d348f5 ro console=tty0 console=ttyS0,115200n8 no_timer_check net.ifnames=0 crashkernel=auto LANG=en_US.UTF-8 earlyprintk=serial,ttyS0,115200 kexec kexec
[    0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[    0.000000] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[    0.000000] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format.
[    0.000000] BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009ffff] usable
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000003ed74fff] usable
[    0.000000] BIOS-e820: [mem 0x000000003ed75000-0x000000003ee86fff] reserved
[    0.000000] BIOS-e820: [mem 0x000000003ee87000-0x000000003ff06fff] usable
[    0.000000] BIOS-e820: [mem 0x000000003ff07000-0x000000003ff5efff] reserved
[    0.000000] BIOS-e820: [mem 0x000000003ff5f000-0x000000003ff66fff] ACPI data
[    0.000000] BIOS-e820: [mem 0x000000003ff67000-0x000000003ff6afff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x000000003ff6b000-0x000000003ffcffff] usable
[    0.000000] BIOS-e820: [mem 0x000000003ffd0000-0x000000003ffeffff] reserved
[    0.000000] BIOS-e820: [mem 0x000000003fff0000-0x000000003fffffff] usable
[    0.000000] BIOS-e820: [mem 0x00000000ffe00000-0x00000000ffffffff] reserved
[    0.000000] printk: bootconsole [earlyser0] enabled
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] DMI not present or invalid.
[    0.000000] Hypervisor detected: KVM
[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000000] kvm-clock: cpu 0, msr 2238e001, primary cpu clock
[    0.000001] kvm-clock: using sched offset of 100318497884 cycles
[    0.001055] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
[    0.004086] tsc: Detected 2399.998 MHz processor
[    0.005147] last_pfn = 0x40000 max_arch_pfn = 0x400000000
[    0.006234] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT  
Memory KASLR using RDRAND RDTSC...
[    0.008079] x2apic: enabled by BIOS, switching to x2apic ops
[    0.020284] RAMDISK: [mem 0x3b8da000-0x3d5fffff]
[    0.021169] ACPI: Early table checksum verification disabled
[    0.022280] ACPI BIOS Error (bug): A valid RSDP was not found (20181213/tbxfroot-210)
[    0.023755] No NUMA configuration found
[    0.024461] Faking a node at [mem 0x0000000000000000-0x000000003fffffff]
[    0.025746] NODE_DATA(0) allocated [mem 0x3ffa6000-0x3ffcffff]
[    0.027098] crashkernel: memory value expected
[    0.027918] Zone ranges:
[    0.028384]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[    0.029553]   DMA32    [mem 0x0000000001000000-0x000000003fffffff]
[    0.030688]   Normal   empty
[    0.031217]   Device   empty
[    0.031741] Movable zone start for each node
[    0.032525] Early memory node ranges
[    0.033212]   node   0: [mem 0x0000000000001000-0x000000000009ffff]
[    0.034377]   node   0: [mem 0x0000000000100000-0x000000003ed74fff]
[    0.035520]   node   0: [mem 0x000000003ee87000-0x000000003ff06fff]
[    0.036663]   node   0: [mem 0x000000003ff6b000-0x000000003ffcffff]
[    0.037840]   node   0: [mem 0x000000003fff0000-0x000000003fffffff]
[    0.039012] Zeroed struct page in unavailable ranges: 503 pages
[    0.039013] Initmem setup node 0 [mem 0x0000000000001000-0x000000003fffffff]
[    0.044319] BUG: unable to handle kernel paging request at ffffffffff5fd020
[    0.045637] #PF error: [normal kernel read fault]
[    0.046501] PGD 2200e067 P4D 2200e067 PUD 22010067 PMD 22011067 PTE 0
[    0.047682] Oops: 0000 [#1] SMP
[    0.048258] CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-dirty #2
[    0.049419] RIP: 0010:native_apic_mem_read+0x3/0x10
[    0.050328] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
[    0.053749] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
[    0.054703] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
[    0.056009] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
[    0.057346] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
[    0.058667] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
[    0.059969] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
[    0.061313] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
[    0.062812] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.063865] CR2: ffffffffff5fd020 CR3: 000000002200d000 CR4: 00000000000406b0
[    0.065222] Call Trace:
[    0.065670]  ? read_apic_id+0x19/0x30
[    0.066347]  init_apic_mappings+0x7a/0x129
[    0.067096]  setup_arch+0xb67/0xc19
[    0.067729]  start_kernel+0x6b/0x4e3
[    0.068386]  x86_64_start_reservations+0x24/0x26
[    0.069230]  x86_64_start_kernel+0x6f/0x72
[    0.069974]  secondary_startup_64+0xa4/0xb0
[    0.070739] Modules linked in:
[    0.071297] CR2: ffffffffff5fd020
[    0.071901] random: get_random_bytes called from print_oops_end_marker+0x3f/0x60 with crng_init=0
[    0.073567] ---[ end trace 2cc66932e568af60 ]---
[    0.074427] RIP: 0010:native_apic_mem_read+0x3/0x10
[    0.075320] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
[    0.078755] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
[    0.079741] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
[    0.081050] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
[    0.082355] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
[    0.083687] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
[    0.084996] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
[    0.086296] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
[    0.087805] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.088855] CR2: ffffffffff5fd020 CR3: 000000002200d000 CR4: 00000000000406b0
[    0.090167] Kernel panic - not syncing: Fatal exception
[    0.091160] BUG: unable to handle kernel paging request at ffffffffff5fd030
[    0.092438] #PF error: [normal kernel read fault]
[    0.093301] PGD 2200e067 P4D 2200e067 PUD 22010067 PMD 22011067 PTE 0
[    0.094480] Oops: 0000 [#2] SMP
[    0.095094] CPU: 0 PID: 0 Comm: swapper Tainted: G      D           5.0.0-dirty #2
[    0.096478] RIP: 0010:native_apic_mem_read+0x3/0x10
[    0.097367] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
[    0.100833] RSP: 0000:ffffffff88003aa8 EFLAGS: 00010002
[    0.101792] RAX: ffffffff87248840 RBX: 0000000000000046 RCX: 0000000000000000
[    0.103130] RDX: 0000000000000080 RSI: 0000000000002000 RDI: 0000000000000030
[    0.104433] RBP: ffffffff88003ac0 R08: 0000000000000001 R09: 0000000000000080
[    0.105733] R10: ffffffff88160ca0 R11: ffffffff8818a428 R12: 0000000000000000
[    0.107070] R13: 0000000000000046 R14: ffffffff88013740 R15: 000000000000000b
[    0.108382] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
[    0.109872] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.110926] CR2: ffffffffff5fd030 CR3: 000000002200d000 CR4: 00000000000406b0
[    0.112265] Call Trace:
[    0.112712]  ? clear_local_APIC+0x37/0x2f0
[    0.113463]  disable_local_APIC+0x22/0x60
[    0.114200]  native_stop_other_cpus+0xc8/0x160
[    0.115048]  panic+0x11a/0x2a8
[    0.115606]  oops_end+0xc1/0xd0
[    0.116188]  no_context+0x1eb/0x550
[    0.116826]  __bad_area_nosemaphore.constprop.30+0x50/0x1d0
[    0.117852]  bad_area_nosemaphore+0x13/0x20
[    0.118618]  do_kern_addr_fault+0x5c/0x90
[    0.119387]  __do_page_fault+0x382/0x440
[    0.120109]  ? memmap_init_zone+0x8f/0x22d
[    0.120851]  do_page_fault+0x32/0x120
[    0.121521]  page_fault+0x1e/0x30
[    0.122128] RIP: 0010:native_apic_mem_read+0x3/0x10
[    0.123045] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
[    0.126481] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
[    0.127502] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
[    0.128807] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
[    0.130161] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
[    0.131464] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
[    0.132771] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
[    0.134081]  ? native_apic_mem_write+0x10/0x10
[    0.134892]  ? read_apic_id+0x19/0x30
[    0.135564]  init_apic_mappings+0x7a/0x129
[    0.136316]  setup_arch+0xb67/0xc19
[    0.136954]  start_kernel+0x6b/0x4e3
[    0.137656]  x86_64_start_reservations+0x24/0x26
[    0.138576]  x86_64_start_kernel+0x6f/0x72
[    0.139329]  secondary_startup_64+0xa4/0xb0
[    0.140096] Modules linked in:
[    0.140653] CR2: ffffffffff5fd030
[    0.141259] ---[ end trace 2cc66932e568af61 ]---
[    0.142102] RIP: 0010:native_apic_mem_read+0x3/0x10
[    0.142992] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
[    0.146520] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
[    0.147533] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
[    0.148845] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
[    0.150191] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
[    0.151492] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
[    0.152796] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
[    0.154103] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
[    0.155579] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.156625] CR2: ffffffffff5fd030 CR3: 000000002200d000 CR4: 00000000000406b0
[    0.157967] Kernel panic - not syncing: Fatal exception
<repeating panic>


Libvirt configuration of the VM looks like this:

  <os>
    <type arch='x86_64' machine='pc'>hvm</type>
    <loader readonly='yes' type='pflash'>/usr/share/edk2.git/ovmf-ia32/OVMF_CODE-pure-efi.fd</loader>
    <nvram template='/usr/share/edk2.git/ovmf-ia32/OVMF_VARS-pure-efi.fd'>/var/lib/libvirt/qemu/nvram/vm76_VARS-32.fd</nvram>
    <kernel>/var/lib/libvirt/boot/vmlinuz-5.0.0-dirty</kernel>
    <initrd>/var/lib/libvirt/boot/initramfs-5.0.0-dirty.img</initrd>
    <cmdline>root=UUID=6bea2b7b-e6cc-4dba-ac79-be6530d348f5 ro console=tty0 console=ttyS0,115200n8 no_timer_check net.ifnames=0 crashkernel=auto LANG=en_US.UTF-8 earlyprintk=serial,ttyS0,115200</cmdline>
    <boot dev='hd'/>
  </os>

-- 
Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-16 13:22                                         ` Borislav Petkov
@ 2019-04-17  1:38                                           ` Dave Young
  2019-04-17  4:57                                             ` Dave Young
  2019-04-17  8:22                                             ` Borislav Petkov
  0 siblings, 2 replies; 48+ messages in thread
From: Dave Young @ 2019-04-17  1:38 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On 04/16/19 at 03:22pm, Borislav Petkov wrote:
> On Tue, Apr 16, 2019 at 07:41:33PM +0800, Dave Young wrote:
> > On 04/16/19 at 11:52am, Borislav Petkov wrote:
> > > I'll queue the below in the next days if there are no more complaints:
> > 
> > As for the kexec breakage, even with the V3 patch, kexec still hangs on
> > a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
> > wait a few days see if we can make some progress to find the cause?
> 
> How is applying this patch going to change anything?
> 
> I was told that the breakage is there even without it...

Without this patch, the bug happens in the efi_get_rsdp.. function, this
patch tries to fix that by adding kexec_get.. but the new introduced
kexec_* function does not work on some laptops, so it is not a 100% good
fix, I hoped we can get it working for all known issues.  But if we can
not do it eg. within one week we can go with this version and leave the
laptop issue as a known issue.

But if you want to apply it now, I think it is fine as well, probably
the laptop issue is lenovo firmware specific, we can add rsdp in cmdline
or boot params via kexec-tools so it should be good by using new kexec-tools.

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-17  1:38                                           ` Dave Young
@ 2019-04-17  4:57                                             ` Dave Young
  2019-04-17  6:00                                               ` Kairui Song
  2019-04-17  8:22                                             ` Borislav Petkov
  1 sibling, 1 reply; 48+ messages in thread
From: Dave Young @ 2019-04-17  4:57 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On 04/17/19 at 09:38am, Dave Young wrote:
> On 04/16/19 at 03:22pm, Borislav Petkov wrote:
> > On Tue, Apr 16, 2019 at 07:41:33PM +0800, Dave Young wrote:
> > > On 04/16/19 at 11:52am, Borislav Petkov wrote:
> > > > I'll queue the below in the next days if there are no more complaints:
> > > 
> > > As for the kexec breakage, even with the V3 patch, kexec still hangs on
> > > a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
> > > wait a few days see if we can make some progress to find the cause?
> > 
> > How is applying this patch going to change anything?
> > 
> > I was told that the breakage is there even without it...
> 
> Without this patch, the bug happens in the efi_get_rsdp.. function, this
> patch tries to fix that by adding kexec_get.. but the new introduced
> kexec_* function does not work on some laptops, so it is not a 100% good
> fix, I hoped we can get it working for all known issues.  But if we can
> not do it eg. within one week we can go with this version and leave the
> laptop issue as a known issue.
> 

Latest debugging status:

Kexec boot works with commenting out some code like below, so the guid
cmp (memcmp) caused a system reset), still need to find out why:

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index d9f9abd63c68..13e7a23ae94c 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -95,10 +95,12 @@ __efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
 			table = tbl->table;
 		}
 
+/*
 		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
 			rsdp_addr = table;
 		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
 			return table;
+*/
 	}
 
 	return rsdp_addr;
@@ -291,9 +293,10 @@ acpi_physical_address get_rsdp_addr(void)
 	if (!pa)
 		pa = kexec_get_rsdp_addr();
 
+/*
 	if (!pa)
 		pa = efi_get_rsdp_addr();
-
+*/
 	if (!pa)
 		pa = bios_get_rsdp_addr();
 

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel)
  2019-04-16 23:09                                       ` kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel) Junichi Nomura
@ 2019-04-17  5:14                                         ` Dave Young
  2019-04-17 17:57                                           ` Prakhya, Sai Praneeth
  0 siblings, 1 reply; 48+ messages in thread
From: Dave Young @ 2019-04-17  5:14 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Borislav Petkov, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel, linux-efi, Ard Biesheuvel, Sai Praneeth Prakhya

Added efi people.

I remember previously Sai did some efi32 tests for kexec, but I'm not
sure if he tested EFI32 + 64bit kernel.

Kexec status is not certain because I'm not sure anyone tesed and
reported issues for that.

On 04/16/19 at 11:09pm, Junichi Nomura wrote:
> On 4/16/19 6:45 PM, Borislav Petkov wrote:
> > On Mon, Apr 15, 2019 at 11:14:34PM +0000, Junichi Nomura wrote:
> >> I see kexec is only supported on 64bit kernel. But are we sure
> >> we don't need to support kexec on EFI32 + 64bit kernel?
> >>
> >> I don't have such an environment and as far as I tried with OVMF i386
> >> and KVM guest, that combination doesn't work reliably even with v5.0.
> > 
> > What does that mean exactly?
> > 
> > If it can be fixed, we can try to.
> 
> When I do kexec on OVMF i386 + x86_64 kernel, 1st kexec seems to work.
> But 2nd kexec (i.e. kexec from kexec-booted system) causes kernel
> crash during boot like this:
> 
> [   69.907176] kexec_core: Starting new kernel
> early console in extract_kernel
> input_data: 0x000000003e7a73b1
> input_len: 0x00000000004464c8
> output: 0x000000003d600000
> output_len: 0x00000000015c7248
> kernel_total_size: 0x000000000142c000
> trampoline_32bit: 0x000000000009d000
> booted via startup_64()
> Physical KASLR using RDRAND RDTSC...
> Virtual KASLR using RDRAND RDTSC...
> 
> Decompressing Linux... Parsing ELF... Performing relocations... done.
> Booting the kernel.
> [    0.000000] Linux version 5.0.0-dirty (root@vm76) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-28) (GCC)) #2 SMP Mon Apr 8 04:42:45 EDT 2019
> [    0.000000] Command line: root=UUID=6bea2b7b-e6cc-4dba-ac79-be6530d348f5 ro console=tty0 console=ttyS0,115200n8 no_timer_check net.ifnames=0 crashkernel=auto LANG=en_US.UTF-8 earlyprintk=serial,ttyS0,115200 kexec kexec
> [    0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
> [    0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
> [    0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
> [    0.000000] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
> [    0.000000] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format.
> [    0.000000] BIOS-provided physical RAM map:
> [    0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009ffff] usable
> [    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000003ed74fff] usable
> [    0.000000] BIOS-e820: [mem 0x000000003ed75000-0x000000003ee86fff] reserved
> [    0.000000] BIOS-e820: [mem 0x000000003ee87000-0x000000003ff06fff] usable
> [    0.000000] BIOS-e820: [mem 0x000000003ff07000-0x000000003ff5efff] reserved
> [    0.000000] BIOS-e820: [mem 0x000000003ff5f000-0x000000003ff66fff] ACPI data
> [    0.000000] BIOS-e820: [mem 0x000000003ff67000-0x000000003ff6afff] ACPI NVS
> [    0.000000] BIOS-e820: [mem 0x000000003ff6b000-0x000000003ffcffff] usable
> [    0.000000] BIOS-e820: [mem 0x000000003ffd0000-0x000000003ffeffff] reserved
> [    0.000000] BIOS-e820: [mem 0x000000003fff0000-0x000000003fffffff] usable
> [    0.000000] BIOS-e820: [mem 0x00000000ffe00000-0x00000000ffffffff] reserved
> [    0.000000] printk: bootconsole [earlyser0] enabled
> [    0.000000] NX (Execute Disable) protection: active
> [    0.000000] DMI not present or invalid.
> [    0.000000] Hypervisor detected: KVM
> [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
> [    0.000000] kvm-clock: cpu 0, msr 2238e001, primary cpu clock
> [    0.000001] kvm-clock: using sched offset of 100318497884 cycles
> [    0.001055] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
> [    0.004086] tsc: Detected 2399.998 MHz processor
> [    0.005147] last_pfn = 0x40000 max_arch_pfn = 0x400000000
> [    0.006234] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT  
> Memory KASLR using RDRAND RDTSC...
> [    0.008079] x2apic: enabled by BIOS, switching to x2apic ops
> [    0.020284] RAMDISK: [mem 0x3b8da000-0x3d5fffff]
> [    0.021169] ACPI: Early table checksum verification disabled
> [    0.022280] ACPI BIOS Error (bug): A valid RSDP was not found (20181213/tbxfroot-210)
> [    0.023755] No NUMA configuration found
> [    0.024461] Faking a node at [mem 0x0000000000000000-0x000000003fffffff]
> [    0.025746] NODE_DATA(0) allocated [mem 0x3ffa6000-0x3ffcffff]
> [    0.027098] crashkernel: memory value expected
> [    0.027918] Zone ranges:
> [    0.028384]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
> [    0.029553]   DMA32    [mem 0x0000000001000000-0x000000003fffffff]
> [    0.030688]   Normal   empty
> [    0.031217]   Device   empty
> [    0.031741] Movable zone start for each node
> [    0.032525] Early memory node ranges
> [    0.033212]   node   0: [mem 0x0000000000001000-0x000000000009ffff]
> [    0.034377]   node   0: [mem 0x0000000000100000-0x000000003ed74fff]
> [    0.035520]   node   0: [mem 0x000000003ee87000-0x000000003ff06fff]
> [    0.036663]   node   0: [mem 0x000000003ff6b000-0x000000003ffcffff]
> [    0.037840]   node   0: [mem 0x000000003fff0000-0x000000003fffffff]
> [    0.039012] Zeroed struct page in unavailable ranges: 503 pages
> [    0.039013] Initmem setup node 0 [mem 0x0000000000001000-0x000000003fffffff]
> [    0.044319] BUG: unable to handle kernel paging request at ffffffffff5fd020
> [    0.045637] #PF error: [normal kernel read fault]
> [    0.046501] PGD 2200e067 P4D 2200e067 PUD 22010067 PMD 22011067 PTE 0
> [    0.047682] Oops: 0000 [#1] SMP
> [    0.048258] CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-dirty #2
> [    0.049419] RIP: 0010:native_apic_mem_read+0x3/0x10
> [    0.050328] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
> [    0.053749] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
> [    0.054703] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
> [    0.056009] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
> [    0.057346] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
> [    0.058667] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
> [    0.059969] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
> [    0.061313] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
> [    0.062812] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.063865] CR2: ffffffffff5fd020 CR3: 000000002200d000 CR4: 00000000000406b0
> [    0.065222] Call Trace:
> [    0.065670]  ? read_apic_id+0x19/0x30
> [    0.066347]  init_apic_mappings+0x7a/0x129
> [    0.067096]  setup_arch+0xb67/0xc19
> [    0.067729]  start_kernel+0x6b/0x4e3
> [    0.068386]  x86_64_start_reservations+0x24/0x26
> [    0.069230]  x86_64_start_kernel+0x6f/0x72
> [    0.069974]  secondary_startup_64+0xa4/0xb0
> [    0.070739] Modules linked in:
> [    0.071297] CR2: ffffffffff5fd020
> [    0.071901] random: get_random_bytes called from print_oops_end_marker+0x3f/0x60 with crng_init=0
> [    0.073567] ---[ end trace 2cc66932e568af60 ]---
> [    0.074427] RIP: 0010:native_apic_mem_read+0x3/0x10
> [    0.075320] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
> [    0.078755] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
> [    0.079741] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
> [    0.081050] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
> [    0.082355] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
> [    0.083687] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
> [    0.084996] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
> [    0.086296] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
> [    0.087805] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.088855] CR2: ffffffffff5fd020 CR3: 000000002200d000 CR4: 00000000000406b0
> [    0.090167] Kernel panic - not syncing: Fatal exception
> [    0.091160] BUG: unable to handle kernel paging request at ffffffffff5fd030
> [    0.092438] #PF error: [normal kernel read fault]
> [    0.093301] PGD 2200e067 P4D 2200e067 PUD 22010067 PMD 22011067 PTE 0
> [    0.094480] Oops: 0000 [#2] SMP
> [    0.095094] CPU: 0 PID: 0 Comm: swapper Tainted: G      D           5.0.0-dirty #2
> [    0.096478] RIP: 0010:native_apic_mem_read+0x3/0x10
> [    0.097367] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
> [    0.100833] RSP: 0000:ffffffff88003aa8 EFLAGS: 00010002
> [    0.101792] RAX: ffffffff87248840 RBX: 0000000000000046 RCX: 0000000000000000
> [    0.103130] RDX: 0000000000000080 RSI: 0000000000002000 RDI: 0000000000000030
> [    0.104433] RBP: ffffffff88003ac0 R08: 0000000000000001 R09: 0000000000000080
> [    0.105733] R10: ffffffff88160ca0 R11: ffffffff8818a428 R12: 0000000000000000
> [    0.107070] R13: 0000000000000046 R14: ffffffff88013740 R15: 000000000000000b
> [    0.108382] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
> [    0.109872] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.110926] CR2: ffffffffff5fd030 CR3: 000000002200d000 CR4: 00000000000406b0
> [    0.112265] Call Trace:
> [    0.112712]  ? clear_local_APIC+0x37/0x2f0
> [    0.113463]  disable_local_APIC+0x22/0x60
> [    0.114200]  native_stop_other_cpus+0xc8/0x160
> [    0.115048]  panic+0x11a/0x2a8
> [    0.115606]  oops_end+0xc1/0xd0
> [    0.116188]  no_context+0x1eb/0x550
> [    0.116826]  __bad_area_nosemaphore.constprop.30+0x50/0x1d0
> [    0.117852]  bad_area_nosemaphore+0x13/0x20
> [    0.118618]  do_kern_addr_fault+0x5c/0x90
> [    0.119387]  __do_page_fault+0x382/0x440
> [    0.120109]  ? memmap_init_zone+0x8f/0x22d
> [    0.120851]  do_page_fault+0x32/0x120
> [    0.121521]  page_fault+0x1e/0x30
> [    0.122128] RIP: 0010:native_apic_mem_read+0x3/0x10
> [    0.123045] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
> [    0.126481] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
> [    0.127502] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
> [    0.128807] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
> [    0.130161] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
> [    0.131464] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
> [    0.132771] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
> [    0.134081]  ? native_apic_mem_write+0x10/0x10
> [    0.134892]  ? read_apic_id+0x19/0x30
> [    0.135564]  init_apic_mappings+0x7a/0x129
> [    0.136316]  setup_arch+0xb67/0xc19
> [    0.136954]  start_kernel+0x6b/0x4e3
> [    0.137656]  x86_64_start_reservations+0x24/0x26
> [    0.138576]  x86_64_start_kernel+0x6f/0x72
> [    0.139329]  secondary_startup_64+0xa4/0xb0
> [    0.140096] Modules linked in:
> [    0.140653] CR2: ffffffffff5fd030
> [    0.141259] ---[ end trace 2cc66932e568af61 ]---
> [    0.142102] RIP: 0010:native_apic_mem_read+0x3/0x10
> [    0.142992] Code: 00 00 e8 20 3a 2b 00 48 89 d8 5b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 ff 48 89 e5 89 b7 00 d0 5f ff 5d c3 66 90 55 89 ff <8b> 87 00 d0 5f ff 48 89 e5 5d c3 66 90 e8 7b 8a 5b 00 55 b8 01 00
> [    0.146520] RSP: 0000:ffffffff88003e38 EFLAGS: 00010002
> [    0.147533] RAX: ffffffff87248840 RBX: 000000003fe09000 RCX: 0000000000000000
> [    0.148845] RDX: ffffffff88003e30 RSI: 000000000000f800 RDI: 0000000000000020
> [    0.150191] RBP: ffffffff88003e48 R08: 0000000000000000 R09: 0000000000000000
> [    0.151492] R10: 00000000000000ff R11: 0000000000000000 R12: 0000000001d254d6
> [    0.152796] R13: 000000003d600000 R14: 0000000000000000 R15: 0000000000000000
> [    0.154103] FS:  0000000000000000(0000) GS:ffffffff88173000(0000) knlGS:0000000000000000
> [    0.155579] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    0.156625] CR2: ffffffffff5fd030 CR3: 000000002200d000 CR4: 00000000000406b0
> [    0.157967] Kernel panic - not syncing: Fatal exception
> <repeating panic>
> 
> 
> Libvirt configuration of the VM looks like this:
> 
>   <os>
>     <type arch='x86_64' machine='pc'>hvm</type>
>     <loader readonly='yes' type='pflash'>/usr/share/edk2.git/ovmf-ia32/OVMF_CODE-pure-efi.fd</loader>
>     <nvram template='/usr/share/edk2.git/ovmf-ia32/OVMF_VARS-pure-efi.fd'>/var/lib/libvirt/qemu/nvram/vm76_VARS-32.fd</nvram>
>     <kernel>/var/lib/libvirt/boot/vmlinuz-5.0.0-dirty</kernel>
>     <initrd>/var/lib/libvirt/boot/initramfs-5.0.0-dirty.img</initrd>
>     <cmdline>root=UUID=6bea2b7b-e6cc-4dba-ac79-be6530d348f5 ro console=tty0 console=ttyS0,115200n8 no_timer_check net.ifnames=0 crashkernel=auto LANG=en_US.UTF-8 earlyprintk=serial,ttyS0,115200</cmdline>
>     <boot dev='hd'/>
>   </os>
> 
> -- 
> Jun'ichi Nomura, NEC Corporation / NEC Solution Innovators, Ltd.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-17  4:57                                             ` Dave Young
@ 2019-04-17  6:00                                               ` Kairui Song
  2019-04-17  7:08                                                 ` Dave Young
  0 siblings, 1 reply; 48+ messages in thread
From: Kairui Song @ 2019-04-17  6:00 UTC (permalink / raw)
  To: Dave Young
  Cc: Borislav Petkov, Junichi Nomura, Chao Fan, Baoquan He, x86,
	kexec, linux-kernel

On Wed, Apr 17, 2019 at 12:57 PM Dave Young <dyoung@redhat.com> wrote:
>
> On 04/17/19 at 09:38am, Dave Young wrote:
> > On 04/16/19 at 03:22pm, Borislav Petkov wrote:
> > > On Tue, Apr 16, 2019 at 07:41:33PM +0800, Dave Young wrote:
> > > > On 04/16/19 at 11:52am, Borislav Petkov wrote:
> > > > > I'll queue the below in the next days if there are no more complaints:
> > > >
> > > > As for the kexec breakage, even with the V3 patch, kexec still hangs on
> > > > a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
> > > > wait a few days see if we can make some progress to find the cause?
> > >
> > > How is applying this patch going to change anything?
> > >
> > > I was told that the breakage is there even without it...
> >
> > Without this patch, the bug happens in the efi_get_rsdp.. function, this
> > patch tries to fix that by adding kexec_get.. but the new introduced
> > kexec_* function does not work on some laptops, so it is not a 100% good
> > fix, I hoped we can get it working for all known issues.  But if we can
> > not do it eg. within one week we can go with this version and leave the
> > laptop issue as a known issue.
> >
>
> Latest debugging status:
>
> Kexec boot works with commenting out some code like below, so the guid
> cmp (memcmp) caused a system reset), still need to find out why:
>
> diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> index d9f9abd63c68..13e7a23ae94c 100644
> --- a/arch/x86/boot/compressed/acpi.c
> +++ b/arch/x86/boot/compressed/acpi.c
> @@ -95,10 +95,12 @@ __efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
>                         table = tbl->table;
>                 }
>
> +/*
>                 if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
>                         rsdp_addr = table;
>                 else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
>                         return table;
> +*/
>         }
>
>         return rsdp_addr;
> @@ -291,9 +293,10 @@ acpi_physical_address get_rsdp_addr(void)
>         if (!pa)
>                 pa = kexec_get_rsdp_addr();
>
> +/*
>         if (!pa)
>                 pa = efi_get_rsdp_addr();
> -
> +*/
>         if (!pa)
>                 pa = bios_get_rsdp_addr();
>
>

Hi Dave, for this case I think it's just because GCC will found the
loop does nothing, and optimize out the whole loop in
__efi_get_rsdp_addr and will no longer read the actual nr_table value.

I can fix the boot error on T420 with your patch, but if I add
anything, like a hardcode value assignment with the right value for
acpi_rsdp in the loop, it will reset the machine. But set acpi_rsdp
with a right initial value out side the loop works fine.
If the loop condition is false, then there should be no difference
between just comment out the line you mentioned and add an assignment.
Else it just assign the value multiple times, not very reasonable but
shouldn't fail.

And, I inspected the generated ASM code also suggest the same thing.
So still, access the systab memory is the cause of the system reset on
certain machines.

-- 
Best Regards,
Kairui Song

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-16 22:44                                     ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
@ 2019-04-17  7:02                                       ` Dave Young
  2019-04-17  8:54                                         ` Borislav Petkov
  0 siblings, 1 reply; 48+ messages in thread
From: Dave Young @ 2019-04-17  7:02 UTC (permalink / raw)
  To: Junichi Nomura
  Cc: Borislav Petkov, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On 04/16/19 at 10:44pm, Junichi Nomura wrote:
> On 4/16/19 6:40 PM, Borislav Petkov wrote:
> > On Mon, Apr 15, 2019 at 11:00:25PM +0000, Junichi Nomura wrote:
> >> I thought we should hang here instead of return so that we
> >> don't run into efi_get_rsdp_addr() in case of kexec.
> > 
> > Hanging that early without debug output is not very friendly to
> > debuggers, methinks.
> 
> Right. But if we could move get_rsdp_addr() after console_init(),
> you get debug output.
> 

move it after console_init looks better, I finally setup IME as a serial
console, I can see debug msg even for kexec reboot now.

How about move it after console_init, and at the same time skip
efi_get_rsdp_addr in case kexec?

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-17  6:00                                               ` Kairui Song
@ 2019-04-17  7:08                                                 ` Dave Young
  0 siblings, 0 replies; 48+ messages in thread
From: Dave Young @ 2019-04-17  7:08 UTC (permalink / raw)
  To: Kairui Song
  Cc: Borislav Petkov, Junichi Nomura, Chao Fan, Baoquan He, x86,
	kexec, linux-kernel

On 04/17/19 at 02:00pm, Kairui Song wrote:
> On Wed, Apr 17, 2019 at 12:57 PM Dave Young <dyoung@redhat.com> wrote:
> >
> > On 04/17/19 at 09:38am, Dave Young wrote:
> > > On 04/16/19 at 03:22pm, Borislav Petkov wrote:
> > > > On Tue, Apr 16, 2019 at 07:41:33PM +0800, Dave Young wrote:
> > > > > On 04/16/19 at 11:52am, Borislav Petkov wrote:
> > > > > > I'll queue the below in the next days if there are no more complaints:
> > > > >
> > > > > As for the kexec breakage, even with the V3 patch, kexec still hangs on
> > > > > a Lenovo T420 laptop.  Kairui also reproduced the problem. So can we
> > > > > wait a few days see if we can make some progress to find the cause?
> > > >
> > > > How is applying this patch going to change anything?
> > > >
> > > > I was told that the breakage is there even without it...
> > >
> > > Without this patch, the bug happens in the efi_get_rsdp.. function, this
> > > patch tries to fix that by adding kexec_get.. but the new introduced
> > > kexec_* function does not work on some laptops, so it is not a 100% good
> > > fix, I hoped we can get it working for all known issues.  But if we can
> > > not do it eg. within one week we can go with this version and leave the
> > > laptop issue as a known issue.
> > >
> >
> > Latest debugging status:
> >
> > Kexec boot works with commenting out some code like below, so the guid
> > cmp (memcmp) caused a system reset), still need to find out why:
> >
> > diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> > index d9f9abd63c68..13e7a23ae94c 100644
> > --- a/arch/x86/boot/compressed/acpi.c
> > +++ b/arch/x86/boot/compressed/acpi.c
> > @@ -95,10 +95,12 @@ __efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
> >                         table = tbl->table;
> >                 }
> >
> > +/*
> >                 if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
> >                         rsdp_addr = table;
> >                 else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
> >                         return table;
> > +*/
> >         }
> >
> >         return rsdp_addr;
> > @@ -291,9 +293,10 @@ acpi_physical_address get_rsdp_addr(void)
> >         if (!pa)
> >                 pa = kexec_get_rsdp_addr();
> >
> > +/*
> >         if (!pa)
> >                 pa = efi_get_rsdp_addr();
> > -
> > +*/
> >         if (!pa)
> >                 pa = bios_get_rsdp_addr();
> >
> >
> 
> Hi Dave, for this case I think it's just because GCC will found the
> loop does nothing, and optimize out the whole loop in
> __efi_get_rsdp_addr and will no longer read the actual nr_table value.
> 
> I can fix the boot error on T420 with your patch, but if I add
> anything, like a hardcode value assignment with the right value for
> acpi_rsdp in the loop, it will reset the machine. But set acpi_rsdp
> with a right initial value out side the loop works fine.
> If the loop condition is false, then there should be no difference
> between just comment out the line you mentioned and add an assignment.
> Else it just assign the value multiple times, not very reasonable but
> shouldn't fail.
> 
> And, I inspected the generated ASM code also suggest the same thing.
> So still, access the systab memory is the cause of the system reset on
> certain machines.

Makse sense, my previous debug also point to some systab accessing.
Probably some early pg table mess up.

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-17  1:38                                           ` Dave Young
  2019-04-17  4:57                                             ` Dave Young
@ 2019-04-17  8:22                                             ` Borislav Petkov
  2019-04-18  1:24                                               ` Dave Young
  1 sibling, 1 reply; 48+ messages in thread
From: Borislav Petkov @ 2019-04-17  8:22 UTC (permalink / raw)
  To: Dave Young
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On Wed, Apr 17, 2019 at 09:38:38AM +0800, Dave Young wrote:
> But if you want to apply it now, I think it is fine as well, probably
> the laptop issue is lenovo firmware specific, we can add rsdp in cmdline
> or boot params via kexec-tools so it should be good by using new kexec-tools.

I'm not in a hurry to apply it - if you guys wanna debug this further,
sure, we can wait and then tag all the fixes for stable so that they can
get backported.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-17  7:02                                       ` Dave Young
@ 2019-04-17  8:54                                         ` Borislav Petkov
  2019-04-17  9:02                                           ` Borislav Petkov
  2019-04-17 10:31                                           ` Chao Fan
  0 siblings, 2 replies; 48+ messages in thread
From: Borislav Petkov @ 2019-04-17  8:54 UTC (permalink / raw)
  To: Dave Young, Junichi Nomura
  Cc: Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Wed, Apr 17, 2019 at 03:02:50PM +0800, Dave Young wrote:
> How about move it after console_init,

Sounds ok to me. That's still before KASLR gets setup and should work
for Chao's movable regions too.

> and at the same time skip efi_get_rsdp_addr in case kexec?

If kexec_get_rsdp_addr() gets a correct address, efi_get_rsdp_addr()
will be skipped.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-17  8:54                                         ` Borislav Petkov
@ 2019-04-17  9:02                                           ` Borislav Petkov
  2019-04-17 10:31                                           ` Chao Fan
  1 sibling, 0 replies; 48+ messages in thread
From: Borislav Petkov @ 2019-04-17  9:02 UTC (permalink / raw)
  To: Dave Young, Junichi Nomura
  Cc: Chao Fan, Baoquan He, Kairui Song, x86, kexec, linux-kernel

On Wed, Apr 17, 2019 at 10:54:34AM +0200, Borislav Petkov wrote:
> On Wed, Apr 17, 2019 at 03:02:50PM +0800, Dave Young wrote:
> > How about move it after console_init,
> 
> Sounds ok to me. That's still before KASLR gets setup and should work
> for Chao's movable regions too.

---
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..24e65a0f756d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	/* Clear flags intended for solely in-kernel use. */
 	boot_params->hdr.loadflags &= ~KASLR_FLAG;
 
-	/* Save RSDP address for later use. */
-	boot_params->acpi_rsdp_addr = get_rsdp_addr();
-
 	sanitize_boot_params(boot_params);
 
 	if (boot_params->screen_info.orig_video_mode == 7) {
@@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	cols = boot_params->screen_info.orig_video_cols;
 
 	console_init();
+
+	/*
+	 * Save RSDP address for later use. Have this after console_init()
+	 * so that early debugging output from the RSDP parsing code can be
+	 * collected.
+	 */
+	boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
 	debug_putstr("early console in extract_kernel\n");
 
 	free_mem_ptr     = heap;	/* Heap */

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel
  2019-04-17  8:54                                         ` Borislav Petkov
  2019-04-17  9:02                                           ` Borislav Petkov
@ 2019-04-17 10:31                                           ` Chao Fan
  1 sibling, 0 replies; 48+ messages in thread
From: Chao Fan @ 2019-04-17 10:31 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dave Young, Junichi Nomura, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On Wed, Apr 17, 2019 at 10:54:34AM +0200, Borislav Petkov wrote:
>On Wed, Apr 17, 2019 at 03:02:50PM +0800, Dave Young wrote:
>> How about move it after console_init,
>
>Sounds ok to me. That's still before KASLR gets setup and should work
>for Chao's movable regions too.

Yes, when debugging, I also put it after console_init().

Thanks,
Chao Fan

>
>> and at the same time skip efi_get_rsdp_addr in case kexec?
>
>If kexec_get_rsdp_addr() gets a correct address, efi_get_rsdp_addr()
>will be skipped.
>
>-- 
>Regards/Gruss,
>    Boris.
>
>Good mailing practices for 400: avoid top-posting and trim the reply.
>
>



^ permalink raw reply	[flat|nested] 48+ messages in thread

* RE: kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel)
  2019-04-17  5:14                                         ` Dave Young
@ 2019-04-17 17:57                                           ` Prakhya, Sai Praneeth
  0 siblings, 0 replies; 48+ messages in thread
From: Prakhya, Sai Praneeth @ 2019-04-17 17:57 UTC (permalink / raw)
  To: Dave Young, Junichi Nomura
  Cc: Borislav Petkov, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel, linux-efi, Ard Biesheuvel

> Added efi people.
> 
> I remember previously Sai did some efi32 tests for kexec, but I'm not sure if he
> tested EFI32 + 64bit kernel.
> 
> Kexec status is not certain because I'm not sure anyone tesed and reported
> issues for that.

No.. I didn't test kexec on EFI32 + 64bit kernel and I also haven't tested kexec'ing from 
kexec'd kernel.

Regards,
Sai

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-17  8:22                                             ` Borislav Petkov
@ 2019-04-18  1:24                                               ` Dave Young
  0 siblings, 0 replies; 48+ messages in thread
From: Dave Young @ 2019-04-18  1:24 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Junichi Nomura, Chao Fan, Baoquan He, Kairui Song, x86, kexec,
	linux-kernel

On 04/17/19 at 10:22am, Borislav Petkov wrote:
> On Wed, Apr 17, 2019 at 09:38:38AM +0800, Dave Young wrote:
> > But if you want to apply it now, I think it is fine as well, probably
> > the laptop issue is lenovo firmware specific, we can add rsdp in cmdline
> > or boot params via kexec-tools so it should be good by using new kexec-tools.
> 
> I'm not in a hurry to apply it - if you guys wanna debug this further,
> sure, we can wait and then tag all the fixes for stable so that they can
> get backported.

Thanks!  According to our debugging this indeed need amending.  We tried
maping the efi systab/acpi table area with some hardcode changes, and
test passed.  This means the previous tests people have done probably
just happened work,  maybe early pg table happen to cover the memory
areas in their cases.  We noticed in vm guest and some other machines, the systab
all fall into memory under 2G, but the systab of T420 sits in area
0xda...... which is 2G ~ 4G area.

Kairui is trying to work on some fix so that it can be folded into the
current reviewed patch.

Thanks
Dave

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it
  2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
  2019-04-16 10:02                                       ` Ingo Molnar
  2019-04-16 11:41                                       ` Dave Young
@ 2019-04-19  8:34                                       ` Kairui Song
  2019-04-19  8:58                                         ` Baoquan He
  2 siblings, 1 reply; 48+ messages in thread
From: Kairui Song @ 2019-04-19  8:34 UTC (permalink / raw)
  To: linux-kernel
  Cc: Borislav Petkov, Junichi Nomura, Dave Young, Chao Fan,
	Baoquan He, Kairui Song, x86, kexec

The previous patch "x86/boot: Use efi_setup_data for searching RSDP on
kexec-ed kernels" always reset some machines. This is a follow up of
that patch.

The reason is, by default, the systab region is not mapped by the
identity mapping provided by kexec. So kernel will be accessing a not
mapped memory region and cause fault. But as kexec tend to pad the map
region up tp PUD or PMD size, the systab could be included in
the map by accident so it worked on some machines, but that will be
broken easily and unstable.

There are two approach to fix it, detect if the systab is mapped, and avoid
reading it if not. Another one is to ensure the region is map by either
check and map the systab in fisrt kernel before kexec. Or map the systab
in early code before reading it.

Mapping in the early code should cover every case (else boot from an
older kernel will also fail). This patch is a draft of implementing it.

Just added a helper (add_identity_map_pgd) which could be used to add
extra identity mapping in very early stage. And call it before reading
systab. There should be no need to unmap it as the early page table will
be discarded later.

But some refractoring is included, which introduced a lot of changes,
move some page table related code from kaslr_64.c to pgtable_64.c. If
the appraoch goes well could prepare a sperate clean up patches.

Signed-off-by: Kairui Song <kasong@redhat.com>
---
 arch/x86/boot/compressed/acpi.c       |   5 +
 arch/x86/boot/compressed/kaslr_64.c   | 109 +--------------------
 arch/x86/boot/compressed/misc.c       |   2 +
 arch/x86/boot/compressed/pgtable.h    |  11 +++
 arch/x86/boot/compressed/pgtable_64.c | 131 +++++++++++++++++++++++++-
 arch/x86/include/asm/boot.h           |   8 +-
 6 files changed, 156 insertions(+), 110 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 8cecce1ac0cd..a513b0f9bfda 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -2,6 +2,7 @@
 #define BOOT_CTYPE_H
 #include "misc.h"
 #include "error.h"
+#include "pgtable.h"
 #include "../string.h"
 
 #include <linux/numa.h>
@@ -134,6 +135,10 @@ static acpi_physical_address kexec_get_rsdp_addr(void)
 	if (!systab)
 		error("EFI system table not found in kexec boot_params.");
 
+	add_identity_map_pgd((unsigned long)systab,
+			     (unsigned long)systab + sizeof(*systab),
+			     early_boot_top_pgt);
+
 	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
 }
 #else
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
index 748456c365f4..ec7093e192bf 100644
--- a/arch/x86/boot/compressed/kaslr_64.c
+++ b/arch/x86/boot/compressed/kaslr_64.c
@@ -8,121 +8,21 @@
  * Copyright (C)      2016  Kees Cook
  */
 
-/*
- * Since we're dealing with identity mappings, physical and virtual
- * addresses are the same, so override these defines which are ultimately
- * used by the headers in misc.h.
- */
-#define __pa(x)  ((unsigned long)(x))
-#define __va(x)  ((void *)((unsigned long)(x)))
-
-/* No PAGE_TABLE_ISOLATION support needed either: */
-#undef CONFIG_PAGE_TABLE_ISOLATION
-
 #include "misc.h"
-
-/* These actually do the work of building the kernel identity maps. */
-#include <asm/init.h>
-#include <asm/pgtable.h>
-/* Use the static base for this part of the boot process */
-#undef __PAGE_OFFSET
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#include "../../mm/ident_map.c"
+#include "pgtable.h"
 
 /* Used by pgtable.h asm code to force instruction serialization. */
 unsigned long __force_order;
 
-/* Used to track our page table allocation area. */
-struct alloc_pgt_data {
-	unsigned char *pgt_buf;
-	unsigned long pgt_buf_size;
-	unsigned long pgt_buf_offset;
-};
-
-/*
- * Allocates space for a page table entry, using struct alloc_pgt_data
- * above. Besides the local callers, this is used as the allocation
- * callback in mapping_info below.
- */
-static void *alloc_pgt_page(void *context)
-{
-	struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
-	unsigned char *entry;
-
-	/* Validate there is space available for a new page. */
-	if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
-		debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
-		debug_putaddr(pages->pgt_buf_offset);
-		debug_putaddr(pages->pgt_buf_size);
-		return NULL;
-	}
-
-	entry = pages->pgt_buf + pages->pgt_buf_offset;
-	pages->pgt_buf_offset += PAGE_SIZE;
-
-	return entry;
-}
-
-/* Used to track our allocated page tables. */
-static struct alloc_pgt_data pgt_data;
-
 /* The top level page table entry pointer. */
 static unsigned long top_level_pgt;
 
-phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
-
-/*
- * Mapping information structure passed to kernel_ident_mapping_init().
- * Due to relocation, pointers must be assigned at run time not build time.
- */
-static struct x86_mapping_info mapping_info;
-
 /* Locates and clears a region for a new top level page table. */
 void initialize_identity_maps(void)
 {
-	/* If running as an SEV guest, the encryption mask is required. */
-	set_sev_encryption_mask();
-
-	/* Exclude the encryption mask from __PHYSICAL_MASK */
-	physical_mask &= ~sme_me_mask;
-
-	/* Init mapping_info with run-time function/buffer pointers. */
-	mapping_info.alloc_pgt_page = alloc_pgt_page;
-	mapping_info.context = &pgt_data;
-	mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
-	mapping_info.kernpg_flag = _KERNPG_TABLE;
-
-	/*
-	 * It should be impossible for this not to already be true,
-	 * but since calling this a second time would rewind the other
-	 * counters, let's just make sure this is reset too.
-	 */
-	pgt_data.pgt_buf_offset = 0;
-
-	/*
-	 * If we came here via startup_32(), cr3 will be _pgtable already
-	 * and we must append to the existing area instead of entirely
-	 * overwriting it.
-	 *
-	 * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
-	 * the top-level page table is allocated separately.
-	 *
-	 * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
-	 * cases. On 4-level paging it's equal to 'top_level_pgt'.
-	 */
-	top_level_pgt = read_cr3_pa();
-	if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
-		debug_putstr("booted via startup_32()\n");
-		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
-		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
-		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
-	} else {
-		debug_putstr("booted via startup_64()\n");
-		pgt_data.pgt_buf = _pgtable;
-		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
-		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+	top_level_pgt = early_boot_top_pgt;
+	if ((p4d_t *)top_level_pgt != (p4d_t *)_pgtable)
 		top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
-	}
 }
 
 /*
@@ -141,8 +41,7 @@ void add_identity_map(unsigned long start, unsigned long size)
 		return;
 
 	/* Build the mapping. */
-	kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
-				  start, end);
+	add_identity_map_pgd(start, end, top_level_pgt);
 }
 
 /*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..6b3548080d15 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -345,6 +345,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 
+	initialize_pgtable_alloc();
+
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
 
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 6ff7e81b5628..443df2b65fbf 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -16,5 +16,16 @@ extern unsigned long *trampoline_32bit;
 
 extern void trampoline_32bit_src(void *return_ptr);
 
+extern struct alloc_pgt_data pgt_data;
+
+extern unsigned long early_boot_top_pgt;
+
+void *alloc_pgt_page(void *context);
+
+int add_identity_map_pgd(unsigned long pstart,
+			 unsigned long pend, unsigned long pgd);
+
+void initialize_pgtable_alloc(void);
+
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..cd36cf9e6a5c 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,9 +1,30 @@
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
+#include "misc.h"
+#include "pgtable.h"
+#include "../string.h"
+
 #include <linux/efi.h>
 #include <asm/e820/types.h>
 #include <asm/processor.h>
 #include <asm/efi.h>
-#include "pgtable.h"
-#include "../string.h"
+
+/* For handling early ident mapping */
+#include <asm/init.h>
+#include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#include "../../mm/ident_map.c"
 
 /*
  * __force_order is used by special_insns.h asm code to force instruction
@@ -14,6 +35,28 @@
  */
 unsigned long __force_order;
 
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+	unsigned char *pgt_buf;
+	unsigned long pgt_buf_size;
+	unsigned long pgt_buf_offset;
+};
+
+/* Used to track our allocated page tables. */
+struct alloc_pgt_data pgt_data;
+
+/* Track the first loaded boot page table. */
+unsigned long early_boot_top_pgt;
+
+phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
+
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info;
+
+/* For handling trampoline. */
 #define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */
 #define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */
 
@@ -202,3 +245,87 @@ void cleanup_trampoline(void *pgtable)
 	/* Restore trampoline memory */
 	memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
 }
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+void *alloc_pgt_page(void *context)
+{
+	struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+	unsigned char *entry;
+
+	/* Validate there is space available for a new page. */
+	if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+		debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+		debug_putaddr(pages->pgt_buf_offset);
+		debug_putaddr(pages->pgt_buf_size);
+		return NULL;
+	}
+
+	entry = pages->pgt_buf + pages->pgt_buf_offset;
+	pages->pgt_buf_offset += PAGE_SIZE;
+
+	return entry;
+}
+
+/* Locates and clears a region for update or create page table. */
+void initialize_pgtable_alloc(void)
+{
+	/* If running as an SEV guest, the encryption mask is required. */
+	set_sev_encryption_mask();
+
+	/* Exclude the encryption mask from __PHYSICAL_MASK */
+	physical_mask &= ~sme_me_mask;
+
+	/* Init mapping_info with run-time function/buffer pointers. */
+	mapping_info.alloc_pgt_page = alloc_pgt_page;
+	mapping_info.context = &pgt_data;
+	mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+	mapping_info.kernpg_flag = _KERNPG_TABLE;
+
+	/*
+	 * It should be impossible for this not to already be true,
+	 * but since calling this a second time would rewind the other
+	 * counters, let's just make sure this is reset too.
+	 */
+	pgt_data.pgt_buf_offset = 0;
+
+	/*
+	 * If we came here via startup_32(), cr3 will be _pgtable already
+	 * and we must append to the existing area instead of entirely
+	 * overwriting it.
+	 *
+	 * With 5-level paging, we use '_pgtable' to allocate the p4d page
+	 * table, the top-level page table is allocated separately.
+	 *
+	 * p4d_offset(early_boot_top_pgt, 0) would cover both the 4- and 5-level
+	 * cases. On 4-level paging it's equal to 'early_boot_top_pgt'.
+	 */
+
+	early_boot_top_pgt = read_cr3_pa();
+	early_boot_top_pgt = (unsigned long)p4d_offset(
+			(pgd_t *)early_boot_top_pgt, 0);
+	if ((p4d_t *)early_boot_top_pgt == (p4d_t *)_pgtable) {
+		debug_putstr("booted via startup_32()\n");
+		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+	} else {
+		debug_putstr("booted via startup_64()\n");
+		pgt_data.pgt_buf = _pgtable;
+		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+	}
+}
+
+/*
+ * Helper for mapping extra memory region in very early stage
+ * before extract and execute the actual kernel
+ */
+int add_identity_map_pgd(unsigned long pstart, unsigned long pend,
+			 unsigned long pgd)
+{
+	kernel_ident_mapping_init(&mapping_info, (pgd_t *)pgd, pstart, pend);
+}
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 680c320363db..fb37eb98b65d 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -33,6 +33,8 @@
 #ifdef CONFIG_X86_64
 # define BOOT_STACK_SIZE	0x4000
 
+/* Reserve one page for possible extra mapping requirement */
+# define BOOT_EXTRA_PGT_SIZE	(1*4096)
 # define BOOT_INIT_PGT_SIZE	(6*4096)
 # ifdef CONFIG_RANDOMIZE_BASE
 /*
@@ -43,12 +45,12 @@
  * Total is 19 pages.
  */
 #  ifdef CONFIG_X86_VERBOSE_BOOTUP
-#   define BOOT_PGT_SIZE	(19*4096)
+#   define BOOT_PGT_SIZE	((19 * 4096) + BOOT_EXTRA_PGT_SIZE)
 #  else /* !CONFIG_X86_VERBOSE_BOOTUP */
-#   define BOOT_PGT_SIZE	(17*4096)
+#   define BOOT_PGT_SIZE	((17 * 4096) + BOOT_EXTRA_PGT_SIZE)
 #  endif
 # else /* !CONFIG_RANDOMIZE_BASE */
-#  define BOOT_PGT_SIZE		BOOT_INIT_PGT_SIZE
+#  define BOOT_PGT_SIZE		(BOOT_INIT_PGT_SIZE + BOOT_EXTRA_PGT_SIZE)
 # endif
 
 #else /* !CONFIG_X86_64 */
-- 
2.20.1


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it
  2019-04-19  8:34                                       ` [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it Kairui Song
@ 2019-04-19  8:58                                         ` Baoquan He
  2019-04-19  9:39                                           ` Kairui Song
  0 siblings, 1 reply; 48+ messages in thread
From: Baoquan He @ 2019-04-19  8:58 UTC (permalink / raw)
  To: Kairui Song
  Cc: linux-kernel, Borislav Petkov, Junichi Nomura, Dave Young,
	Chao Fan, x86, kexec

On 04/19/19 at 04:34pm, Kairui Song wrote:
>  /* Locates and clears a region for a new top level page table. */
>  void initialize_identity_maps(void)
>  {
> -	/* If running as an SEV guest, the encryption mask is required. */
> -	set_sev_encryption_mask();
> -
> -	/* Exclude the encryption mask from __PHYSICAL_MASK */
> -	physical_mask &= ~sme_me_mask;
> -
> -	/* Init mapping_info with run-time function/buffer pointers. */
> -	mapping_info.alloc_pgt_page = alloc_pgt_page;
> -	mapping_info.context = &pgt_data;
> -	mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
> -	mapping_info.kernpg_flag = _KERNPG_TABLE;
> -
> -	/*
> -	 * It should be impossible for this not to already be true,
> -	 * but since calling this a second time would rewind the other
> -	 * counters, let's just make sure this is reset too.
> -	 */
> -	pgt_data.pgt_buf_offset = 0;
> -
> -	/*
> -	 * If we came here via startup_32(), cr3 will be _pgtable already
> -	 * and we must append to the existing area instead of entirely
> -	 * overwriting it.
> -	 *
> -	 * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
> -	 * the top-level page table is allocated separately.
> -	 *
> -	 * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
> -	 * cases. On 4-level paging it's equal to 'top_level_pgt'.
> -	 */
> -	top_level_pgt = read_cr3_pa();
> -	if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
> -		debug_putstr("booted via startup_32()\n");
> -		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
> -		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
> -		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> -	} else {
> -		debug_putstr("booted via startup_64()\n");
> -		pgt_data.pgt_buf = _pgtable;
> -		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
> -		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +	top_level_pgt = early_boot_top_pgt;
> +	if ((p4d_t *)top_level_pgt != (p4d_t *)_pgtable)
>  		top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);

Kairui, will you make a patchset to include these changes separately
later on? I don't get the purposes of code changes. E.g here, I
don't know why you introduce a new variable early_boot_top_pgt, and
allocate the page table, even though they have been done in the old 
initialize_identity_maps().

Thanks
Baoquan

> -	}
>  }
>  
>  /*
> @@ -141,8 +41,7 @@ void add_identity_map(unsigned long start, unsigned long size)
>  		return;
>  
>  	/* Build the mapping. */
> -	kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
> -				  start, end);
> +	add_identity_map_pgd(start, end, top_level_pgt);
>  }
>  
>  /*
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index c0d6c560df69..6b3548080d15 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -345,6 +345,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
>  	const unsigned long kernel_total_size = VO__end - VO__text;
>  	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
>  
> +	initialize_pgtable_alloc();
> +
>  	/* Retain x86 boot parameters pointer passed from startup_32/64. */
>  	boot_params = rmode;
>  
> diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
> index 6ff7e81b5628..443df2b65fbf 100644
> --- a/arch/x86/boot/compressed/pgtable.h
> +++ b/arch/x86/boot/compressed/pgtable.h
> @@ -16,5 +16,16 @@ extern unsigned long *trampoline_32bit;
>  
>  extern void trampoline_32bit_src(void *return_ptr);
>  
> +extern struct alloc_pgt_data pgt_data;
> +
> +extern unsigned long early_boot_top_pgt;
> +
> +void *alloc_pgt_page(void *context);
> +
> +int add_identity_map_pgd(unsigned long pstart,
> +			 unsigned long pend, unsigned long pgd);
> +
> +void initialize_pgtable_alloc(void);
> +
>  #endif /* __ASSEMBLER__ */
>  #endif /* BOOT_COMPRESSED_PAGETABLE_H */
> diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> index f8debf7aeb4c..cd36cf9e6a5c 100644
> --- a/arch/x86/boot/compressed/pgtable_64.c
> +++ b/arch/x86/boot/compressed/pgtable_64.c
> @@ -1,9 +1,30 @@
> +/*
> + * Since we're dealing with identity mappings, physical and virtual
> + * addresses are the same, so override these defines which are ultimately
> + * used by the headers in misc.h.
> + */
> +#define __pa(x)  ((unsigned long)(x))
> +#define __va(x)  ((void *)((unsigned long)(x)))
> +
> +/* No PAGE_TABLE_ISOLATION support needed either: */
> +#undef CONFIG_PAGE_TABLE_ISOLATION
> +
> +#include "misc.h"
> +#include "pgtable.h"
> +#include "../string.h"
> +
>  #include <linux/efi.h>
>  #include <asm/e820/types.h>
>  #include <asm/processor.h>
>  #include <asm/efi.h>
> -#include "pgtable.h"
> -#include "../string.h"
> +
> +/* For handling early ident mapping */
> +#include <asm/init.h>
> +#include <asm/pgtable.h>
> +/* Use the static base for this part of the boot process */
> +#undef __PAGE_OFFSET
> +#define __PAGE_OFFSET __PAGE_OFFSET_BASE
> +#include "../../mm/ident_map.c"
>  
>  /*
>   * __force_order is used by special_insns.h asm code to force instruction
> @@ -14,6 +35,28 @@
>   */
>  unsigned long __force_order;
>  
> +/* Used to track our page table allocation area. */
> +struct alloc_pgt_data {
> +	unsigned char *pgt_buf;
> +	unsigned long pgt_buf_size;
> +	unsigned long pgt_buf_offset;
> +};
> +
> +/* Used to track our allocated page tables. */
> +struct alloc_pgt_data pgt_data;
> +
> +/* Track the first loaded boot page table. */
> +unsigned long early_boot_top_pgt;
> +
> +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
> +
> +/*
> + * Mapping information structure passed to kernel_ident_mapping_init().
> + * Due to relocation, pointers must be assigned at run time not build time.
> + */
> +static struct x86_mapping_info mapping_info;
> +
> +/* For handling trampoline. */
>  #define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */
>  #define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */
>  
> @@ -202,3 +245,87 @@ void cleanup_trampoline(void *pgtable)
>  	/* Restore trampoline memory */
>  	memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
>  }
> +
> +/*
> + * Allocates space for a page table entry, using struct alloc_pgt_data
> + * above. Besides the local callers, this is used as the allocation
> + * callback in mapping_info below.
> + */
> +void *alloc_pgt_page(void *context)
> +{
> +	struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
> +	unsigned char *entry;
> +
> +	/* Validate there is space available for a new page. */
> +	if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
> +		debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
> +		debug_putaddr(pages->pgt_buf_offset);
> +		debug_putaddr(pages->pgt_buf_size);
> +		return NULL;
> +	}
> +
> +	entry = pages->pgt_buf + pages->pgt_buf_offset;
> +	pages->pgt_buf_offset += PAGE_SIZE;
> +
> +	return entry;
> +}
> +
> +/* Locates and clears a region for update or create page table. */
> +void initialize_pgtable_alloc(void)
> +{
> +	/* If running as an SEV guest, the encryption mask is required. */
> +	set_sev_encryption_mask();
> +
> +	/* Exclude the encryption mask from __PHYSICAL_MASK */
> +	physical_mask &= ~sme_me_mask;
> +
> +	/* Init mapping_info with run-time function/buffer pointers. */
> +	mapping_info.alloc_pgt_page = alloc_pgt_page;
> +	mapping_info.context = &pgt_data;
> +	mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
> +	mapping_info.kernpg_flag = _KERNPG_TABLE;
> +
> +	/*
> +	 * It should be impossible for this not to already be true,
> +	 * but since calling this a second time would rewind the other
> +	 * counters, let's just make sure this is reset too.
> +	 */
> +	pgt_data.pgt_buf_offset = 0;
> +
> +	/*
> +	 * If we came here via startup_32(), cr3 will be _pgtable already
> +	 * and we must append to the existing area instead of entirely
> +	 * overwriting it.
> +	 *
> +	 * With 5-level paging, we use '_pgtable' to allocate the p4d page
> +	 * table, the top-level page table is allocated separately.
> +	 *
> +	 * p4d_offset(early_boot_top_pgt, 0) would cover both the 4- and 5-level
> +	 * cases. On 4-level paging it's equal to 'early_boot_top_pgt'.
> +	 */
> +
> +	early_boot_top_pgt = read_cr3_pa();
> +	early_boot_top_pgt = (unsigned long)p4d_offset(
> +			(pgd_t *)early_boot_top_pgt, 0);
> +	if ((p4d_t *)early_boot_top_pgt == (p4d_t *)_pgtable) {
> +		debug_putstr("booted via startup_32()\n");
> +		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
> +		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
> +		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +	} else {
> +		debug_putstr("booted via startup_64()\n");
> +		pgt_data.pgt_buf = _pgtable;
> +		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
> +		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +	}
> +}
> +
> +/*
> + * Helper for mapping extra memory region in very early stage
> + * before extract and execute the actual kernel
> + */
> +int add_identity_map_pgd(unsigned long pstart, unsigned long pend,
> +			 unsigned long pgd)
> +{
> +	kernel_ident_mapping_init(&mapping_info, (pgd_t *)pgd, pstart, pend);
> +}
> diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
> index 680c320363db..fb37eb98b65d 100644
> --- a/arch/x86/include/asm/boot.h
> +++ b/arch/x86/include/asm/boot.h
> @@ -33,6 +33,8 @@
>  #ifdef CONFIG_X86_64
>  # define BOOT_STACK_SIZE	0x4000
>  
> +/* Reserve one page for possible extra mapping requirement */
> +# define BOOT_EXTRA_PGT_SIZE	(1*4096)
>  # define BOOT_INIT_PGT_SIZE	(6*4096)
>  # ifdef CONFIG_RANDOMIZE_BASE
>  /*
> @@ -43,12 +45,12 @@
>   * Total is 19 pages.
>   */
>  #  ifdef CONFIG_X86_VERBOSE_BOOTUP
> -#   define BOOT_PGT_SIZE	(19*4096)
> +#   define BOOT_PGT_SIZE	((19 * 4096) + BOOT_EXTRA_PGT_SIZE)
>  #  else /* !CONFIG_X86_VERBOSE_BOOTUP */
> -#   define BOOT_PGT_SIZE	(17*4096)
> +#   define BOOT_PGT_SIZE	((17 * 4096) + BOOT_EXTRA_PGT_SIZE)
>  #  endif
>  # else /* !CONFIG_RANDOMIZE_BASE */
> -#  define BOOT_PGT_SIZE		BOOT_INIT_PGT_SIZE
> +#  define BOOT_PGT_SIZE		(BOOT_INIT_PGT_SIZE + BOOT_EXTRA_PGT_SIZE)
>  # endif
>  
>  #else /* !CONFIG_X86_64 */
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it
  2019-04-19  8:58                                         ` Baoquan He
@ 2019-04-19  9:39                                           ` Kairui Song
  0 siblings, 0 replies; 48+ messages in thread
From: Kairui Song @ 2019-04-19  9:39 UTC (permalink / raw)
  To: Baoquan He
  Cc: Linux Kernel Mailing List, Borislav Petkov, Junichi Nomura,
	Dave Young, Chao Fan, x86, kexec

On Fri, Apr 19, 2019 at 4:58 PM Baoquan He <bhe@redhat.com> wrote:
>
> On 04/19/19 at 04:34pm, Kairui Song wrote:
> >  /* Locates and clears a region for a new top level page table. */
> >  void initialize_identity_maps(void)
> >  {
> > -     /* If running as an SEV guest, the encryption mask is required. */
> > -     set_sev_encryption_mask();
> > -
> > -     /* Exclude the encryption mask from __PHYSICAL_MASK */
> > -     physical_mask &= ~sme_me_mask;
> > -
> > -     /* Init mapping_info with run-time function/buffer pointers. */
> > -     mapping_info.alloc_pgt_page = alloc_pgt_page;
> > -     mapping_info.context = &pgt_data;
> > -     mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
> > -     mapping_info.kernpg_flag = _KERNPG_TABLE;
> > -
> > -     /*
> > -      * It should be impossible for this not to already be true,
> > -      * but since calling this a second time would rewind the other
> > -      * counters, let's just make sure this is reset too.
> > -      */
> > -     pgt_data.pgt_buf_offset = 0;
> > -
> > -     /*
> > -      * If we came here via startup_32(), cr3 will be _pgtable already
> > -      * and we must append to the existing area instead of entirely
> > -      * overwriting it.
> > -      *
> > -      * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
> > -      * the top-level page table is allocated separately.
> > -      *
> > -      * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
> > -      * cases. On 4-level paging it's equal to 'top_level_pgt'.
> > -      */
> > -     top_level_pgt = read_cr3_pa();
> > -     if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
> > -             debug_putstr("booted via startup_32()\n");
> > -             pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
> > -             pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
> > -             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> > -     } else {
> > -             debug_putstr("booted via startup_64()\n");
> > -             pgt_data.pgt_buf = _pgtable;
> > -             pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
> > -             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> > +     top_level_pgt = early_boot_top_pgt;
> > +     if ((p4d_t *)top_level_pgt != (p4d_t *)_pgtable)
> >               top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
>
> Kairui, will you make a patchset to include these changes separately
> later on? I don't get the purposes of code changes. E.g here, I
> don't know why you introduce a new variable early_boot_top_pgt, and
> allocate the page table, even though they have been done in the old
> initialize_identity_maps().
>
> Thanks
> Baoquan
>

OK, right, it's not a good idea to mess up things together, I'll
resend the patch, and will sent the cleanup separately. Without clean
up it may bring in some extra burden with certain kernel config, but
that should be OK for the fix.

-- 
Best Regards,
Kairui Song

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [tip:x86/boot] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels
  2019-04-08 23:10 [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
  2019-04-10 17:14 ` Borislav Petkov
@ 2019-06-06 19:22 ` tip-bot for Junichi Nomura
  1 sibling, 0 replies; 48+ messages in thread
From: tip-bot for Junichi Nomura @ 2019-06-06 19:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: fanc.fnst, dirk.vandermerwe, mingo, bp, dyoung, j-nomura, tglx,
	linux-kernel, hpa

Commit-ID:  0a23ebc66a46786769dd68bfdaa3102345819b9c
Gitweb:     https://git.kernel.org/tip/0a23ebc66a46786769dd68bfdaa3102345819b9c
Author:     Junichi Nomura <j-nomura@ce.jp.nec.com>
AuthorDate: Thu, 11 Apr 2019 15:49:32 +0200
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Thu, 6 Jun 2019 20:28:37 +0200

x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels

Commit

  3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")

broke kexec boot on EFI systems. efi_get_rsdp_addr() in the early
parsing code tries to search RSDP from the EFI tables but that will
crash because the table address is virtual when the kernel was booted by
kexec (set_virtual_address_map() has run in the first kernel and cannot
be run again in the second kernel).

In the case of kexec, the physical address of EFI tables is provided via
efi_setup_data in boot_params, which is set up by kexec(1).

Factor out the table parsing code and use different pointers depending
on whether the kernel is booted by kexec or not.

 [ bp: Massage. ]

Fixes: 3a63f70bf4c3a ("x86/boot: Early parse RSDP and save it in boot_params")
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Cc: Chao Fan <fanc.fnst@cn.fujitsu.com>
Cc: Dave Young <dyoung@redhat.com>
Link: https://lkml.kernel.org/r/20190408231011.GA5402@jeru.linux.bs1.fc.nec.co.jp
---
 arch/x86/boot/compressed/acpi.c | 143 ++++++++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 36 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ad84239e595e..15255f388a85 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void)
 	return addr;
 }
 
-/* Search EFI system tables for RSDP. */
-static acpi_physical_address efi_get_rsdp_addr(void)
+/*
+ * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
+ * ACPI_TABLE_GUID are found, take the former, which has more features.
+ */
+static acpi_physical_address
+__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
+		    bool efi_64)
 {
 	acpi_physical_address rsdp_addr = 0;
 
 #ifdef CONFIG_EFI
-	unsigned long systab, systab_tables, config_tables;
+	int i;
+
+	/* Get EFI tables from systab. */
+	for (i = 0; i < nr_tables; i++) {
+		acpi_physical_address table;
+		efi_guid_t guid;
+
+		if (efi_64) {
+			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+
+			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+				return 0;
+			}
+		} else {
+			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
+
+			guid  = tbl->guid;
+			table = tbl->table;
+		}
+
+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+			rsdp_addr = table;
+		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+			return table;
+	}
+#endif
+	return rsdp_addr;
+}
+
+/* EFI/kexec support is 64-bit only. */
+#ifdef CONFIG_X86_64
+static struct efi_setup_data *get_kexec_setup_data_addr(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+
+	pa_data = boot_params->hdr.setup_data;
+	while (pa_data) {
+		data = (struct setup_data *)pa_data;
+		if (data->type == SETUP_EFI)
+			return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+
+		pa_data = data->next;
+	}
+	return NULL;
+}
+
+static acpi_physical_address kexec_get_rsdp_addr(void)
+{
+	efi_system_table_64_t *systab;
+	struct efi_setup_data *esd;
+	struct efi_info *ei;
+	char *sig;
+
+	esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
+	if (!esd)
+		return 0;
+
+	if (!esd->tables) {
+		debug_putstr("Wrong kexec SETUP_EFI data.\n");
+		return 0;
+	}
+
+	ei = &boot_params->efi_info;
+	sig = (char *)&ei->efi_loader_signature;
+	if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+		debug_putstr("Wrong kexec EFI loader signature.\n");
+		return 0;
+	}
+
+	/* Get systab from boot params. */
+	systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
+	if (!systab)
+		error("EFI system table not found in kexec boot_params.");
+
+	return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+}
+#else
+static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
+#endif /* CONFIG_X86_64 */
+
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+#ifdef CONFIG_EFI
+	unsigned long systab, config_tables;
 	unsigned int nr_tables;
 	struct efi_info *ei;
 	bool efi_64;
-	int size, i;
 	char *sig;
 
 	ei = &boot_params->efi_info;
@@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void)
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_64_t);
 	} else {
 		efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
 
 		config_tables	= stbl->tables;
 		nr_tables	= stbl->nr_tables;
-		size		= sizeof(efi_config_table_32_t);
 	}
 
 	if (!config_tables)
 		error("EFI config tables not found.");
 
-	/* Get EFI tables from systab. */
-	for (i = 0; i < nr_tables; i++) {
-		acpi_physical_address table;
-		efi_guid_t guid;
-
-		config_tables += size;
-
-		if (efi_64) {
-			efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-
-			if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-				debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-				return 0;
-			}
-		} else {
-			efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
-
-			guid  = tbl->guid;
-			table = tbl->table;
-		}
-
-		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-			rsdp_addr = table;
-		else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-			return table;
-	}
+	return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+#else
+	return 0;
 #endif
-	return rsdp_addr;
 }
 
 static u8 compute_checksum(u8 *buffer, u32 length)
@@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void)
 	if (!pa)
 		pa = boot_params->acpi_rsdp_addr;
 
+	/*
+	 * Try to get EFI data from setup_data. This can happen when we're a
+	 * kexec'ed kernel and kexec(1) has passed all the required EFI info to
+	 * us.
+	 */
+	if (!pa)
+		pa = kexec_get_rsdp_addr();
+
 	if (!pa)
 		pa = efi_get_rsdp_addr();
 

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2019-06-06 19:26 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-08 23:10 [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
2019-04-10 17:14 ` Borislav Petkov
2019-04-10 23:34   ` Junichi Nomura
2019-04-11  8:09     ` Borislav Petkov
2019-04-11  8:16       ` Junichi Nomura
2019-04-11  8:37         ` Borislav Petkov
2019-04-11  9:13           ` Junichi Nomura
2019-04-11  9:21             ` Boris Petkov
2019-04-11  9:32               ` Junichi Nomura
2019-04-11  9:40                 ` Boris Petkov
2019-04-11 12:58                   ` Borislav Petkov
2019-04-12  2:54                     ` Junichi Nomura
2019-04-12  8:49                       ` Borislav Petkov
2019-04-12 13:35                         ` Borislav Petkov
2019-04-15  7:01                           ` Junichi Nomura
2019-04-15  9:07                             ` Borislav Petkov
2019-04-15 10:25                               ` Borislav Petkov
2019-04-15 23:00                                 ` Junichi Nomura
2019-04-15 23:14                                   ` Junichi Nomura
2019-04-16  9:45                                     ` Borislav Petkov
2019-04-16 23:09                                       ` kexec crash on OVMF i386 + x86_64 kernel (Re: [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel) Junichi Nomura
2019-04-17  5:14                                         ` Dave Young
2019-04-17 17:57                                           ` Prakhya, Sai Praneeth
2019-04-16  9:40                                   ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Borislav Petkov
2019-04-16  9:52                                     ` [PATCH] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels Borislav Petkov
2019-04-16 10:02                                       ` Ingo Molnar
2019-04-16 10:31                                         ` Borislav Petkov
2019-04-16 11:41                                       ` Dave Young
2019-04-16 13:22                                         ` Borislav Petkov
2019-04-17  1:38                                           ` Dave Young
2019-04-17  4:57                                             ` Dave Young
2019-04-17  6:00                                               ` Kairui Song
2019-04-17  7:08                                                 ` Dave Young
2019-04-17  8:22                                             ` Borislav Petkov
2019-04-18  1:24                                               ` Dave Young
2019-04-19  8:34                                       ` [RFC PATCH] kexec, x86/boot: map systab region in identity mapping before accessing it Kairui Song
2019-04-19  8:58                                         ` Baoquan He
2019-04-19  9:39                                           ` Kairui Song
2019-04-16 22:44                                     ` [PATCH v4] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernel Junichi Nomura
2019-04-17  7:02                                       ` Dave Young
2019-04-17  8:54                                         ` Borislav Petkov
2019-04-17  9:02                                           ` Borislav Petkov
2019-04-17 10:31                                           ` Chao Fan
2019-04-11  8:42         ` Baoquan He
2019-04-11  9:14           ` Junichi Nomura
2019-04-12  0:23             ` Baoquan He
2019-04-15  7:46               ` Dave Young
2019-06-06 19:22 ` [tip:x86/boot] x86/boot: Use efi_setup_data for searching RSDP on kexec-ed kernels tip-bot for Junichi Nomura

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.