All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] ACPI: fix acpi table use after free
@ 2021-03-03 20:09 George Kennedy
  2021-03-04 12:14 ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: George Kennedy @ 2021-03-03 20:09 UTC (permalink / raw)
  To: robert.moore, erik.kaneda, rafael.j.wysocki, lenb, linux-acpi,
	devel, linux-kernel, rppt, konrad.wilk, dan.carpenter,
	dhaval.giani
  Cc: george.kennedy

Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
in __free_pages_core()") the following use after free occurs
intermittently when acpi tables are accessed.

BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
Read of size 4 at addr ffff8880be453004 by task swapper/0/1
CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
Call Trace:
 dump_stack+0xf6/0x158
 print_address_description.constprop.9+0x41/0x60
 kasan_report.cold.14+0x7b/0xd4
 __asan_report_load_n_noabort+0xf/0x20
 ibft_init+0x134/0xc49
 do_one_initcall+0xc4/0x3e0
 kernel_init_freeable+0x5af/0x66b
 kernel_init+0x16/0x1d0
 ret_from_fork+0x22/0x30

ACPI tables mapped via kmap() do not have their mapped pages
reserved and the pages can be "stolen" by the buddy allocator.
Use memblock_reserve() to reserve all the ACPI table pages.

Signed-off-by: George Kennedy <george.kennedy@oracle.com>
---
 arch/x86/kernel/setup.c        | 3 +--
 drivers/acpi/acpica/tbinstal.c | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d883176..97deea3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
 	cleanup_highmap();
 
 	memblock_set_current_limit(ISA_END_ADDRESS);
+	acpi_boot_table_init();
 	e820__memblock_setup();
 
 	/*
@@ -1139,8 +1140,6 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
-	acpi_boot_table_init();
-
 	early_acpi_boot_init();
 
 	initmem_init();
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 8d1e5b5..4e32b22 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -8,6 +8,7 @@
  *****************************************************************************/
 
 #include <acpi/acpi.h>
+#include <linux/memblock.h>
 #include "accommon.h"
 #include "actables.h"
 
@@ -58,6 +59,9 @@
 				      new_table_desc->flags,
 				      new_table_desc->pointer);
 
+	memblock_reserve(new_table_desc->address,
+			 PAGE_ALIGN(new_table_desc->pointer->length));
+
 	acpi_tb_print_table_header(new_table_desc->address,
 				   new_table_desc->pointer);
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-03 20:09 [PATCH 1/1] ACPI: fix acpi table use after free George Kennedy
@ 2021-03-04 12:14 ` Rafael J. Wysocki
  2021-03-04 23:14   ` George Kennedy
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-04 12:14 UTC (permalink / raw)
  To: George Kennedy
  Cc: Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani

On Thu, Mar 4, 2021 at 2:22 AM George Kennedy <george.kennedy@oracle.com> wrote:
>
> Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
> in __free_pages_core()") the following use after free occurs
> intermittently when acpi tables are accessed.
>
> BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
> Read of size 4 at addr ffff8880be453004 by task swapper/0/1
> CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
> Call Trace:
>  dump_stack+0xf6/0x158
>  print_address_description.constprop.9+0x41/0x60
>  kasan_report.cold.14+0x7b/0xd4
>  __asan_report_load_n_noabort+0xf/0x20
>  ibft_init+0x134/0xc49
>  do_one_initcall+0xc4/0x3e0
>  kernel_init_freeable+0x5af/0x66b
>  kernel_init+0x16/0x1d0
>  ret_from_fork+0x22/0x30
>
> ACPI tables mapped via kmap() do not have their mapped pages
> reserved and the pages can be "stolen" by the buddy allocator.

What do you mean by this?

> Use memblock_reserve() to reserve all the ACPI table pages.

How is this going to help?

> Signed-off-by: George Kennedy <george.kennedy@oracle.com>
> ---
>  arch/x86/kernel/setup.c        | 3 +--
>  drivers/acpi/acpica/tbinstal.c | 4 ++++
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index d883176..97deea3 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
>         cleanup_highmap();
>
>         memblock_set_current_limit(ISA_END_ADDRESS);
> +       acpi_boot_table_init();

This cannot be moved before the acpi_table_upgrade() invocation AFAICS.

Why exactly do you want to move it?

>         e820__memblock_setup();
>
>         /*
> @@ -1139,8 +1140,6 @@ void __init setup_arch(char **cmdline_p)
>         /*
>          * Parse the ACPI tables for possible boot-time SMP configuration.
>          */
> -       acpi_boot_table_init();
> -
>         early_acpi_boot_init();
>
>         initmem_init();
> diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
> index 8d1e5b5..4e32b22 100644
> --- a/drivers/acpi/acpica/tbinstal.c
> +++ b/drivers/acpi/acpica/tbinstal.c
> @@ -8,6 +8,7 @@
>   *****************************************************************************/
>
>  #include <acpi/acpi.h>
> +#include <linux/memblock.h>
>  #include "accommon.h"
>  #include "actables.h"
>
> @@ -58,6 +59,9 @@
>                                       new_table_desc->flags,
>                                       new_table_desc->pointer);
>
> +       memblock_reserve(new_table_desc->address,
> +                        PAGE_ALIGN(new_table_desc->pointer->length));
> +

Why do you want to do this here in the first place?

Things like that cannot be done in the ACPICA code in general.

>         acpi_tb_print_table_header(new_table_desc->address,
>                                    new_table_desc->pointer);
>
> --

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-04 12:14 ` Rafael J. Wysocki
@ 2021-03-04 23:14   ` George Kennedy
  2021-03-05 13:30     ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: George Kennedy @ 2021-03-04 23:14 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani

Hello Rafael,

On 3/4/2021 7:14 AM, Rafael J. Wysocki wrote:
> On Thu, Mar 4, 2021 at 2:22 AM George Kennedy <george.kennedy@oracle.com> wrote:
>> Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
>> in __free_pages_core()") the following use after free occurs
>> intermittently when acpi tables are accessed.
>>
>> BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
>> Read of size 4 at addr ffff8880be453004 by task swapper/0/1
>> CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
>> Call Trace:
>>   dump_stack+0xf6/0x158
>>   print_address_description.constprop.9+0x41/0x60
>>   kasan_report.cold.14+0x7b/0xd4
>>   __asan_report_load_n_noabort+0xf/0x20
>>   ibft_init+0x134/0xc49
>>   do_one_initcall+0xc4/0x3e0
>>   kernel_init_freeable+0x5af/0x66b
>>   kernel_init+0x16/0x1d0
>>   ret_from_fork+0x22/0x30
>>
>> ACPI tables mapped via kmap() do not have their mapped pages
>> reserved and the pages can be "stolen" by the buddy allocator.
> What do you mean by this?
The ibft table, for example, is mapped in via acpi_map() and kmap(). The 
page for the ibft table is not reserved, so it can end up on the freelist.
>
>> Use memblock_reserve() to reserve all the ACPI table pages.
> How is this going to help?
If the ibft table page is not reserved, it will end up on the freelist 
and potentially be allocated before ibft_init() is called.

I believe this is the call that causes the ibft table page (in this case 
pfn=0xbe453) to end up on the freelist:

memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000, 
zone_end_pfn=100000

[    0.477319]  memmap_init_range+0x33b/0x4e2
[    0.479053]  memmap_init_zone+0x1e0/0x243
[    0.485276]  free_area_init_node+0xa4e/0xac5
[    0.498242]  free_area_init+0xf4a/0x107a
[    0.509958]  zone_sizes_init+0xd9/0x111
[    0.511731]  paging_init+0x4a/0x4c
[    0.512417]  setup_arch+0x14f8/0x1758
[    0.519193]  start_kernel+0x6c/0x46f
[    0.519921]  x86_64_start_reservations+0x37/0x39
[    0.520847]  x86_64_start_kernel+0x7b/0x7e
[    0.521666]  secondary_startup_64_no_verify+0xb0/0xbb

>
>> Signed-off-by: George Kennedy <george.kennedy@oracle.com>
>> ---
>>   arch/x86/kernel/setup.c        | 3 +--
>>   drivers/acpi/acpica/tbinstal.c | 4 ++++
>>   2 files changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index d883176..97deea3 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
>>          cleanup_highmap();
>>
>>          memblock_set_current_limit(ISA_END_ADDRESS);
>> +       acpi_boot_table_init();
> This cannot be moved before the acpi_table_upgrade() invocation AFAICS.
>
> Why exactly do you want to move it?

Want to make sure there are slots for memblock_reserve() to be able to 
reserve the page.
>
>>          e820__memblock_setup();
>>
>>          /*
>> @@ -1139,8 +1140,6 @@ void __init setup_arch(char **cmdline_p)
>>          /*
>>           * Parse the ACPI tables for possible boot-time SMP configuration.
>>           */
>> -       acpi_boot_table_init();
>> -
>>          early_acpi_boot_init();
>>
>>          initmem_init();
>> diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
>> index 8d1e5b5..4e32b22 100644
>> --- a/drivers/acpi/acpica/tbinstal.c
>> +++ b/drivers/acpi/acpica/tbinstal.c
>> @@ -8,6 +8,7 @@
>>    *****************************************************************************/
>>
>>   #include <acpi/acpi.h>
>> +#include <linux/memblock.h>
>>   #include "accommon.h"
>>   #include "actables.h"
>>
>> @@ -58,6 +59,9 @@
>>                                        new_table_desc->flags,
>>                                        new_table_desc->pointer);
>>
>> +       memblock_reserve(new_table_desc->address,
>> +                        PAGE_ALIGN(new_table_desc->pointer->length));
>> +
> Why do you want to do this here in the first place?

If there is a better place to do it, I can move the memblock_reserve() 
there. The memblock_reserve() cannot be done from the ibft code - it's 
too late - the ibft table page has already ended up on the freelist by 
the time ibft_init() is called.

>
> Things like that cannot be done in the ACPICA code in general.

Can you recommend a better place to do the memblock_reserve() from?

Thank you,
George

>
>>          acpi_tb_print_table_header(new_table_desc->address,
>>                                     new_table_desc->pointer);
>>
>> --


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-04 23:14   ` George Kennedy
@ 2021-03-05 13:30     ` Rafael J. Wysocki
  2021-03-05 13:40       ` David Hildenbrand
  2021-03-07  7:46       ` [PATCH 1/1] ACPI: fix acpi table use after free Mike Rapoport
  0 siblings, 2 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-05 13:30 UTC (permalink / raw)
  To: George Kennedy, David Hildenbrand
  Cc: Rafael J. Wysocki, Robert Moore, Erik Kaneda, Rafael Wysocki,
	Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko

On Fri, Mar 5, 2021 at 12:14 AM George Kennedy
<george.kennedy@oracle.com> wrote:
>
> Hello Rafael,
>
> On 3/4/2021 7:14 AM, Rafael J. Wysocki wrote:
> > On Thu, Mar 4, 2021 at 2:22 AM George Kennedy <george.kennedy@oracle.com> wrote:
> >> Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
> >> in __free_pages_core()") the following use after free occurs
> >> intermittently when acpi tables are accessed.
> >>
> >> BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
> >> Read of size 4 at addr ffff8880be453004 by task swapper/0/1
> >> CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
> >> Call Trace:
> >>   dump_stack+0xf6/0x158
> >>   print_address_description.constprop.9+0x41/0x60
> >>   kasan_report.cold.14+0x7b/0xd4
> >>   __asan_report_load_n_noabort+0xf/0x20
> >>   ibft_init+0x134/0xc49
> >>   do_one_initcall+0xc4/0x3e0
> >>   kernel_init_freeable+0x5af/0x66b
> >>   kernel_init+0x16/0x1d0
> >>   ret_from_fork+0x22/0x30
> >>
> >> ACPI tables mapped via kmap() do not have their mapped pages
> >> reserved and the pages can be "stolen" by the buddy allocator.
> >>
> > What do you mean by this?
>>
> The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> page for the ibft table is not reserved, so it can end up on the freelist.

You appear to be saying that it is not sufficient to kmap() a page in
order to use it safely.  It is also necessary to reserve it upfront,
for example with the help of memblock_reserve().  Is that correct?  If
so, is there an alternative way to reserve a page frame?

> >
> >> Use memblock_reserve() to reserve all the ACPI table pages.
> > How is this going to help?
> If the ibft table page is not reserved, it will end up on the freelist
> and potentially be allocated before ibft_init() is called.
>
> I believe this is the call that causes the ibft table page (in this case
> pfn=0xbe453) to end up on the freelist:
>
> memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000,
> zone_end_pfn=100000

David, is commit 7fef431be9c9 related to this and if so, then how?

> [    0.477319]  memmap_init_range+0x33b/0x4e2
> [    0.479053]  memmap_init_zone+0x1e0/0x243
> [    0.485276]  free_area_init_node+0xa4e/0xac5
> [    0.498242]  free_area_init+0xf4a/0x107a
> [    0.509958]  zone_sizes_init+0xd9/0x111
> [    0.511731]  paging_init+0x4a/0x4c
> [    0.512417]  setup_arch+0x14f8/0x1758
> [    0.519193]  start_kernel+0x6c/0x46f
> [    0.519921]  x86_64_start_reservations+0x37/0x39
> [    0.520847]  x86_64_start_kernel+0x7b/0x7e
> [    0.521666]  secondary_startup_64_no_verify+0xb0/0xbb
>
> >
> >> Signed-off-by: George Kennedy <george.kennedy@oracle.com>
> >> ---
> >>   arch/x86/kernel/setup.c        | 3 +--
> >>   drivers/acpi/acpica/tbinstal.c | 4 ++++
> >>   2 files changed, 5 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> >> index d883176..97deea3 100644
> >> --- a/arch/x86/kernel/setup.c
> >> +++ b/arch/x86/kernel/setup.c
> >> @@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
> >>          cleanup_highmap();
> >>
> >>          memblock_set_current_limit(ISA_END_ADDRESS);
> >> +       acpi_boot_table_init();
> > This cannot be moved before the acpi_table_upgrade() invocation AFAICS.
> >
> > Why exactly do you want to move it?
>
> Want to make sure there are slots for memblock_reserve() to be able to
> reserve the page.

Well, that may not require reordering the initialization this way.

> >>          e820__memblock_setup();
> >>
> >>          /*
> >> @@ -1139,8 +1140,6 @@ void __init setup_arch(char **cmdline_p)
> >>          /*
> >>           * Parse the ACPI tables for possible boot-time SMP configuration.
> >>           */
> >> -       acpi_boot_table_init();
> >> -
> >>          early_acpi_boot_init();
> >>
> >>          initmem_init();
> >> diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
> >> index 8d1e5b5..4e32b22 100644
> >> --- a/drivers/acpi/acpica/tbinstal.c
> >> +++ b/drivers/acpi/acpica/tbinstal.c
> >> @@ -8,6 +8,7 @@
> >>    *****************************************************************************/
> >>
> >>   #include <acpi/acpi.h>
> >> +#include <linux/memblock.h>
> >>   #include "accommon.h"
> >>   #include "actables.h"
> >>
> >> @@ -58,6 +59,9 @@
> >>                                        new_table_desc->flags,
> >>                                        new_table_desc->pointer);
> >>
> >> +       memblock_reserve(new_table_desc->address,
> >> +                        PAGE_ALIGN(new_table_desc->pointer->length));
> >> +
> > Why do you want to do this here in the first place?
>
> If there is a better place to do it, I can move the memblock_reserve()
> there. The memblock_reserve() cannot be done from the ibft code - it's
> too late - the ibft table page has already ended up on the freelist by
> the time ibft_init() is called.

I see.

> >
> > Things like that cannot be done in the ACPICA code in general.
>
> Can you recommend a better place to do the memblock_reserve() from?

Maybe.  I need to understand the problem better, though.

Thanks!

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-05 13:30     ` Rafael J. Wysocki
@ 2021-03-05 13:40       ` David Hildenbrand
  2021-03-05 15:24         ` George Kennedy
  2021-03-10 18:39         ` Rafael J. Wysocki
  2021-03-07  7:46       ` [PATCH 1/1] ACPI: fix acpi table use after free Mike Rapoport
  1 sibling, 2 replies; 33+ messages in thread
From: David Hildenbrand @ 2021-03-05 13:40 UTC (permalink / raw)
  To: Rafael J. Wysocki, George Kennedy
  Cc: Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko

>> The ibft table, for example, is mapped in via acpi_map() and kmap(). The
>> page for the ibft table is not reserved, so it can end up on the freelist.
> 
> You appear to be saying that it is not sufficient to kmap() a page in
> order to use it safely.  It is also necessary to reserve it upfront,
> for example with the help of memblock_reserve().  Is that correct?  If
> so, is there an alternative way to reserve a page frame?

If the memory is indicated by the BIOS/firmware as valid memory 
(!reserved) but contains actual tables that have to remain untouched 
what happens is:

1) Memblock thinks the memory should be given to the buddy, because it
    is valid memory and was not reserved by anyone (i.e., the bios, early
    allocations).

2) Memblock will expose the pages to the buddy, adding them to the free
    page list.

3) Anybody can allocate them, e.g., via alloc_pages().

The root issue is that pages that should not get exposed to the buddy as 
free pages get exposed to the buddy as free pages. We have to teach 
memblock that these pages are not actually to be used, but instead, area 
reserved.

> 
>>>
>>>> Use memblock_reserve() to reserve all the ACPI table pages.
>>> How is this going to help?
>> If the ibft table page is not reserved, it will end up on the freelist
>> and potentially be allocated before ibft_init() is called.
>>
>> I believe this is the call that causes the ibft table page (in this case
>> pfn=0xbe453) to end up on the freelist:
>>
>> memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000,
>> zone_end_pfn=100000
> 
> David, is commit 7fef431be9c9 related to this and if so, then how?
> 

Memory gets allocated and used in a different order, which seems to have 
exposed (yet another) latent BUG. The same could be reproduced via zone 
shuffling with a little luck.

-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-05 13:40       ` David Hildenbrand
@ 2021-03-05 15:24         ` George Kennedy
  2021-03-10 18:39         ` Rafael J. Wysocki
  1 sibling, 0 replies; 33+ messages in thread
From: George Kennedy @ 2021-03-05 15:24 UTC (permalink / raw)
  To: David Hildenbrand, Rafael J. Wysocki
  Cc: Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko



On 3/5/2021 8:40 AM, David Hildenbrand wrote:
>>> The ibft table, for example, is mapped in via acpi_map() and kmap(). 
>>> The
>>> page for the ibft table is not reserved, so it can end up on the 
>>> freelist.
>>
>> You appear to be saying that it is not sufficient to kmap() a page in
>> order to use it safely.  It is also necessary to reserve it upfront,
>> for example with the help of memblock_reserve().  Is that correct?  If
>> so, is there an alternative way to reserve a page frame?
>
> If the memory is indicated by the BIOS/firmware as valid memory 
> (!reserved) but contains actual tables that have to remain untouched 
> what happens is:
>
> 1) Memblock thinks the memory should be given to the buddy, because it
>    is valid memory and was not reserved by anyone (i.e., the bios, early
>    allocations).
>
> 2) Memblock will expose the pages to the buddy, adding them to the free
>    page list.
>
> 3) Anybody can allocate them, e.g., via alloc_pages().
>
> The root issue is that pages that should not get exposed to the buddy 
> as free pages get exposed to the buddy as free pages. We have to teach 
> memblock that these pages are not actually to be used, but instead, 
> area reserved.
>
>>
>>>>
>>>>> Use memblock_reserve() to reserve all the ACPI table pages.
>>>> How is this going to help?
>>> If the ibft table page is not reserved, it will end up on the freelist
>>> and potentially be allocated before ibft_init() is called.
>>>
>>> I believe this is the call that causes the ibft table page (in this 
>>> case
>>> pfn=0xbe453) to end up on the freelist:
>>>
>>> memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000,
>>> zone_end_pfn=100000
>>
>> David, is commit 7fef431be9c9 related to this and if so, then how?
>>
>
> Memory gets allocated and used in a different order, which seems to 
> have exposed (yet another) latent BUG. The same could be reproduced 
> via zone shuffling with a little luck.

Thank you David for the detailed problem description,
George


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-05 13:30     ` Rafael J. Wysocki
  2021-03-05 13:40       ` David Hildenbrand
@ 2021-03-07  7:46       ` Mike Rapoport
  2021-03-09 17:54         ` Mike Rapoport
  1 sibling, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-07  7:46 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: George Kennedy, David Hildenbrand, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

Hello Rafael,

On Fri, Mar 05, 2021 at 02:30:07PM +0100, Rafael J. Wysocki wrote:
> On Fri, Mar 5, 2021 at 12:14 AM George Kennedy <george.kennedy@oracle.com> wrote:
>
> > The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> > page for the ibft table is not reserved, so it can end up on the freelist.
> 
> You appear to be saying that it is not sufficient to kmap() a page in
> order to use it safely.  It is also necessary to reserve it upfront,
> for example with the help of memblock_reserve().  Is that correct?  If
> so, is there an alternative way to reserve a page frame?

Like David said in the other reply, if a BIOS does not mark the memory that
contains an ACPI table as used (e.g. reserved or ACPI data), we need to
make sure the kernel knows that such memory is in use and an early call to
memblock_reserve() is exactly what we need here.
George had this issue with iBFT, but in general this could be any table
that a buggy BIOS forgot to mark as ACPI data.
 
> > >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > >> index d883176..97deea3 100644
> > >> --- a/arch/x86/kernel/setup.c
> > >> +++ b/arch/x86/kernel/setup.c
> > >> @@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
> > >>          cleanup_highmap();
> > >>
> > >>          memblock_set_current_limit(ISA_END_ADDRESS);
> > >> +       acpi_boot_table_init();
> > > This cannot be moved before the acpi_table_upgrade() invocation AFAICS.
> > >
> > > Why exactly do you want to move it?
> >
> > Want to make sure there are slots for memblock_reserve() to be able to
> > reserve the page.
> 
> Well, that may not require reordering the initialization this way.

The memory that is used by the firmware should be reserved before memblock
allocations are allowed so that ACPI tables won't get trampled by some
random allocation.

On x86 this essentially means that the early reservations need to be
complete before the call to e820__memblock_setup().

We probably need more precise refactoring of ACPI init than simply moving
acpi_boot_table_init() earlier. 
 
> > >>          e820__memblock_setup();
> > >>

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-07  7:46       ` [PATCH 1/1] ACPI: fix acpi table use after free Mike Rapoport
@ 2021-03-09 17:54         ` Mike Rapoport
  2021-03-09 18:29           ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-09 17:54 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: George Kennedy, David Hildenbrand, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Sun, Mar 07, 2021 at 09:46:22AM +0200, Mike Rapoport wrote:
> Hello Rafael,
> 
> On Fri, Mar 05, 2021 at 02:30:07PM +0100, Rafael J. Wysocki wrote:
> > On Fri, Mar 5, 2021 at 12:14 AM George Kennedy <george.kennedy@oracle.com> wrote:
> >
> > > The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> > > page for the ibft table is not reserved, so it can end up on the freelist.
> > 
> > You appear to be saying that it is not sufficient to kmap() a page in
> > order to use it safely.  It is also necessary to reserve it upfront,
> > for example with the help of memblock_reserve().  Is that correct?  If
> > so, is there an alternative way to reserve a page frame?
> 
> Like David said in the other reply, if a BIOS does not mark the memory that
> contains an ACPI table as used (e.g. reserved or ACPI data), we need to
> make sure the kernel knows that such memory is in use and an early call to
> memblock_reserve() is exactly what we need here.
> George had this issue with iBFT, but in general this could be any table
> that a buggy BIOS forgot to mark as ACPI data.
  
BTW, I wonder is there a fundamental reason to use ioremap() to access ACPI
tables at all? 
In the end, they reside in RAM and, apparently, they live at the same DIMM
as neighboring "normal memory" so why cannot we just map them normally as
read-only not executable?


> > > >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > > >> index d883176..97deea3 100644
> > > >> --- a/arch/x86/kernel/setup.c
> > > >> +++ b/arch/x86/kernel/setup.c
> > > >> @@ -1046,6 +1046,7 @@ void __init setup_arch(char **cmdline_p)
> > > >>          cleanup_highmap();
> > > >>
> > > >>          memblock_set_current_limit(ISA_END_ADDRESS);
> > > >> +       acpi_boot_table_init();
> > > > This cannot be moved before the acpi_table_upgrade() invocation AFAICS.
> > > >
> > > > Why exactly do you want to move it?
> > >
> > > Want to make sure there are slots for memblock_reserve() to be able to
> > > reserve the page.
> > 
> > Well, that may not require reordering the initialization this way.
> 
> The memory that is used by the firmware should be reserved before memblock
> allocations are allowed so that ACPI tables won't get trampled by some
> random allocation.
> 
> On x86 this essentially means that the early reservations need to be
> complete before the call to e820__memblock_setup().
> 
> We probably need more precise refactoring of ACPI init than simply moving
> acpi_boot_table_init() earlier. 
>  
> > > >>          e820__memblock_setup();
> > > >>
> 
> -- 
> Sincerely yours,
> Mike.

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-09 17:54         ` Mike Rapoport
@ 2021-03-09 18:29           ` Rafael J. Wysocki
  2021-03-09 20:16             ` Mike Rapoport
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-09 18:29 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, George Kennedy, David Hildenbrand,
	Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Tue, Mar 9, 2021 at 6:54 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Sun, Mar 07, 2021 at 09:46:22AM +0200, Mike Rapoport wrote:
> > Hello Rafael,
> >
> > On Fri, Mar 05, 2021 at 02:30:07PM +0100, Rafael J. Wysocki wrote:
> > > On Fri, Mar 5, 2021 at 12:14 AM George Kennedy <george.kennedy@oracle.com> wrote:
> > >
> > > > The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> > > > page for the ibft table is not reserved, so it can end up on the freelist.
> > >
> > > You appear to be saying that it is not sufficient to kmap() a page in
> > > order to use it safely.  It is also necessary to reserve it upfront,
> > > for example with the help of memblock_reserve().  Is that correct?  If
> > > so, is there an alternative way to reserve a page frame?
> >
> > Like David said in the other reply, if a BIOS does not mark the memory that
> > contains an ACPI table as used (e.g. reserved or ACPI data), we need to
> > make sure the kernel knows that such memory is in use and an early call to
> > memblock_reserve() is exactly what we need here.
> > George had this issue with iBFT, but in general this could be any table
> > that a buggy BIOS forgot to mark as ACPI data.
>
> BTW, I wonder is there a fundamental reason to use ioremap() to access ACPI
> tables at all?
> In the end, they reside in RAM and, apparently, they live at the same DIMM
> as neighboring "normal memory" so why cannot we just map them normally as
> read-only not executable?

This may be NVS memory (depending on the configuration of the system)
which isn't "normal" RAM AFAICS.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-09 18:29           ` Rafael J. Wysocki
@ 2021-03-09 20:16             ` Mike Rapoport
  0 siblings, 0 replies; 33+ messages in thread
From: Mike Rapoport @ 2021-03-09 20:16 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: George Kennedy, David Hildenbrand, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Tue, Mar 09, 2021 at 07:29:51PM +0100, Rafael J. Wysocki wrote:
> On Tue, Mar 9, 2021 at 6:54 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >
> > On Sun, Mar 07, 2021 at 09:46:22AM +0200, Mike Rapoport wrote:
> > > Hello Rafael,
> > >
> > > On Fri, Mar 05, 2021 at 02:30:07PM +0100, Rafael J. Wysocki wrote:
> > > > On Fri, Mar 5, 2021 at 12:14 AM George Kennedy <george.kennedy@oracle.com> wrote:
> > > >
> > > > > The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> > > > > page for the ibft table is not reserved, so it can end up on the freelist.
> > > >
> > > > You appear to be saying that it is not sufficient to kmap() a page in
> > > > order to use it safely.  It is also necessary to reserve it upfront,
> > > > for example with the help of memblock_reserve().  Is that correct?  If
> > > > so, is there an alternative way to reserve a page frame?
> > >
> > > Like David said in the other reply, if a BIOS does not mark the memory that
> > > contains an ACPI table as used (e.g. reserved or ACPI data), we need to
> > > make sure the kernel knows that such memory is in use and an early call to
> > > memblock_reserve() is exactly what we need here.
> > > George had this issue with iBFT, but in general this could be any table
> > > that a buggy BIOS forgot to mark as ACPI data.
> >
> > BTW, I wonder is there a fundamental reason to use ioremap() to access ACPI
> > tables at all?
> > In the end, they reside in RAM and, apparently, they live at the same DIMM
> > as neighboring "normal memory" so why cannot we just map them normally as
> > read-only not executable?
> 
> This may be NVS memory (depending on the configuration of the system)
> which isn't "normal" RAM AFAICS.

Hmm, according to the description of "ACPI NVS" in ACPI 6.3

	ACPI NVS Memory. This range of addresses is in use or reserved by
			 the system and must not be used by the operating
			 system. This range is required to be saved and
 			 restored across an NVS sleep.

it behaves more like "normal" RAM rather than actual non-volatile storage.

There are other places in ACPI text that imply that "ACPI NVS" is actually
RAM, it's just reserved by the firmware.

And judging by the example below both "ACPI data" and "ACPI NVS" live in
the very same DIMM as "usable" RAM.

[    0.000000] BIOS-e820: [mem 0x0000000029931000-0x0000000029932fff] usable
[    0.000000] BIOS-e820: [mem 0x0000000029933000-0x000000002993afff] ACPI data
[    0.000000] BIOS-e820: [mem 0x000000002993b000-0x000000002993bfff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x000000002993c000-0x0000000029940fff] ACPI data
[    0.000000] BIOS-e820: [mem 0x0000000029941000-0x0000000029944fff] usable

Unfortunately, both UEFI and ACPI standards are very vague about the
meaning of "ACPI NVS" so there may be systems that use real non-volatile
storage for it...

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-05 13:40       ` David Hildenbrand
  2021-03-05 15:24         ` George Kennedy
@ 2021-03-10 18:39         ` Rafael J. Wysocki
  2021-03-10 18:54           ` Rafael J. Wysocki
  1 sibling, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-10 18:39 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Rafael J. Wysocki, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko

On Fri, Mar 5, 2021 at 2:40 PM David Hildenbrand <david@redhat.com> wrote:
>
> >> The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> >> page for the ibft table is not reserved, so it can end up on the freelist.
> >
> > You appear to be saying that it is not sufficient to kmap() a page in
> > order to use it safely.  It is also necessary to reserve it upfront,
> > for example with the help of memblock_reserve().  Is that correct?  If
> > so, is there an alternative way to reserve a page frame?
>
> If the memory is indicated by the BIOS/firmware as valid memory
> (!reserved) but contains actual tables that have to remain untouched
> what happens is:
>
> 1) Memblock thinks the memory should be given to the buddy, because it
>     is valid memory and was not reserved by anyone (i.e., the bios, early
>     allocations).
>
> 2) Memblock will expose the pages to the buddy, adding them to the free
>     page list.
>
> 3) Anybody can allocate them, e.g., via alloc_pages().
>
> The root issue is that pages that should not get exposed to the buddy as
> free pages get exposed to the buddy as free pages. We have to teach
> memblock that these pages are not actually to be used, but instead, area
> reserved.
>
> >
> >>>
> >>>> Use memblock_reserve() to reserve all the ACPI table pages.
> >>> How is this going to help?
> >> If the ibft table page is not reserved, it will end up on the freelist
> >> and potentially be allocated before ibft_init() is called.
> >>
> >> I believe this is the call that causes the ibft table page (in this case
> >> pfn=0xbe453) to end up on the freelist:
> >>
> >> memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000,
> >> zone_end_pfn=100000
> >
> > David, is commit 7fef431be9c9 related to this and if so, then how?
> >
>
> Memory gets allocated and used in a different order, which seems to have
> exposed (yet another) latent BUG.

Well, you can call it that, or you can say that things worked under
certain assumptions regarding the memory allocation order which are
not met any more.

> The same could be reproduced via zone shuffling with a little luck.

But nobody does that in practice.

This would be relatively straightforward to address if ACPICA was not
involved in it, but unfortunately that's not the case.

Changing this part of ACPICA is risky, because such changes may affect
other OSes using it, so that requires some serious consideration.
Alternatively, the previous memory allocation order in Linux could be
restored.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-10 18:39         ` Rafael J. Wysocki
@ 2021-03-10 18:54           ` Rafael J. Wysocki
  2021-03-10 19:10             ` David Hildenbrand
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-10 18:54 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Rafael J. Wysocki, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko

On Wed, Mar 10, 2021 at 7:39 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Fri, Mar 5, 2021 at 2:40 PM David Hildenbrand <david@redhat.com> wrote:
> >
> > >> The ibft table, for example, is mapped in via acpi_map() and kmap(). The
> > >> page for the ibft table is not reserved, so it can end up on the freelist.
> > >
> > > You appear to be saying that it is not sufficient to kmap() a page in
> > > order to use it safely.  It is also necessary to reserve it upfront,
> > > for example with the help of memblock_reserve().  Is that correct?  If
> > > so, is there an alternative way to reserve a page frame?
> >
> > If the memory is indicated by the BIOS/firmware as valid memory
> > (!reserved) but contains actual tables that have to remain untouched
> > what happens is:
> >
> > 1) Memblock thinks the memory should be given to the buddy, because it
> >     is valid memory and was not reserved by anyone (i.e., the bios, early
> >     allocations).
> >
> > 2) Memblock will expose the pages to the buddy, adding them to the free
> >     page list.
> >
> > 3) Anybody can allocate them, e.g., via alloc_pages().
> >
> > The root issue is that pages that should not get exposed to the buddy as
> > free pages get exposed to the buddy as free pages. We have to teach
> > memblock that these pages are not actually to be used, but instead, area
> > reserved.
> >
> > >
> > >>>
> > >>>> Use memblock_reserve() to reserve all the ACPI table pages.
> > >>> How is this going to help?
> > >> If the ibft table page is not reserved, it will end up on the freelist
> > >> and potentially be allocated before ibft_init() is called.
> > >>
> > >> I believe this is the call that causes the ibft table page (in this case
> > >> pfn=0xbe453) to end up on the freelist:
> > >>
> > >> memmap_init_range: size=bd49b, nid=0, zone=1, start_pfn=1000,
> > >> zone_end_pfn=100000
> > >
> > > David, is commit 7fef431be9c9 related to this and if so, then how?
> > >
> >
> > Memory gets allocated and used in a different order, which seems to have
> > exposed (yet another) latent BUG.
>
> Well, you can call it that, or you can say that things worked under
> certain assumptions regarding the memory allocation order which are
> not met any more.
>
> > The same could be reproduced via zone shuffling with a little luck.
>
> But nobody does that in practice.
>
> This would be relatively straightforward to address if ACPICA was not
> involved in it, but unfortunately that's not the case.
>
> Changing this part of ACPICA is risky, because such changes may affect
> other OSes using it, so that requires some serious consideration.
> Alternatively, the previous memory allocation order in Linux could be
> restored.

Of course, long-term this needs to be addressed in the ACPI
initialization code, because it clearly is not robust enough, but in
the meantime there's practical breakage observable in the field, so
what can be done about that?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-10 18:54           ` Rafael J. Wysocki
@ 2021-03-10 19:10             ` David Hildenbrand
  2021-03-10 19:38               ` Mike Rapoport
  0 siblings, 1 reply; 33+ messages in thread
From: David Hildenbrand @ 2021-03-10 19:10 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: George Kennedy, Robert Moore, Erik Kaneda, Rafael Wysocki,
	Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Mike Rapoport, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko


>>> Memory gets allocated and used in a different order, which seems to have
>>> exposed (yet another) latent BUG.
>>
>> Well, you can call it that, or you can say that things worked under
>> certain assumptions regarding the memory allocation order which are
>> not met any more.
>>
>>> The same could be reproduced via zone shuffling with a little luck.
>>
>> But nobody does that in practice.
>>

Dan will most certainly object. And I don't know what makes you speak in 
absolute words here.

>> This would be relatively straightforward to address if ACPICA was not
>> involved in it, but unfortunately that's not the case.
>>
>> Changing this part of ACPICA is risky, because such changes may affect
>> other OSes using it, so that requires some serious consideration.
>> Alternatively, the previous memory allocation order in Linux could be
>> restored.
> 
> Of course, long-term this needs to be addressed in the ACPI
> initialization code, because it clearly is not robust enough, but in
> the meantime there's practical breakage observable in the field, so
> what can be done about that?

*joke* enable zone shuffling.

No seriously, fix the latent BUG. What again is problematic about 
excluding these pages from the page allcoator, for example, via 
memblock_reserve()?

@Mike?

-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-10 19:10             ` David Hildenbrand
@ 2021-03-10 19:38               ` Mike Rapoport
  2021-03-10 19:47                 ` David Hildenbrand
  0 siblings, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-10 19:38 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Rafael J. Wysocki, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Wed, Mar 10, 2021 at 08:10:42PM +0100, David Hildenbrand wrote:
> 
> > > > Memory gets allocated and used in a different order, which seems to have
> > > > exposed (yet another) latent BUG.
> > > 
> > > Well, you can call it that, or you can say that things worked under
> > > certain assumptions regarding the memory allocation order which are
> > > not met any more.

Regardless of the assumptions in the page allocator we had a page used by
the firmware on a free list, which is a bug.

> > > > The same could be reproduced via zone shuffling with a little luck.
> > > 
> > > But nobody does that in practice.
> > > 
> 
> Dan will most certainly object. And I don't know what makes you speak in
> absolute words here.
> 
> > > This would be relatively straightforward to address if ACPICA was not
> > > involved in it, but unfortunately that's not the case.
> > > 
> > > Changing this part of ACPICA is risky, because such changes may affect
> > > other OSes using it, so that requires some serious consideration.
> > > Alternatively, the previous memory allocation order in Linux could be
> > > restored.
> > 
> > Of course, long-term this needs to be addressed in the ACPI
> > initialization code, because it clearly is not robust enough, but in
> > the meantime there's practical breakage observable in the field, so
> > what can be done about that?
> 
> *joke* enable zone shuffling.
> 
> No seriously, fix the latent BUG. What again is problematic about excluding
> these pages from the page allcoator, for example, via memblock_reserve()?
> 
> @Mike?

There is some care that should be taken to make sure we get the order
right, but I don't see a fundamental issue here.

If I understand correctly, Rafael's concern is about changing the parts of
ACPICA that should be OS agnostic, so I think we just need another place to
call memblock_reserve() rather than acpi_tb_install_table_with_override().

Since the reservation should be done early in x86::setup_arch() (and
probably in arm64::setup_arch()) we might just have a function that parses
table headers and reserves them, similarly to how we parse the tables
during KASLR setup.

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-10 19:38               ` Mike Rapoport
@ 2021-03-10 19:47                 ` David Hildenbrand
  2021-03-11 15:36                   ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: David Hildenbrand @ 2021-03-10 19:47 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

>>>>> The same could be reproduced via zone shuffling with a little luck.
>>>>
>>>> But nobody does that in practice.
>>>>
>>
>> Dan will most certainly object. And I don't know what makes you speak in
>> absolute words here.
>>
>>>> This would be relatively straightforward to address if ACPICA was not
>>>> involved in it, but unfortunately that's not the case.
>>>>
>>>> Changing this part of ACPICA is risky, because such changes may affect
>>>> other OSes using it, so that requires some serious consideration.
>>>> Alternatively, the previous memory allocation order in Linux could be
>>>> restored.
>>>
>>> Of course, long-term this needs to be addressed in the ACPI
>>> initialization code, because it clearly is not robust enough, but in
>>> the meantime there's practical breakage observable in the field, so
>>> what can be done about that?
>>
>> *joke* enable zone shuffling.
>>
>> No seriously, fix the latent BUG. What again is problematic about excluding
>> these pages from the page allcoator, for example, via memblock_reserve()?
>>
>> @Mike?
> 
> There is some care that should be taken to make sure we get the order
> right, but I don't see a fundamental issue here.
> 
> If I understand correctly, Rafael's concern is about changing the parts of
> ACPICA that should be OS agnostic, so I think we just need another place to
> call memblock_reserve() rather than acpi_tb_install_table_with_override().
> 
> Since the reservation should be done early in x86::setup_arch() (and
> probably in arm64::setup_arch()) we might just have a function that parses
> table headers and reserves them, similarly to how we parse the tables
> during KASLR setup.
> 

FWIW, something like below would hide our latent BUG again properly (lol).
But I guess I don't have to express how ugly and wrong that is. Not to mention
what happens if memblock decides to allocate that memory area earlier
for some other user (including CMA, ...).

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e4b29ee2b1e..ec71b7c63dbe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1566,6 +1566,21 @@ void __free_pages_core(struct page *page, unsigned int order)
  
         atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
  
+       /*
+        * BUG ALERT: x86-64 ACPI code has latent BUGs where ACPI tables
+        * that must not get allocated/modified will get exposed to the buddy
+        * as free pages; anybody can allocate and use them once in the free
+        * lists.
+        *
+        * Instead of fixing the BUG, revert the change to the
+        * freeing/allocation order during boot that revealed it and cross
+        * fingers that everything will be fine.
+        */
+       if (system_state < SYSTEM_RUNNING) {
+               __free_pages_ok(page, order, FPI_NONE);
+               return;
+       }
+
         /*
          * Bypass PCP and place fresh pages right to the tail, primarily
          * relevant for memory onlining.


-- 
Thanks,

David / dhildenb


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-10 19:47                 ` David Hildenbrand
@ 2021-03-11 15:36                   ` Rafael J. Wysocki
  2021-03-14 18:59                     ` Mike Rapoport
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-11 15:36 UTC (permalink / raw)
  To: David Hildenbrand, Mike Rapoport
  Cc: Rafael J. Wysocki, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
>
> >>>>> The same could be reproduced via zone shuffling with a little luck.
> >>>>
> >>>> But nobody does that in practice.
> >>>>
> >>
> >> Dan will most certainly object. And I don't know what makes you speak in
> >> absolute words here.
> >>
> >>>> This would be relatively straightforward to address if ACPICA was not
> >>>> involved in it, but unfortunately that's not the case.
> >>>>
> >>>> Changing this part of ACPICA is risky, because such changes may affect
> >>>> other OSes using it, so that requires some serious consideration.
> >>>> Alternatively, the previous memory allocation order in Linux could be
> >>>> restored.
> >>>
> >>> Of course, long-term this needs to be addressed in the ACPI
> >>> initialization code, because it clearly is not robust enough, but in
> >>> the meantime there's practical breakage observable in the field, so
> >>> what can be done about that?
> >>
> >> *joke* enable zone shuffling.
> >>
> >> No seriously, fix the latent BUG. What again is problematic about excluding
> >> these pages from the page allcoator, for example, via memblock_reserve()?
> >>
> >> @Mike?
> >
> > There is some care that should be taken to make sure we get the order
> > right, but I don't see a fundamental issue here.

Me neither.

> > If I understand correctly, Rafael's concern is about changing the parts of
> > ACPICA that should be OS agnostic, so I think we just need another place to
> > call memblock_reserve() rather than acpi_tb_install_table_with_override().

Something like this.

There is also the problem that memblock_reserve() needs to be called
for all of the tables early enough, which will require some reordering
of the early init code.

> > Since the reservation should be done early in x86::setup_arch() (and
> > probably in arm64::setup_arch()) we might just have a function that parses
> > table headers and reserves them, similarly to how we parse the tables
> > during KASLR setup.

Right.

>
> FWIW, something like below would hide our latent BUG again properly (lol).
> But I guess I don't have to express how ugly and wrong that is. Not to mention
> what happens if memblock decides to allocate that memory area earlier
> for some other user (including CMA, ...).

Fair enough.

> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3e4b29ee2b1e..ec71b7c63dbe 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1566,6 +1566,21 @@ void __free_pages_core(struct page *page, unsigned int order)
>
>          atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>
> +       /*
> +        * BUG ALERT: x86-64 ACPI code has latent BUGs where ACPI tables
> +        * that must not get allocated/modified will get exposed to the buddy
> +        * as free pages; anybody can allocate and use them once in the free
> +        * lists.
> +        *
> +        * Instead of fixing the BUG, revert the change to the
> +        * freeing/allocation order during boot that revealed it and cross
> +        * fingers that everything will be fine.
> +        */
> +       if (system_state < SYSTEM_RUNNING) {
> +               __free_pages_ok(page, order, FPI_NONE);
> +               return;
> +       }
> +
>          /*
>           * Bypass PCP and place fresh pages right to the tail, primarily
>           * relevant for memory onlining.
>
>
> --

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-11 15:36                   ` Rafael J. Wysocki
@ 2021-03-14 18:59                     ` Mike Rapoport
  2021-03-15 16:19                       ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-14 18:59 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: David Hildenbrand, George Kennedy, Robert Moore, Erik Kaneda,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > >
> > > There is some care that should be taken to make sure we get the order
> > > right, but I don't see a fundamental issue here.
> 
> Me neither.
> 
> > > If I understand correctly, Rafael's concern is about changing the parts of
> > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> 
> Something like this.
> 
> There is also the problem that memblock_reserve() needs to be called
> for all of the tables early enough, which will require some reordering
> of the early init code.
> 
> > > Since the reservation should be done early in x86::setup_arch() (and
> > > probably in arm64::setup_arch()) we might just have a function that parses
> > > table headers and reserves them, similarly to how we parse the tables
> > > during KASLR setup.
> 
> Right.

I've looked at it a bit more and we do something like the patch below that
nearly duplicates acpi_tb_parse_root_table() which is not very nice.
Besides, reserving ACPI tables early and then calling acpi_table_init()
(and acpi_tb_parse_root_table() again would mean doing the dance with
early_memremap() twice for no good reason.

I believe the most effective way to deal with this would be to have a
function that does parsing, reservation and installs the tables supplied by
the firmware which can be called really early and then another function
that overrides tables if needed a some later point.

--------------------------------------------------------------

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d883176ef2ce..48bcb1c355ad 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -910,6 +910,8 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+	acpi_reserve_tables();
+
 	if (efi_enabled(EFI_BOOT))
 		efi_memblock_x86_reserve_range();
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index e2d0046799a2..6cb5bcf3fb49 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -133,6 +133,9 @@ void
 acpi_tb_install_table_with_override(struct acpi_table_desc *new_table_desc,
 				    u8 override, u32 *table_index);
 
+acpi_physical_address
+acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size);
+
 acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address);
 
 acpi_status
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 4b9b329a5a92..2ad3c08915d4 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -14,10 +14,6 @@
 #define _COMPONENT          ACPI_TABLES
 ACPI_MODULE_NAME("tbutils")
 
-/* Local prototypes */
-static acpi_physical_address
-acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size);
-
 #if (!ACPI_REDUCED_HARDWARE)
 /*******************************************************************************
  *
@@ -162,7 +158,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index)
  *
  ******************************************************************************/
 
-static acpi_physical_address
+acpi_physical_address
 acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size)
 {
 	u64 address64;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index e48690a006a4..e4b721bada04 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -23,6 +23,9 @@
 #include <linux/security.h>
 #include "internal.h"
 
+#include "acpica/aclocal.h"
+#include "acpica/actables.h"
+
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
 #include CONFIG_ACPI_CUSTOM_DSDT_FILE
 #endif
@@ -809,6 +812,107 @@ int __init acpi_table_init(void)
 	return 0;
 }
 
+void __init acpi_reserve_tables(void)
+{
+	u32 i, table_count, table_entry_size, length;
+	acpi_physical_address rsdp_address, address;
+	struct acpi_table_header *table, *hdr;
+	struct acpi_table_rsdp *rsdp;
+	u8 *table_entry;
+
+	rsdp_address = acpi_os_get_root_pointer();
+	if (!rsdp_address) {
+		pr_debug("%s: no rsdp_address\n", __func__);
+		return;
+	}
+
+	/* Map the entire RSDP and extract the address of the RSDT or XSDT */
+	rsdp = acpi_os_map_memory(rsdp_address, sizeof(struct acpi_table_rsdp));
+	if (!rsdp) {
+		pr_debug("%s: can't map rsdp\n", __func__);
+		return;
+	}
+
+	memblock_reserve(rsdp_address, sizeof(struct acpi_table_rsdp));
+
+	/* Use XSDT if present and not overridden. Otherwise, use RSDT */
+	if ((rsdp->revision > 1) &&
+	    rsdp->xsdt_physical_address && !acpi_gbl_do_not_use_xsdt) {
+		address = (acpi_physical_address)rsdp->xsdt_physical_address;
+		table_entry_size = ACPI_XSDT_ENTRY_SIZE;
+	} else {
+		address = (acpi_physical_address)rsdp->rsdt_physical_address;
+		table_entry_size = ACPI_RSDT_ENTRY_SIZE;
+	}
+
+	/*
+	 * It is not possible to map more than one entry in some environments,
+	 * so unmap the RSDP here before mapping other tables
+	 */
+	acpi_os_unmap_memory(rsdp, sizeof(struct acpi_table_rsdp));
+
+	/* Map the RSDT/XSDT table header to get the full table length */
+
+	table = acpi_os_map_memory(address, sizeof(struct acpi_table_header));
+	if (!table) {
+		pr_debug("%s: can't map [RX]SDT header\n", __func__);
+		return;
+	}
+
+	/*
+	 * Validate length of the table, and map entire table.
+	 * Minimum length table must contain at least one entry.
+	 */
+	length = table->length;
+	acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
+
+	if (length < (sizeof(struct acpi_table_header) + table_entry_size)) {
+		pr_debug("Invalid table length 0x%X in RSDT/XSDT", length);
+		return;
+	}
+
+	memblock_reserve(address, length);
+
+	table = acpi_os_map_memory(address, length);
+	if (!table) {
+		pr_debug("%s: can't map [RX]SDT table\n", __func__);
+		return;
+	}
+
+	/* Get the number of entries and pointer to first entry */
+	table_count = (u32)((table->length - sizeof(struct acpi_table_header)) /
+			    table_entry_size);
+	table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header));
+
+	/* reserve tables pointed from the RSDT/XSDT */
+	for (i = 0; i < table_count; i++, table_entry += table_entry_size) {
+
+		/* Get the table physical address (32-bit for RSDT, 64-bit for XSDT) */
+
+		address =
+		    acpi_tb_get_root_table_entry(table_entry, table_entry_size);
+
+		/* Skip NULL entries in RSDT/XSDT */
+
+		if (!address)
+			continue;
+
+		hdr = acpi_os_map_memory(address, sizeof(struct acpi_table_header));
+		if (!hdr) {
+			pr_debug("%s: can't map %d header\n", __func__, i);
+			continue;
+		}
+
+		memblock_reserve(address, hdr->length);
+
+		/* FIXME: parse FADT and reserve embedded there tables */
+
+		acpi_os_unmap_memory(hdr, sizeof(struct acpi_table_header));
+	}
+
+	acpi_os_unmap_memory(table, length);
+}
+
 static int __init acpi_parse_apic_instance(char *str)
 {
 	if (!str)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9f432411e988..d8688e4b6726 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -226,6 +226,8 @@ void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
+void acpi_reserve_tables(void);
+
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,

-- 
Sincerely yours,
Mike.

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-14 18:59                     ` Mike Rapoport
@ 2021-03-15 16:19                       ` Rafael J. Wysocki
  2021-03-15 18:05                         ` Rafael J. Wysocki
  2021-03-17 20:14                         ` Rafael J. Wysocki
  0 siblings, 2 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-15 16:19 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, David Hildenbrand, George Kennedy,
	Robert Moore, Erik Kaneda, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > >
> > > > There is some care that should be taken to make sure we get the order
> > > > right, but I don't see a fundamental issue here.
> >
> > Me neither.
> >
> > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> >
> > Something like this.
> >
> > There is also the problem that memblock_reserve() needs to be called
> > for all of the tables early enough, which will require some reordering
> > of the early init code.
> >
> > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > table headers and reserves them, similarly to how we parse the tables
> > > > during KASLR setup.
> >
> > Right.
>
> I've looked at it a bit more and we do something like the patch below that
> nearly duplicates acpi_tb_parse_root_table() which is not very nice.

It looks to me that the code need not be duplicated (see below).

> Besides, reserving ACPI tables early and then calling acpi_table_init()
> (and acpi_tb_parse_root_table() again would mean doing the dance with
> early_memremap() twice for no good reason.

That'd be simply inefficient which is kind of acceptable to me to start with.

And I changing the ACPICA code can be avoided at least initially, it
by itself would be a good enough reason.

> I believe the most effective way to deal with this would be to have a
> function that does parsing, reservation and installs the tables supplied by
> the firmware which can be called really early and then another function
> that overrides tables if needed a some later point.

I agree that this should be the direction to go into.

However, it looks to me that something like the following could be
done to start with:

(a) Make __acpi_map_table() call memblock_reserve() in addition to
early_memremap().

My assumption here is that the memblock_reserve() will simply be
ignored if it is called too late.

(b) Introduce acpi_reserve_tables() as something like

void __init acpi_table_reserve(void)
{
        acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
}

Because initial_tables is passed to acpi_initialize_tables() above and
allow_resize is 0, the array used by it will simply get overwritten
when acpi_table_init() gets called.

(c) Make setup_arch() call acpi_table_reserve() like in the original
patch from George.

Would that work?

If so, I'll cut a patch along these lines.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-15 16:19                       ` Rafael J. Wysocki
@ 2021-03-15 18:05                         ` Rafael J. Wysocki
  2021-03-17 20:14                         ` Rafael J. Wysocki
  1 sibling, 0 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-15 18:05 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Mike Rapoport, David Hildenbrand, George Kennedy, Robert Moore,
	Erik Kaneda, Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Mon, Mar 15, 2021 at 5:19 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >
> > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > >
> > > > > There is some care that should be taken to make sure we get the order
> > > > > right, but I don't see a fundamental issue here.
> > >
> > > Me neither.
> > >
> > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > >
> > > Something like this.
> > >
> > > There is also the problem that memblock_reserve() needs to be called
> > > for all of the tables early enough, which will require some reordering
> > > of the early init code.
> > >
> > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > during KASLR setup.
> > >
> > > Right.
> >
> > I've looked at it a bit more and we do something like the patch below that
> > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
>
> It looks to me that the code need not be duplicated (see below).
>
> > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > early_memremap() twice for no good reason.
>
> That'd be simply inefficient which is kind of acceptable to me to start with.
>
> And I changing the ACPICA code can be avoided at least initially, it
> by itself would be a good enough reason.
>
> > I believe the most effective way to deal with this would be to have a
> > function that does parsing, reservation and installs the tables supplied by
> > the firmware which can be called really early and then another function
> > that overrides tables if needed a some later point.
>
> I agree that this should be the direction to go into.
>
> However, it looks to me that something like the following could be
> done to start with:
>
> (a) Make __acpi_map_table() call memblock_reserve() in addition to
> early_memremap().
>
> My assumption here is that the memblock_reserve() will simply be
> ignored if it is called too late.
>
> (b) Introduce acpi_reserve_tables() as something like
>
> void __init acpi_table_reserve(void)
> {
>         acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
> }
>
> Because initial_tables is passed to acpi_initialize_tables() above and
> allow_resize is 0, the array used by it will simply get overwritten
> when acpi_table_init() gets called.
>
> (c) Make setup_arch() call acpi_table_reserve() like in the original
> patch from George.
>
> Would that work?

Well, that doesn't work, so more digging ...

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-15 16:19                       ` Rafael J. Wysocki
  2021-03-15 18:05                         ` Rafael J. Wysocki
@ 2021-03-17 20:14                         ` Rafael J. Wysocki
  2021-03-17 22:28                           ` George Kennedy
  2021-03-18  7:25                           ` Mike Rapoport
  1 sibling, 2 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-17 20:14 UTC (permalink / raw)
  To: Mike Rapoport, Erik Kaneda
  Cc: Rafael J. Wysocki, David Hildenbrand, George Kennedy,
	Robert Moore, Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >
> > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > >
> > > > > There is some care that should be taken to make sure we get the order
> > > > > right, but I don't see a fundamental issue here.
> > >
> > > Me neither.
> > >
> > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > >
> > > Something like this.
> > >
> > > There is also the problem that memblock_reserve() needs to be called
> > > for all of the tables early enough, which will require some reordering
> > > of the early init code.
> > >
> > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > during KASLR setup.
> > >
> > > Right.
> >
> > I've looked at it a bit more and we do something like the patch below that
> > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> 
> It looks to me that the code need not be duplicated (see below).
> 
> > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > early_memremap() twice for no good reason.
> 
> That'd be simply inefficient which is kind of acceptable to me to start with.
> 
> And I changing the ACPICA code can be avoided at least initially, it
> by itself would be a good enough reason.
> 
> > I believe the most effective way to deal with this would be to have a
> > function that does parsing, reservation and installs the tables supplied by
> > the firmware which can be called really early and then another function
> > that overrides tables if needed a some later point.
> 
> I agree that this should be the direction to go into.

So maybe something like the patch below?

I'm not sure if acpi_boot_table_prepare() gets called early enough, though.

Also this still may not play well with initrd-based table overrides. Erik, do
you have any insights here?

And ia64 needs to be updated too.

---
 arch/x86/kernel/acpi/boot.c |   12 +++++++++---
 arch/x86/kernel/setup.c     |    3 +++
 drivers/acpi/tables.c       |   24 +++++++++++++++++++++---
 include/linux/acpi.h        |    9 +++++++--
 4 files changed, 40 insertions(+), 8 deletions(-)

Index: linux-pm/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/acpi/boot.c
+++ linux-pm/arch/x86/kernel/acpi/boot.c
@@ -1541,7 +1541,7 @@ static const struct dmi_system_id acpi_d
  *	...
  */
 
-void __init acpi_boot_table_init(void)
+void __init acpi_boot_table_prepare(void)
 {
 	dmi_check_system(acpi_dmi_table);
 
@@ -1554,10 +1554,16 @@ void __init acpi_boot_table_init(void)
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	if (acpi_table_init()) {
+	if (acpi_table_prepare())
 		disable_acpi();
+}
+
+void __init acpi_boot_table_init(void)
+{
+	if (acpi_disabled)
 		return;
-	}
+
+	acpi_table_init();
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
 
Index: linux-pm/arch/x86/kernel/setup.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/setup.c
+++ linux-pm/arch/x86/kernel/setup.c
@@ -1070,6 +1070,9 @@ void __init setup_arch(char **cmdline_p)
 	/* preallocate 4k for mptable mpc */
 	e820__memblock_alloc_reserved_mpc_new();
 
+	/* Look for ACPI tables and reserve memory occupied by them. */
+	acpi_boot_table_prepare();
+
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 	setup_bios_corruption_check();
 #endif
Index: linux-pm/include/linux/acpi.h
===================================================================
--- linux-pm.orig/include/linux/acpi.h
+++ linux-pm/include/linux/acpi.h
@@ -222,11 +222,13 @@ void __iomem *__acpi_map_table(unsigned
 void __acpi_unmap_table(void __iomem *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
+void acpi_boot_table_prepare (void);
 void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
-int acpi_table_init (void);
+int acpi_table_prepare (void);
+void acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
 			      int entry_id,
@@ -814,9 +816,12 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
+static inline void acpi_boot_table_prepare(void)
+{
+}
+
 static inline void acpi_boot_table_init(void)
 {
-	return;
 }
 
 static inline int acpi_mps_check(void)
Index: linux-pm/drivers/acpi/tables.c
===================================================================
--- linux-pm.orig/drivers/acpi/tables.c
+++ linux-pm/drivers/acpi/tables.c
@@ -788,9 +788,10 @@ acpi_status acpi_os_table_override(struc
  * result: sdt_entry[] is initialized
  */
 
-int __init acpi_table_init(void)
+int __init acpi_table_prepare(void)
 {
 	acpi_status status;
+	int i;
 
 	if (acpi_verify_table_checksum) {
 		pr_info("Early table checksum verification enabled\n");
@@ -803,12 +804,29 @@ int __init acpi_table_init(void)
 	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
 	if (ACPI_FAILURE(status))
 		return -EINVAL;
-	acpi_table_initrd_scan();
 
-	check_multiple_madt();
+	for (i = 0; i < ACPI_MAX_TABLES; i++) {
+		struct acpi_table_desc *table_desc = &initial_tables[i];
+
+		if (!table_desc->address || !table_desc->length)
+			break;
+
+		pr_info("Reserving %4s table memory at [0x%llx - 0x%llx]\n",
+			table_desc->signature.ascii, table_desc->address,
+			table_desc->address + table_desc->length - 1);
+
+		memblock_reserve(table_desc->address, table_desc->length);
+	}
+
 	return 0;
 }
 
+void __init acpi_table_init(void)
+{
+	acpi_table_initrd_scan();
+	check_multiple_madt();
+}
+
 static int __init acpi_parse_apic_instance(char *str)
 {
 	if (!str)




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-17 20:14                         ` Rafael J. Wysocki
@ 2021-03-17 22:28                           ` George Kennedy
  2021-03-18 15:42                             ` Rafael J. Wysocki
  2021-03-18  7:25                           ` Mike Rapoport
  1 sibling, 1 reply; 33+ messages in thread
From: George Kennedy @ 2021-03-17 22:28 UTC (permalink / raw)
  To: Rafael J. Wysocki, Mike Rapoport, Erik Kaneda
  Cc: Rafael J. Wysocki, David Hildenbrand, Robert Moore,
	Rafael Wysocki, Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko



On 3/17/2021 4:14 PM, Rafael J. Wysocki wrote:
> On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
>> On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
>>> On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
>>>> On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
>>>>>> There is some care that should be taken to make sure we get the order
>>>>>> right, but I don't see a fundamental issue here.
>>>> Me neither.
>>>>
>>>>>> If I understand correctly, Rafael's concern is about changing the parts of
>>>>>> ACPICA that should be OS agnostic, so I think we just need another place to
>>>>>> call memblock_reserve() rather than acpi_tb_install_table_with_override().
>>>> Something like this.
>>>>
>>>> There is also the problem that memblock_reserve() needs to be called
>>>> for all of the tables early enough, which will require some reordering
>>>> of the early init code.
>>>>
>>>>>> Since the reservation should be done early in x86::setup_arch() (and
>>>>>> probably in arm64::setup_arch()) we might just have a function that parses
>>>>>> table headers and reserves them, similarly to how we parse the tables
>>>>>> during KASLR setup.
>>>> Right.
>>> I've looked at it a bit more and we do something like the patch below that
>>> nearly duplicates acpi_tb_parse_root_table() which is not very nice.
>> It looks to me that the code need not be duplicated (see below).
>>
>>> Besides, reserving ACPI tables early and then calling acpi_table_init()
>>> (and acpi_tb_parse_root_table() again would mean doing the dance with
>>> early_memremap() twice for no good reason.
>> That'd be simply inefficient which is kind of acceptable to me to start with.
>>
>> And I changing the ACPICA code can be avoided at least initially, it
>> by itself would be a good enough reason.
>>
>>> I believe the most effective way to deal with this would be to have a
>>> function that does parsing, reservation and installs the tables supplied by
>>> the firmware which can be called really early and then another function
>>> that overrides tables if needed a some later point.
>> I agree that this should be the direction to go into.
> So maybe something like the patch below?

Do you want me to try it out in the failing setup?

George
>
> I'm not sure if acpi_boot_table_prepare() gets called early enough, though.
>
> Also this still may not play well with initrd-based table overrides. Erik, do
> you have any insights here?
>
> And ia64 needs to be updated too.
>
> ---
>   arch/x86/kernel/acpi/boot.c |   12 +++++++++---
>   arch/x86/kernel/setup.c     |    3 +++
>   drivers/acpi/tables.c       |   24 +++++++++++++++++++++---
>   include/linux/acpi.h        |    9 +++++++--
>   4 files changed, 40 insertions(+), 8 deletions(-)
>
> Index: linux-pm/arch/x86/kernel/acpi/boot.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
> +++ linux-pm/arch/x86/kernel/acpi/boot.c
> @@ -1541,7 +1541,7 @@ static const struct dmi_system_id acpi_d
>    *	...
>    */
>   
> -void __init acpi_boot_table_init(void)
> +void __init acpi_boot_table_prepare(void)
>   {
>   	dmi_check_system(acpi_dmi_table);
>   
> @@ -1554,10 +1554,16 @@ void __init acpi_boot_table_init(void)
>   	/*
>   	 * Initialize the ACPI boot-time table parser.
>   	 */
> -	if (acpi_table_init()) {
> +	if (acpi_table_prepare())
>   		disable_acpi();
> +}
> +
> +void __init acpi_boot_table_init(void)
> +{
> +	if (acpi_disabled)
>   		return;
> -	}
> +
> +	acpi_table_init();
>   
>   	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>   
> Index: linux-pm/arch/x86/kernel/setup.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/setup.c
> +++ linux-pm/arch/x86/kernel/setup.c
> @@ -1070,6 +1070,9 @@ void __init setup_arch(char **cmdline_p)
>   	/* preallocate 4k for mptable mpc */
>   	e820__memblock_alloc_reserved_mpc_new();
>   
> +	/* Look for ACPI tables and reserve memory occupied by them. */
> +	acpi_boot_table_prepare();
> +
>   #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>   	setup_bios_corruption_check();
>   #endif
> Index: linux-pm/include/linux/acpi.h
> ===================================================================
> --- linux-pm.orig/include/linux/acpi.h
> +++ linux-pm/include/linux/acpi.h
> @@ -222,11 +222,13 @@ void __iomem *__acpi_map_table(unsigned
>   void __acpi_unmap_table(void __iomem *map, unsigned long size);
>   int early_acpi_boot_init(void);
>   int acpi_boot_init (void);
> +void acpi_boot_table_prepare (void);
>   void acpi_boot_table_init (void);
>   int acpi_mps_check (void);
>   int acpi_numa_init (void);
>   
> -int acpi_table_init (void);
> +int acpi_table_prepare (void);
> +void acpi_table_init (void);
>   int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
>   int __init acpi_table_parse_entries(char *id, unsigned long table_size,
>   			      int entry_id,
> @@ -814,9 +816,12 @@ static inline int acpi_boot_init(void)
>   	return 0;
>   }
>   
> +static inline void acpi_boot_table_prepare(void)
> +{
> +}
> +
>   static inline void acpi_boot_table_init(void)
>   {
> -	return;
>   }
>   
>   static inline int acpi_mps_check(void)
> Index: linux-pm/drivers/acpi/tables.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/tables.c
> +++ linux-pm/drivers/acpi/tables.c
> @@ -788,9 +788,10 @@ acpi_status acpi_os_table_override(struc
>    * result: sdt_entry[] is initialized
>    */
>   
> -int __init acpi_table_init(void)
> +int __init acpi_table_prepare(void)
>   {
>   	acpi_status status;
> +	int i;
>   
>   	if (acpi_verify_table_checksum) {
>   		pr_info("Early table checksum verification enabled\n");
> @@ -803,12 +804,29 @@ int __init acpi_table_init(void)
>   	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
>   	if (ACPI_FAILURE(status))
>   		return -EINVAL;
> -	acpi_table_initrd_scan();
>   
> -	check_multiple_madt();
> +	for (i = 0; i < ACPI_MAX_TABLES; i++) {
> +		struct acpi_table_desc *table_desc = &initial_tables[i];
> +
> +		if (!table_desc->address || !table_desc->length)
> +			break;
> +
> +		pr_info("Reserving %4s table memory at [0x%llx - 0x%llx]\n",
> +			table_desc->signature.ascii, table_desc->address,
> +			table_desc->address + table_desc->length - 1);
> +
> +		memblock_reserve(table_desc->address, table_desc->length);
> +	}
> +
>   	return 0;
>   }
>   
> +void __init acpi_table_init(void)
> +{
> +	acpi_table_initrd_scan();
> +	check_multiple_madt();
> +}
> +
>   static int __init acpi_parse_apic_instance(char *str)
>   {
>   	if (!str)
>
>
>


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-17 20:14                         ` Rafael J. Wysocki
  2021-03-17 22:28                           ` George Kennedy
@ 2021-03-18  7:25                           ` Mike Rapoport
  2021-03-18 10:50                             ` Rafael J. Wysocki
  1 sibling, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-18  7:25 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Erik Kaneda, Rafael J. Wysocki, David Hildenbrand,
	George Kennedy, Robert Moore, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Wed, Mar 17, 2021 at 09:14:37PM +0100, Rafael J. Wysocki wrote:
> On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> > On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > >
> > > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > > >
> > > > > > There is some care that should be taken to make sure we get the order
> > > > > > right, but I don't see a fundamental issue here.
> > > >
> > > > Me neither.
> > > >
> > > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > > >
> > > > Something like this.
> > > >
> > > > There is also the problem that memblock_reserve() needs to be called
> > > > for all of the tables early enough, which will require some reordering
> > > > of the early init code.
> > > >
> > > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > > during KASLR setup.
> > > >
> > > > Right.
> > >
> > > I've looked at it a bit more and we do something like the patch below that
> > > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> > 
> > It looks to me that the code need not be duplicated (see below).
> > 
> > > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > > early_memremap() twice for no good reason.
> > 
> > That'd be simply inefficient which is kind of acceptable to me to start with.
> > 
> > And I changing the ACPICA code can be avoided at least initially, it
> > by itself would be a good enough reason.
> > 
> > > I believe the most effective way to deal with this would be to have a
> > > function that does parsing, reservation and installs the tables supplied by
> > > the firmware which can be called really early and then another function
> > > that overrides tables if needed a some later point.
> > 
> > I agree that this should be the direction to go into.
> 
> So maybe something like the patch below?
> 
> I'm not sure if acpi_boot_table_prepare() gets called early enough, though.

To be 100% safe it should be called before e820__memblock_setup(). It is
possible to call memblock_reserve() at any time, even before the actual
memory is detected as long as all reservations fit into the static array
that currently has 128 entries on x86.

As e820__memblock_setup() essentially enables memblock allocations,
theoretically the memory occupied by ACPI tables can be allocated even in
x86::setup_arch() unless it is reserved before e820__memblock_setup().

> Also this still may not play well with initrd-based table overrides. Erik, do
> you have any insights here?
> 
> And ia64 needs to be updated too.

I think arm64 as well.

> ---
>  arch/x86/kernel/acpi/boot.c |   12 +++++++++---
>  arch/x86/kernel/setup.c     |    3 +++
>  drivers/acpi/tables.c       |   24 +++++++++++++++++++++---
>  include/linux/acpi.h        |    9 +++++++--
>  4 files changed, 40 insertions(+), 8 deletions(-)
> 
> Index: linux-pm/arch/x86/kernel/acpi/boot.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
> +++ linux-pm/arch/x86/kernel/acpi/boot.c
> @@ -1541,7 +1541,7 @@ static const struct dmi_system_id acpi_d
>   *	...
>   */
>  
> -void __init acpi_boot_table_init(void)
> +void __init acpi_boot_table_prepare(void)
>  {
>  	dmi_check_system(acpi_dmi_table);
>  
> @@ -1554,10 +1554,16 @@ void __init acpi_boot_table_init(void)
>  	/*
>  	 * Initialize the ACPI boot-time table parser.
>  	 */
> -	if (acpi_table_init()) {
> +	if (acpi_table_prepare())
>  		disable_acpi();
> +}
> +
> +void __init acpi_boot_table_init(void)
> +{
> +	if (acpi_disabled)
>  		return;
> -	}
> +
> +	acpi_table_init();
>  
>  	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>  
> Index: linux-pm/arch/x86/kernel/setup.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/setup.c
> +++ linux-pm/arch/x86/kernel/setup.c
> @@ -1070,6 +1070,9 @@ void __init setup_arch(char **cmdline_p)
>  	/* preallocate 4k for mptable mpc */
>  	e820__memblock_alloc_reserved_mpc_new();
>  
> +	/* Look for ACPI tables and reserve memory occupied by them. */
> +	acpi_boot_table_prepare();
> +
>  #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
>  	setup_bios_corruption_check();
>  #endif
> Index: linux-pm/include/linux/acpi.h
> ===================================================================
> --- linux-pm.orig/include/linux/acpi.h
> +++ linux-pm/include/linux/acpi.h
> @@ -222,11 +222,13 @@ void __iomem *__acpi_map_table(unsigned
>  void __acpi_unmap_table(void __iomem *map, unsigned long size);
>  int early_acpi_boot_init(void);
>  int acpi_boot_init (void);
> +void acpi_boot_table_prepare (void);
>  void acpi_boot_table_init (void);

Not related to this patch, but it feels to me like there are too many
acpi_boot_something() :)

>  int acpi_mps_check (void);
>  int acpi_numa_init (void);
>  
> -int acpi_table_init (void);
> +int acpi_table_prepare (void);
> +void acpi_table_init (void);
>  int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
>  int __init acpi_table_parse_entries(char *id, unsigned long table_size,
>  			      int entry_id,
> @@ -814,9 +816,12 @@ static inline int acpi_boot_init(void)
>  	return 0;
>  }
>  
> +static inline void acpi_boot_table_prepare(void)
> +{
> +}
> +
>  static inline void acpi_boot_table_init(void)
>  {
> -	return;
>  }
>  
>  static inline int acpi_mps_check(void)
> Index: linux-pm/drivers/acpi/tables.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/tables.c
> +++ linux-pm/drivers/acpi/tables.c
> @@ -788,9 +788,10 @@ acpi_status acpi_os_table_override(struc
>   * result: sdt_entry[] is initialized
>   */
>  
> -int __init acpi_table_init(void)
> +int __init acpi_table_prepare(void)
>  {
>  	acpi_status status;
> +	int i;
>  
>  	if (acpi_verify_table_checksum) {
>  		pr_info("Early table checksum verification enabled\n");
> @@ -803,12 +804,29 @@ int __init acpi_table_init(void)
>  	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
>  	if (ACPI_FAILURE(status))
>  		return -EINVAL;
> -	acpi_table_initrd_scan();
>  
> -	check_multiple_madt();
> +	for (i = 0; i < ACPI_MAX_TABLES; i++) {
> +		struct acpi_table_desc *table_desc = &initial_tables[i];
> +
> +		if (!table_desc->address || !table_desc->length)
> +			break;
> +
> +		pr_info("Reserving %4s table memory at [0x%llx - 0x%llx]\n",
> +			table_desc->signature.ascii, table_desc->address,
> +			table_desc->address + table_desc->length - 1);
> +
> +		memblock_reserve(table_desc->address, table_desc->length);
> +	}
> +
>  	return 0;
>  }
>  
> +void __init acpi_table_init(void)
> +{
> +	acpi_table_initrd_scan();
> +	check_multiple_madt();
> +}
> +
>  static int __init acpi_parse_apic_instance(char *str)
>  {
>  	if (!str)
> 
> 
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-18  7:25                           ` Mike Rapoport
@ 2021-03-18 10:50                             ` Rafael J. Wysocki
  2021-03-18 15:22                               ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-18 10:50 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, Erik Kaneda, Rafael J. Wysocki,
	David Hildenbrand, George Kennedy, Robert Moore, Rafael Wysocki,
	Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Thu, Mar 18, 2021 at 8:25 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Wed, Mar 17, 2021 at 09:14:37PM +0100, Rafael J. Wysocki wrote:
> > On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> > > On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > >
> > > > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > > > >
> > > > > > > There is some care that should be taken to make sure we get the order
> > > > > > > right, but I don't see a fundamental issue here.
> > > > >
> > > > > Me neither.
> > > > >
> > > > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > > > >
> > > > > Something like this.
> > > > >
> > > > > There is also the problem that memblock_reserve() needs to be called
> > > > > for all of the tables early enough, which will require some reordering
> > > > > of the early init code.
> > > > >
> > > > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > > > during KASLR setup.
> > > > >
> > > > > Right.
> > > >
> > > > I've looked at it a bit more and we do something like the patch below that
> > > > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> > >
> > > It looks to me that the code need not be duplicated (see below).
> > >
> > > > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > > > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > > > early_memremap() twice for no good reason.
> > >
> > > That'd be simply inefficient which is kind of acceptable to me to start with.
> > >
> > > And I changing the ACPICA code can be avoided at least initially, it
> > > by itself would be a good enough reason.
> > >
> > > > I believe the most effective way to deal with this would be to have a
> > > > function that does parsing, reservation and installs the tables supplied by
> > > > the firmware which can be called really early and then another function
> > > > that overrides tables if needed a some later point.
> > >
> > > I agree that this should be the direction to go into.
> >
> > So maybe something like the patch below?
> >
> > I'm not sure if acpi_boot_table_prepare() gets called early enough, though.
>
> To be 100% safe it should be called before e820__memblock_setup().

OK

> It is possible to call memblock_reserve() at any time, even before the actual
> memory is detected as long as all reservations fit into the static array
> that currently has 128 entries on x86.
>
> As e820__memblock_setup() essentially enables memblock allocations,
> theoretically the memory occupied by ACPI tables can be allocated even in
> x86::setup_arch() unless it is reserved before e820__memblock_setup().
>
> > Also this still may not play well with initrd-based table overrides. Erik, do
> > you have any insights here?
> >
> > And ia64 needs to be updated too.
>
> I think arm64 as well.

Right.

> > ---
> >  arch/x86/kernel/acpi/boot.c |   12 +++++++++---
> >  arch/x86/kernel/setup.c     |    3 +++
> >  drivers/acpi/tables.c       |   24 +++++++++++++++++++++---
> >  include/linux/acpi.h        |    9 +++++++--
> >  4 files changed, 40 insertions(+), 8 deletions(-)
> >
> > Index: linux-pm/arch/x86/kernel/acpi/boot.c
> > ===================================================================
> > --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
> > +++ linux-pm/arch/x86/kernel/acpi/boot.c
> > @@ -1541,7 +1541,7 @@ static const struct dmi_system_id acpi_d
> >   *   ...
> >   */
> >
> > -void __init acpi_boot_table_init(void)
> > +void __init acpi_boot_table_prepare(void)
> >  {
> >       dmi_check_system(acpi_dmi_table);
> >
> > @@ -1554,10 +1554,16 @@ void __init acpi_boot_table_init(void)
> >       /*
> >        * Initialize the ACPI boot-time table parser.
> >        */
> > -     if (acpi_table_init()) {
> > +     if (acpi_table_prepare())
> >               disable_acpi();
> > +}
> > +
> > +void __init acpi_boot_table_init(void)
> > +{
> > +     if (acpi_disabled)
> >               return;
> > -     }
> > +
> > +     acpi_table_init();
> >
> >       acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
> >
> > Index: linux-pm/arch/x86/kernel/setup.c
> > ===================================================================
> > --- linux-pm.orig/arch/x86/kernel/setup.c
> > +++ linux-pm/arch/x86/kernel/setup.c
> > @@ -1070,6 +1070,9 @@ void __init setup_arch(char **cmdline_p)
> >       /* preallocate 4k for mptable mpc */
> >       e820__memblock_alloc_reserved_mpc_new();
> >
> > +     /* Look for ACPI tables and reserve memory occupied by them. */
> > +     acpi_boot_table_prepare();
> > +
> >  #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
> >       setup_bios_corruption_check();
> >  #endif
> > Index: linux-pm/include/linux/acpi.h
> > ===================================================================
> > --- linux-pm.orig/include/linux/acpi.h
> > +++ linux-pm/include/linux/acpi.h
> > @@ -222,11 +222,13 @@ void __iomem *__acpi_map_table(unsigned
> >  void __acpi_unmap_table(void __iomem *map, unsigned long size);
> >  int early_acpi_boot_init(void);
> >  int acpi_boot_init (void);
> > +void acpi_boot_table_prepare (void);
> >  void acpi_boot_table_init (void);
>
> Not related to this patch, but it feels to me like there are too many
> acpi_boot_something() :)

Well, there was one initially, but it has been split for a few times
due to ordering issues similar to the one at hand.

It could be cleaned up I suppose, though.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-18 10:50                             ` Rafael J. Wysocki
@ 2021-03-18 15:22                               ` Rafael J. Wysocki
  2021-03-20  8:25                                 ` Mike Rapoport
  0 siblings, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-18 15:22 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, Erik Kaneda, David Hildenbrand,
	George Kennedy, Robert Moore, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Thu, Mar 18, 2021 at 11:50 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Thu, Mar 18, 2021 at 8:25 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >
> > On Wed, Mar 17, 2021 at 09:14:37PM +0100, Rafael J. Wysocki wrote:
> > > On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> > > > On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > > >
> > > > > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > > > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > > > > >
> > > > > > > > There is some care that should be taken to make sure we get the order
> > > > > > > > right, but I don't see a fundamental issue here.
> > > > > >
> > > > > > Me neither.
> > > > > >
> > > > > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > > > > >
> > > > > > Something like this.
> > > > > >
> > > > > > There is also the problem that memblock_reserve() needs to be called
> > > > > > for all of the tables early enough, which will require some reordering
> > > > > > of the early init code.
> > > > > >
> > > > > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > > > > during KASLR setup.
> > > > > >
> > > > > > Right.
> > > > >
> > > > > I've looked at it a bit more and we do something like the patch below that
> > > > > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> > > >
> > > > It looks to me that the code need not be duplicated (see below).
> > > >
> > > > > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > > > > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > > > > early_memremap() twice for no good reason.
> > > >
> > > > That'd be simply inefficient which is kind of acceptable to me to start with.
> > > >
> > > > And I changing the ACPICA code can be avoided at least initially, it
> > > > by itself would be a good enough reason.
> > > >
> > > > > I believe the most effective way to deal with this would be to have a
> > > > > function that does parsing, reservation and installs the tables supplied by
> > > > > the firmware which can be called really early and then another function
> > > > > that overrides tables if needed a some later point.
> > > >
> > > > I agree that this should be the direction to go into.
> > >
> > > So maybe something like the patch below?
> > >
> > > I'm not sure if acpi_boot_table_prepare() gets called early enough, though.
> >
> > To be 100% safe it should be called before e820__memblock_setup().
>
> OK

Well, that said, reserve_bios_regions() doesn't seem to have concerns
like this and I'm not sure why ACPI tables should be reserved before
this runs.  That applies to efi_reserve_boot_services() too.

I can put the new call before e820__memblock_alloc_reserved_mpc_new(),
but I'm not sure why to put it before efi_reserve_boot_services(),
say?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-17 22:28                           ` George Kennedy
@ 2021-03-18 15:42                             ` Rafael J. Wysocki
  0 siblings, 0 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-18 15:42 UTC (permalink / raw)
  To: George Kennedy
  Cc: Rafael J. Wysocki, Mike Rapoport, Erik Kaneda, Rafael J. Wysocki,
	David Hildenbrand, Robert Moore, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Wed, Mar 17, 2021 at 11:28 PM George Kennedy
<george.kennedy@oracle.com> wrote:
>
>
>
> On 3/17/2021 4:14 PM, Rafael J. Wysocki wrote:
> > On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> >> On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >>> On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> >>>> On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> >>>>>> There is some care that should be taken to make sure we get the order
> >>>>>> right, but I don't see a fundamental issue here.
> >>>> Me neither.
> >>>>
> >>>>>> If I understand correctly, Rafael's concern is about changing the parts of
> >>>>>> ACPICA that should be OS agnostic, so I think we just need another place to
> >>>>>> call memblock_reserve() rather than acpi_tb_install_table_with_override().
> >>>> Something like this.
> >>>>
> >>>> There is also the problem that memblock_reserve() needs to be called
> >>>> for all of the tables early enough, which will require some reordering
> >>>> of the early init code.
> >>>>
> >>>>>> Since the reservation should be done early in x86::setup_arch() (and
> >>>>>> probably in arm64::setup_arch()) we might just have a function that parses
> >>>>>> table headers and reserves them, similarly to how we parse the tables
> >>>>>> during KASLR setup.
> >>>> Right.
> >>> I've looked at it a bit more and we do something like the patch below that
> >>> nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> >> It looks to me that the code need not be duplicated (see below).
> >>
> >>> Besides, reserving ACPI tables early and then calling acpi_table_init()
> >>> (and acpi_tb_parse_root_table() again would mean doing the dance with
> >>> early_memremap() twice for no good reason.
> >> That'd be simply inefficient which is kind of acceptable to me to start with.
> >>
> >> And I changing the ACPICA code can be avoided at least initially, it
> >> by itself would be a good enough reason.
> >>
> >>> I believe the most effective way to deal with this would be to have a
> >>> function that does parsing, reservation and installs the tables supplied by
> >>> the firmware which can be called really early and then another function
> >>> that overrides tables if needed a some later point.
> >> I agree that this should be the direction to go into.
> > So maybe something like the patch below?
>
> Do you want me to try it out in the failing setup?

Yes, please!

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-18 15:22                               ` Rafael J. Wysocki
@ 2021-03-20  8:25                                 ` Mike Rapoport
  2021-03-22 16:57                                   ` Rafael J. Wysocki
  2021-03-23 19:26                                   ` [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables Rafael J. Wysocki
  0 siblings, 2 replies; 33+ messages in thread
From: Mike Rapoport @ 2021-03-20  8:25 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Rafael J. Wysocki, Erik Kaneda, David Hildenbrand,
	George Kennedy, Robert Moore, Rafael Wysocki, Len Brown,
	ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Thu, Mar 18, 2021 at 04:22:37PM +0100, Rafael J. Wysocki wrote:
> On Thu, Mar 18, 2021 at 11:50 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >
> > On Thu, Mar 18, 2021 at 8:25 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > >
> > > On Wed, Mar 17, 2021 at 09:14:37PM +0100, Rafael J. Wysocki wrote:
> > > > On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> > > > > On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > > > >
> > > > > > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > > > > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > There is some care that should be taken to make sure we get the order
> > > > > > > > > right, but I don't see a fundamental issue here.
> > > > > > >
> > > > > > > Me neither.
> > > > > > >
> > > > > > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > > > > > >
> > > > > > > Something like this.
> > > > > > >
> > > > > > > There is also the problem that memblock_reserve() needs to be called
> > > > > > > for all of the tables early enough, which will require some reordering
> > > > > > > of the early init code.
> > > > > > >
> > > > > > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > > > > > during KASLR setup.
> > > > > > >
> > > > > > > Right.
> > > > > >
> > > > > > I've looked at it a bit more and we do something like the patch below that
> > > > > > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> > > > >
> > > > > It looks to me that the code need not be duplicated (see below).
> > > > >
> > > > > > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > > > > > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > > > > > early_memremap() twice for no good reason.
> > > > >
> > > > > That'd be simply inefficient which is kind of acceptable to me to start with.
> > > > >
> > > > > And I changing the ACPICA code can be avoided at least initially, it
> > > > > by itself would be a good enough reason.
> > > > >
> > > > > > I believe the most effective way to deal with this would be to have a
> > > > > > function that does parsing, reservation and installs the tables supplied by
> > > > > > the firmware which can be called really early and then another function
> > > > > > that overrides tables if needed a some later point.
> > > > >
> > > > > I agree that this should be the direction to go into.
> > > >
> > > > So maybe something like the patch below?
> > > >
> > > > I'm not sure if acpi_boot_table_prepare() gets called early enough, though.
> > >
> > > To be 100% safe it should be called before e820__memblock_setup().
> >
> > OK
> 
> Well, that said, reserve_bios_regions() doesn't seem to have concerns
> like this and I'm not sure why ACPI tables should be reserved before
> this runs.  That applies to efi_reserve_boot_services() too.
> 
> I can put the new call before e820__memblock_alloc_reserved_mpc_new(),
> but I'm not sure why to put it before efi_reserve_boot_services(),
> say?

The general idea is to reserve all the memory used by the firmware before
memblock allocations are possible, i.e. before e820__memblock_setup().
Currently this is not the case, but it does not make it more correct.

Theoretically, it is possible that reserve_bios_regions() will cause a
memory allocation and the allocated memory will be exactly at the area
where ACPI tables reside.

In practice I believe this is very unlikely, but who knows.

Another advantage of having ACPI tables handy by the time we do the memory
detection is that we will be able to SRAT earlier and simplify NUMA
initialization.

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/1] ACPI: fix acpi table use after free
  2021-03-20  8:25                                 ` Mike Rapoport
@ 2021-03-22 16:57                                   ` Rafael J. Wysocki
  2021-03-23 19:26                                   ` [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables Rafael J. Wysocki
  1 sibling, 0 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-22 16:57 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Rafael J. Wysocki, Rafael J. Wysocki, Erik Kaneda,
	David Hildenbrand, George Kennedy, Robert Moore, Rafael Wysocki,
	Len Brown, ACPI Devel Maling List,
	open list:ACPI COMPONENT ARCHITECTURE (ACPICA),
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko

On Sat, Mar 20, 2021 at 9:25 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Thu, Mar 18, 2021 at 04:22:37PM +0100, Rafael J. Wysocki wrote:
> > On Thu, Mar 18, 2021 at 11:50 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
> > >
> > > On Thu, Mar 18, 2021 at 8:25 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > >
> > > > On Wed, Mar 17, 2021 at 09:14:37PM +0100, Rafael J. Wysocki wrote:
> > > > > On Monday, March 15, 2021 5:19:29 PM CET Rafael J. Wysocki wrote:
> > > > > > On Sun, Mar 14, 2021 at 8:00 PM Mike Rapoport <rppt@linux.ibm.com> wrote:
> > > > > > >
> > > > > > > On Thu, Mar 11, 2021 at 04:36:31PM +0100, Rafael J. Wysocki wrote:
> > > > > > > > On Wed, Mar 10, 2021 at 8:47 PM David Hildenbrand <david@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > There is some care that should be taken to make sure we get the order
> > > > > > > > > > right, but I don't see a fundamental issue here.
> > > > > > > >
> > > > > > > > Me neither.
> > > > > > > >
> > > > > > > > > > If I understand correctly, Rafael's concern is about changing the parts of
> > > > > > > > > > ACPICA that should be OS agnostic, so I think we just need another place to
> > > > > > > > > > call memblock_reserve() rather than acpi_tb_install_table_with_override().
> > > > > > > >
> > > > > > > > Something like this.
> > > > > > > >
> > > > > > > > There is also the problem that memblock_reserve() needs to be called
> > > > > > > > for all of the tables early enough, which will require some reordering
> > > > > > > > of the early init code.
> > > > > > > >
> > > > > > > > > > Since the reservation should be done early in x86::setup_arch() (and
> > > > > > > > > > probably in arm64::setup_arch()) we might just have a function that parses
> > > > > > > > > > table headers and reserves them, similarly to how we parse the tables
> > > > > > > > > > during KASLR setup.
> > > > > > > >
> > > > > > > > Right.
> > > > > > >
> > > > > > > I've looked at it a bit more and we do something like the patch below that
> > > > > > > nearly duplicates acpi_tb_parse_root_table() which is not very nice.
> > > > > >
> > > > > > It looks to me that the code need not be duplicated (see below).
> > > > > >
> > > > > > > Besides, reserving ACPI tables early and then calling acpi_table_init()
> > > > > > > (and acpi_tb_parse_root_table() again would mean doing the dance with
> > > > > > > early_memremap() twice for no good reason.
> > > > > >
> > > > > > That'd be simply inefficient which is kind of acceptable to me to start with.
> > > > > >
> > > > > > And I changing the ACPICA code can be avoided at least initially, it
> > > > > > by itself would be a good enough reason.
> > > > > >
> > > > > > > I believe the most effective way to deal with this would be to have a
> > > > > > > function that does parsing, reservation and installs the tables supplied by
> > > > > > > the firmware which can be called really early and then another function
> > > > > > > that overrides tables if needed a some later point.
> > > > > >
> > > > > > I agree that this should be the direction to go into.
> > > > >
> > > > > So maybe something like the patch below?
> > > > >
> > > > > I'm not sure if acpi_boot_table_prepare() gets called early enough, though.
> > > >
> > > > To be 100% safe it should be called before e820__memblock_setup().
> > >
> > > OK
> >
> > Well, that said, reserve_bios_regions() doesn't seem to have concerns
> > like this and I'm not sure why ACPI tables should be reserved before
> > this runs.  That applies to efi_reserve_boot_services() too.
> >
> > I can put the new call before e820__memblock_alloc_reserved_mpc_new(),
> > but I'm not sure why to put it before efi_reserve_boot_services(),
> > say?
>
> The general idea is to reserve all the memory used by the firmware before
> memblock allocations are possible, i.e. before e820__memblock_setup().
> Currently this is not the case, but it does not make it more correct.

I see.

> Theoretically, it is possible that reserve_bios_regions() will cause a
> memory allocation and the allocated memory will be exactly at the area
> where ACPI tables reside.
>
> In practice I believe this is very unlikely, but who knows.
>
> Another advantage of having ACPI tables handy by the time we do the memory
> detection is that we will be able to SRAT earlier and simplify NUMA
> initialization.

OK, fair enough.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-20  8:25                                 ` Mike Rapoport
  2021-03-22 16:57                                   ` Rafael J. Wysocki
@ 2021-03-23 19:26                                   ` Rafael J. Wysocki
  2021-03-24  8:24                                     ` Mike Rapoport
  1 sibling, 1 reply; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-23 19:26 UTC (permalink / raw)
  To: ACPI Devel Maling List
  Cc: Mike Rapoport, Rafael J. Wysocki, Rafael J. Wysocki, Erik Kaneda,
	David Hildenbrand, George Kennedy, Robert Moore, Rafael Wysocki,
	Len Brown, Linux Kernel Mailing List, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko,
	x86 Maintainers

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The following problem has been reported by George Kennedy:

 Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
 in __free_pages_core()") the following use after free occurs
 intermittently when ACPI tables are accessed.

 BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
 Read of size 4 at addr ffff8880be453004 by task swapper/0/1
 CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
 Call Trace:
  dump_stack+0xf6/0x158
  print_address_description.constprop.9+0x41/0x60
  kasan_report.cold.14+0x7b/0xd4
  __asan_report_load_n_noabort+0xf/0x20
  ibft_init+0x134/0xc49
  do_one_initcall+0xc4/0x3e0
  kernel_init_freeable+0x5af/0x66b
  kernel_init+0x16/0x1d0
  ret_from_fork+0x22/0x30

 ACPI tables mapped via kmap() do not have their mapped pages
 reserved and the pages can be "stolen" by the buddy allocator.

Apparently, on the affected system, the ACPI table in question is
not located in "reserved" memory, like ACPI NVS or ACPI Data, that
will not be used by the buddy allocator, so the memory occupied by
that table has to be explicitly reserved to prevent the buddy
allocator from using it.

In order to address this problem, rearrange the initialization of the
ACPI tables on x86 to locate the initial tables earlier and reserve
the memory occupied by them.

The other architectures using ACPI should not be affected by this
change.

Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
Reported-by: George Kennedy <george.kennedy@oracle.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/boot.c |   25 ++++++++++++-------------
 arch/x86/kernel/setup.c     |    8 +++-----
 drivers/acpi/tables.c       |   42 +++++++++++++++++++++++++++++++++++++++---
 include/linux/acpi.h        |    9 ++++++++-
 4 files changed, 62 insertions(+), 22 deletions(-)

Index: linux-pm/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/acpi/boot.c
+++ linux-pm/arch/x86/kernel/acpi/boot.c
@@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	if (acpi_table_init()) {
+	if (acpi_locate_initial_tables())
 		disable_acpi();
-		return;
-	}
+	else
+		acpi_reserve_initial_tables();
+}
+
+int __init early_acpi_boot_init(void)
+{
+	if (acpi_disabled)
+		return 1;
+
+	acpi_table_init_complete();
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
 
@@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return;
+			return 1;
 		}
 	}
-}
-
-int __init early_acpi_boot_init(void)
-{
-	/*
-	 * If acpi_disabled, bail out
-	 */
-	if (acpi_disabled)
-		return 1;
 
 	/*
 	 * Process the Multiple APIC Description Table (MADT), if present
Index: linux-pm/arch/x86/kernel/setup.c
===================================================================
--- linux-pm.orig/arch/x86/kernel/setup.c
+++ linux-pm/arch/x86/kernel/setup.c
@@ -1045,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
 
 	cleanup_highmap();
 
+	/* Look for ACPI tables and reserve memory occupied by them. */
+	acpi_boot_table_init();
+
 	memblock_set_current_limit(ISA_END_ADDRESS);
 	e820__memblock_setup();
 
@@ -1136,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
 
 	early_platform_quirks();
 
-	/*
-	 * Parse the ACPI tables for possible boot-time SMP configuration.
-	 */
-	acpi_boot_table_init();
-
 	early_acpi_boot_init();
 
 	initmem_init();
Index: linux-pm/include/linux/acpi.h
===================================================================
--- linux-pm.orig/include/linux/acpi.h
+++ linux-pm/include/linux/acpi.h
@@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned
 void __acpi_unmap_table(void __iomem *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
+void acpi_boot_table_prepare (void);
 void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
+int acpi_locate_initial_tables (void);
+void acpi_reserve_initial_tables (void);
+void acpi_table_init_complete (void);
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
@@ -814,9 +818,12 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
+static inline void acpi_boot_table_prepare(void)
+{
+}
+
 static inline void acpi_boot_table_init(void)
 {
-	return;
 }
 
 static inline int acpi_mps_check(void)
Index: linux-pm/drivers/acpi/tables.c
===================================================================
--- linux-pm.orig/drivers/acpi/tables.c
+++ linux-pm/drivers/acpi/tables.c
@@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struc
 }
 
 /*
- * acpi_table_init()
+ * acpi_locate_initial_tables()
  *
  * find RSDP, find and checksum SDT/XSDT.
  * checksum all tables, print SDT/XSDT
@@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struc
  * result: sdt_entry[] is initialized
  */
 
-int __init acpi_table_init(void)
+int __init acpi_locate_initial_tables(void)
 {
 	acpi_status status;
 
@@ -803,9 +803,45 @@ int __init acpi_table_init(void)
 	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
 	if (ACPI_FAILURE(status))
 		return -EINVAL;
-	acpi_table_initrd_scan();
 
+	return 0;
+}
+
+void __init acpi_reserve_initial_tables(void)
+{
+	int i;
+
+	for (i = 0; i < ACPI_MAX_TABLES; i++) {
+		struct acpi_table_desc *table_desc = &initial_tables[i];
+		u64 start = table_desc->address;
+		u64 size = table_desc->length;
+
+		if (!start || !size)
+			break;
+
+		pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
+			table_desc->signature.ascii, start, start + size - 1);
+
+		memblock_reserve(start, size);
+	}
+}
+
+void __init acpi_table_init_complete(void)
+{
+	acpi_table_initrd_scan();
 	check_multiple_madt();
+}
+
+int __init acpi_table_init(void)
+{
+	int ret;
+
+	ret = acpi_locate_initial_tables();
+	if (ret)
+		return ret;
+
+	acpi_table_init_complete();
+
 	return 0;
 }
 




^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-23 19:26                                   ` [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables Rafael J. Wysocki
@ 2021-03-24  8:24                                     ` Mike Rapoport
  2021-03-24 13:27                                       ` Rafael J. Wysocki
  0 siblings, 1 reply; 33+ messages in thread
From: Mike Rapoport @ 2021-03-24  8:24 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: ACPI Devel Maling List, Rafael J. Wysocki, Erik Kaneda,
	David Hildenbrand, George Kennedy, Robert Moore, Rafael Wysocki,
	Len Brown, Linux Kernel Mailing List, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko,
	x86 Maintainers

On Tue, Mar 23, 2021 at 08:26:52PM +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> The following problem has been reported by George Kennedy:
> 
>  Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
>  in __free_pages_core()") the following use after free occurs
>  intermittently when ACPI tables are accessed.
> 
>  BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
>  Read of size 4 at addr ffff8880be453004 by task swapper/0/1
>  CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
>  Call Trace:
>   dump_stack+0xf6/0x158
>   print_address_description.constprop.9+0x41/0x60
>   kasan_report.cold.14+0x7b/0xd4
>   __asan_report_load_n_noabort+0xf/0x20
>   ibft_init+0x134/0xc49
>   do_one_initcall+0xc4/0x3e0
>   kernel_init_freeable+0x5af/0x66b
>   kernel_init+0x16/0x1d0
>   ret_from_fork+0x22/0x30
> 
>  ACPI tables mapped via kmap() do not have their mapped pages
>  reserved and the pages can be "stolen" by the buddy allocator.
> 
> Apparently, on the affected system, the ACPI table in question is
> not located in "reserved" memory, like ACPI NVS or ACPI Data, that
> will not be used by the buddy allocator, so the memory occupied by
> that table has to be explicitly reserved to prevent the buddy
> allocator from using it.
> 
> In order to address this problem, rearrange the initialization of the
> ACPI tables on x86 to locate the initial tables earlier and reserve
> the memory occupied by them.
> 
> The other architectures using ACPI should not be affected by this
> change.
> 
> Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
> Reported-by: George Kennedy <george.kennedy@oracle.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

FWIW:
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>

> ---
>  arch/x86/kernel/acpi/boot.c |   25 ++++++++++++-------------
>  arch/x86/kernel/setup.c     |    8 +++-----
>  drivers/acpi/tables.c       |   42 +++++++++++++++++++++++++++++++++++++++---
>  include/linux/acpi.h        |    9 ++++++++-
>  4 files changed, 62 insertions(+), 22 deletions(-)
> 
> Index: linux-pm/arch/x86/kernel/acpi/boot.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
> +++ linux-pm/arch/x86/kernel/acpi/boot.c
> @@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
>  	/*
>  	 * Initialize the ACPI boot-time table parser.
>  	 */
> -	if (acpi_table_init()) {
> +	if (acpi_locate_initial_tables())
>  		disable_acpi();
> -		return;
> -	}
> +	else
> +		acpi_reserve_initial_tables();
> +}
> +
> +int __init early_acpi_boot_init(void)
> +{
> +	if (acpi_disabled)
> +		return 1;
> +
> +	acpi_table_init_complete();
>  
>  	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>  
> @@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
>  		} else {
>  			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
>  			disable_acpi();
> -			return;
> +			return 1;
>  		}
>  	}
> -}
> -
> -int __init early_acpi_boot_init(void)
> -{
> -	/*
> -	 * If acpi_disabled, bail out
> -	 */
> -	if (acpi_disabled)
> -		return 1;
>  
>  	/*
>  	 * Process the Multiple APIC Description Table (MADT), if present
> Index: linux-pm/arch/x86/kernel/setup.c
> ===================================================================
> --- linux-pm.orig/arch/x86/kernel/setup.c
> +++ linux-pm/arch/x86/kernel/setup.c
> @@ -1045,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
>  
>  	cleanup_highmap();
>  
> +	/* Look for ACPI tables and reserve memory occupied by them. */
> +	acpi_boot_table_init();
> +
>  	memblock_set_current_limit(ISA_END_ADDRESS);
>  	e820__memblock_setup();
>  
> @@ -1136,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
>  
>  	early_platform_quirks();
>  
> -	/*
> -	 * Parse the ACPI tables for possible boot-time SMP configuration.
> -	 */
> -	acpi_boot_table_init();
> -
>  	early_acpi_boot_init();
>  
>  	initmem_init();
> Index: linux-pm/include/linux/acpi.h
> ===================================================================
> --- linux-pm.orig/include/linux/acpi.h
> +++ linux-pm/include/linux/acpi.h
> @@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned
>  void __acpi_unmap_table(void __iomem *map, unsigned long size);
>  int early_acpi_boot_init(void);
>  int acpi_boot_init (void);
> +void acpi_boot_table_prepare (void);
>  void acpi_boot_table_init (void);
>  int acpi_mps_check (void);
>  int acpi_numa_init (void);
>  
> +int acpi_locate_initial_tables (void);
> +void acpi_reserve_initial_tables (void);
> +void acpi_table_init_complete (void);
>  int acpi_table_init (void);
>  int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
>  int __init acpi_table_parse_entries(char *id, unsigned long table_size,
> @@ -814,9 +818,12 @@ static inline int acpi_boot_init(void)
>  	return 0;
>  }
>  
> +static inline void acpi_boot_table_prepare(void)
> +{
> +}
> +
>  static inline void acpi_boot_table_init(void)
>  {
> -	return;
>  }
>  
>  static inline int acpi_mps_check(void)
> Index: linux-pm/drivers/acpi/tables.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/tables.c
> +++ linux-pm/drivers/acpi/tables.c
> @@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struc
>  }
>  
>  /*
> - * acpi_table_init()
> + * acpi_locate_initial_tables()
>   *
>   * find RSDP, find and checksum SDT/XSDT.
>   * checksum all tables, print SDT/XSDT
> @@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struc
>   * result: sdt_entry[] is initialized
>   */
>  
> -int __init acpi_table_init(void)
> +int __init acpi_locate_initial_tables(void)
>  {
>  	acpi_status status;
>  
> @@ -803,9 +803,45 @@ int __init acpi_table_init(void)
>  	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
>  	if (ACPI_FAILURE(status))
>  		return -EINVAL;
> -	acpi_table_initrd_scan();
>  
> +	return 0;
> +}
> +
> +void __init acpi_reserve_initial_tables(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < ACPI_MAX_TABLES; i++) {
> +		struct acpi_table_desc *table_desc = &initial_tables[i];
> +		u64 start = table_desc->address;
> +		u64 size = table_desc->length;
> +
> +		if (!start || !size)
> +			break;
> +
> +		pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
> +			table_desc->signature.ascii, start, start + size - 1);
> +
> +		memblock_reserve(start, size);
> +	}
> +}
> +
> +void __init acpi_table_init_complete(void)
> +{
> +	acpi_table_initrd_scan();
>  	check_multiple_madt();
> +}
> +
> +int __init acpi_table_init(void)
> +{
> +	int ret;
> +
> +	ret = acpi_locate_initial_tables();
> +	if (ret)
> +		return ret;
> +
> +	acpi_table_init_complete();
> +
>  	return 0;
>  }
>  
> 
> 
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-24  8:24                                     ` Mike Rapoport
@ 2021-03-24 13:27                                       ` Rafael J. Wysocki
  2021-03-24 13:49                                         ` George Kennedy
  2021-03-24 15:42                                         ` George Kennedy
  0 siblings, 2 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-24 13:27 UTC (permalink / raw)
  To: Mike Rapoport, George Kennedy
  Cc: Rafael J. Wysocki, ACPI Devel Maling List, Rafael J. Wysocki,
	Erik Kaneda, David Hildenbrand, Robert Moore, Rafael Wysocki,
	Len Brown, Linux Kernel Mailing List, Konrad Rzeszutek Wilk,
	Dan Carpenter, Dhaval Giani, Andrew Morton, Vlastimil Babka,
	Oscar Salvador, Wei Yang, Pankaj Gupta, Michal Hocko,
	x86 Maintainers

On Wed, Mar 24, 2021 at 9:24 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
>
> On Tue, Mar 23, 2021 at 08:26:52PM +0100, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > The following problem has been reported by George Kennedy:
> >
> >  Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
> >  in __free_pages_core()") the following use after free occurs
> >  intermittently when ACPI tables are accessed.
> >
> >  BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
> >  Read of size 4 at addr ffff8880be453004 by task swapper/0/1
> >  CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
> >  Call Trace:
> >   dump_stack+0xf6/0x158
> >   print_address_description.constprop.9+0x41/0x60
> >   kasan_report.cold.14+0x7b/0xd4
> >   __asan_report_load_n_noabort+0xf/0x20
> >   ibft_init+0x134/0xc49
> >   do_one_initcall+0xc4/0x3e0
> >   kernel_init_freeable+0x5af/0x66b
> >   kernel_init+0x16/0x1d0
> >   ret_from_fork+0x22/0x30
> >
> >  ACPI tables mapped via kmap() do not have their mapped pages
> >  reserved and the pages can be "stolen" by the buddy allocator.
> >
> > Apparently, on the affected system, the ACPI table in question is
> > not located in "reserved" memory, like ACPI NVS or ACPI Data, that
> > will not be used by the buddy allocator, so the memory occupied by
> > that table has to be explicitly reserved to prevent the buddy
> > allocator from using it.
> >
> > In order to address this problem, rearrange the initialization of the
> > ACPI tables on x86 to locate the initial tables earlier and reserve
> > the memory occupied by them.
> >
> > The other architectures using ACPI should not be affected by this
> > change.
> >
> > Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
> > Reported-by: George Kennedy <george.kennedy@oracle.com>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> FWIW:
> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>

Thank you!

George, can you please try this patch on the affected system?

> > ---
> >  arch/x86/kernel/acpi/boot.c |   25 ++++++++++++-------------
> >  arch/x86/kernel/setup.c     |    8 +++-----
> >  drivers/acpi/tables.c       |   42 +++++++++++++++++++++++++++++++++++++++---
> >  include/linux/acpi.h        |    9 ++++++++-
> >  4 files changed, 62 insertions(+), 22 deletions(-)
> >
> > Index: linux-pm/arch/x86/kernel/acpi/boot.c
> > ===================================================================
> > --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
> > +++ linux-pm/arch/x86/kernel/acpi/boot.c
> > @@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
> >       /*
> >        * Initialize the ACPI boot-time table parser.
> >        */
> > -     if (acpi_table_init()) {
> > +     if (acpi_locate_initial_tables())
> >               disable_acpi();
> > -             return;
> > -     }
> > +     else
> > +             acpi_reserve_initial_tables();
> > +}
> > +
> > +int __init early_acpi_boot_init(void)
> > +{
> > +     if (acpi_disabled)
> > +             return 1;
> > +
> > +     acpi_table_init_complete();
> >
> >       acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
> >
> > @@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
> >               } else {
> >                       printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
> >                       disable_acpi();
> > -                     return;
> > +                     return 1;
> >               }
> >       }
> > -}
> > -
> > -int __init early_acpi_boot_init(void)
> > -{
> > -     /*
> > -      * If acpi_disabled, bail out
> > -      */
> > -     if (acpi_disabled)
> > -             return 1;
> >
> >       /*
> >        * Process the Multiple APIC Description Table (MADT), if present
> > Index: linux-pm/arch/x86/kernel/setup.c
> > ===================================================================
> > --- linux-pm.orig/arch/x86/kernel/setup.c
> > +++ linux-pm/arch/x86/kernel/setup.c
> > @@ -1045,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
> >
> >       cleanup_highmap();
> >
> > +     /* Look for ACPI tables and reserve memory occupied by them. */
> > +     acpi_boot_table_init();
> > +
> >       memblock_set_current_limit(ISA_END_ADDRESS);
> >       e820__memblock_setup();
> >
> > @@ -1136,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
> >
> >       early_platform_quirks();
> >
> > -     /*
> > -      * Parse the ACPI tables for possible boot-time SMP configuration.
> > -      */
> > -     acpi_boot_table_init();
> > -
> >       early_acpi_boot_init();
> >
> >       initmem_init();
> > Index: linux-pm/include/linux/acpi.h
> > ===================================================================
> > --- linux-pm.orig/include/linux/acpi.h
> > +++ linux-pm/include/linux/acpi.h
> > @@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned
> >  void __acpi_unmap_table(void __iomem *map, unsigned long size);
> >  int early_acpi_boot_init(void);
> >  int acpi_boot_init (void);
> > +void acpi_boot_table_prepare (void);
> >  void acpi_boot_table_init (void);
> >  int acpi_mps_check (void);
> >  int acpi_numa_init (void);
> >
> > +int acpi_locate_initial_tables (void);
> > +void acpi_reserve_initial_tables (void);
> > +void acpi_table_init_complete (void);
> >  int acpi_table_init (void);
> >  int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
> >  int __init acpi_table_parse_entries(char *id, unsigned long table_size,
> > @@ -814,9 +818,12 @@ static inline int acpi_boot_init(void)
> >       return 0;
> >  }
> >
> > +static inline void acpi_boot_table_prepare(void)
> > +{
> > +}
> > +
> >  static inline void acpi_boot_table_init(void)
> >  {
> > -     return;
> >  }
> >
> >  static inline int acpi_mps_check(void)
> > Index: linux-pm/drivers/acpi/tables.c
> > ===================================================================
> > --- linux-pm.orig/drivers/acpi/tables.c
> > +++ linux-pm/drivers/acpi/tables.c
> > @@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struc
> >  }
> >
> >  /*
> > - * acpi_table_init()
> > + * acpi_locate_initial_tables()
> >   *
> >   * find RSDP, find and checksum SDT/XSDT.
> >   * checksum all tables, print SDT/XSDT
> > @@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struc
> >   * result: sdt_entry[] is initialized
> >   */
> >
> > -int __init acpi_table_init(void)
> > +int __init acpi_locate_initial_tables(void)
> >  {
> >       acpi_status status;
> >
> > @@ -803,9 +803,45 @@ int __init acpi_table_init(void)
> >       status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
> >       if (ACPI_FAILURE(status))
> >               return -EINVAL;
> > -     acpi_table_initrd_scan();
> >
> > +     return 0;
> > +}
> > +
> > +void __init acpi_reserve_initial_tables(void)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < ACPI_MAX_TABLES; i++) {
> > +             struct acpi_table_desc *table_desc = &initial_tables[i];
> > +             u64 start = table_desc->address;
> > +             u64 size = table_desc->length;
> > +
> > +             if (!start || !size)
> > +                     break;
> > +
> > +             pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
> > +                     table_desc->signature.ascii, start, start + size - 1);
> > +
> > +             memblock_reserve(start, size);
> > +     }
> > +}
> > +
> > +void __init acpi_table_init_complete(void)
> > +{
> > +     acpi_table_initrd_scan();
> >       check_multiple_madt();
> > +}
> > +
> > +int __init acpi_table_init(void)
> > +{
> > +     int ret;
> > +
> > +     ret = acpi_locate_initial_tables();
> > +     if (ret)
> > +             return ret;
> > +
> > +     acpi_table_init_complete();
> > +
> >       return 0;
> >  }
> >
> >
> >
> >
>
> --
> Sincerely yours,
> Mike.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-24 13:27                                       ` Rafael J. Wysocki
@ 2021-03-24 13:49                                         ` George Kennedy
  2021-03-24 15:42                                         ` George Kennedy
  1 sibling, 0 replies; 33+ messages in thread
From: George Kennedy @ 2021-03-24 13:49 UTC (permalink / raw)
  To: Rafael J. Wysocki, Mike Rapoport
  Cc: Rafael J. Wysocki, ACPI Devel Maling List, Erik Kaneda,
	David Hildenbrand, Robert Moore, Rafael Wysocki, Len Brown,
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko, x86 Maintainers



On 3/24/2021 9:27 AM, Rafael J. Wysocki wrote:
> On Wed, Mar 24, 2021 at 9:24 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
>> On Tue, Mar 23, 2021 at 08:26:52PM +0100, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> The following problem has been reported by George Kennedy:
>>>
>>>   Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
>>>   in __free_pages_core()") the following use after free occurs
>>>   intermittently when ACPI tables are accessed.
>>>
>>>   BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
>>>   Read of size 4 at addr ffff8880be453004 by task swapper/0/1
>>>   CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
>>>   Call Trace:
>>>    dump_stack+0xf6/0x158
>>>    print_address_description.constprop.9+0x41/0x60
>>>    kasan_report.cold.14+0x7b/0xd4
>>>    __asan_report_load_n_noabort+0xf/0x20
>>>    ibft_init+0x134/0xc49
>>>    do_one_initcall+0xc4/0x3e0
>>>    kernel_init_freeable+0x5af/0x66b
>>>    kernel_init+0x16/0x1d0
>>>    ret_from_fork+0x22/0x30
>>>
>>>   ACPI tables mapped via kmap() do not have their mapped pages
>>>   reserved and the pages can be "stolen" by the buddy allocator.
>>>
>>> Apparently, on the affected system, the ACPI table in question is
>>> not located in "reserved" memory, like ACPI NVS or ACPI Data, that
>>> will not be used by the buddy allocator, so the memory occupied by
>>> that table has to be explicitly reserved to prevent the buddy
>>> allocator from using it.
>>>
>>> In order to address this problem, rearrange the initialization of the
>>> ACPI tables on x86 to locate the initial tables earlier and reserve
>>> the memory occupied by them.
>>>
>>> The other architectures using ACPI should not be affected by this
>>> change.
>>>
>>> Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
>>> Reported-by: George Kennedy <george.kennedy@oracle.com>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>> FWIW:
>> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
> Thank you!
>
> George, can you please try this patch on the affected system?

Will do.

George
>
>>> ---
>>>   arch/x86/kernel/acpi/boot.c |   25 ++++++++++++-------------
>>>   arch/x86/kernel/setup.c     |    8 +++-----
>>>   drivers/acpi/tables.c       |   42 +++++++++++++++++++++++++++++++++++++++---
>>>   include/linux/acpi.h        |    9 ++++++++-
>>>   4 files changed, 62 insertions(+), 22 deletions(-)
>>>
>>> Index: linux-pm/arch/x86/kernel/acpi/boot.c
>>> ===================================================================
>>> --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
>>> +++ linux-pm/arch/x86/kernel/acpi/boot.c
>>> @@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
>>>        /*
>>>         * Initialize the ACPI boot-time table parser.
>>>         */
>>> -     if (acpi_table_init()) {
>>> +     if (acpi_locate_initial_tables())
>>>                disable_acpi();
>>> -             return;
>>> -     }
>>> +     else
>>> +             acpi_reserve_initial_tables();
>>> +}
>>> +
>>> +int __init early_acpi_boot_init(void)
>>> +{
>>> +     if (acpi_disabled)
>>> +             return 1;
>>> +
>>> +     acpi_table_init_complete();
>>>
>>>        acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>>>
>>> @@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
>>>                } else {
>>>                        printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
>>>                        disable_acpi();
>>> -                     return;
>>> +                     return 1;
>>>                }
>>>        }
>>> -}
>>> -
>>> -int __init early_acpi_boot_init(void)
>>> -{
>>> -     /*
>>> -      * If acpi_disabled, bail out
>>> -      */
>>> -     if (acpi_disabled)
>>> -             return 1;
>>>
>>>        /*
>>>         * Process the Multiple APIC Description Table (MADT), if present
>>> Index: linux-pm/arch/x86/kernel/setup.c
>>> ===================================================================
>>> --- linux-pm.orig/arch/x86/kernel/setup.c
>>> +++ linux-pm/arch/x86/kernel/setup.c
>>> @@ -1045,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
>>>
>>>        cleanup_highmap();
>>>
>>> +     /* Look for ACPI tables and reserve memory occupied by them. */
>>> +     acpi_boot_table_init();
>>> +
>>>        memblock_set_current_limit(ISA_END_ADDRESS);
>>>        e820__memblock_setup();
>>>
>>> @@ -1136,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
>>>
>>>        early_platform_quirks();
>>>
>>> -     /*
>>> -      * Parse the ACPI tables for possible boot-time SMP configuration.
>>> -      */
>>> -     acpi_boot_table_init();
>>> -
>>>        early_acpi_boot_init();
>>>
>>>        initmem_init();
>>> Index: linux-pm/include/linux/acpi.h
>>> ===================================================================
>>> --- linux-pm.orig/include/linux/acpi.h
>>> +++ linux-pm/include/linux/acpi.h
>>> @@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned
>>>   void __acpi_unmap_table(void __iomem *map, unsigned long size);
>>>   int early_acpi_boot_init(void);
>>>   int acpi_boot_init (void);
>>> +void acpi_boot_table_prepare (void);
>>>   void acpi_boot_table_init (void);
>>>   int acpi_mps_check (void);
>>>   int acpi_numa_init (void);
>>>
>>> +int acpi_locate_initial_tables (void);
>>> +void acpi_reserve_initial_tables (void);
>>> +void acpi_table_init_complete (void);
>>>   int acpi_table_init (void);
>>>   int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
>>>   int __init acpi_table_parse_entries(char *id, unsigned long table_size,
>>> @@ -814,9 +818,12 @@ static inline int acpi_boot_init(void)
>>>        return 0;
>>>   }
>>>
>>> +static inline void acpi_boot_table_prepare(void)
>>> +{
>>> +}
>>> +
>>>   static inline void acpi_boot_table_init(void)
>>>   {
>>> -     return;
>>>   }
>>>
>>>   static inline int acpi_mps_check(void)
>>> Index: linux-pm/drivers/acpi/tables.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/acpi/tables.c
>>> +++ linux-pm/drivers/acpi/tables.c
>>> @@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struc
>>>   }
>>>
>>>   /*
>>> - * acpi_table_init()
>>> + * acpi_locate_initial_tables()
>>>    *
>>>    * find RSDP, find and checksum SDT/XSDT.
>>>    * checksum all tables, print SDT/XSDT
>>> @@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struc
>>>    * result: sdt_entry[] is initialized
>>>    */
>>>
>>> -int __init acpi_table_init(void)
>>> +int __init acpi_locate_initial_tables(void)
>>>   {
>>>        acpi_status status;
>>>
>>> @@ -803,9 +803,45 @@ int __init acpi_table_init(void)
>>>        status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
>>>        if (ACPI_FAILURE(status))
>>>                return -EINVAL;
>>> -     acpi_table_initrd_scan();
>>>
>>> +     return 0;
>>> +}
>>> +
>>> +void __init acpi_reserve_initial_tables(void)
>>> +{
>>> +     int i;
>>> +
>>> +     for (i = 0; i < ACPI_MAX_TABLES; i++) {
>>> +             struct acpi_table_desc *table_desc = &initial_tables[i];
>>> +             u64 start = table_desc->address;
>>> +             u64 size = table_desc->length;
>>> +
>>> +             if (!start || !size)
>>> +                     break;
>>> +
>>> +             pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
>>> +                     table_desc->signature.ascii, start, start + size - 1);
>>> +
>>> +             memblock_reserve(start, size);
>>> +     }
>>> +}
>>> +
>>> +void __init acpi_table_init_complete(void)
>>> +{
>>> +     acpi_table_initrd_scan();
>>>        check_multiple_madt();
>>> +}
>>> +
>>> +int __init acpi_table_init(void)
>>> +{
>>> +     int ret;
>>> +
>>> +     ret = acpi_locate_initial_tables();
>>> +     if (ret)
>>> +             return ret;
>>> +
>>> +     acpi_table_init_complete();
>>> +
>>>        return 0;
>>>   }
>>>
>>>
>>>
>>>
>> --
>> Sincerely yours,
>> Mike.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-24 13:27                                       ` Rafael J. Wysocki
  2021-03-24 13:49                                         ` George Kennedy
@ 2021-03-24 15:42                                         ` George Kennedy
  2021-03-24 15:44                                           ` Rafael J. Wysocki
  1 sibling, 1 reply; 33+ messages in thread
From: George Kennedy @ 2021-03-24 15:42 UTC (permalink / raw)
  To: Rafael J. Wysocki, Mike Rapoport
  Cc: Rafael J. Wysocki, ACPI Devel Maling List, Erik Kaneda,
	David Hildenbrand, Robert Moore, Rafael Wysocki, Len Brown,
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko, x86 Maintainers



On 3/24/2021 9:27 AM, Rafael J. Wysocki wrote:
> On Wed, Mar 24, 2021 at 9:24 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
>> On Tue, Mar 23, 2021 at 08:26:52PM +0100, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> The following problem has been reported by George Kennedy:
>>>
>>>   Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
>>>   in __free_pages_core()") the following use after free occurs
>>>   intermittently when ACPI tables are accessed.
>>>
>>>   BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
>>>   Read of size 4 at addr ffff8880be453004 by task swapper/0/1
>>>   CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
>>>   Call Trace:
>>>    dump_stack+0xf6/0x158
>>>    print_address_description.constprop.9+0x41/0x60
>>>    kasan_report.cold.14+0x7b/0xd4
>>>    __asan_report_load_n_noabort+0xf/0x20
>>>    ibft_init+0x134/0xc49
>>>    do_one_initcall+0xc4/0x3e0
>>>    kernel_init_freeable+0x5af/0x66b
>>>    kernel_init+0x16/0x1d0
>>>    ret_from_fork+0x22/0x30
>>>
>>>   ACPI tables mapped via kmap() do not have their mapped pages
>>>   reserved and the pages can be "stolen" by the buddy allocator.
>>>
>>> Apparently, on the affected system, the ACPI table in question is
>>> not located in "reserved" memory, like ACPI NVS or ACPI Data, that
>>> will not be used by the buddy allocator, so the memory occupied by
>>> that table has to be explicitly reserved to prevent the buddy
>>> allocator from using it.
>>>
>>> In order to address this problem, rearrange the initialization of the
>>> ACPI tables on x86 to locate the initial tables earlier and reserve
>>> the memory occupied by them.
>>>
>>> The other architectures using ACPI should not be affected by this
>>> change.
>>>
>>> Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
>>> Reported-by: George Kennedy <george.kennedy@oracle.com>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>> FWIW:
>> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
> Thank you!
>
> George, can you please try this patch on the affected system?

Rafael,

10 for 10 successful reboots with your patch.

First, verified the failure is still there with latest 5.12.0-rc4.

George
P.S. Thanks Mike, Rafael & David

>
>>> ---
>>>   arch/x86/kernel/acpi/boot.c |   25 ++++++++++++-------------
>>>   arch/x86/kernel/setup.c     |    8 +++-----
>>>   drivers/acpi/tables.c       |   42 +++++++++++++++++++++++++++++++++++++++---
>>>   include/linux/acpi.h        |    9 ++++++++-
>>>   4 files changed, 62 insertions(+), 22 deletions(-)
>>>
>>> Index: linux-pm/arch/x86/kernel/acpi/boot.c
>>> ===================================================================
>>> --- linux-pm.orig/arch/x86/kernel/acpi/boot.c
>>> +++ linux-pm/arch/x86/kernel/acpi/boot.c
>>> @@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
>>>        /*
>>>         * Initialize the ACPI boot-time table parser.
>>>         */
>>> -     if (acpi_table_init()) {
>>> +     if (acpi_locate_initial_tables())
>>>                disable_acpi();
>>> -             return;
>>> -     }
>>> +     else
>>> +             acpi_reserve_initial_tables();
>>> +}
>>> +
>>> +int __init early_acpi_boot_init(void)
>>> +{
>>> +     if (acpi_disabled)
>>> +             return 1;
>>> +
>>> +     acpi_table_init_complete();
>>>
>>>        acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>>>
>>> @@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
>>>                } else {
>>>                        printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
>>>                        disable_acpi();
>>> -                     return;
>>> +                     return 1;
>>>                }
>>>        }
>>> -}
>>> -
>>> -int __init early_acpi_boot_init(void)
>>> -{
>>> -     /*
>>> -      * If acpi_disabled, bail out
>>> -      */
>>> -     if (acpi_disabled)
>>> -             return 1;
>>>
>>>        /*
>>>         * Process the Multiple APIC Description Table (MADT), if present
>>> Index: linux-pm/arch/x86/kernel/setup.c
>>> ===================================================================
>>> --- linux-pm.orig/arch/x86/kernel/setup.c
>>> +++ linux-pm/arch/x86/kernel/setup.c
>>> @@ -1045,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
>>>
>>>        cleanup_highmap();
>>>
>>> +     /* Look for ACPI tables and reserve memory occupied by them. */
>>> +     acpi_boot_table_init();
>>> +
>>>        memblock_set_current_limit(ISA_END_ADDRESS);
>>>        e820__memblock_setup();
>>>
>>> @@ -1136,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
>>>
>>>        early_platform_quirks();
>>>
>>> -     /*
>>> -      * Parse the ACPI tables for possible boot-time SMP configuration.
>>> -      */
>>> -     acpi_boot_table_init();
>>> -
>>>        early_acpi_boot_init();
>>>
>>>        initmem_init();
>>> Index: linux-pm/include/linux/acpi.h
>>> ===================================================================
>>> --- linux-pm.orig/include/linux/acpi.h
>>> +++ linux-pm/include/linux/acpi.h
>>> @@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned
>>>   void __acpi_unmap_table(void __iomem *map, unsigned long size);
>>>   int early_acpi_boot_init(void);
>>>   int acpi_boot_init (void);
>>> +void acpi_boot_table_prepare (void);
>>>   void acpi_boot_table_init (void);
>>>   int acpi_mps_check (void);
>>>   int acpi_numa_init (void);
>>>
>>> +int acpi_locate_initial_tables (void);
>>> +void acpi_reserve_initial_tables (void);
>>> +void acpi_table_init_complete (void);
>>>   int acpi_table_init (void);
>>>   int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
>>>   int __init acpi_table_parse_entries(char *id, unsigned long table_size,
>>> @@ -814,9 +818,12 @@ static inline int acpi_boot_init(void)
>>>        return 0;
>>>   }
>>>
>>> +static inline void acpi_boot_table_prepare(void)
>>> +{
>>> +}
>>> +
>>>   static inline void acpi_boot_table_init(void)
>>>   {
>>> -     return;
>>>   }
>>>
>>>   static inline int acpi_mps_check(void)
>>> Index: linux-pm/drivers/acpi/tables.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/acpi/tables.c
>>> +++ linux-pm/drivers/acpi/tables.c
>>> @@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struc
>>>   }
>>>
>>>   /*
>>> - * acpi_table_init()
>>> + * acpi_locate_initial_tables()
>>>    *
>>>    * find RSDP, find and checksum SDT/XSDT.
>>>    * checksum all tables, print SDT/XSDT
>>> @@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struc
>>>    * result: sdt_entry[] is initialized
>>>    */
>>>
>>> -int __init acpi_table_init(void)
>>> +int __init acpi_locate_initial_tables(void)
>>>   {
>>>        acpi_status status;
>>>
>>> @@ -803,9 +803,45 @@ int __init acpi_table_init(void)
>>>        status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
>>>        if (ACPI_FAILURE(status))
>>>                return -EINVAL;
>>> -     acpi_table_initrd_scan();
>>>
>>> +     return 0;
>>> +}
>>> +
>>> +void __init acpi_reserve_initial_tables(void)
>>> +{
>>> +     int i;
>>> +
>>> +     for (i = 0; i < ACPI_MAX_TABLES; i++) {
>>> +             struct acpi_table_desc *table_desc = &initial_tables[i];
>>> +             u64 start = table_desc->address;
>>> +             u64 size = table_desc->length;
>>> +
>>> +             if (!start || !size)
>>> +                     break;
>>> +
>>> +             pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
>>> +                     table_desc->signature.ascii, start, start + size - 1);
>>> +
>>> +             memblock_reserve(start, size);
>>> +     }
>>> +}
>>> +
>>> +void __init acpi_table_init_complete(void)
>>> +{
>>> +     acpi_table_initrd_scan();
>>>        check_multiple_madt();
>>> +}
>>> +
>>> +int __init acpi_table_init(void)
>>> +{
>>> +     int ret;
>>> +
>>> +     ret = acpi_locate_initial_tables();
>>> +     if (ret)
>>> +             return ret;
>>> +
>>> +     acpi_table_init_complete();
>>> +
>>>        return 0;
>>>   }
>>>
>>>
>>>
>>>
>> --
>> Sincerely yours,
>> Mike.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables
  2021-03-24 15:42                                         ` George Kennedy
@ 2021-03-24 15:44                                           ` Rafael J. Wysocki
  0 siblings, 0 replies; 33+ messages in thread
From: Rafael J. Wysocki @ 2021-03-24 15:44 UTC (permalink / raw)
  To: George Kennedy
  Cc: Rafael J. Wysocki, Mike Rapoport, Rafael J. Wysocki,
	ACPI Devel Maling List, Erik Kaneda, David Hildenbrand,
	Robert Moore, Rafael Wysocki, Len Brown,
	Linux Kernel Mailing List, Konrad Rzeszutek Wilk, Dan Carpenter,
	Dhaval Giani, Andrew Morton, Vlastimil Babka, Oscar Salvador,
	Wei Yang, Pankaj Gupta, Michal Hocko, x86 Maintainers

On Wed, Mar 24, 2021 at 4:42 PM George Kennedy
<george.kennedy@oracle.com> wrote:
>
>
>
> On 3/24/2021 9:27 AM, Rafael J. Wysocki wrote:
> > On Wed, Mar 24, 2021 at 9:24 AM Mike Rapoport <rppt@linux.ibm.com> wrote:
> >> On Tue, Mar 23, 2021 at 08:26:52PM +0100, Rafael J. Wysocki wrote:
> >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> The following problem has been reported by George Kennedy:
> >>>
> >>>   Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail
> >>>   in __free_pages_core()") the following use after free occurs
> >>>   intermittently when ACPI tables are accessed.
> >>>
> >>>   BUG: KASAN: use-after-free in ibft_init+0x134/0xc49
> >>>   Read of size 4 at addr ffff8880be453004 by task swapper/0/1
> >>>   CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1
> >>>   Call Trace:
> >>>    dump_stack+0xf6/0x158
> >>>    print_address_description.constprop.9+0x41/0x60
> >>>    kasan_report.cold.14+0x7b/0xd4
> >>>    __asan_report_load_n_noabort+0xf/0x20
> >>>    ibft_init+0x134/0xc49
> >>>    do_one_initcall+0xc4/0x3e0
> >>>    kernel_init_freeable+0x5af/0x66b
> >>>    kernel_init+0x16/0x1d0
> >>>    ret_from_fork+0x22/0x30
> >>>
> >>>   ACPI tables mapped via kmap() do not have their mapped pages
> >>>   reserved and the pages can be "stolen" by the buddy allocator.
> >>>
> >>> Apparently, on the affected system, the ACPI table in question is
> >>> not located in "reserved" memory, like ACPI NVS or ACPI Data, that
> >>> will not be used by the buddy allocator, so the memory occupied by
> >>> that table has to be explicitly reserved to prevent the buddy
> >>> allocator from using it.
> >>>
> >>> In order to address this problem, rearrange the initialization of the
> >>> ACPI tables on x86 to locate the initial tables earlier and reserve
> >>> the memory occupied by them.
> >>>
> >>> The other architectures using ACPI should not be affected by this
> >>> change.
> >>>
> >>> Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/
> >>> Reported-by: George Kennedy <george.kennedy@oracle.com>
> >>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >> FWIW:
> >> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
> > Thank you!
> >
> > George, can you please try this patch on the affected system?
>
> Rafael,
>
> 10 for 10 successful reboots with your patch.
>
> First, verified the failure is still there with latest 5.12.0-rc4.

Thank you!

I'll add a Tested-by from you to it, then.

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2021-03-24 15:46 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-03 20:09 [PATCH 1/1] ACPI: fix acpi table use after free George Kennedy
2021-03-04 12:14 ` Rafael J. Wysocki
2021-03-04 23:14   ` George Kennedy
2021-03-05 13:30     ` Rafael J. Wysocki
2021-03-05 13:40       ` David Hildenbrand
2021-03-05 15:24         ` George Kennedy
2021-03-10 18:39         ` Rafael J. Wysocki
2021-03-10 18:54           ` Rafael J. Wysocki
2021-03-10 19:10             ` David Hildenbrand
2021-03-10 19:38               ` Mike Rapoport
2021-03-10 19:47                 ` David Hildenbrand
2021-03-11 15:36                   ` Rafael J. Wysocki
2021-03-14 18:59                     ` Mike Rapoport
2021-03-15 16:19                       ` Rafael J. Wysocki
2021-03-15 18:05                         ` Rafael J. Wysocki
2021-03-17 20:14                         ` Rafael J. Wysocki
2021-03-17 22:28                           ` George Kennedy
2021-03-18 15:42                             ` Rafael J. Wysocki
2021-03-18  7:25                           ` Mike Rapoport
2021-03-18 10:50                             ` Rafael J. Wysocki
2021-03-18 15:22                               ` Rafael J. Wysocki
2021-03-20  8:25                                 ` Mike Rapoport
2021-03-22 16:57                                   ` Rafael J. Wysocki
2021-03-23 19:26                                   ` [PATCH] ACPI: tables: x86: Reserve memory occupied by ACPI tables Rafael J. Wysocki
2021-03-24  8:24                                     ` Mike Rapoport
2021-03-24 13:27                                       ` Rafael J. Wysocki
2021-03-24 13:49                                         ` George Kennedy
2021-03-24 15:42                                         ` George Kennedy
2021-03-24 15:44                                           ` Rafael J. Wysocki
2021-03-07  7:46       ` [PATCH 1/1] ACPI: fix acpi table use after free Mike Rapoport
2021-03-09 17:54         ` Mike Rapoport
2021-03-09 18:29           ` Rafael J. Wysocki
2021-03-09 20:16             ` Mike Rapoport

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.