linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: George Kennedy <george.kennedy@oracle.com>
To: Mike Rapoport <rppt@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>,
	Andrey Konovalov <andreyknvl@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Konrad Rzeszutek Wilk <konrad@darnok.org>,
	Will Deacon <will.deacon@arm.com>,
	Andrey Ryabinin <aryabinin@virtuozzo.com>,
	Alexander Potapenko <glider@google.com>,
	Marco Elver <elver@google.com>,
	Peter Collingbourne <pcc@google.com>,
	Evgenii Stepanov <eugenis@google.com>,
	Branislav Rankov <Branislav.Rankov@arm.com>,
	Kevin Brodsky <kevin.brodsky@arm.com>,
	Christoph Hellwig <hch@infradead.org>,
	kasan-dev <kasan-dev@googlegroups.com>,
	Linux ARM <linux-arm-kernel@lists.infradead.org>,
	Linux Memory Management List <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Dhaval Giani <dhaval.giani@oracle.com>
Subject: Re: [PATCH] mm, kasan: don't poison boot memory
Date: Thu, 25 Feb 2021 11:06:12 -0500	[thread overview]
Message-ID: <a43179e8-b063-569c-f27d-dda84133322e@oracle.com> (raw)
In-Reply-To: <bb444ddb-d60d-114f-c2fe-64e5fb34102d@oracle.com>



On 2/25/2021 10:22 AM, George Kennedy wrote:
>
>
> On 2/25/2021 9:57 AM, Mike Rapoport wrote:
>> On Thu, Feb 25, 2021 at 07:38:19AM -0500, George Kennedy wrote:
>>> On 2/25/2021 3:53 AM, Mike Rapoport wrote:
>>>> Hi George,
>>>>
>>>>> On 2/24/2021 5:37 AM, Mike Rapoport wrote:
>>>>>> On Tue, Feb 23, 2021 at 04:46:28PM -0500, George Kennedy wrote:
>>>>>>> Mike,
>>>>>>>
>>>>>>> Still no luck.
>>>>>>>
>>>>>>> [   30.193723] iscsi: registered transport (iser)
>>>>>>> [   30.195970] iBFT detected.
>>>>>>> [   30.196571] BUG: unable to handle page fault for address: 
>>>>>>> ffffffffff240004
>>>>>> Hmm, we cannot set ibft_addr to early pointer to the ACPI table.
>>>>>> Let's try something more disruptive and move the reservation back to
>>>>>> iscsi_ibft_find.c.
>>>>>>
>>>>>> diff --git a/arch/x86/kernel/acpi/boot.c 
>>>>>> b/arch/x86/kernel/acpi/boot.c
>>>>>> index 7bdc0239a943..c118dd54a747 100644
>>>>>> --- a/arch/x86/kernel/acpi/boot.c
>>>>>> +++ b/arch/x86/kernel/acpi/boot.c
>>>>>> @@ -1551,6 +1551,7 @@ void __init acpi_boot_table_init(void)
>>>>>>         if (acpi_disabled)
>>>>>>             return;
>>>>>> +#if 0
>>>>>>         /*
>>>>>>          * Initialize the ACPI boot-time table parser.
>>>>>>          */
>>>>>> @@ -1558,6 +1559,7 @@ void __init acpi_boot_table_init(void)
>>>>>>             disable_acpi();
>>>>>>             return;
>>>>>>         }
>>>>>> +#endif
>>>>>>         acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
>>>>>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>>>>>> index d883176ef2ce..c615ce96c9a2 100644
>>>>>> --- a/arch/x86/kernel/setup.c
>>>>>> +++ b/arch/x86/kernel/setup.c
>>>>>> @@ -570,16 +570,6 @@ void __init reserve_standard_io_resources(void)
>>>>>>     }
>>>>>> -static __init void reserve_ibft_region(void)
>>>>>> -{
>>>>>> -    unsigned long addr, size = 0;
>>>>>> -
>>>>>> -    addr = find_ibft_region(&size);
>>>>>> -
>>>>>> -    if (size)
>>>>>> -        memblock_reserve(addr, size);
>>>>>> -}
>>>>>> -
>>>>>>     static bool __init snb_gfx_workaround_needed(void)
>>>>>>     {
>>>>>>     #ifdef CONFIG_PCI
>>>>>> @@ -1032,6 +1022,12 @@ void __init setup_arch(char **cmdline_p)
>>>>>>          */
>>>>>>         find_smp_config();
>>>>>> +    /*
>>>>>> +     * Initialize the ACPI boot-time table parser.
>>>>>> +     */
>>>>>> +    if (acpi_table_init())
>>>>>> +        disable_acpi();
>>>>>> +
>>>>>>         reserve_ibft_region();
>>>>>>         early_alloc_pgt_buf();
>>>>>> diff --git a/drivers/firmware/iscsi_ibft_find.c 
>>>>>> b/drivers/firmware/iscsi_ibft_find.c
>>>>>> index 64bb94523281..01be513843d6 100644
>>>>>> --- a/drivers/firmware/iscsi_ibft_find.c
>>>>>> +++ b/drivers/firmware/iscsi_ibft_find.c
>>>>>> @@ -47,7 +47,25 @@ static const struct {
>>>>>>     #define VGA_MEM 0xA0000 /* VGA buffer */
>>>>>>     #define VGA_SIZE 0x20000 /* 128kB */
>>>>>> -static int __init find_ibft_in_mem(void)
>>>>>> +static void __init *acpi_find_ibft_region(void)
>>>>>> +{
>>>>>> +    int i;
>>>>>> +    struct acpi_table_header *table = NULL;
>>>>>> +    acpi_status status;
>>>>>> +
>>>>>> +    if (acpi_disabled)
>>>>>> +        return NULL;
>>>>>> +
>>>>>> +    for (i = 0; i < ARRAY_SIZE(ibft_signs) && !ibft_addr; i++) {
>>>>>> +        status = acpi_get_table(ibft_signs[i].sign, 0, &table);
>>>>>> +        if (ACPI_SUCCESS(status))
>>>>>> +            return table;
>>>>>> +    }
>>>>>> +
>>>>>> +    return NULL;
>>>>>> +}
>>>>>> +
>>>>>> +static void __init *find_ibft_in_mem(void)
>>>>>>     {
>>>>>>         unsigned long pos;
>>>>>>         unsigned int len = 0;
>>>>>> @@ -70,35 +88,44 @@ static int __init find_ibft_in_mem(void)
>>>>>>                     /* if the length of the table extends past 1M,
>>>>>>                      * the table cannot be valid. */
>>>>>>                     if (pos + len <= (IBFT_END-1)) {
>>>>>> -                    ibft_addr = (struct acpi_table_ibft *)virt;
>>>>>>                         pr_info("iBFT found at 0x%lx.\n", pos);
>>>>>> -                    goto done;
>>>>>> +                    return virt;
>>>>>>                     }
>>>>>>                 }
>>>>>>             }
>>>>>>         }
>>>>>> -done:
>>>>>> -    return len;
>>>>>> +
>>>>>> +    return NULL;
>>>>>>     }
>>>>>> +
>>>>>> +static void __init *find_ibft(void)
>>>>>> +{
>>>>>> +    /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
>>>>>> +     * only use ACPI for this */
>>>>>> +    if (!efi_enabled(EFI_BOOT))
>>>>>> +        return find_ibft_in_mem();
>>>>>> +    else
>>>>>> +        return acpi_find_ibft_region();
>>>>>> +}
>>>>>> +
>>>>>>     /*
>>>>>>      * Routine used to find the iSCSI Boot Format Table. The logical
>>>>>>      * kernel address is set in the ibft_addr global variable.
>>>>>>      */
>>>>>> -unsigned long __init find_ibft_region(unsigned long *sizep)
>>>>>> +void __init reserve_ibft_region(void)
>>>>>>     {
>>>>>> -    ibft_addr = NULL;
>>>>>> +    struct acpi_table_ibft *table;
>>>>>> +    unsigned long size;
>>>>>> -    /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
>>>>>> -     * only use ACPI for this */
>>>>>> +    table = find_ibft();
>>>>>> +    if (!table)
>>>>>> +        return;
>>>>>> -    if (!efi_enabled(EFI_BOOT))
>>>>>> -        find_ibft_in_mem();
>>>>>> -
>>>>>> -    if (ibft_addr) {
>>>>>> -        *sizep = PAGE_ALIGN(ibft_addr->header.length);
>>>>>> -        return (u64)virt_to_phys(ibft_addr);
>>>>>> -    }
>>>>>> +    size = PAGE_ALIGN(table->header.length);
>>>>>> +    memblock_reserve(virt_to_phys(table), size);
>>>>>> -    *sizep = 0;
>>>>>> -    return 0;
>>>>>> +    if (efi_enabled(EFI_BOOT))
>>>>>> +        acpi_put_table(&table->header);
>>>>>> +    else
>>>>>> +        ibft_addr = table;
>>>>>>     }
>>>>>> diff --git a/include/linux/iscsi_ibft.h b/include/linux/iscsi_ibft.h
>>>>>> index b7b45ca82bea..da813c891990 100644
>>>>>> --- a/include/linux/iscsi_ibft.h
>>>>>> +++ b/include/linux/iscsi_ibft.h
>>>>>> @@ -26,13 +26,9 @@ extern struct acpi_table_ibft *ibft_addr;
>>>>>>      * mapped address is set in the ibft_addr variable.
>>>>>>      */
>>>>>>     #ifdef CONFIG_ISCSI_IBFT_FIND
>>>>>> -unsigned long find_ibft_region(unsigned long *sizep);
>>>>>> +void reserve_ibft_region(void);
>>>>>>     #else
>>>>>> -static inline unsigned long find_ibft_region(unsigned long *sizep)
>>>>>> -{
>>>>>> -    *sizep = 0;
>>>>>> -    return 0;
>>>>>> -}
>>>>>> +static inline void reserve_ibft_region(void) {}
>>>>>>     #endif
>>>>>>     #endif /* ISCSI_IBFT_H */
>>>>> Still no luck Mike,
>>>>>
>>>>> We're back to the original problem where the only thing that 
>>>>> worked was to
>>>>> run "SetPageReserved(page)" before calling "kmap(page)". The page 
>>>>> is being
>>>>> "freed" before ibft_init() is called as a result of the recent 
>>>>> buddy page
>>>>> freeing changes.
>>>> I keep missing some little details each time :(
>>> No worries. Thanks for all your help. Does this patch go on top of your
>>> previous patch or is it standalone?
>> This is standalone.
>>> George
>>>> Ok, let's try from the different angle.
>>>>
>>>> diff --git a/drivers/acpi/acpica/tbutils.c 
>>>> b/drivers/acpi/acpica/tbutils.c
>>>> index 4b9b329a5a92..ec43e1447336 100644
>>>> --- a/drivers/acpi/acpica/tbutils.c
>>>> +++ b/drivers/acpi/acpica/tbutils.c
>>>> @@ -7,6 +7,8 @@
>>>>     *
>>>> *****************************************************************************/
>>>> +#include <linux/memblock.h>
>>>> +
>>>>    #include <acpi/acpi.h>
>>>>    #include "accommon.h"
>>>>    #include "actables.h"
>>>> @@ -339,6 +341,21 @@ acpi_tb_parse_root_table(acpi_physical_address 
>>>> rsdp_address)
>>>>                acpi_tb_parse_fadt();
>>>>            }
>>>> +        if (ACPI_SUCCESS(status) &&
>>>> + ACPI_COMPARE_NAMESEG(&acpi_gbl_root_table_list.
>>>> +                     tables[table_index].signature,
>>>> +                     ACPI_SIG_IBFT)) {
>>>> +            struct acpi_table_header *ibft;
>>>> +            struct acpi_table_desc *desc;
>>>> +
>>>> +            desc = &acpi_gbl_root_table_list.tables[table_index];
>>>> +            status = acpi_tb_get_table(desc, &ibft);
>>>> +            if (ACPI_SUCCESS(status)) {
>>>> +                memblock_reserve(address, ibft->length);
>>>> +                acpi_tb_put_table(desc);
>>>> +
>>>> +        }
>>>> +
>>>>    next_table:
>>>>            table_entry += table_entry_size;
>>>>
>>>>
> Applied just your latest patch, but same failure.
>
> I thought there was an earlier comment (which I can't find now) that 
> stated that memblock_reserve() wouldn't reserve the page, which is 
> what's needed here.
Mike,

Here was David's explanation of what he thinks is going on (or should be 
going on) from a few days ago:

QUOTE...
I assume that acpi_map()/acpi_unmap() map some firmware blob that is 
provided via firmware/bios/... to us.

should_use_kmap() tells us whether
a) we have a "struct page" and should kmap() that one
b) we don't have a "struct page" and should ioremap.

As it is a blob, the firmware should always reserve that memory region 
via memblock (e.g., memblock_reserve()), such that we either
1) don't create a memmap ("struct page") at all (-> case b) )
2) if we have to create e memmap, we mark the page PG_reserved and
*never* expose it to the buddy (-> case a) )


Are you telling me that in this case we might have a memmap for the HW 
blob that is *not* PG_reserved? In that case it most probably got 
exposed to the buddy where it can happily get allocated/freed.

The latent BUG would be that that blob gets exposed to the system like 
ordinary RAM, and not reserved via memblock early during boot. Assuming 
that blob has a low physical address, with my patch it will get 
allocated/used a lot earlier - which would mean we trigger this latent 
BUG now more easily.
...END_QUOTE

Your most recent patch has added the memblock_reserve(), but it's still 
missing the PG_reserved setting.

Thanks,
George

>
> [   30.308229] iBFT detected..
> [   30.308796] 
> ==================================================================
> [   30.308890] BUG: KASAN: use-after-free in ibft_init+0x134/0xc33
> [   30.308890] Read of size 4 at addr ffff8880be453004 by task 
> swapper/0/1
> [   30.308890]
> [   30.308890] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
> 5.11.0-f9593a0 #12
> [   30.308890] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
> BIOS 0.0.0 02/06/2015
> [   30.308890] Call Trace:
> [   30.308890]  dump_stack+0xdb/0x120
> [   30.308890]  ? ibft_init+0x134/0xc33
> [   30.308890]  print_address_description.constprop.7+0x41/0x60
> [   30.308890]  ? ibft_init+0x134/0xc33
> [   30.308890]  ? ibft_init+0x134/0xc33
> [   30.308890]  kasan_report.cold.10+0x78/0xd1
> [   30.308890]  ? ibft_init+0x134/0xc33
> [   30.308890]  __asan_report_load_n_noabort+0xf/0x20
> [   30.308890]  ibft_init+0x134/0xc33
> [   30.308890]  ? write_comp_data+0x2f/0x90
> [   30.308890]  ? ibft_check_initiator_for+0x159/0x159
> [   30.308890]  ? write_comp_data+0x2f/0x90
> [   30.308890]  ? ibft_check_initiator_for+0x159/0x159
> [   30.308890]  do_one_initcall+0xc4/0x3e0
> [   30.308890]  ? perf_trace_initcall_level+0x3e0/0x3e0
> [   30.308890]  ? unpoison_range+0x14/0x40
> [   30.308890]  ? ____kasan_kmalloc.constprop.5+0x8f/0xc0
> [   30.308890]  ? kernel_init_freeable+0x420/0x652
> [   30.308890]  ? __kasan_kmalloc+0x9/0x10
> [   30.308890]  ? __sanitizer_cov_trace_pc+0x21/0x50
> [   30.308890]  kernel_init_freeable+0x596/0x652
> [   30.308890]  ? console_on_rootfs+0x7d/0x7d
> [   30.308890]  ? __sanitizer_cov_trace_pc+0x21/0x50
> [   30.308890]  ? rest_init+0xf0/0xf0
> [   30.308890]  kernel_init+0x16/0x1d0
> [   30.308890]  ? rest_init+0xf0/0xf0
> [   30.308890]  ret_from_fork+0x22/0x30
> [   30.308890]
> [   30.308890] The buggy address belongs to the page:
> [   30.308890] page:0000000001b7b17c refcount:0 mapcount:0 
> mapping:0000000000000000 index:0x1 pfn:0xbe453
> [   30.308890] flags: 0xfffffc0000000()
> [   30.308890] raw: 000fffffc0000000 ffffea0002ef9788 ffffea0002f91488 
> 0000000000000000
> [   30.308890] raw: 0000000000000001 0000000000000000 00000000ffffffff 
> 0000000000000000
> [   30.308890] page dumped because: kasan: bad access detected
> [   30.308890] page_owner tracks the page as freed
> [   30.308890] page last allocated via order 0, migratetype Movable, 
> gfp_mask 0x100dca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), pid 204, ts 
> 28121288605
> [   30.308890]  prep_new_page+0xfb/0x140
> [   30.308890]  get_page_from_freelist+0x3503/0x5730
> [   30.308890]  __alloc_pages_nodemask+0x2d8/0x650
> [   30.308890]  alloc_pages_vma+0xe2/0x560
> [   30.308890]  __handle_mm_fault+0x930/0x26c0
> [   30.308890]  handle_mm_fault+0x1f9/0x810
> [   30.308890]  do_user_addr_fault+0x6f7/0xca0
> [   30.308890]  exc_page_fault+0xaf/0x1a0
> [   30.308890]  asm_exc_page_fault+0x1e/0x30
> [   30.308890] page last free stack trace:
> [   30.308890]  free_pcp_prepare+0x122/0x290
> [   30.308890]  free_unref_page_list+0xe6/0x490
> [   30.308890]  release_pages+0x2ed/0x1270
> [   30.308890]  free_pages_and_swap_cache+0x245/0x2e0
> [   30.308890]  tlb_flush_mmu+0x11e/0x680
> [   30.308890]  tlb_finish_mmu+0xa6/0x3e0
> [   30.308890]  exit_mmap+0x2b3/0x540
> [   30.308890]  mmput+0x11d/0x450
> [   30.308890]  do_exit+0xaa6/0x2d40
> [   30.308890]  do_group_exit+0x128/0x340
> [   30.308890]  __x64_sys_exit_group+0x43/0x50
> [   30.308890]  do_syscall_64+0x37/0x50
> [   30.308890]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [   30.308890]
> [   30.308890] Memory state around the buggy address:
> [   30.308890]  ffff8880be452f00: ff ff ff ff ff ff ff ff ff ff ff ff 
> ff ff ff ff
> [   30.308890]  ffff8880be452f80: ff ff ff ff ff ff ff ff ff ff ff ff 
> ff ff ff ff
> [   30.308890] >ffff8880be453000: ff ff ff ff ff ff ff ff ff ff ff ff 
> ff ff ff ff
> [   30.308890]                    ^
> [   30.308890]  ffff8880be453080: ff ff ff ff ff ff ff ff ff ff ff ff 
> ff ff ff ff
> [   30.308890]  ffff8880be453100: ff ff ff ff ff ff ff ff ff ff ff ff 
> ff ff ff ff
> [   30.308890] 
> ==================================================================
>
> George
>



  reply	other threads:[~2021-02-25 18:47 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-17 20:56 [PATCH] mm, kasan: don't poison boot memory Andrey Konovalov
2021-02-18  8:55 ` David Hildenbrand
2021-02-18 19:40   ` Andrey Konovalov
2021-02-18 19:46     ` David Hildenbrand
2021-02-18 20:26       ` Andrey Konovalov
2021-02-19  0:06   ` George Kennedy
2021-02-19  0:09     ` Andrey Konovalov
2021-02-19 16:45       ` George Kennedy
2021-02-19 23:04         ` George Kennedy
2021-02-22  9:52           ` David Hildenbrand
2021-02-22 15:13             ` George Kennedy
2021-02-22 16:13               ` David Hildenbrand
2021-02-22 16:39                 ` David Hildenbrand
2021-02-22 17:40                   ` Konrad Rzeszutek Wilk
2021-02-22 18:45                     ` Mike Rapoport
2021-02-22 18:42                 ` George Kennedy
2021-02-22 21:55                   ` Mike Rapoport
     [not found]                     ` <9773282a-2854-25a4-9faa-9da5dd34e371@oracle.com>
2021-02-23 10:33                       ` Mike Rapoport
     [not found]                         ` <3ef9892f-d657-207f-d4cf-111f98dcb55c@oracle.com>
2021-02-23 15:47                           ` Mike Rapoport
2021-02-23 18:05                             ` George Kennedy
2021-02-23 20:09                               ` Mike Rapoport
2021-02-23 21:16                                 ` George Kennedy
2021-02-23 21:32                                   ` Mike Rapoport
2021-02-23 21:46                                     ` George Kennedy
2021-02-24 10:37                                       ` Mike Rapoport
2021-02-24 14:22                                         ` George Kennedy
2021-02-25  8:53                                           ` Mike Rapoport
2021-02-25 12:38                                             ` George Kennedy
2021-02-25 14:57                                               ` Mike Rapoport
2021-02-25 15:22                                                 ` George Kennedy
2021-02-25 16:06                                                   ` George Kennedy [this message]
2021-02-25 16:07                                                   ` Mike Rapoport
2021-02-25 16:31                                                     ` George Kennedy
2021-02-25 17:23                                                       ` David Hildenbrand
2021-02-25 17:41                                                         ` Mike Rapoport
2021-02-25 17:50                                                       ` Mike Rapoport
2021-02-25 17:33                                                     ` George Kennedy
2021-02-26  1:19                                                       ` George Kennedy
2021-02-26 11:17                                                         ` Mike Rapoport
2021-02-26 16:16                                                           ` George Kennedy
2021-02-28 18:08                                                             ` Mike Rapoport
2021-03-01 14:29                                                               ` George Kennedy
2021-03-02  1:20                                                                 ` George Kennedy
2021-03-02  9:57                                                                   ` Mike Rapoport
2021-02-23 21:26                                 ` George Kennedy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a43179e8-b063-569c-f27d-dda84133322e@oracle.com \
    --to=george.kennedy@oracle.com \
    --cc=Branislav.Rankov@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andreyknvl@google.com \
    --cc=aryabinin@virtuozzo.com \
    --cc=catalin.marinas@arm.com \
    --cc=david@redhat.com \
    --cc=dhaval.giani@oracle.com \
    --cc=dvyukov@google.com \
    --cc=elver@google.com \
    --cc=eugenis@google.com \
    --cc=glider@google.com \
    --cc=hch@infradead.org \
    --cc=kasan-dev@googlegroups.com \
    --cc=kevin.brodsky@arm.com \
    --cc=konrad@darnok.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=pcc@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=vincenzo.frascino@arm.com \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).