All of lore.kernel.org
 help / color / mirror / Atom feed
From: mark.rutland@arm.com (Mark Rutland)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCHv3 4/7] arm64: Move some head.text functions to executable section
Date: Thu, 21 Aug 2014 11:34:55 +0100	[thread overview]
Message-ID: <20140821103455.GJ21734@leverpostej> (raw)
In-Reply-To: <1408584039-12735-5-git-send-email-lauraa@codeaurora.org>

Hi Laura,

On Thu, Aug 21, 2014 at 02:20:36AM +0100, Laura Abbott wrote:
> The code in the head.text section of the kernel exists in the
> same section as the swapper_pg_dir which means it needs the
> same page table permissions. The swapper_pg_dir needs to be
> writeable but shouldn't be executable.

I think we can drop the above. As far as I can tell as of commit
bd00cd5f8c8c (arm64: place initial page tables above the kernel) it's no
longer relevant.

> The head.text section
> is intended to be run at early bootup before any of the regular
> kernel mappings have been setup so there is no issue at bootup.
> The suspend/resume/hotplug code path requires some of these
> head.S functions to run however which means they need to be
> executable. We can't easily move all of the head.text to
> an executable section, so split it into two parts: that which
> is used only at early head.S bootup and that which is used
> after bootup. There is a small bit of code duplication because
> of some relocation issues related to accessing code more than
> 1MB away.

>From a cursory glance it looks like the only things we need write access
to in .head.text are __cpu_boot_mode and __switch_data. Can't we instead
place those in .data and make .head.text executable?

We currently find them with adr, which should be easy to replace with
adrp + add to get around relocation issues.

Thanks,
Mark.

> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
>  arch/arm64/kernel/head.S        | 424 +++++++++++++++++++++-------------------
>  arch/arm64/kernel/vmlinux.lds.S |   1 +
>  2 files changed, 228 insertions(+), 197 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 61bc210..dbdb378 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -238,7 +238,7 @@ ENTRY(stext)
>         mov     x0, x22
>         bl      lookup_processor_type
>         mov     x23, x0                         // x23=current cpu_table
> -       cbz     x23, __error_p                  // invalid processor (x23=0)?
> +       cbz     x23, __h_error_p                // invalid processor (x23=0)?
>         bl      __vet_fdt
>         bl      __create_page_tables            // x25=TTBR0, x26=TTBR1
>         /*
> @@ -250,12 +250,236 @@ ENTRY(stext)
>          */
>         ldr     x27, __switch_data              // address to jump to after
>                                                 // MMU has been enabled
> -       adr     lr, __enable_mmu                // return (PIC) address
> +       adr     lr, __h_enable_mmu              // return (PIC) address
>         ldr     x12, [x23, #CPU_INFO_SETUP]
>         add     x12, x12, x28                   // __virt_to_phys
>         br      x12                             // initialise processor
>  ENDPROC(stext)
> 
> +__h_error_p:
> +ENDPROC(__h_error_p)
> +
> +__h_error:
> +1:
> +       nop
> +       b       1b
> +ENDPROC(__h_error)
> +
> +__h_enable_mmu:
> +       ldr     x5, =vectors
> +       msr     vbar_el1, x5
> +       msr     ttbr0_el1, x25                  // load TTBR0
> +       msr     ttbr1_el1, x26                  // load TTBR1
> +       isb
> +       b       __h_turn_mmu_on
> +ENDPROC(__h_enable_mmu)
> +
> +       .align  4
> +__h_turn_mmu_on:
> +       msr     sctlr_el1, x0
> +       isb
> +       br      x27
> +ENDPROC(__h_turn_mmu_on)
> +
> +/*
> + * Determine validity of the x21 FDT pointer.
> + * The dtb must be 8-byte aligned and live in the first 512M of memory.
> + */
> +__vet_fdt:
> +       tst     x21, #0x7
> +       b.ne    1f
> +       cmp     x21, x24
> +       b.lt    1f
> +       mov     x0, #(1 << 29)
> +       add     x0, x0, x24
> +       cmp     x21, x0
> +       b.ge    1f
> +       ret
> +1:
> +       mov     x21, #0
> +       ret
> +ENDPROC(__vet_fdt)
> +/*
> + * Macro to create a table entry to the next page.
> + *
> + *     tbl:    page table address
> + *     virt:   virtual address
> + *     shift:  #imm page table shift
> + *     ptrs:   #imm pointers per table page
> + *
> + * Preserves:  virt
> + * Corrupts:   tmp1, tmp2
> + * Returns:    tbl -> next level table page address
> + */
> +       .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
> +       lsr     \tmp1, \virt, #\shift
> +       and     \tmp1, \tmp1, #\ptrs - 1        // table index
> +       add     \tmp2, \tbl, #PAGE_SIZE
> +       orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
> +       str     \tmp2, [\tbl, \tmp1, lsl #3]
> +       add     \tbl, \tbl, #PAGE_SIZE          // next level table page
> +       .endm
> +
> +/*
> + * Macro to populate the PGD (and possibily PUD) for the corresponding
> + * block entry in the next level (tbl) for the given virtual address.
> + *
> + * Preserves:  tbl, next, virt
> + * Corrupts:   tmp1, tmp2
> + */
> +       .macro  create_pgd_entry, tbl, virt, tmp1, tmp2
> +       create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
> +#if SWAPPER_PGTABLE_LEVELS == 3
> +       create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
> +#endif
> +       .endm
> +
> +/*
> + * Macro to populate block entries in the page table for the start..end
> + * virtual range (inclusive).
> + *
> + * Preserves:  tbl, flags
> + * Corrupts:   phys, start, end, pstate
> + */
> +       .macro  create_block_map, tbl, flags, phys, start, end
> +       lsr     \phys, \phys, #BLOCK_SHIFT
> +       lsr     \start, \start, #BLOCK_SHIFT
> +       and     \start, \start, #PTRS_PER_PTE - 1       // table index
> +       orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
> +       lsr     \end, \end, #BLOCK_SHIFT
> +       and     \end, \end, #PTRS_PER_PTE - 1           // table end index
> +9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
> +       add     \start, \start, #1                      // next entry
> +       add     \phys, \phys, #BLOCK_SIZE               // next block
> +       cmp     \start, \end
> +       b.ls    9999b
> +       .endm
> +
> +/*
> + * Setup the initial page tables. We only setup the barest amount which is
> + * required to get the kernel running. The following sections are required:
> + *   - identity mapping to enable the MMU (low address, TTBR0)
> + *   - first few MB of the kernel linear mapping to jump to once the MMU has
> + *     been enabled, including the FDT blob (TTBR1)
> + *   - pgd entry for fixed mappings (TTBR1)
> + */
> +__create_page_tables:
> +       pgtbl   x25, x26, x28                   // idmap_pg_dir and swapper_pg_dir addresses
> +       mov     x27, lr
> +
> +       /*
> +        * Invalidate the idmap and swapper page tables to avoid potential
> +        * dirty cache lines being evicted.
> +        */
> +       mov     x0, x25
> +       add     x1, x26, #SWAPPER_DIR_SIZE
> +       bl      __inval_cache_range
> +
> +       /*
> +        * Clear the idmap and swapper page tables.
> +        */
> +       mov     x0, x25
> +       add     x6, x26, #SWAPPER_DIR_SIZE
> +1:     stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       stp     xzr, xzr, [x0], #16
> +       cmp     x0, x6
> +       b.lo    1b
> +
> +       ldr     x7, =MM_MMUFLAGS
> +
> +       /*
> +        * Create the identity mapping.
> +        */
> +       mov     x0, x25                         // idmap_pg_dir
> +       ldr     x3, =KERNEL_START
> +       add     x3, x3, x28                     // __pa(KERNEL_START)
> +       create_pgd_entry x0, x3, x5, x6
> +       ldr     x6, =KERNEL_END
> +       mov     x5, x3                          // __pa(KERNEL_START)
> +       add     x6, x6, x28                     // __pa(KERNEL_END)
> +       create_block_map x0, x7, x3, x5, x6
> +
> +       /*
> +        * Map the kernel image (starting with PHYS_OFFSET).
> +        */
> +       mov     x0, x26                         // swapper_pg_dir
> +       mov     x5, #PAGE_OFFSET
> +       create_pgd_entry x0, x5, x3, x6
> +       ldr     x6, =KERNEL_END
> +       mov     x3, x24                         // phys offset
> +       create_block_map x0, x7, x3, x5, x6
> +
> +       /*
> +        * Map the FDT blob (maximum 2MB; must be within 512MB of
> +        * PHYS_OFFSET).
> +        */
> +       mov     x3, x21                         // FDT phys address
> +       and     x3, x3, #~((1 << 21) - 1)       // 2MB aligned
> +       mov     x6, #PAGE_OFFSET
> +       sub     x5, x3, x24                     // subtract PHYS_OFFSET
> +       tst     x5, #~((1 << 29) - 1)           // within 512MB?
> +       csel    x21, xzr, x21, ne               // zero the FDT pointer
> +       b.ne    1f
> +       add     x5, x5, x6                      // __va(FDT blob)
> +       add     x6, x5, #1 << 21                // 2MB for the FDT blob
> +       sub     x6, x6, #1                      // inclusive range
> +       create_block_map x0, x7, x3, x5, x6
> +1:
> +       /*
> +        * Since the page tables have been populated with non-cacheable
> +        * accesses (MMU disabled), invalidate the idmap and swapper page
> +        * tables again to remove any speculatively loaded cache lines.
> +        */
> +       mov     x0, x25
> +       add     x1, x26, #SWAPPER_DIR_SIZE
> +       bl      __inval_cache_range
> +
> +       mov     lr, x27
> +       ret
> +ENDPROC(__create_page_tables)
> +       .ltorg
> +
> +       .align  3
> +       .type   __switch_data, %object
> +__switch_data:
> +       .quad   __mmap_switched
> +       .quad   __bss_start                     // x6
> +       .quad   __bss_stop                      // x7
> +       .quad   processor_id                    // x4
> +       .quad   __fdt_pointer                   // x5
> +       .quad   memstart_addr                   // x6
> +       .quad   init_thread_union + THREAD_START_SP // sp
> +
> +/*
> + * The following fragment of code is executed with the MMU on in MMU mode, and
> + * uses absolute addresses; this is not position independent.
> + */
> +__mmap_switched:
> +       adr     x3, __switch_data + 8
> +
> +       ldp     x6, x7, [x3], #16
> +1:     cmp     x6, x7
> +       b.hs    2f
> +       str     xzr, [x6], #8                   // Clear BSS
> +       b       1b
> +2:
> +       ldp     x4, x5, [x3], #16
> +       ldr     x6, [x3], #8
> +       ldr     x16, [x3]
> +       mov     sp, x16
> +       str     x22, [x4]                       // Save processor ID
> +       str     x21, [x5]                       // Save FDT pointer
> +       str     x24, [x6]                       // Save PHYS_OFFSET
> +       mov     x29, #0
> +       b       start_kernel
> +ENDPROC(__mmap_switched)
> +
> +/*
> + * end 'true' head section, begin head section that can be read only
> + */
> +       .section ".latehead.text","ax"
>  /*
>   * If we're fortunate enough to boot at EL2, ensure that the world is
>   * sane before dropping to EL1.
> @@ -497,183 +721,6 @@ ENDPROC(__calc_phys_offset)
>         .quad   PAGE_OFFSET
> 
>  /*
> - * Macro to create a table entry to the next page.
> - *
> - *     tbl:    page table address
> - *     virt:   virtual address
> - *     shift:  #imm page table shift
> - *     ptrs:   #imm pointers per table page
> - *
> - * Preserves:  virt
> - * Corrupts:   tmp1, tmp2
> - * Returns:    tbl -> next level table page address
> - */
> -       .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
> -       lsr     \tmp1, \virt, #\shift
> -       and     \tmp1, \tmp1, #\ptrs - 1        // table index
> -       add     \tmp2, \tbl, #PAGE_SIZE
> -       orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
> -       str     \tmp2, [\tbl, \tmp1, lsl #3]
> -       add     \tbl, \tbl, #PAGE_SIZE          // next level table page
> -       .endm
> -
> -/*
> - * Macro to populate the PGD (and possibily PUD) for the corresponding
> - * block entry in the next level (tbl) for the given virtual address.
> - *
> - * Preserves:  tbl, next, virt
> - * Corrupts:   tmp1, tmp2
> - */
> -       .macro  create_pgd_entry, tbl, virt, tmp1, tmp2
> -       create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
> -#if SWAPPER_PGTABLE_LEVELS == 3
> -       create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
> -#endif
> -       .endm
> -
> -/*
> - * Macro to populate block entries in the page table for the start..end
> - * virtual range (inclusive).
> - *
> - * Preserves:  tbl, flags
> - * Corrupts:   phys, start, end, pstate
> - */
> -       .macro  create_block_map, tbl, flags, phys, start, end
> -       lsr     \phys, \phys, #BLOCK_SHIFT
> -       lsr     \start, \start, #BLOCK_SHIFT
> -       and     \start, \start, #PTRS_PER_PTE - 1       // table index
> -       orr     \phys, \flags, \phys, lsl #BLOCK_SHIFT  // table entry
> -       lsr     \end, \end, #BLOCK_SHIFT
> -       and     \end, \end, #PTRS_PER_PTE - 1           // table end index
> -9999:  str     \phys, [\tbl, \start, lsl #3]           // store the entry
> -       add     \start, \start, #1                      // next entry
> -       add     \phys, \phys, #BLOCK_SIZE               // next block
> -       cmp     \start, \end
> -       b.ls    9999b
> -       .endm
> -
> -/*
> - * Setup the initial page tables. We only setup the barest amount which is
> - * required to get the kernel running. The following sections are required:
> - *   - identity mapping to enable the MMU (low address, TTBR0)
> - *   - first few MB of the kernel linear mapping to jump to once the MMU has
> - *     been enabled, including the FDT blob (TTBR1)
> - *   - pgd entry for fixed mappings (TTBR1)
> - */
> -__create_page_tables:
> -       pgtbl   x25, x26, x28                   // idmap_pg_dir and swapper_pg_dir addresses
> -       mov     x27, lr
> -
> -       /*
> -        * Invalidate the idmap and swapper page tables to avoid potential
> -        * dirty cache lines being evicted.
> -        */
> -       mov     x0, x25
> -       add     x1, x26, #SWAPPER_DIR_SIZE
> -       bl      __inval_cache_range
> -
> -       /*
> -        * Clear the idmap and swapper page tables.
> -        */
> -       mov     x0, x25
> -       add     x6, x26, #SWAPPER_DIR_SIZE
> -1:     stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       stp     xzr, xzr, [x0], #16
> -       cmp     x0, x6
> -       b.lo    1b
> -
> -       ldr     x7, =MM_MMUFLAGS
> -
> -       /*
> -        * Create the identity mapping.
> -        */
> -       mov     x0, x25                         // idmap_pg_dir
> -       ldr     x3, =KERNEL_START
> -       add     x3, x3, x28                     // __pa(KERNEL_START)
> -       create_pgd_entry x0, x3, x5, x6
> -       ldr     x6, =KERNEL_END
> -       mov     x5, x3                          // __pa(KERNEL_START)
> -       add     x6, x6, x28                     // __pa(KERNEL_END)
> -       create_block_map x0, x7, x3, x5, x6
> -
> -       /*
> -        * Map the kernel image (starting with PHYS_OFFSET).
> -        */
> -       mov     x0, x26                         // swapper_pg_dir
> -       mov     x5, #PAGE_OFFSET
> -       create_pgd_entry x0, x5, x3, x6
> -       ldr     x6, =KERNEL_END
> -       mov     x3, x24                         // phys offset
> -       create_block_map x0, x7, x3, x5, x6
> -
> -       /*
> -        * Map the FDT blob (maximum 2MB; must be within 512MB of
> -        * PHYS_OFFSET).
> -        */
> -       mov     x3, x21                         // FDT phys address
> -       and     x3, x3, #~((1 << 21) - 1)       // 2MB aligned
> -       mov     x6, #PAGE_OFFSET
> -       sub     x5, x3, x24                     // subtract PHYS_OFFSET
> -       tst     x5, #~((1 << 29) - 1)           // within 512MB?
> -       csel    x21, xzr, x21, ne               // zero the FDT pointer
> -       b.ne    1f
> -       add     x5, x5, x6                      // __va(FDT blob)
> -       add     x6, x5, #1 << 21                // 2MB for the FDT blob
> -       sub     x6, x6, #1                      // inclusive range
> -       create_block_map x0, x7, x3, x5, x6
> -1:
> -       /*
> -        * Since the page tables have been populated with non-cacheable
> -        * accesses (MMU disabled), invalidate the idmap and swapper page
> -        * tables again to remove any speculatively loaded cache lines.
> -        */
> -       mov     x0, x25
> -       add     x1, x26, #SWAPPER_DIR_SIZE
> -       bl      __inval_cache_range
> -
> -       mov     lr, x27
> -       ret
> -ENDPROC(__create_page_tables)
> -       .ltorg
> -
> -       .align  3
> -       .type   __switch_data, %object
> -__switch_data:
> -       .quad   __mmap_switched
> -       .quad   __bss_start                     // x6
> -       .quad   __bss_stop                      // x7
> -       .quad   processor_id                    // x4
> -       .quad   __fdt_pointer                   // x5
> -       .quad   memstart_addr                   // x6
> -       .quad   init_thread_union + THREAD_START_SP // sp
> -
> -/*
> - * The following fragment of code is executed with the MMU on in MMU mode, and
> - * uses absolute addresses; this is not position independent.
> - */
> -__mmap_switched:
> -       adr     x3, __switch_data + 8
> -
> -       ldp     x6, x7, [x3], #16
> -1:     cmp     x6, x7
> -       b.hs    2f
> -       str     xzr, [x6], #8                   // Clear BSS
> -       b       1b
> -2:
> -       ldp     x4, x5, [x3], #16
> -       ldr     x6, [x3], #8
> -       ldr     x16, [x3]
> -       mov     sp, x16
> -       str     x22, [x4]                       // Save processor ID
> -       str     x21, [x5]                       // Save FDT pointer
> -       str     x24, [x6]                       // Save PHYS_OFFSET
> -       mov     x29, #0
> -       b       start_kernel
> -ENDPROC(__mmap_switched)
> -
> -/*
>   * Exception handling. Something went wrong and we can't proceed. We ought to
>   * tell the user, but since we don't have any guarantee that we're even
>   * running on the right architecture, we do virtually nothing.
> @@ -721,21 +768,4 @@ __lookup_processor_type_data:
>         .quad   cpu_table
>         .size   __lookup_processor_type_data, . - __lookup_processor_type_data
> 
> -/*
> - * Determine validity of the x21 FDT pointer.
> - * The dtb must be 8-byte aligned and live in the first 512M of memory.
> - */
> -__vet_fdt:
> -       tst     x21, #0x7
> -       b.ne    1f
> -       cmp     x21, x24
> -       b.lt    1f
> -       mov     x0, #(1 << 29)
> -       add     x0, x0, x24
> -       cmp     x21, x0
> -       b.ge    1f
> -       ret
> -1:
> -       mov     x21, #0
> -       ret
> -ENDPROC(__vet_fdt)
> +
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 97f0c04..2b674c5 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -56,6 +56,7 @@ SECTIONS
>         }
>         .text : {                       /* Real text segment            */
>                 _stext = .;             /* Text and read-only data      */
> +                       *(.latehead.text)
>                         __exception_text_start = .;
>                         *(.exception.text)
>                         __exception_text_end = .;
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

  reply	other threads:[~2014-08-21 10:34 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-21  1:20 [PATCHv3 0/7] Better page protections for arm64 Laura Abbott
2014-08-21  1:20 ` [PATCHv3 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
     [not found]   ` <CAGXu5jLur_gdXs2X5BCmxB6L5HwgyP12jkrufK7bpS0Cxhp_+Q@mail.gmail.com>
2014-08-25 18:23     ` Laura Abbott
2014-08-28 17:02   ` Catalin Marinas
2014-08-21  1:20 ` [PATCHv3 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
2014-08-21  9:30   ` Mark Rutland
2014-08-21  1:20 ` [PATCHv3 3/7] arm64: Move cpu_resume into the text section Laura Abbott
2014-08-25 20:34   ` Stephen Boyd
2014-08-26  0:43     ` Laura Abbott
2014-08-26  1:08       ` Stephen Boyd
2014-08-21  1:20 ` [PATCHv3 4/7] arm64: Move some head.text functions to executable section Laura Abbott
2014-08-21 10:34   ` Mark Rutland [this message]
2014-08-21 21:42     ` Laura Abbott
2014-08-22  9:48       ` Mark Rutland
2014-08-26  0:32         ` Laura Abbott
2014-08-26 17:45           ` Mark Rutland
2014-08-21  1:20 ` [PATCHv3 5/7] arm64: Factor out fixmap initialiation from ioremap Laura Abbott
2014-08-23  5:45   ` Kees Cook
2014-08-25 18:34     ` Laura Abbott
2014-08-21  1:20 ` [PATCHv3 6/7] arm64: use fixmap for text patching when text is RO Laura Abbott
2014-08-23  5:51   ` Kees Cook
2014-08-25 18:38     ` Laura Abbott
2014-08-26 18:36       ` Mark Rutland
2014-08-21  1:20 ` [PATCHv3 7/7] arm64: add better page protections to arm64 Laura Abbott
2014-08-23  5:59   ` Kees Cook
2014-08-25 19:04     ` Laura Abbott

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140821103455.GJ21734@leverpostej \
    --to=mark.rutland@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.