From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753265AbbDAArO (ORCPT ); Tue, 31 Mar 2015 20:47:14 -0400 Received: from mga02.intel.com ([134.134.136.20]:32442 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753067AbbDAAqc (ORCPT ); Tue, 31 Mar 2015 20:46:32 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.11,503,1422950400"; d="scan'208";a="549243050" Subject: [PATCH 15/16] x86, mpx: support 32-bit binaries on 64-bit kernel To: linux-kernel@vger.kernel.org Cc: x86@kernel.org, tglx@linutronix.de, Dave Hansen , dave.hansen@linux.intel.com From: Dave Hansen Date: Tue, 31 Mar 2015 17:46:43 -0700 References: <20150401004623.894DF37A@viggo.jf.intel.com> In-Reply-To: <20150401004623.894DF37A@viggo.jf.intel.com> Message-Id: <20150401004643.37CDE9F4@viggo.jf.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Dave Hansen Right now, the kernel can only switch between 64-bit and 32-bit binaries at compile time. This patch adds support for 32-bit binaries on 64-bit kernels when we support ia32 emulation. We essentially choose which set of table sizes to use when doing arithmetic for the bounds table calculations. This also uses a different approach for calculating the table indexes than before. I think the new one makes it much more clear what is going on, and allows us to share more code between the 32 and 64-bit cases. Based-on-patch-by: Qiaowei Ren Signed-off-by: Dave Hansen --- b/arch/x86/include/asm/mpx.h | 68 +++++++++---------- b/arch/x86/mm/mpx.c | 150 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 163 insertions(+), 55 deletions(-) diff -puN arch/x86/include/asm/mpx.h~0002-x86-mpx-support-32bit-binaries-on-64bit-kernel arch/x86/include/asm/mpx.h --- a/arch/x86/include/asm/mpx.h~0002-x86-mpx-support-32bit-binaries-on-64bit-kernel 2015-03-31 16:42:01.155501615 -0700 +++ b/arch/x86/include/asm/mpx.h 2015-03-31 16:42:01.160501840 -0700 @@ -13,49 +13,49 @@ #define MPX_BNDCFG_ENABLE_FLAG 0x1 #define MPX_BD_ENTRY_VALID_FLAG 0x1 -#ifdef CONFIG_X86_64 - -/* upper 28 bits [47:20] of the virtual address in 64-bit used to - * index into bounds directory (BD). +/* + * The upper 28 bits [47:20] of the virtual address in 64-bit + * are used to index into bounds directory (BD). + * + * The directory is 2G (2^31) in size, and with 8-byte entries + * it has 2^28 entries. */ -#define MPX_BD_ENTRY_OFFSET 28 -#define MPX_BD_ENTRY_SHIFT 3 -/* bits [19:3] of the virtual address in 64-bit used to index into - * bounds table (BT). +#define MPX_BD_SIZE_BYTES_64 (1UL<<31) +/* An entry is a long, so 8 bytes and a shift of 3 */ +#define MPX_BD_ENTRY_BYTES_64 8 +#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) + +/* + * The 32-bit directory is 4MB (2^22) in size, and with 4-byte + * entries it has 2^20 entries. */ -#define MPX_BT_ENTRY_OFFSET 17 -#define MPX_BT_ENTRY_SHIFT 5 -#define MPX_IGN_BITS 3 -#define MPX_BD_ENTRY_TAIL 3 - -#else - -#define MPX_BD_ENTRY_OFFSET 20 -#define MPX_BD_ENTRY_SHIFT 2 -#define MPX_BT_ENTRY_OFFSET 10 -#define MPX_BT_ENTRY_SHIFT 4 -#define MPX_IGN_BITS 2 -#define MPX_BD_ENTRY_TAIL 2 - -#endif - -#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) -#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) +#define MPX_BD_SIZE_BYTES_32 (1UL<<22) +/* An entry is a long, so 4 bytes and a shift of 2 */ +#define MPX_BD_ENTRY_BYTES_32 4 +#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) + +/* + * A 64-bit table is 4MB total in size, and an entry is + * 4 64-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_64 (1UL<<22) +#define MPX_BT_ENTRY_BYTES_64 32 +#define MPX_BT_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) + +/* + * A 32-bit table is 16kB total in size, and an entry is + * 4 32-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_32 (1UL<<14) +#define MPX_BT_ENTRY_BYTES_32 16 +#define MPX_BT_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) #define MPX_BNDSTA_TAIL 2 #define MPX_BNDCFG_TAIL 12 #define MPX_BNDSTA_ADDR_MASK (~((1UL<>(MPX_BT_ENTRY_OFFSET+ \ - MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) -#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ - MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) - #ifdef CONFIG_X86_INTEL_MPX siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, struct task_struct *tsk); diff -puN arch/x86/mm/mpx.c~0002-x86-mpx-support-32bit-binaries-on-64bit-kernel arch/x86/mm/mpx.c --- a/arch/x86/mm/mpx.c~0002-x86-mpx-support-32bit-binaries-on-64bit-kernel 2015-03-31 16:42:01.157501704 -0700 +++ b/arch/x86/mm/mpx.c 2015-03-31 16:42:01.161501885 -0700 @@ -36,6 +36,22 @@ static int is_mpx_vma(struct vm_area_str return (vma->vm_ops == &mpx_vma_ops); } +static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BD_SIZE_BYTES_64; + else + return MPX_BD_SIZE_BYTES_32; +} + +static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BT_SIZE_BYTES_64; + else + return MPX_BT_SIZE_BYTES_32; +} + /* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). @@ -52,7 +68,7 @@ static unsigned long mpx_mmap(unsigned l struct vm_area_struct *vma; /* Only bounds table can be allocated here */ - if (len != MPX_BT_SIZE_BYTES) + if (len != mpx_bt_size_bytes(mm)) return -EINVAL; down_write(&mm->mmap_sem); @@ -451,13 +467,12 @@ static int mpx_cmpxchg_bd_entry(struct m } /* - * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each - * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, + * With 32-bit mode, a bounds directory is 4MB, and the size of each + * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, * and the size of each bounds table is 4MB. */ -static int allocate_bt(long __user *bd_entry) +static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) { - struct mm_struct *mm = current->mm; unsigned long expected_old_val = 0; unsigned long actual_old_val = 0; unsigned long bt_addr; @@ -468,7 +483,7 @@ static int allocate_bt(long __user *bd_e * Carve the virtual space out of userspace for the new * bounds table: */ - bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); + bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); if (IS_ERR((void *)bt_addr)) return PTR_ERR((void *)bt_addr); /* @@ -519,7 +534,7 @@ static int allocate_bt(long __user *bd_e trace_mpx_new_bounds_table(bt_addr); return 0; out_unmap: - vm_munmap(bt_addr, MPX_BT_SIZE_BYTES); + vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); return ret; } @@ -538,6 +553,7 @@ static int do_mpx_bt_fault(struct task_s { unsigned long bd_entry, bd_base; struct bndcsr *bndcsr; + struct mm_struct *mm = current->mm; bndcsr = tsk_get_xsave_field(tsk, XSTATE_BNDCSR); if (!bndcsr) @@ -556,10 +572,10 @@ static int do_mpx_bt_fault(struct task_s * the directory is. */ if ((bd_entry < bd_base) || - (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) + (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) return -EINVAL; - return allocate_bt((long __user *)bd_entry); + return allocate_bt(mm, (long __user *)bd_entry); } int mpx_handle_bd_fault(struct task_struct *tsk) @@ -791,7 +807,95 @@ static int unmap_single_bt(struct mm_str * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); +} + +/* + * Take a virtual address and turns it in to the offset in bytes + * inside of the bounds table where the bounds table entry + * controlling 'addr' can be found. + */ +static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, + unsigned long addr) +{ + unsigned long bt_entry_size_bytes; + unsigned long bt_table_nr_entries; + unsigned long offset = addr; + + if (is_64bit_mm(mm)) { + /* Bottom 3 bits are ignored on 64-bit */ + offset >>= 3; + bt_entry_size_bytes = MPX_BT_ENTRY_BYTES_64; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; + } else { + /* Bottom 2 bits are ignored on 32-bit */ + offset >>= 2; + bt_entry_size_bytes = MPX_BT_ENTRY_BYTES_32; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; + } + /* + * We know the size of the table in to which we are + * indexing, and we have eliminated all the low bits + * which are ignored for indexing. + * + * Mask out all the high bits which we do not need + * to index in to the table. + */ + offset &= (bt_table_nr_entries-1); + /* + * We now have an entry offset in terms of *entries* in + * the table. We need to scale it back up to bytes. + */ + offset *= bt_entry_size_bytes; + return offset; +} + +static noinline unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, + unsigned long addr) +{ + /* + * Total size of the process's virtual address space + * Use a u64 because 4GB (for 32-bit) won't fit in a long. + */ + u64 vaddr_space_size; + /* + * How much virtual address space does a single bounds + * directory entry cover? + */ + unsigned long bd_entry_virt_space; + + /* + * There are several ways to derive the bd offsets. We + * use the following approach here: + * 1. We know the size of the virtual address space + * 2. We know the number of entries in a bounds table + * 3. We know that each entry covers a fixed amount of + * virtual address space. + * So, we can just divide the virtual address by the + * number of entries to figure out which entry "controls" + * the given virtual address. + */ + if (is_64bit_mm(mm)) { + vaddr_space_size = 1ULL << __VIRTUAL_MASK_SHIFT; + bd_entry_virt_space = vaddr_space_size / MPX_BD_NR_ENTRIES_64; + /* + * __VIRTUAL_MASK takes the 64-bit addressing hole + * in to account. This is a noop on 32-bit. + */ + addr &= __VIRTUAL_MASK; + return addr / bd_entry_virt_space; + } else { + vaddr_space_size = (1ULL << 32); + bd_entry_virt_space = vaddr_space_size / MPX_BD_NR_ENTRIES_32; + return addr / bd_entry_virt_space; + } + /* + * The two return calls above are exact copies. If we + * pull out a single copy and put it in here, gcc won't + * realize that we're doing a power-of-2 divide and use + * shifts. It uses a real divide. If we put them up + * there, it manages to figure it out (gcc 4.8.3). + */ } /* @@ -805,6 +909,7 @@ static int unmap_shared_bt(struct mm_str unsigned long end, bool prev_shared, bool next_shared) { unsigned long bt_addr; + unsigned long start_off, end_off; int ret; ret = get_bt_addr(mm, bd_entry, &bt_addr); @@ -816,17 +921,20 @@ static int unmap_shared_bt(struct mm_str if (ret) return ret; + start_off = mpx_get_bt_entry_offset_bytes(mm, start); + end_off = mpx_get_bt_entry_offset_bytes(mm, end); + if (prev_shared && next_shared) ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); + bt_addr+start_off, + bt_addr+end_off); else if (prev_shared) ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_BT_SIZE_BYTES); + bt_addr + start_off, + bt_addr + mpx_bt_size_bytes(mm)); else if (next_shared) ret = zap_bt_entries(mm, bt_addr, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); + bt_addr+end_off); else ret = unmap_single_bt(mm, bd_entry, bt_addr); @@ -847,8 +955,8 @@ static int unmap_edge_bts(struct mm_stru struct vm_area_struct *prev, *next; bool prev_shared = false, next_shared = false; - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); /* * Check whether bde_start and bde_end are shared with adjacent @@ -860,10 +968,10 @@ static int unmap_edge_bts(struct mm_stru * in to 'next'. */ next = find_vma_prev(mm, start, &prev); - if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) + if (prev && (mm->bd_addr + mpx_get_bd_entry_offset(mm, prev->vm_end-1)) == bde_start) prev_shared = true; - if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) + if (next && (mm->bd_addr + mpx_get_bd_entry_offset(mm, next->vm_start)) == bde_end) next_shared = true; @@ -929,8 +1037,8 @@ static int mpx_unmap_tables(struct mm_st * 1. fully covered * 2. not at the edges of the mapping, even if full aligned */ - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { ret = get_bt_addr(mm, bd_entry, &bt_addr); switch (ret) { _