linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: "H. Peter Anvin" <hpa@linux.intel.com>, boris.ostrovsky@oracle.com
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>,
	Alexander van Heukelum <heukelum@fastmail.fm>,
	Andy Lutomirski <amluto@gmail.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Borislav Petkov <bp@alien8.de>,
	Arjan van de Ven <arjan.van.de.ven@intel.com>,
	Brian Gerst <brgerst@gmail.com>,
	Alexandre Julliard <julliard@winehq.com>,
	Andi Kleen <andi@firstfloor.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH] x86-64: espfix for 64-bit mode *PROTOTYPE*
Date: Fri, 25 Apr 2014 17:02:41 -0400	[thread overview]
Message-ID: <20140425210241.GB30532@phenom.dumpdata.com> (raw)
In-Reply-To: <535714A1.9080502@linux.intel.com>

On Tue, Apr 22, 2014 at 06:17:21PM -0700, H. Peter Anvin wrote:
> Another spin of the prototype.  This one avoids the espfix for anything
> but #GP, and avoids save/restore/saving registers... one can wonder,
> though, how much that actually matters in practice.
> 
> It still does redundant SWAPGS on the slow path.  I'm not sure I
> personally care enough to optimize that, as it means some fairly
> significant restructuring of some of the code paths.  Some of that
> restructuring might actually be beneficial, but still...

Sorry about being late to the party.


 .. snip..
> diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
> new file mode 100644
> index 000000000000..05567d706f92
> --- /dev/null
> +++ b/arch/x86/kernel/espfix_64.c
> @@ -0,0 +1,136 @@
> +/* ----------------------------------------------------------------------- *
> + *
> + *   Copyright 2014 Intel Corporation; author: H. Peter Anvin
> + *
> + *   This file is part of the Linux kernel, and is made available under
> + *   the terms of the GNU General Public License version 2 or (at your
> + *   option) any later version; incorporated herein by reference.
> + *
> + * ----------------------------------------------------------------------- */
> +
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/percpu.h>
> +#include <linux/gfp.h>
> +#include <asm/pgtable.h>
> +
> +#define ESPFIX_STACK_SIZE	64UL
> +#define ESPFIX_STACKS_PER_PAGE	(PAGE_SIZE/ESPFIX_STACK_SIZE)
> +
> +#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE << (PGDIR_SHIFT-PAGE_SHIFT-16))
> +#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
> +# error "Need more than one PGD for the ESPFIX hack"
> +#endif
> +
> +#define ESPFIX_BASE_ADDR	(-2UL << PGDIR_SHIFT)
> +
> +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
> +
> +/* This contains the *bottom* address of the espfix stack */
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
> +
> +/* Initialization mutex - should this be a spinlock? */
> +static DEFINE_MUTEX(espfix_init_mutex);
> +
> +/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */
> +#define ESPFIX_MAX_PAGES  DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
> +#define ESPFIX_MAP_SIZE   DIV_ROUND_UP(ESPFIX_MAX_PAGES, BITS_PER_LONG)
> +static unsigned long espfix_page_alloc_map[ESPFIX_MAP_SIZE];
> +
> +static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
> +	__aligned(PAGE_SIZE);
> +
> +/*
> + * This returns the bottom address of the espfix stack for a specific CPU.
> + * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case
> + * we have to account for some amount of padding at the end of each page.
> + */
> +static inline unsigned long espfix_base_addr(unsigned int cpu)
> +{
> +	unsigned long page, addr;
> +
> +	page = (cpu / ESPFIX_STACKS_PER_PAGE) << PAGE_SHIFT;
> +	addr = page + (cpu % ESPFIX_STACKS_PER_PAGE) * ESPFIX_STACK_SIZE;
> +	addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16);
> +	addr += ESPFIX_BASE_ADDR;
> +	return addr;
> +}
> +
> +#define PTE_STRIDE        (65536/PAGE_SIZE)
> +#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE)
> +#define ESPFIX_PMD_CLONES PTRS_PER_PMD
> +#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES))
> +
> +void init_espfix_this_cpu(void)
> +{
> +	unsigned int cpu, page;
> +	unsigned long addr;
> +	pgd_t pgd, *pgd_p;
> +	pud_t pud, *pud_p;
> +	pmd_t pmd, *pmd_p;
> +	pte_t pte, *pte_p;
> +	int n;
> +	void *stack_page;
> +	pteval_t ptemask;
> +
> +	/* We only have to do this once... */
> +	if (likely(this_cpu_read(espfix_stack)))
> +		return;		/* Already initialized */
> +
> +	cpu = smp_processor_id();
> +	addr = espfix_base_addr(cpu);
> +	page = cpu/ESPFIX_STACKS_PER_PAGE;
> +
> +	/* Did another CPU already set this up? */
> +	if (likely(test_bit(page, espfix_page_alloc_map)))
> +		goto done;
> +
> +	mutex_lock(&espfix_init_mutex);
> +
> +	/* Did we race on the lock? */
> +	if (unlikely(test_bit(page, espfix_page_alloc_map)))
> +		goto unlock_done;
> +
> +	ptemask = __supported_pte_mask;
> +
> +	pgd_p = &init_level4_pgt[pgd_index(addr)];
> +	pgd = *pgd_p;
> +	if (!pgd_present(pgd)) {
> +		/* This can only happen on the BSP */
> +		pgd = __pgd(__pa_symbol(espfix_pud_page) |

Any particular reason you are using __pgd

> +			    (_KERNPG_TABLE & ptemask));
> +		set_pgd(pgd_p, pgd);
> +	}
> +
> +	pud_p = &espfix_pud_page[pud_index(addr)];
> +	pud = *pud_p;
> +	if (!pud_present(pud)) {
> +		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
> +		pud = __pud(__pa(pmd_p) | (_KERNPG_TABLE & ptemask));

_pud
> +		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
> +			set_pud(&pud_p[n], pud);
> +	}
> +
> +	pmd_p = pmd_offset(&pud, addr);
> +	pmd = *pmd_p;
> +	if (!pmd_present(pmd)) {
> +		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
> +		pmd = __pmd(__pa(pte_p) | (_KERNPG_TABLE & ptemask));

and _pmd?
> +		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
> +			set_pmd(&pmd_p[n], pmd);
> +	}
> +
> +	pte_p = pte_offset_kernel(&pmd, addr);
> +	stack_page = (void *)__get_free_page(GFP_KERNEL);
> +	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL & ptemask));

and __pte instead of the 'pmd', 'pud', 'pmd' and 'pte' macros?

> +	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
> +		set_pte(&pte_p[n*PTE_STRIDE], pte);
> +
> +	/* Job is done for this CPU and any CPU which shares this page */
> +	set_bit(page, espfix_page_alloc_map);
> +
> +unlock_done:
> +	mutex_unlock(&espfix_init_mutex);
> +done:
> +	this_cpu_write(espfix_stack, addr);
> +}

  parent reply	other threads:[~2014-04-25 21:03 UTC|newest]

Thread overview: 136+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-11 17:36 [tip:x86/urgent] x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels tip-bot for H. Peter Anvin
2014-04-11 18:12 ` Andy Lutomirski
2014-04-11 18:20   ` H. Peter Anvin
2014-04-11 18:27 ` Brian Gerst
2014-04-11 18:29   ` H. Peter Anvin
2014-04-11 18:35     ` Brian Gerst
2014-04-11 21:16     ` Andy Lutomirski
2014-04-11 21:24       ` H. Peter Anvin
2014-04-11 21:53         ` Andy Lutomirski
2014-04-11 21:59           ` H. Peter Anvin
2014-04-11 22:15             ` Andy Lutomirski
2014-04-11 22:18               ` H. Peter Anvin
2014-04-13  4:20           ` H. Peter Anvin
2014-04-12 23:26         ` Alexander van Heukelum
2014-04-12 23:31           ` H. Peter Anvin
2014-04-12 23:49             ` Alexander van Heukelum
2014-04-13  0:03               ` H. Peter Anvin
2014-04-13  1:25                 ` Andy Lutomirski
2014-04-13  1:29                   ` Andy Lutomirski
2014-04-13  3:00                     ` H. Peter Anvin
2014-04-11 21:34       ` Linus Torvalds
2014-04-11 18:41   ` Linus Torvalds
2014-04-11 18:45     ` Brian Gerst
2014-04-11 18:50       ` Linus Torvalds
2014-04-12  4:44         ` Brian Gerst
2014-04-12 17:18           ` H. Peter Anvin
2014-04-12 19:35             ` Borislav Petkov
2014-04-12 19:44               ` H. Peter Anvin
2014-04-12 20:11                 ` Borislav Petkov
2014-04-12 20:34                   ` Brian Gerst
2014-04-12 20:59                     ` Borislav Petkov
2014-04-12 21:13                       ` Brian Gerst
2014-04-12 21:40                         ` Borislav Petkov
2014-04-14  7:21                           ` Ingo Molnar
2014-04-14  9:44                             ` Borislav Petkov
2014-04-14  9:47                               ` Ingo Molnar
2014-04-12 21:53                 ` Linus Torvalds
2014-04-12 22:25                   ` H. Peter Anvin
2014-04-13  2:56                     ` Andi Kleen
2014-04-13  3:02                       ` H. Peter Anvin
2014-04-13  3:13                       ` Linus Torvalds
2014-04-12 20:29             ` Brian Gerst
2014-04-14  7:48         ` Alexandre Julliard
2014-05-07  9:18           ` Sven Joachim
2014-05-07 10:18             ` Borislav Petkov
2014-05-07 16:57             ` Linus Torvalds
2014-05-07 17:09               ` H. Peter Anvin
2014-05-07 17:50                 ` Alexandre Julliard
2014-05-08  6:43                 ` Sven Joachim
2014-05-08 13:50                   ` H. Peter Anvin
2014-05-08 20:13                     ` H. Peter Anvin
2014-05-08 20:40                     ` H. Peter Anvin
2014-05-12 13:16               ` Josh Boyer
2014-05-12 16:52                 ` H. Peter Anvin
2014-05-14 23:43               ` [tip:x86/urgent] x86-64, modify_ldt: Make support for 16-bit segments a runtime option tip-bot for Linus Torvalds
2014-04-11 18:46     ` [tip:x86/urgent] x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels H. Peter Anvin
2014-04-14  7:27       ` Ingo Molnar
2014-04-14 15:45         ` H. Peter Anvin
2014-04-13  2:54     ` Andi Kleen
2014-04-21 22:47 ` [PATCH] x86-64: espfix for 64-bit mode *PROTOTYPE* H. Peter Anvin
2014-04-21 23:19   ` Andrew Lutomirski
2014-04-21 23:29     ` H. Peter Anvin
2014-04-22  0:37       ` Andrew Lutomirski
2014-04-22  0:53         ` H. Peter Anvin
2014-04-22  1:06           ` Andrew Lutomirski
2014-04-22  1:14             ` H. Peter Anvin
2014-04-22  1:28               ` Andrew Lutomirski
2014-04-22  1:47                 ` H. Peter Anvin
2014-04-22  1:53                   ` Andrew Lutomirski
2014-04-22 11:23                     ` Borislav Petkov
2014-04-22 14:46                       ` Borislav Petkov
2014-04-22 16:03                         ` Andrew Lutomirski
2014-04-22 16:10                           ` H. Peter Anvin
2014-04-22 16:33                             ` Andrew Lutomirski
2014-04-22 16:43                               ` Linus Torvalds
2014-04-22 17:00                                 ` Andrew Lutomirski
2014-04-22 17:04                                   ` Linus Torvalds
2014-04-22 17:11                                     ` Andrew Lutomirski
2014-04-22 17:15                                       ` H. Peter Anvin
2014-04-23  9:54                                         ` One Thousand Gnomes
2014-04-23 15:53                                           ` H. Peter Anvin
2014-04-23 17:08                                             ` Andrew Lutomirski
2014-04-23 17:16                                               ` H. Peter Anvin
2014-04-23 17:25                                                 ` Andrew Lutomirski
2014-04-23 17:28                                                   ` H. Peter Anvin
2014-04-23 17:45                                                     ` Andrew Lutomirski
2014-04-22 17:19                                       ` Linus Torvalds
2014-04-22 17:29                                         ` H. Peter Anvin
2014-04-22 17:46                                           ` Andrew Lutomirski
2014-04-22 17:59                                             ` H. Peter Anvin
2014-04-22 18:03                                             ` Brian Gerst
2014-04-22 18:06                                               ` H. Peter Anvin
2014-04-22 18:17                                                 ` Brian Gerst
2014-04-22 18:51                                                   ` H. Peter Anvin
2014-04-22 19:55                                                     ` Brian Gerst
2014-04-22 20:17                                                       ` H. Peter Anvin
2014-04-22 23:08                                                         ` Brian Gerst
2014-04-22 23:39                                                     ` Andi Kleen
2014-04-22 23:40                                                       ` H. Peter Anvin
2014-04-22 17:11                                     ` H. Peter Anvin
2014-04-22 17:26                                       ` Borislav Petkov
2014-04-22 17:29                                         ` Andrew Lutomirski
2014-04-22 19:27                                           ` Borislav Petkov
2014-04-23  6:24                                     ` H. Peter Anvin
2014-04-23  8:57                                       ` Alexandre Julliard
2014-04-22 17:09                                   ` H. Peter Anvin
2014-04-22 17:20                                     ` Andrew Lutomirski
2014-04-22 17:24                                       ` H. Peter Anvin
2014-04-22 11:25   ` Borislav Petkov
2014-04-23  1:17   ` H. Peter Anvin
2014-04-23  1:23     ` Andrew Lutomirski
2014-04-23  1:42       ` H. Peter Anvin
2014-04-23 14:24         ` Boris Ostrovsky
2014-04-23 16:56           ` H. Peter Anvin
2014-04-28 13:04             ` Konrad Rzeszutek Wilk
2014-04-25 21:02     ` Konrad Rzeszutek Wilk [this message]
2014-04-25 21:16       ` H. Peter Anvin
2014-04-24  4:13   ` comex
2014-04-24  4:53     ` Andrew Lutomirski
2014-04-24 22:24       ` H. Peter Anvin
2014-04-24 22:31         ` Andrew Lutomirski
2014-04-24 22:37           ` H. Peter Anvin
2014-04-24 22:43             ` Andrew Lutomirski
2014-04-28 23:05       ` H. Peter Anvin
2014-04-28 23:08         ` H. Peter Anvin
2014-04-29  0:02           ` Andrew Lutomirski
2014-04-29  0:15             ` H. Peter Anvin
2014-04-29  0:20             ` Andrew Lutomirski
2014-04-29  2:38               ` H. Peter Anvin
2014-04-29  2:44                 ` H. Peter Anvin
2014-04-29  3:45                 ` H. Peter Anvin
2014-04-29  3:47                   ` H. Peter Anvin
2014-04-29  4:36                   ` H. Peter Anvin
2014-04-29  7:14                     ` H. Peter Anvin
2014-04-25 12:02   ` Pavel Machek
2014-04-25 21:20     ` H. Peter Anvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140425210241.GB30532@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=amluto@gmail.com \
    --cc=andi@firstfloor.org \
    --cc=arjan.van.de.ven@intel.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=heukelum@fastmail.fm \
    --cc=hpa@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=julliard@winehq.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).