linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
@ 2012-09-06 13:15 Matt Fleming
  2012-09-06 14:34 ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Matt Fleming @ 2012-09-06 13:15 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: linux-kernel, linux-efi, Matt Fleming, JérômeCarretero,
	Jan Beulich, Matthew Garrett, Vasco Dias

From: Matt Fleming <matt.fleming@intel.com>

Some firmware still needs a 1:1 (virt->phys) mapping even after we've
called SetVirtualAddressMap(). So install the mapping alongside our
existing kernel mapping whenever we make EFI calls in virtual mode.

This bug was discovered on ASUS machines where the firmware
implementation of GetTime() accesses the RTC device via physical
addresses, even though that's bogus per the UEFI spec since we've
informed the firmware via SetVirtualAddressMap() that the boottime
memory map is no longer valid.

This bug seems to be present in a lot of consumer devices, so there's
not a lot we can do about this spec violation apart from workaround
it.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JérômeCarretero <cJ-ko@zougloub.eu>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Vasco Dias <rafa.vasco@gmail.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/include/asm/efi.h     |   28 ++++++--
 arch/x86/include/asm/pgalloc.h |    2 +
 arch/x86/mm/pgtable.c          |    2 -
 arch/x86/platform/efi/efi.c    |  132 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/platform/efi/efi_64.c |   45 ++++++++++++++
 5 files changed, 200 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c9dcc18..2ba6f86 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -69,23 +69,37 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
+extern pgd_t *efi_call_virt_prelog(void);
+extern void efi_call_virt_epilog(pgd_t *);
+
+#define efi_callx(x, func, ...)					\
+	({							\
+		efi_status_t __status;				\
+		pgd_t *__pgd;					\
+								\
+		__pgd = efi_call_virt_prelog();			\
+		__status = efi_call##x(func, __VA_ARGS__);	\
+		efi_call_virt_epilog(__pgd);			\
+		__status;					\
+	})
+
 #define efi_call_virt0(f)				\
-	efi_call0((void *)(efi.systab->runtime->f))
+	efi_callx(0, (void *)(efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)					\
-	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+	efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)					\
-	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+	efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)					\
-	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index b4389a4..ade0804 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -5,6 +5,8 @@
 #include <linux/mm.h>		/* for struct page */
 #include <linux/pagemap.h>
 
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83..e999bb5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -5,8 +5,6 @@
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
-
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
 #else
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 2dc29f5..d17243f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -47,6 +47,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/x86_init.h>
+#include <asm/pgalloc.h>
 
 #define EFI_DEBUG	1
 
@@ -741,6 +742,114 @@ static void __init runtime_code_page_mkexec(void)
 	}
 }
 
+#ifdef CONFIG_X86_64
+pgd_t *efi_one_to_one_pgd;
+int efi_one_to_one_index = 0;
+
+struct efi_pgtable {
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	u64 addr;
+};
+
+static int efi_pgd_entry(pgd_t *virt_pgd, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk)
+{
+	struct efi_pgtable *ep = (struct efi_pgtable *)walk->private;
+
+	ep->pgd = efi_one_to_one_pgd + pgd_index(ep->addr);
+	if (!pgd_present(*ep->pgd)) {
+		if (!pud_alloc(walk->mm, ep->pgd, ep->addr))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int efi_pte_entry(pte_t *virt_pte, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk)
+{
+	struct efi_pgtable *ep = (struct efi_pgtable *)walk->private;
+	pte_t *phys_pte;
+
+	phys_pte = pte_offset_kernel(ep->pmd, ep->addr);
+	set_pte(phys_pte, *virt_pte);
+	ep->addr += PAGE_SIZE;
+
+	return 0;
+}
+
+static int efi_pmd_entry(pmd_t *virt_pmd, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk)
+{
+	struct efi_pgtable *ep = (struct efi_pgtable *)walk->private;
+
+	ep->pmd = pmd_offset(ep->pud, ep->addr);
+	if (pmd_large(*virt_pmd)) {
+		set_pmd(ep->pmd, *virt_pmd);
+		ep->addr += (1 << PG_LEVEL_2M);
+
+		/* Skip the pte */
+		walk->pte_entry = NULL;
+		return 0;
+	}
+
+	walk->pte_entry = efi_pte_entry;
+
+	if (!pmd_present(*ep->pmd)) {
+		if (!pte_alloc_kernel(ep->pmd, ep->addr))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int efi_pud_entry(pud_t *virt_pud, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk)
+{
+	struct efi_pgtable *ep = (struct efi_pgtable *)walk->private;
+
+	ep->pud = pud_offset(ep->pgd, ep->addr);
+	if (pud_large(*virt_pud)) {
+		set_pud(ep->pud, *virt_pud);
+		ep->addr += (1 << PG_LEVEL_1G);
+
+		/* Skip the pmd/pte */
+		walk->pmd_entry = NULL;
+		walk->pte_entry = NULL;
+		return 0;
+	}
+
+	walk->pmd_entry = efi_pmd_entry;
+	walk->pte_entry = efi_pte_entry;
+
+	if (!pud_present(*ep->pud)) {
+		if (!pmd_alloc(walk->mm, ep->pud, ep->addr))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int efi_insert_one_to_one_mapping(u64 phys_addr, u64 virt_addr,
+					 unsigned long size)
+{
+	struct efi_pgtable ep;
+	struct mm_walk walk = {
+		.pgd_entry = efi_pgd_entry,
+		.pud_entry = efi_pud_entry,
+		.pmd_entry = efi_pmd_entry,
+		.pte_entry = efi_pte_entry,
+		.mm = &init_mm,
+		.private = (void *)&ep,
+	};
+
+	ep.addr = phys_addr;
+	return walk_page_range(virt_addr, virt_addr + size, &walk);
+}
+#endif /* CONFIG_X86_64 */
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
@@ -795,6 +904,11 @@ void __init efi_enter_virtual_mode(void)
 		prev_md = md;
 	}
 
+
+#ifdef CONFIG_X86_64
+	efi_one_to_one_pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+#endif
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
@@ -839,6 +953,24 @@ void __init efi_enter_virtual_mode(void)
 		memcpy(new_memmap + (count * memmap.desc_size), md,
 		       memmap.desc_size);
 		count++;
+
+#ifdef CONFIG_X86_64
+		/*
+		 * Some firmware, notably that from ASUS, still
+		 * attempts to access the physical address space after
+		 * we've called SetVirtualAddressMap().
+		 *
+		 * Maintain a 1:1 mapping virt->phys which only exists
+		 * for the benefit of this broken firmware, the kernel
+		 * MUST NOT access addresses via the 1:1 mapping
+		 * directly, doing so has been known to cause issues
+		 * on Apple firmware.
+		 */
+		if (efi_insert_one_to_one_mapping(md->phys_addr,
+						  md->virt_addr, size))
+			pr_alert("Unable to map address 1:1 0x%llx\n",
+				 md->phys_addr);
+#endif /* CONFIG_X86_64 */
 	}
 
 	BUG_ON(!efi.systab);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54..b11d40f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -37,6 +37,10 @@
 #include <asm/efi.h>
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+extern pgd_t *efi_one_to_one_pgd;
+extern int efi_one_to_one_index;
 
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
@@ -58,6 +62,47 @@ static void __init early_code_mapping_set_exec(int executable)
 	}
 }
 
+pgd_t *efi_call_virt_prelog(void)
+{
+	pgd_t *save;
+	int i;
+
+	save = kmalloc(sizeof(pgd_t) * (efi_one_to_one_index + 1), GFP_KERNEL);
+	if (!save) {
+		pr_alert("Unable to save pgd entries\n");
+		return NULL;
+	}
+
+	for (i = 0; i <= efi_one_to_one_index; i++) {
+		pgd_t *pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+
+		pgd += i;
+		save[i] = *pgd;
+		set_pgd(pgd, efi_one_to_one_pgd[i]);
+	}
+
+	__flush_tlb_all();
+	return save;
+}
+
+void efi_call_virt_epilog(pgd_t *save)
+{
+	int i;
+
+	if (!save)
+		return;
+
+	for (i = 0; i <= efi_one_to_one_index; i++) {
+		pgd_t *pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+
+		pgd += i;
+		set_pgd(pgd, save[i]);
+	}
+
+	kfree(save);
+	__flush_tlb_all();
+}
+
 void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 13:15 [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls Matt Fleming
@ 2012-09-06 14:34 ` Jan Beulich
  2012-09-06 15:47   ` Matt Fleming
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2012-09-06 14:34 UTC (permalink / raw)
  To: Matt Fleming
  Cc: Vasco Dias, Matt Fleming, Matthew Garrett, linux-efi,
	linux-kernel, cJ-ko, H. Peter Anvin

>>> On 06.09.12 at 15:15, Matt Fleming <matt@console-pimps.org> wrote:
> From: Matt Fleming <matt.fleming@intel.com>
> 
> Some firmware still needs a 1:1 (virt->phys) mapping even after we've
> called SetVirtualAddressMap(). So install the mapping alongside our
> existing kernel mapping whenever we make EFI calls in virtual mode.
> 
> This bug was discovered on ASUS machines where the firmware
> implementation of GetTime() accesses the RTC device via physical
> addresses, even though that's bogus per the UEFI spec since we've
> informed the firmware via SetVirtualAddressMap() that the boottime
> memory map is no longer valid.

> @@ -741,6 +742,114 @@ static void __init runtime_code_page_mkexec(void)
>  	}
>  }
>  
> +#ifdef CONFIG_X86_64
> +pgd_t *efi_one_to_one_pgd;
> +int efi_one_to_one_index = 0;

I wasn't able to spot where this variable ever gets set to a
non-zero value.

> @@ -58,6 +62,47 @@ static void __init early_code_mapping_set_exec(int 
> executable)
>  	}
>  }
>  
> +pgd_t *efi_call_virt_prelog(void)
> +{
> +	pgd_t *save;
> +	int i;
> +
> +	save = kmalloc(sizeof(pgd_t) * (efi_one_to_one_index + 1), GFP_KERNEL);

GFP_ATOMIC perhaps, given that many runtime calls happen
inside spin locks?

> +	if (!save) {
> +		pr_alert("Unable to save pgd entries\n");
> +		return NULL;
> +	}
> +
> +	for (i = 0; i <= efi_one_to_one_index; i++) {
> +		pgd_t *pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);

I'd suggest moving this out of the loop (also below), slightly
adjusting the rest of the loop body.

> +
> +		pgd += i;
> +		save[i] = *pgd;
> +		set_pgd(pgd, efi_one_to_one_pgd[i]);
> +	}

Did you, as an alternative, consider switching to a different
CR3 instead of copying back and forth?

> +
> +	__flush_tlb_all();

Is it certain you will _never_ hit a global mapping (in which case I
believe the above would be insufficient)?

Jan

> +	return save;
> +}
> +
> +void efi_call_virt_epilog(pgd_t *save)
> +{
> +	int i;
> +
> +	if (!save)
> +		return;
> +
> +	for (i = 0; i <= efi_one_to_one_index; i++) {
> +		pgd_t *pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
> +
> +		pgd += i;
> +		set_pgd(pgd, save[i]);
> +	}
> +
> +	kfree(save);
> +	__flush_tlb_all();
> +}
> +
>  void __init efi_call_phys_prelog(void)
>  {
>  	unsigned long vaddress;
> -- 
> 1.7.4.4




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 14:34 ` Jan Beulich
@ 2012-09-06 15:47   ` Matt Fleming
  2012-09-06 17:07     ` H. Peter Anvin
                       ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Matt Fleming @ 2012-09-06 15:47 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Vasco Dias, Matthew Garrett, linux-efi, linux-kernel, cJ-ko,
	H. Peter Anvin

On Thu, 2012-09-06 at 15:34 +0100, Jan Beulich wrote:
> >>> On 06.09.12 at 15:15, Matt Fleming <matt@console-pimps.org> wrote:
> > From: Matt Fleming <matt.fleming@intel.com>
> > 
> > Some firmware still needs a 1:1 (virt->phys) mapping even after we've
> > called SetVirtualAddressMap(). So install the mapping alongside our
> > existing kernel mapping whenever we make EFI calls in virtual mode.
> > 
> > This bug was discovered on ASUS machines where the firmware
> > implementation of GetTime() accesses the RTC device via physical
> > addresses, even though that's bogus per the UEFI spec since we've
> > informed the firmware via SetVirtualAddressMap() that the boottime
> > memory map is no longer valid.
> 
> > @@ -741,6 +742,114 @@ static void __init runtime_code_page_mkexec(void)
> >  	}
> >  }
> >  
> > +#ifdef CONFIG_X86_64
> > +pgd_t *efi_one_to_one_pgd;
> > +int efi_one_to_one_index = 0;
> 
> I wasn't able to spot where this variable ever gets set to a
> non-zero value.

Oops. Yeah, you're right. It seems I lost that code at some point in
development. I'll re-add in a v2 of this patch.

> > @@ -58,6 +62,47 @@ static void __init early_code_mapping_set_exec(int 
> > executable)
> >  	}
> >  }
> >  
> > +pgd_t *efi_call_virt_prelog(void)
> > +{
> > +	pgd_t *save;
> > +	int i;
> > +
> > +	save = kmalloc(sizeof(pgd_t) * (efi_one_to_one_index + 1), GFP_KERNEL);
> 
> GFP_ATOMIC perhaps, given that many runtime calls happen
> inside spin locks?

Yeah, good idea.

> > +	if (!save) {
> > +		pr_alert("Unable to save pgd entries\n");
> > +		return NULL;
> > +	}
> > +
> > +	for (i = 0; i <= efi_one_to_one_index; i++) {
> > +		pgd_t *pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
> 
> I'd suggest moving this out of the loop (also below), slightly
> adjusting the rest of the loop body.

Sure.

> > +
> > +		pgd += i;
> > +		save[i] = *pgd;
> > +		set_pgd(pgd, efi_one_to_one_pgd[i]);
> > +	}
> 
> Did you, as an alternative, consider switching to a different
> CR3 instead of copying back and forth?

I did consider it, but I couldn't convince myself whether or not the EFI
pagetable would need to be manually kept in sync with any other
pagetables. But now I look at the code a bit harder, it seems that
should be taken care of automatically. In fact, the tboot code seems to
do something similar. I'll try that approach.

> > +
> > +	__flush_tlb_all();
> 
> Is it certain you will _never_ hit a global mapping (in which case I
> believe the above would be insufficient)?

Are you saying that this should be a flush_tlb_all() if we have global
mappings? Or that if we are guaranteed to never have global mappings we
can opitmise this by simply doing a __flush_tlb()?

Thanks Jan!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 15:47   ` Matt Fleming
@ 2012-09-06 17:07     ` H. Peter Anvin
  2012-09-07  7:40       ` Jan Beulich
  2012-09-07  7:20     ` Jan Beulich
  2012-09-07  7:43     ` Jan Beulich
  2 siblings, 1 reply; 9+ messages in thread
From: H. Peter Anvin @ 2012-09-06 17:07 UTC (permalink / raw)
  To: Matt Fleming
  Cc: Jan Beulich, Vasco Dias, Matthew Garrett, linux-efi, linux-kernel, cJ-ko

Anyone care to refresh my memory why we can't use the "global" 
1:1+kernel map, possibly with some improvements (which would benefit the 
users of those maps too)?  With that I mean initial_page_map on i386 and 
trampoline_pgd on x86-64...

	-hpa
-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 15:47   ` Matt Fleming
  2012-09-06 17:07     ` H. Peter Anvin
@ 2012-09-07  7:20     ` Jan Beulich
  2012-09-07  7:43     ` Jan Beulich
  2 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2012-09-07  7:20 UTC (permalink / raw)
  To: Matt Fleming
  Cc: Vasco Dias, Matthew Garrett, linux-efi, linux-kernel, cJ-ko,
	H. Peter Anvin

>>> On 06.09.12 at 17:47, Matt Fleming <matt.fleming@intel.com> wrote:
> On Thu, 2012-09-06 at 15:34 +0100, Jan Beulich wrote:
>> >>> On 06.09.12 at 15:15, Matt Fleming <matt@console-pimps.org> wrote:
>> > +	__flush_tlb_all();
>> 
>> Is it certain you will _never_ hit a global mapping (in which case I
>> believe the above would be insufficient)?
> 
> Are you saying that this should be a flush_tlb_all() if we have global
> mappings? Or that if we are guaranteed to never have global mappings we
> can opitmise this by simply doing a __flush_tlb()?

Hmm, looks like I shouldn't have worked from memory -
other than I remembered, __flush_tlb_all() does take care
of global pages. But indeed, if we can be sure that no global
mappings would ever exist in the replaced (low) part of the
address space, then __flush_tlb() would be sufficient. But I
would agree that for a first round (and possibly forever,
particularly as long as performance doesn't matter) it's safer
to use the more aggressive flush. So please disregard my
respective comment to that part of your patch.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 17:07     ` H. Peter Anvin
@ 2012-09-07  7:40       ` Jan Beulich
  2012-09-07  8:02         ` Matt Fleming
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2012-09-07  7:40 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Vasco Dias, Matt Fleming, Matthew Garrett, linux-efi,
	linux-kernel, cJ-ko

>>> On 06.09.12 at 19:07, "H. Peter Anvin" <hpa@zytor.com> wrote:
> Anyone care to refresh my memory why we can't use the "global" 
> 1:1+kernel map, possibly with some improvements (which would benefit the 
> users of those maps too)?  With that I mean initial_page_map on i386 and 
> trampoline_pgd on x86-64...

I don't think you want anything inserted in the low part of those
(kernel-only) page tables, not even transiently.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-06 15:47   ` Matt Fleming
  2012-09-06 17:07     ` H. Peter Anvin
  2012-09-07  7:20     ` Jan Beulich
@ 2012-09-07  7:43     ` Jan Beulich
  2012-09-07  8:06       ` Matt Fleming
  2 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2012-09-07  7:43 UTC (permalink / raw)
  To: Matt Fleming
  Cc: Vasco Dias, Matthew Garrett, linux-efi, linux-kernel, cJ-ko,
	H. Peter Anvin

>>> On 06.09.12 at 17:47, Matt Fleming <matt.fleming@intel.com> wrote:
> On Thu, 2012-09-06 at 15:34 +0100, Jan Beulich wrote:
>> >>> On 06.09.12 at 15:15, Matt Fleming <matt@console-pimps.org> wrote:
>> > +
>> > +		pgd += i;
>> > +		save[i] = *pgd;
>> > +		set_pgd(pgd, efi_one_to_one_pgd[i]);
>> > +	}
>> 
>> Did you, as an alternative, consider switching to a different
>> CR3 instead of copying back and forth?
> 
> I did consider it, but I couldn't convince myself whether or not the EFI
> pagetable would need to be manually kept in sync with any other
> pagetables. But now I look at the code a bit harder, it seems that
> should be taken care of automatically. In fact, the tboot code seems to
> do something similar. I'll try that approach.

Actually, I think the copying approach is even broken - what if
multiple threads currently on the same address space want to
invoke a runtime call simultaneously? The first one to get here
would save the right values, but the second one wouldn't.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-07  7:40       ` Jan Beulich
@ 2012-09-07  8:02         ` Matt Fleming
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Fleming @ 2012-09-07  8:02 UTC (permalink / raw)
  To: Jan Beulich
  Cc: H. Peter Anvin, Vasco Dias, Matthew Garrett, linux-efi,
	linux-kernel, cJ-ko

On Fri, 2012-09-07 at 08:40 +0100, Jan Beulich wrote:
> >>> On 06.09.12 at 19:07, "H. Peter Anvin" <hpa@zytor.com> wrote:
> > Anyone care to refresh my memory why we can't use the "global" 
> > 1:1+kernel map, possibly with some improvements (which would benefit the 
> > users of those maps too)?  With that I mean initial_page_map on i386 and 
> > trampoline_pgd on x86-64...
> 
> I don't think you want anything inserted in the low part of those
> (kernel-only) page tables, not even transiently.

There are already things inserted into the low part of those page
tables. In fact, the main purpose is to provide a 1:1 mapping for
realmode code. See machine_real_restart().

I think there's definitely merit to the approach Peter outlined above.
There's a variety of places in arch/x86 where a 1:1 mapping is needed.
It would be good to have a single pagetable that provides this.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls
  2012-09-07  7:43     ` Jan Beulich
@ 2012-09-07  8:06       ` Matt Fleming
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Fleming @ 2012-09-07  8:06 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Vasco Dias, Matthew Garrett, linux-efi, linux-kernel, cJ-ko,
	H. Peter Anvin

On Fri, 2012-09-07 at 08:43 +0100, Jan Beulich wrote:
> >>> On 06.09.12 at 17:47, Matt Fleming <matt.fleming@intel.com> wrote:
> > On Thu, 2012-09-06 at 15:34 +0100, Jan Beulich wrote:
> >> >>> On 06.09.12 at 15:15, Matt Fleming <matt@console-pimps.org> wrote:
> >> > +
> >> > +		pgd += i;
> >> > +		save[i] = *pgd;
> >> > +		set_pgd(pgd, efi_one_to_one_pgd[i]);
> >> > +	}
> >> 
> >> Did you, as an alternative, consider switching to a different
> >> CR3 instead of copying back and forth?
> > 
> > I did consider it, but I couldn't convince myself whether or not the EFI
> > pagetable would need to be manually kept in sync with any other
> > pagetables. But now I look at the code a bit harder, it seems that
> > should be taken care of automatically. In fact, the tboot code seems to
> > do something similar. I'll try that approach.
> 
> Actually, I think the copying approach is even broken - what if
> multiple threads currently on the same address space want to
> invoke a runtime call simultaneously? The first one to get here
> would save the right values, but the second one wouldn't.

Yeah, unfortunately I only realised that after I'd sent the patch.
Switching pagetables around efi calls is a much better approach.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-09-07  8:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-06 13:15 [PATCH] x86, efi: 1:1 pagetable mapping for virtual EFI calls Matt Fleming
2012-09-06 14:34 ` Jan Beulich
2012-09-06 15:47   ` Matt Fleming
2012-09-06 17:07     ` H. Peter Anvin
2012-09-07  7:40       ` Jan Beulich
2012-09-07  8:02         ` Matt Fleming
2012-09-07  7:20     ` Jan Beulich
2012-09-07  7:43     ` Jan Beulich
2012-09-07  8:06       ` Matt Fleming

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).