All of lore.kernel.org
 help / color / mirror / Atom feed
From: Elias El Yandouzi <eliasely@amazon.com>
To: <xen-devel@lists.xenproject.org>
Cc: julien@xen.org, pdurrant@amazon.com, dwmw@amazon.com,
	"Hongyan Xia" <hongyxia@amazon.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>, "Wei Liu" <wl@xen.org>,
	"Julien Grall" <jgrall@amazon.com>,
	"Elias El Yandouzi" <eliasely@amazon.com>
Subject: [PATCH v2] x86: Map/unmap pages in restore_all_guests
Date: Tue, 16 Jan 2024 18:50:36 +0000	[thread overview]
Message-ID: <20240116185056.15000-8-eliasely@amazon.com> (raw)
In-Reply-To: <20240116185056.15000-1-eliasely@amazon.com>

From: Hongyan Xia <hongyxia@amazon.com>

Before, it assumed the pv cr3 could be accessed via a direct map. This
is no longer true.

Note that we do not map and unmap root_pgt for now since it is still a
xenheap page.

Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
Signed-off-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Elias El Yandouzi <eliasely@amazon.com>

----

    Changes in V2:
        * Rework the shadow perdomain mapping solution in the follow-up patches

    Changes since Hongyan's version:
        * Remove the final dot in the commit title

diff --git a/xen/arch/x86/include/asm/config.h b/xen/arch/x86/include/asm/config.h
index bbced338be..7cf1f33dc0 100644
--- a/xen/arch/x86/include/asm/config.h
+++ b/xen/arch/x86/include/asm/config.h
@@ -202,7 +202,7 @@ extern unsigned char boot_edid_info[128];
 /* Slot 260: per-domain mappings (including map cache). */
 #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
 #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS         3
+#define PERDOMAIN_SLOTS         4
 #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
                                  (PERDOMAIN_SLOT_MBYTES << 20))
 /* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
@@ -316,6 +316,16 @@ extern unsigned long xen_phys_start;
 #define ARG_XLAT_START(v)        \
     (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
 
+/* root_pt shadow mapping area. The fourth per-domain-mapping sub-area */
+#define SHADOW_ROOT_PT_VIRT_START   PERDOMAIN_VIRT_SLOT(3)
+#define SHADOW_ROOT_PT_ENTRIES      MAX_VIRT_CPUS
+#define SHADOW_ROOT_PT_VIRT_END     (SHADOW_ROOT_PT_VIRT_START +    \
+                                     (SHADOW_ROOT_PT_ENTRIES * PAGE_SIZE))
+
+/* The address of a particular VCPU's ROOT_PT */
+#define SHADOW_ROOT_PT_VCPU_VIRT_START(v) \
+    (SHADOW_ROOT_PT_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
+
 #define ELFSIZE 64
 
 #define ARCH_CRASH_SAVE_VMCOREINFO
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 622d22bef2..4d97c68028 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -273,6 +273,7 @@ struct time_scale {
 struct pv_domain
 {
     l1_pgentry_t **gdt_ldt_l1tab;
+    l1_pgentry_t **shadow_root_pt_l1tab;
 
     atomic_t nr_l4_pages;
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index b56e0d8065..a72c32d87c 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -505,6 +505,13 @@ void share_xen_page_with_guest(struct page_info *page, struct domain *d,
     spin_unlock(&d->page_alloc_lock);
 }
 
+#define shadow_root_pt_idx(v) \
+    ((v)->vcpu_id >> PAGETABLE_ORDER)
+
+#define pv_shadow_root_pt_pte(v) \
+    ((v)->domain->arch.pv.shadow_root_pt_l1tab[shadow_root_pt_idx(v)] + \
+     ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
+
 void make_cr3(struct vcpu *v, mfn_t mfn)
 {
     struct domain *d = v->domain;
@@ -524,6 +531,13 @@ void write_ptbase(struct vcpu *v)
 
     if ( is_pv_vcpu(v) && v->domain->arch.pv.xpti )
     {
+        mfn_t guest_root_pt = _mfn(v->arch.cr3 >> PAGE_SHIFT);
+        l1_pgentry_t *pte = pv_shadow_root_pt_pte(v);
+
+        ASSERT(v == current);
+
+        l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RW));
+
         cpu_info->root_pgt_changed = true;
         cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
         if ( new_cr4 & X86_CR4_PCIDE )
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 2a445bb17b..fef9ae2352 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -288,6 +288,19 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
                               1U << GDT_LDT_VCPU_SHIFT);
 }
 
+static int pv_create_shadow_root_pt_l1tab(struct vcpu *v)
+{
+    return create_perdomain_mapping(v->domain, SHADOW_ROOT_PT_VCPU_VIRT_START(v),
+                                    1, v->domain->arch.pv.shadow_root_pt_l1tab,
+                                    NULL);
+}
+
+static void pv_destroy_shadow_root_pt_l1tab(struct vcpu *v)
+
+{
+    destroy_perdomain_mapping(v->domain, SHADOW_ROOT_PT_VCPU_VIRT_START(v), 1);
+}
+
 void pv_vcpu_destroy(struct vcpu *v)
 {
     if ( is_pv_32bit_vcpu(v) )
@@ -297,6 +310,7 @@ void pv_vcpu_destroy(struct vcpu *v)
     }
 
     pv_destroy_gdt_ldt_l1tab(v);
+    pv_destroy_shadow_root_pt_l1tab(v);
     XFREE(v->arch.pv.trap_ctxt);
 }
 
@@ -311,6 +325,13 @@ int pv_vcpu_initialise(struct vcpu *v)
     if ( rc )
         return rc;
 
+    if ( v->domain->arch.pv.xpti )
+    {
+        rc = pv_create_shadow_root_pt_l1tab(v);
+        if ( rc )
+            goto done;
+    }
+
     BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
                  PAGE_SIZE);
     v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
@@ -346,10 +367,12 @@ void pv_domain_destroy(struct domain *d)
 
     destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
                               GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+    destroy_perdomain_mapping(d, SHADOW_ROOT_PT_VIRT_START, SHADOW_ROOT_PT_ENTRIES);
 
     XFREE(d->arch.pv.cpuidmasks);
 
     FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
+    FREE_XENHEAP_PAGE(d->arch.pv.shadow_root_pt_l1tab);
 }
 
 void noreturn cf_check continue_pv_domain(void);
@@ -371,6 +394,12 @@ int pv_domain_initialise(struct domain *d)
         goto fail;
     clear_page(d->arch.pv.gdt_ldt_l1tab);
 
+    d->arch.pv.shadow_root_pt_l1tab =
+        alloc_xenheap_pages(0, MEMF_node(domain_to_node(d)));
+    if ( !d->arch.pv.shadow_root_pt_l1tab )
+        goto fail;
+    clear_page(d->arch.pv.shadow_root_pt_l1tab);
+
     if ( levelling_caps & ~LCAP_faulting &&
          (d->arch.pv.cpuidmasks = xmemdup(&cpuidmask_defaults)) == NULL )
         goto fail;
@@ -381,6 +410,11 @@ int pv_domain_initialise(struct domain *d)
     if ( rc )
         goto fail;
 
+    rc = create_perdomain_mapping(d, SHADOW_ROOT_PT_VIRT_START,
+                                  SHADOW_ROOT_PT_ENTRIES, NULL, NULL);
+    if ( rc )
+        goto fail;
+
     d->arch.ctxt_switch = &pv_csw;
 
     d->arch.pv.xpti = is_hardware_domain(d) ? opt_xpti_hwdom : opt_xpti_domu;
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 57b73a4e62..23f9cca1a2 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -51,6 +51,7 @@ void __dummy__(void)
     OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
     BLANK();
 
+    OFFSET(VCPU_id, struct vcpu, vcpu_id);
     OFFSET(VCPU_processor, struct vcpu, processor);
     OFFSET(VCPU_domain, struct vcpu, domain);
     OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index c25b14dde6..a216c5ca7a 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -165,7 +165,15 @@ restore_all_guest:
         and   %rsi, %rdi
         and   %r9, %rsi
         add   %rcx, %rdi
+
+        /*
+         * The address in the vCPU cr3 is always mapped in the shadow
+         * root_pt virt area.
+         */
+        imul $PAGE_SIZE, VCPU_id(%rbx), %esi
+        movabs $SHADOW_ROOT_PT_VIRT_START, %rcx
         add   %rcx, %rsi
+
         mov   $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
         mov   root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
         mov   %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
-- 
2.40.1



  parent reply	other threads:[~2024-01-16 18:52 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-16 18:50 [PATCH v2] Remove the directmap Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/vmap: Check the page has been mapped in vm_init_type() Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/setup: Move vm_init() before acpi calls Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/vmap: Introduce vmap_size() and use it Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] acpi: vmap pages in acpi_os_alloc_memory Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/numa: vmap the pages for memnodemap Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/srat: vmap the pages for acpi_slit Elias El Yandouzi
2024-01-16 18:50 ` Elias El Yandouzi [this message]
2024-01-16 18:50 ` [PATCH v2] x86/pv: Domheap pages should be mapped while relocating initrd Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/pv: Rewrite how building PV dom0 handles domheap mappings Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/pv: Map L4 page table for shim domain Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86: Lift mapcache variable to the arch level Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/mapcache: Initialise the mapcache for the idle domain Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86: Add a boot option to enable and disable the direct map Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/arm: fixmap: Rename the fixmap slots to follow the x86 convention Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/x86: Add support for the PMAP Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/x86: Add build assertion for fixmap entries Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/domain_page: Remove the fast paths when mfn is not in the directmap Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/page_alloc: Add a path for xenheap when there is no direct map Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/setup: Leave early boot slightly earlier Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/setup: vmap heap nodes when they are outside the direct map Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] x86/setup: Do not create valid mappings when directmap=no Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] Rename mfn_to_virt() calls Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] Rename maddr_to_virt() calls Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/arm32: mm: Rename 'first' to 'root' in init_secondary_pagetables() Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/arm64: mm: Use per-pCPU page-tables Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/arm64: Implement a mapcache for arm64 Elias El Yandouzi
2024-01-16 18:50 ` [PATCH v2] xen/arm64: Allow the admin to enable/disable the directmap Elias El Yandouzi
2024-01-16 19:22 ` [PATCH v2] Remove " Elias El Yandouzi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240116185056.15000-8-eliasely@amazon.com \
    --to=eliasely@amazon.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dwmw@amazon.com \
    --cc=hongyxia@amazon.com \
    --cc=jbeulich@suse.com \
    --cc=jgrall@amazon.com \
    --cc=julien@xen.org \
    --cc=pdurrant@amazon.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.