xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xensource.com
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Tiejun Chen <tiejun.chen@intel.com>,
	Wei Liu <wei.liu2@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Jan Beulich <jbeulich@suse.com>
Subject: [PATCH 01/16] introduce XENMEM_reserved_device_memory_map
Date: Wed, 22 Jul 2015 16:44:04 +0100	[thread overview]
Message-ID: <1437579859-24485-2-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1437579859-24485-1-git-send-email-ian.jackson@eu.citrix.com>

From: Jan Beulich <jbeulich@suse.com>

This is a prerequisite for punching holes into HVM and PVH guests' P2M
to allow passing through devices that are associated with (on VT-d)
RMRRs.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
v12a: Move interface structure union member to the end, while moving
     the whole public header block into a __XEN__ / __XEN_TOOLS__
     conditional block.
v12: Restore changes as much as possible to my original version, fixing
     a few issues that got introduced after handing it over. Unionize
     new public memop interface structure to allow for non-PCI to be
     supported later on. Check flags to have all currently undefined
     flags clear. Refine adjustments to xen/pci.h.
---
 xen/common/compat/memory.c           |   65 ++++++++++++++++++++++++++++++++++
 xen/common/memory.c                  |   62 ++++++++++++++++++++++++++++++++
 xen/drivers/passthrough/iommu.c      |   10 ++++++
 xen/drivers/passthrough/vtd/dmar.c   |   27 ++++++++++++++
 xen/drivers/passthrough/vtd/extern.h |    1 +
 xen/drivers/passthrough/vtd/iommu.c  |    1 +
 xen/include/public/memory.h          |   37 ++++++++++++++++++-
 xen/include/xen/iommu.h              |   10 ++++++
 xen/include/xen/pci.h                |    4 +++
 xen/include/xlat.lst                 |    3 +-
 10 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
index b258138..002948b 100644
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -17,6 +17,42 @@ CHECK_TYPE(domid);
 CHECK_mem_access_op;
 CHECK_vmemrange;
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+    struct compat_reserved_device_memory_map map;
+    unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+                                      u32 id, void *ctxt)
+{
+    struct get_reserved_device_memory *grdm = ctxt;
+    u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
+                         grdm->map.dev.pci.devfn);
+
+    if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
+        return 0;
+
+    if ( grdm->used_entries < grdm->map.nr_entries )
+    {
+        struct compat_reserved_device_memory rdm = {
+            .start_pfn = start, .nr_pages = nr
+        };
+
+        if ( rdm.start_pfn != start || rdm.nr_pages != nr )
+            return -ERANGE;
+
+        if ( __copy_to_compat_offset(grdm->map.buffer, grdm->used_entries,
+                                     &rdm, 1) )
+            return -EFAULT;
+    }
+
+    ++grdm->used_entries;
+
+    return 1;
+}
+#endif
+
 int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
 {
     int split, op = cmd & MEMOP_CMD_MASK;
@@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
             break;
         }
 
+#ifdef HAS_PASSTHROUGH
+        case XENMEM_reserved_device_memory_map:
+        {
+            struct get_reserved_device_memory grdm;
+
+            if ( unlikely(start_extent) )
+                return -ENOSYS;
+
+            if ( copy_from_guest(&grdm.map, compat, 1) ||
+                 !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+                return -EFAULT;
+
+            if ( grdm.map.flags & ~XENMEM_RDM_ALL )
+                return -EINVAL;
+
+            grdm.used_entries = 0;
+            rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+                                                  &grdm);
+
+            if ( !rc && grdm.map.nr_entries < grdm.used_entries )
+                rc = -ENOBUFS;
+            grdm.map.nr_entries = grdm.used_entries;
+            if ( __copy_to_guest(compat, &grdm.map, 1) )
+                rc = -EFAULT;
+
+            return rc;
+        }
+#endif
+
         default:
             return compat_arch_memory_op(cmd, compat);
         }
diff --git a/xen/common/memory.c b/xen/common/memory.c
index e5d49d8..61bb94c 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -748,6 +748,39 @@ static int construct_memop_from_reservation(
     return 0;
 }
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+    struct xen_reserved_device_memory_map map;
+    unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+                                      u32 id, void *ctxt)
+{
+    struct get_reserved_device_memory *grdm = ctxt;
+    u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
+                         grdm->map.dev.pci.devfn);
+
+    if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
+        return 0;
+
+    if ( grdm->used_entries < grdm->map.nr_entries )
+    {
+        struct xen_reserved_device_memory rdm = {
+            .start_pfn = start, .nr_pages = nr
+        };
+
+        if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries,
+                                    &rdm, 1) )
+            return -EFAULT;
+    }
+
+    ++grdm->used_entries;
+
+    return 1;
+}
+#endif
+
 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
     struct domain *d;
@@ -1162,6 +1195,35 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     }
 
+#ifdef HAS_PASSTHROUGH
+    case XENMEM_reserved_device_memory_map:
+    {
+        struct get_reserved_device_memory grdm;
+
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
+        if ( copy_from_guest(&grdm.map, arg, 1) ||
+             !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+            return -EFAULT;
+
+        if ( grdm.map.flags & ~XENMEM_RDM_ALL )
+            return -EINVAL;
+
+        grdm.used_entries = 0;
+        rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+                                              &grdm);
+
+        if ( !rc && grdm.map.nr_entries < grdm.used_entries )
+            rc = -ENOBUFS;
+        grdm.map.nr_entries = grdm.used_entries;
+        if ( __copy_to_guest(arg, &grdm.map, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+#endif
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 06cb38f..0b2ef52 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -375,6 +375,16 @@ void iommu_crash_shutdown(void)
     iommu_enabled = iommu_intremap = 0;
 }
 
+int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+
+    if ( !iommu_enabled || !ops->get_reserved_device_memory )
+        return 0;
+
+    return ops->get_reserved_device_memory(func, ctxt);
+}
+
 bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
 {
     const struct hvm_iommu *hd = domain_hvm_iommu(d);
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 2672688..56daac7 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -914,3 +914,30 @@ int platform_supports_x2apic(void)
     unsigned int mask = ACPI_DMAR_INTR_REMAP | ACPI_DMAR_X2APIC_OPT_OUT;
     return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP);
 }
+
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+{
+    struct acpi_rmrr_unit *rmrr, *rmrr_cur = NULL;
+    unsigned int i;
+    u16 bdf;
+
+    for_each_rmrr_device ( rmrr, bdf, i )
+    {
+        int rc;
+
+        if ( rmrr == rmrr_cur )
+            continue;
+
+        rc = func(PFN_DOWN(rmrr->base_address),
+                  PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address),
+                  PCI_SBDF2(rmrr->segment, bdf), ctxt);
+
+        if ( unlikely(rc < 0) )
+            return rc;
+
+        if ( rc )
+            rmrr_cur = rmrr;
+    }
+
+    return 0;
+}
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index 5524dba..f9ee9b0 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -75,6 +75,7 @@ int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
                                u8 bus, u8 devfn, const struct pci_dev *);
 int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
                              u8 bus, u8 devfn);
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt);
 
 unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
 void io_apic_write_remap_rte(unsigned int apic,
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index a88b5a3..9849d0e 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2490,6 +2490,7 @@ const struct iommu_ops intel_iommu_ops = {
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .get_reserved_device_memory = intel_iommu_get_reserved_device_memory,
     .dump_p2m_table = vtd_dump_p2m_table,
 };
 
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 832559a..320de91 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -28,6 +28,7 @@
 #define __XEN_PUBLIC_MEMORY_H__
 
 #include "xen.h"
+#include "physdev.h"
 
 /*
  * Increase or decrease the specified domain's memory reservation. Returns the
@@ -522,6 +523,40 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
  * The zero value is appropiate.
  */
 
+/*
+ * With some legacy devices, certain guest-physical addresses cannot safely
+ * be used for other purposes, e.g. to map guest RAM.  This hypercall
+ * enumerates those regions so the toolstack can avoid using them.
+ */
+#define XENMEM_reserved_device_memory_map   27
+struct xen_reserved_device_memory {
+    xen_pfn_t start_pfn;
+    xen_ulong_t nr_pages;
+};
+typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
+
+struct xen_reserved_device_memory_map {
+#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
+    /* IN */
+    uint32_t flags;
+    /*
+     * IN/OUT
+     *
+     * Gets set to the required number of entries when too low,
+     * signaled by error code -ERANGE.
+     */
+    unsigned int nr_entries;
+    /* OUT */
+    XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
+    /* IN */
+    union {
+        struct physdev_pci_device pci;
+    } dev;
+};
+typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
+
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
 /*
@@ -573,7 +608,7 @@ struct xen_vnuma_topology_info {
 typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
 DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
 
-/* Next available subop number is 27 */
+/* Next available subop number is 28 */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 1d00696..52ed3b7 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -125,6 +125,14 @@ int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
 
 struct page_info;
 
+/*
+ * Any non-zero value returned from callbacks of this type will cause the
+ * function the callback was handed to terminate its iteration. Assigning
+ * meaning of these non-zero values is left to the top level caller /
+ * callback pair.
+ */
+typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
+
 struct iommu_ops {
     int (*init)(struct domain *d);
     void (*hwdom_init)(struct domain *d);
@@ -156,12 +164,14 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
     void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
+int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);
 
 void iommu_share_p2m_table(struct domain *d);
 
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 3908146..e85d46f 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -26,6 +26,7 @@
  *  7:3 = slot
  *  2:0 = function
  */
+#define PCI_SEG(sbdf) (((sbdf) >> 16) & 0xffff)
 #define PCI_BUS(bdf)    (((bdf) >> 8) & 0xff)
 #define PCI_SLOT(bdf)   (((bdf) >> 3) & 0x1f)
 #define PCI_FUNC(bdf)   ((bdf) & 0x07)
@@ -33,6 +34,9 @@
 #define PCI_DEVFN2(bdf) ((bdf) & 0xff)
 #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
+#define PCI_SBDF(s,b,d,f) ((((s) & 0xffff) << 16) | PCI_BDF(b,d,f))
+#define PCI_SBDF2(s,bdf) ((((s) & 0xffff) << 16) | ((bdf) & 0xffff))
+#define PCI_SBDF3(s,b,df) ((((s) & 0xffff) << 16) | PCI_BDF2(b, df))
 
 struct pci_dev_info {
     bool_t is_extfn;
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 6fb15bf..8cedee7 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -65,9 +65,10 @@
 !	memory_exchange			memory.h
 !	memory_map			memory.h
 !	memory_reservation		memory.h
-?	mem_access_op		memory.h
+?	mem_access_op			memory.h
 !	pod_target			memory.h
 !	remove_from_physmap		memory.h
+!	reserved_device_memory_map	memory.h
 ?	vmemrange			memory.h
 !	vnuma_topology_info		memory.h
 ?	physdev_eoi			physdev.h
-- 
1.7.10.4

  reply	other threads:[~2015-07-22 15:44 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-22 15:44 [PATCH v13 00/16] Fix RMRR (avoid RDM) Ian Jackson
2015-07-22 15:44 ` Ian Jackson [this message]
2015-07-22 15:44 ` [PATCH 02/16] xen/vtd: create RMRR mapping Ian Jackson
2015-07-22 15:44 ` [PATCH 03/16] xen/passthrough: extend hypercall to support rdm reservation policy Ian Jackson
2015-07-23 11:45   ` Ian Jackson
2015-07-23 11:54     ` Jan Beulich
2015-07-23 11:54     ` Ian Campbell
2015-07-23 12:15     ` Chen, Tiejun
2015-07-23 12:19       ` Ian Jackson
2015-07-23 12:27         ` Ian Campbell
2015-07-23 12:40           ` Chen, Tiejun
2015-07-23 12:43           ` Ian Jackson
2015-07-22 15:44 ` [PATCH 04/16] xen: enable XENMEM_memory_map in hvm Ian Jackson
2015-07-22 15:44 ` [PATCH 05/16] hvmloader: get guest memory map into memory_map[] Ian Jackson
2015-07-22 15:44 ` [PATCH 06/16] hvmloader/pci: try to avoid placing BARs in RMRRs Ian Jackson
2015-07-22 15:44 ` [PATCH 07/16] hvmloader/e820: construct guest e820 table Ian Jackson
2015-07-22 15:44 ` [PATCH 08/16] tools/libxc: Expose new hypercall xc_reserved_device_memory_map Ian Jackson
2015-07-22 15:51   ` Wei Liu
2015-07-22 15:44 ` [PATCH 09/16] tools: extend xc_assign_device() to support rdm reservation policy Ian Jackson
2015-07-22 15:44 ` [PATCH 10/16] tools: introduce some new parameters to set rdm policy Ian Jackson
2015-07-22 15:44 ` [PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM Ian Jackson
2015-07-22 15:53   ` Wei Liu
2015-07-23 11:05     ` Ian Jackson
2015-07-23  0:52   ` Chen, Tiejun
2015-07-23  7:35     ` Wei Liu
2015-07-23  7:51       ` Chen, Tiejun
2015-07-23 11:09     ` Ian Jackson
2015-07-22 15:44 ` [PATCH 12/16] tools: introduce a new parameter to set a predefined rdm boundary Ian Jackson
2015-07-22 15:44 ` [PATCH 13/16] libxl: construct e820 map with RDM information for HVM guest Ian Jackson
2015-07-22 15:44 ` [PATCH 14/16] xen/vtd: enable USB device assignment Ian Jackson
2015-07-22 15:44 ` [PATCH 15/16] xen/vtd: prevent from assign the device with shared rmrr Ian Jackson
2015-09-03 19:39   ` Tamas K Lengyel
2015-09-04  8:17     ` Jan Beulich
2015-09-04 21:52       ` Tamas K Lengyel
2015-09-06  2:16         ` Chen, Tiejun
2015-09-06  3:19           ` Tamas K Lengyel
2015-09-06  4:19             ` Chen, Tiejun
2015-09-06  4:21               ` Tamas K Lengyel
2015-09-06 21:27               ` Wei Liu
2015-09-07  9:45               ` Jan Beulich
2015-07-22 15:44 ` [PATCH 16/16] tools: parse to enable new rdm policy parameters Ian Jackson
2015-07-22 15:51 ` [PATCH v13 00/16] Fix RMRR (avoid RDM) Ian Jackson
2015-07-23  2:15 ` Chen, Tiejun
2015-07-23 11:10   ` Ian Jackson
2015-07-23 12:53     ` Ian Jackson
2015-07-23  7:36 ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1437579859-24485-2-git-send-email-ian.jackson@eu.citrix.com \
    --to=ian.jackson@eu.citrix.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=tiejun.chen@intel.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).