From: Ian Jackson <ian.jackson@eu.citrix.com>
To: xen-devel@lists.xensource.com
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
Tiejun Chen <tiejun.chen@intel.com>,
Wei Liu <wei.liu2@citrix.com>,
Ian Campbell <ian.campbell@citrix.com>,
Jan Beulich <jbeulich@suse.com>
Subject: [PATCH 01/16] introduce XENMEM_reserved_device_memory_map
Date: Wed, 22 Jul 2015 16:44:04 +0100 [thread overview]
Message-ID: <1437579859-24485-2-git-send-email-ian.jackson@eu.citrix.com> (raw)
In-Reply-To: <1437579859-24485-1-git-send-email-ian.jackson@eu.citrix.com>
From: Jan Beulich <jbeulich@suse.com>
This is a prerequisite for punching holes into HVM and PVH guests' P2M
to allow passing through devices that are associated with (on VT-d)
RMRRs.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
v12a: Move interface structure union member to the end, while moving
the whole public header block into a __XEN__ / __XEN_TOOLS__
conditional block.
v12: Restore changes as much as possible to my original version, fixing
a few issues that got introduced after handing it over. Unionize
new public memop interface structure to allow for non-PCI to be
supported later on. Check flags to have all currently undefined
flags clear. Refine adjustments to xen/pci.h.
---
xen/common/compat/memory.c | 65 ++++++++++++++++++++++++++++++++++
xen/common/memory.c | 62 ++++++++++++++++++++++++++++++++
xen/drivers/passthrough/iommu.c | 10 ++++++
xen/drivers/passthrough/vtd/dmar.c | 27 ++++++++++++++
xen/drivers/passthrough/vtd/extern.h | 1 +
xen/drivers/passthrough/vtd/iommu.c | 1 +
xen/include/public/memory.h | 37 ++++++++++++++++++-
xen/include/xen/iommu.h | 10 ++++++
xen/include/xen/pci.h | 4 +++
xen/include/xlat.lst | 3 +-
10 files changed, 218 insertions(+), 2 deletions(-)
diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
index b258138..002948b 100644
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -17,6 +17,42 @@ CHECK_TYPE(domid);
CHECK_mem_access_op;
CHECK_vmemrange;
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+ struct compat_reserved_device_memory_map map;
+ unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+ u32 id, void *ctxt)
+{
+ struct get_reserved_device_memory *grdm = ctxt;
+ u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
+ grdm->map.dev.pci.devfn);
+
+ if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
+ return 0;
+
+ if ( grdm->used_entries < grdm->map.nr_entries )
+ {
+ struct compat_reserved_device_memory rdm = {
+ .start_pfn = start, .nr_pages = nr
+ };
+
+ if ( rdm.start_pfn != start || rdm.nr_pages != nr )
+ return -ERANGE;
+
+ if ( __copy_to_compat_offset(grdm->map.buffer, grdm->used_entries,
+ &rdm, 1) )
+ return -EFAULT;
+ }
+
+ ++grdm->used_entries;
+
+ return 1;
+}
+#endif
+
int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
{
int split, op = cmd & MEMOP_CMD_MASK;
@@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
break;
}
+#ifdef HAS_PASSTHROUGH
+ case XENMEM_reserved_device_memory_map:
+ {
+ struct get_reserved_device_memory grdm;
+
+ if ( unlikely(start_extent) )
+ return -ENOSYS;
+
+ if ( copy_from_guest(&grdm.map, compat, 1) ||
+ !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+ return -EFAULT;
+
+ if ( grdm.map.flags & ~XENMEM_RDM_ALL )
+ return -EINVAL;
+
+ grdm.used_entries = 0;
+ rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+ &grdm);
+
+ if ( !rc && grdm.map.nr_entries < grdm.used_entries )
+ rc = -ENOBUFS;
+ grdm.map.nr_entries = grdm.used_entries;
+ if ( __copy_to_guest(compat, &grdm.map, 1) )
+ rc = -EFAULT;
+
+ return rc;
+ }
+#endif
+
default:
return compat_arch_memory_op(cmd, compat);
}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index e5d49d8..61bb94c 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -748,6 +748,39 @@ static int construct_memop_from_reservation(
return 0;
}
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+ struct xen_reserved_device_memory_map map;
+ unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+ u32 id, void *ctxt)
+{
+ struct get_reserved_device_memory *grdm = ctxt;
+ u32 sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
+ grdm->map.dev.pci.devfn);
+
+ if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
+ return 0;
+
+ if ( grdm->used_entries < grdm->map.nr_entries )
+ {
+ struct xen_reserved_device_memory rdm = {
+ .start_pfn = start, .nr_pages = nr
+ };
+
+ if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries,
+ &rdm, 1) )
+ return -EFAULT;
+ }
+
+ ++grdm->used_entries;
+
+ return 1;
+}
+#endif
+
long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct domain *d;
@@ -1162,6 +1195,35 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
}
+#ifdef HAS_PASSTHROUGH
+ case XENMEM_reserved_device_memory_map:
+ {
+ struct get_reserved_device_memory grdm;
+
+ if ( unlikely(start_extent) )
+ return -ENOSYS;
+
+ if ( copy_from_guest(&grdm.map, arg, 1) ||
+ !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+ return -EFAULT;
+
+ if ( grdm.map.flags & ~XENMEM_RDM_ALL )
+ return -EINVAL;
+
+ grdm.used_entries = 0;
+ rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+ &grdm);
+
+ if ( !rc && grdm.map.nr_entries < grdm.used_entries )
+ rc = -ENOBUFS;
+ grdm.map.nr_entries = grdm.used_entries;
+ if ( __copy_to_guest(arg, &grdm.map, 1) )
+ rc = -EFAULT;
+
+ break;
+ }
+#endif
+
default:
rc = arch_memory_op(cmd, arg);
break;
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 06cb38f..0b2ef52 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -375,6 +375,16 @@ void iommu_crash_shutdown(void)
iommu_enabled = iommu_intremap = 0;
}
+int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+{
+ const struct iommu_ops *ops = iommu_get_ops();
+
+ if ( !iommu_enabled || !ops->get_reserved_device_memory )
+ return 0;
+
+ return ops->get_reserved_device_memory(func, ctxt);
+}
+
bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
{
const struct hvm_iommu *hd = domain_hvm_iommu(d);
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 2672688..56daac7 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -914,3 +914,30 @@ int platform_supports_x2apic(void)
unsigned int mask = ACPI_DMAR_INTR_REMAP | ACPI_DMAR_X2APIC_OPT_OUT;
return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP);
}
+
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
+{
+ struct acpi_rmrr_unit *rmrr, *rmrr_cur = NULL;
+ unsigned int i;
+ u16 bdf;
+
+ for_each_rmrr_device ( rmrr, bdf, i )
+ {
+ int rc;
+
+ if ( rmrr == rmrr_cur )
+ continue;
+
+ rc = func(PFN_DOWN(rmrr->base_address),
+ PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address),
+ PCI_SBDF2(rmrr->segment, bdf), ctxt);
+
+ if ( unlikely(rc < 0) )
+ return rc;
+
+ if ( rc )
+ rmrr_cur = rmrr;
+ }
+
+ return 0;
+}
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index 5524dba..f9ee9b0 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -75,6 +75,7 @@ int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn, const struct pci_dev *);
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn);
+int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt);
unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
void io_apic_write_remap_rte(unsigned int apic,
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index a88b5a3..9849d0e 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2490,6 +2490,7 @@ const struct iommu_ops intel_iommu_ops = {
.crash_shutdown = vtd_crash_shutdown,
.iotlb_flush = intel_iommu_iotlb_flush,
.iotlb_flush_all = intel_iommu_iotlb_flush_all,
+ .get_reserved_device_memory = intel_iommu_get_reserved_device_memory,
.dump_p2m_table = vtd_dump_p2m_table,
};
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 832559a..320de91 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -28,6 +28,7 @@
#define __XEN_PUBLIC_MEMORY_H__
#include "xen.h"
+#include "physdev.h"
/*
* Increase or decrease the specified domain's memory reservation. Returns the
@@ -522,6 +523,40 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* The zero value is appropiate.
*/
+/*
+ * With some legacy devices, certain guest-physical addresses cannot safely
+ * be used for other purposes, e.g. to map guest RAM. This hypercall
+ * enumerates those regions so the toolstack can avoid using them.
+ */
+#define XENMEM_reserved_device_memory_map 27
+struct xen_reserved_device_memory {
+ xen_pfn_t start_pfn;
+ xen_ulong_t nr_pages;
+};
+typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
+
+struct xen_reserved_device_memory_map {
+#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
+ /* IN */
+ uint32_t flags;
+ /*
+ * IN/OUT
+ *
+ * Gets set to the required number of entries when too low,
+ * signaled by error code -ERANGE.
+ */
+ unsigned int nr_entries;
+ /* OUT */
+ XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
+ /* IN */
+ union {
+ struct physdev_pci_device pci;
+ } dev;
+};
+typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
+
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
/*
@@ -573,7 +608,7 @@ struct xen_vnuma_topology_info {
typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
-/* Next available subop number is 27 */
+/* Next available subop number is 28 */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 1d00696..52ed3b7 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -125,6 +125,14 @@ int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
struct page_info;
+/*
+ * Any non-zero value returned from callbacks of this type will cause the
+ * function the callback was handed to terminate its iteration. Assigning
+ * meaning of these non-zero values is left to the top level caller /
+ * callback pair.
+ */
+typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
+
struct iommu_ops {
int (*init)(struct domain *d);
void (*hwdom_init)(struct domain *d);
@@ -156,12 +164,14 @@ struct iommu_ops {
void (*crash_shutdown)(void);
void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
void (*iotlb_flush_all)(struct domain *d);
+ int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
void (*dump_p2m_table)(struct domain *d);
};
void iommu_suspend(void);
void iommu_resume(void);
void iommu_crash_shutdown(void);
+int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);
void iommu_share_p2m_table(struct domain *d);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 3908146..e85d46f 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -26,6 +26,7 @@
* 7:3 = slot
* 2:0 = function
*/
+#define PCI_SEG(sbdf) (((sbdf) >> 16) & 0xffff)
#define PCI_BUS(bdf) (((bdf) >> 8) & 0xff)
#define PCI_SLOT(bdf) (((bdf) >> 3) & 0x1f)
#define PCI_FUNC(bdf) ((bdf) & 0x07)
@@ -33,6 +34,9 @@
#define PCI_DEVFN2(bdf) ((bdf) & 0xff)
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
+#define PCI_SBDF(s,b,d,f) ((((s) & 0xffff) << 16) | PCI_BDF(b,d,f))
+#define PCI_SBDF2(s,bdf) ((((s) & 0xffff) << 16) | ((bdf) & 0xffff))
+#define PCI_SBDF3(s,b,df) ((((s) & 0xffff) << 16) | PCI_BDF2(b, df))
struct pci_dev_info {
bool_t is_extfn;
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 6fb15bf..8cedee7 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -65,9 +65,10 @@
! memory_exchange memory.h
! memory_map memory.h
! memory_reservation memory.h
-? mem_access_op memory.h
+? mem_access_op memory.h
! pod_target memory.h
! remove_from_physmap memory.h
+! reserved_device_memory_map memory.h
? vmemrange memory.h
! vnuma_topology_info memory.h
? physdev_eoi physdev.h
--
1.7.10.4
next prev parent reply other threads:[~2015-07-22 15:44 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-22 15:44 [PATCH v13 00/16] Fix RMRR (avoid RDM) Ian Jackson
2015-07-22 15:44 ` Ian Jackson [this message]
2015-07-22 15:44 ` [PATCH 02/16] xen/vtd: create RMRR mapping Ian Jackson
2015-07-22 15:44 ` [PATCH 03/16] xen/passthrough: extend hypercall to support rdm reservation policy Ian Jackson
2015-07-23 11:45 ` Ian Jackson
2015-07-23 11:54 ` Jan Beulich
2015-07-23 11:54 ` Ian Campbell
2015-07-23 12:15 ` Chen, Tiejun
2015-07-23 12:19 ` Ian Jackson
2015-07-23 12:27 ` Ian Campbell
2015-07-23 12:40 ` Chen, Tiejun
2015-07-23 12:43 ` Ian Jackson
2015-07-22 15:44 ` [PATCH 04/16] xen: enable XENMEM_memory_map in hvm Ian Jackson
2015-07-22 15:44 ` [PATCH 05/16] hvmloader: get guest memory map into memory_map[] Ian Jackson
2015-07-22 15:44 ` [PATCH 06/16] hvmloader/pci: try to avoid placing BARs in RMRRs Ian Jackson
2015-07-22 15:44 ` [PATCH 07/16] hvmloader/e820: construct guest e820 table Ian Jackson
2015-07-22 15:44 ` [PATCH 08/16] tools/libxc: Expose new hypercall xc_reserved_device_memory_map Ian Jackson
2015-07-22 15:51 ` Wei Liu
2015-07-22 15:44 ` [PATCH 09/16] tools: extend xc_assign_device() to support rdm reservation policy Ian Jackson
2015-07-22 15:44 ` [PATCH 10/16] tools: introduce some new parameters to set rdm policy Ian Jackson
2015-07-22 15:44 ` [PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM Ian Jackson
2015-07-22 15:53 ` Wei Liu
2015-07-23 11:05 ` Ian Jackson
2015-07-23 0:52 ` Chen, Tiejun
2015-07-23 7:35 ` Wei Liu
2015-07-23 7:51 ` Chen, Tiejun
2015-07-23 11:09 ` Ian Jackson
2015-07-22 15:44 ` [PATCH 12/16] tools: introduce a new parameter to set a predefined rdm boundary Ian Jackson
2015-07-22 15:44 ` [PATCH 13/16] libxl: construct e820 map with RDM information for HVM guest Ian Jackson
2015-07-22 15:44 ` [PATCH 14/16] xen/vtd: enable USB device assignment Ian Jackson
2015-07-22 15:44 ` [PATCH 15/16] xen/vtd: prevent from assign the device with shared rmrr Ian Jackson
2015-09-03 19:39 ` Tamas K Lengyel
2015-09-04 8:17 ` Jan Beulich
2015-09-04 21:52 ` Tamas K Lengyel
2015-09-06 2:16 ` Chen, Tiejun
2015-09-06 3:19 ` Tamas K Lengyel
2015-09-06 4:19 ` Chen, Tiejun
2015-09-06 4:21 ` Tamas K Lengyel
2015-09-06 21:27 ` Wei Liu
2015-09-07 9:45 ` Jan Beulich
2015-07-22 15:44 ` [PATCH 16/16] tools: parse to enable new rdm policy parameters Ian Jackson
2015-07-22 15:51 ` [PATCH v13 00/16] Fix RMRR (avoid RDM) Ian Jackson
2015-07-23 2:15 ` Chen, Tiejun
2015-07-23 11:10 ` Ian Jackson
2015-07-23 12:53 ` Ian Jackson
2015-07-23 7:36 ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1437579859-24485-2-git-send-email-ian.jackson@eu.citrix.com \
--to=ian.jackson@eu.citrix.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=ian.campbell@citrix.com \
--cc=jbeulich@suse.com \
--cc=tiejun.chen@intel.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).