All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [0/13] RFC: Guest visible IOMMU
@ 2012-03-22  2:14 David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io David Gibson
                   ` (13 more replies)
  0 siblings, 14 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel

Here's another version of my series implementing the nevessary
framework for emulation of guest-visible IOMMUs.  I've incorporated
the feedback from the last posting.  I'm fairly confident that
preliminary patches 1-3 are ready to go in, the rest might still need
more consideration.

In particular, if anyone has thoughts on how to handle the case where
an IOMMU mapping is invalidated by the guest where a qemu-side device
currently has that piece of IOVA-space dma_memory_map()ed, I'd be
interested to hear it.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 02/13] Better support for dma_addr_t variables David Gibson
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, David Gibson

Currently dma_bdrv_io() takes a 'to_dev' boolean parameter to
determine the direction of DMA it is emulating.  We already have a
DMADirection enum designed specifically to encode DMA directions.
This patch uses it for dma_bdrv_io() as well.  This involves removing
the DMADirection definition from the #ifdef it was inside, but since that
only existed to protect the definition of dma_addr_t from places where
config.h is not included, there wasn't any reason for it to be there in
the first place.

Cc: Kevin Wolf <kwolf@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma-helpers.c  |   20 ++++++++++++--------
 dma.h          |   12 ++++++------
 hw/ide/core.c  |    3 ++-
 hw/ide/macio.c |    3 ++-
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index c29ea6d..5f19a85 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -42,7 +42,7 @@ typedef struct {
     BlockDriverAIOCB *acb;
     QEMUSGList *sg;
     uint64_t sector_num;
-    bool to_dev;
+    DMADirection dir;
     bool in_cancel;
     int sg_cur_index;
     dma_addr_t sg_cur_byte;
@@ -76,7 +76,8 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
 
     for (i = 0; i < dbs->iov.niov; ++i) {
         cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
-                                  dbs->iov.iov[i].iov_len, !dbs->to_dev,
+                                  dbs->iov.iov[i].iov_len,
+                                  dbs->dir != DMA_DIRECTION_TO_DEVICE,
                                   dbs->iov.iov[i].iov_len);
     }
     qemu_iovec_reset(&dbs->iov);
@@ -123,7 +124,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
     while (dbs->sg_cur_index < dbs->sg->nsg) {
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev);
+        mem = cpu_physical_memory_map(cur_addr, &cur_len,
+                                      dbs->dir != DMA_DIRECTION_TO_DEVICE);
         if (!mem)
             break;
         qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -170,11 +172,11 @@ static AIOPool dma_aio_pool = {
 BlockDriverAIOCB *dma_bdrv_io(
     BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
     DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
-    void *opaque, bool to_dev)
+    void *opaque, DMADirection dir)
 {
     DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
 
-    trace_dma_bdrv_io(dbs, bs, sector_num, to_dev);
+    trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
 
     dbs->acb = NULL;
     dbs->bs = bs;
@@ -182,7 +184,7 @@ BlockDriverAIOCB *dma_bdrv_io(
     dbs->sector_num = sector_num;
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
-    dbs->to_dev = to_dev;
+    dbs->dir = dir;
     dbs->io_func = io_func;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
@@ -195,14 +197,16 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
+                       DMA_DIRECTION_FROM_DEVICE);
 }
 
 BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
                                  QEMUSGList *sg, uint64_t sector,
                                  void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true);
+    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
+                       DMA_DIRECTION_TO_DEVICE);
 }
 
 
diff --git a/dma.h b/dma.h
index 20e86d2..05ac325 100644
--- a/dma.h
+++ b/dma.h
@@ -17,6 +17,11 @@
 
 typedef struct ScatterGatherEntry ScatterGatherEntry;
 
+typedef enum {
+    DMA_DIRECTION_TO_DEVICE = 0,
+    DMA_DIRECTION_FROM_DEVICE = 1,
+} DMADirection;
+
 struct QEMUSGList {
     ScatterGatherEntry *sg;
     int nsg;
@@ -29,11 +34,6 @@ typedef target_phys_addr_t dma_addr_t;
 
 #define DMA_ADDR_FMT TARGET_FMT_plx
 
-typedef enum {
-    DMA_DIRECTION_TO_DEVICE = 0,
-    DMA_DIRECTION_FROM_DEVICE = 1,
-} DMADirection;
-
 struct ScatterGatherEntry {
     dma_addr_t base;
     dma_addr_t len;
@@ -51,7 +51,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
 BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
                               QEMUSGList *sg, uint64_t sector_num,
                               DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
-                              void *opaque, bool to_dev);
+                              void *opaque, DMADirection dir);
 BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
                                 QEMUSGList *sg, uint64_t sector,
                                 BlockDriverCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 4d568ac..43da841 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -604,7 +604,8 @@ void ide_dma_cb(void *opaque, int ret)
         break;
     case IDE_DMA_TRIM:
         s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
-                                         ide_issue_trim, ide_dma_cb, s, true);
+                                         ide_issue_trim, ide_dma_cb, s,
+                                         DMA_DIRECTION_TO_DEVICE);
         break;
     }
     return;
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index a4df244..7b38d9e 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -149,7 +149,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
         break;
     case IDE_DMA_TRIM:
         m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
-                               ide_issue_trim, pmac_ide_transfer_cb, s, true);
+                               ide_issue_trim, pmac_ide_transfer_cb, s,
+                               DMA_DIRECTION_TO_DEVICE);
         break;
     }
     return;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 02/13] Better support for dma_addr_t variables
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions David Gibson
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: David Gibson

A while back, we introduced the dma_addr_t type, which is supposed to
be used for bus visible memory addresses.  At present, this is an
alias for target_phys_addr_t, but this will change when we eventually
add support for guest visible IOMMUs.

There are some instances of target_phys_addr_t in the code now which
should really be dma_addr_t, but can't be trivially converted due to
missing features which this patch corrects.

 * We add DMA_ADDR_BITS analagous to TARGET_PHYS_ADDR_BITS.  This is
   important where we need to make a compile-time (#if) based on the
   size of dma_addr_t.

 * We add a new helper macro to create device properties which take a
   dma_addr_t, currently an alias to DEFINE_PROP_TADDR().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma.h         |    1 +
 hw/qdev-dma.h |    4 ++++
 2 files changed, 5 insertions(+), 0 deletions(-)
 create mode 100644 hw/qdev-dma.h

diff --git a/dma.h b/dma.h
index 05ac325..463095c 100644
--- a/dma.h
+++ b/dma.h
@@ -32,6 +32,7 @@ struct QEMUSGList {
 #if defined(TARGET_PHYS_ADDR_BITS)
 typedef target_phys_addr_t dma_addr_t;
 
+#define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
 #define DMA_ADDR_FMT TARGET_FMT_plx
 
 struct ScatterGatherEntry {
diff --git a/hw/qdev-dma.h b/hw/qdev-dma.h
new file mode 100644
index 0000000..e407771
--- /dev/null
+++ b/hw/qdev-dma.h
@@ -0,0 +1,4 @@
+#include "qdev-addr.h"
+
+#define DEFINE_PROP_DMAADDR(_n, _s, _f, _d)                               \
+    DEFINE_PROP_TADDR(_n, _s, _f, _d)
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 02/13] Better support for dma_addr_t variables David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22 10:00   ` Gerd Hoffmann
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 04/13] Implement cpu_physical_memory_zero() David Gibson
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Gerd Hoffman, David Gibson

Shortly before 1.0, we added helper functions / wrappers for doing PCI DMA
from individual devices.  This makes what's going on clearer and means that
when we add IOMMU support somewhere in the future, only the general PCI
code will have to change, not every device that uses PCI DMA.

However, usb-xhci is not using these wrappers, despite being a PCI only
device.  This patch remedies the situation, using the pci dma functions
instead of direct calls to cpu_physical_memory_{read,write}().  Likewise
address parameters for DMA are changed to dma_addr_t instead of
target_phys_addr_t.

Cc: Gerd Hoffman <kraxel@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/usb/hcd-xhci.c |  177 ++++++++++++++++++++++++++---------------------------
 1 files changed, 86 insertions(+), 91 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 73b0c7f..f898d63 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -22,7 +22,7 @@
 #include "qemu-timer.h"
 #include "hw/usb.h"
 #include "hw/pci.h"
-#include "hw/qdev-addr.h"
+//#include "hw/qdev-addr.h"
 #include "hw/msi.h"
 
 //#define DEBUG_XHCI
@@ -140,7 +140,7 @@ typedef struct XHCITRB {
     uint64_t parameter;
     uint32_t status;
     uint32_t control;
-    target_phys_addr_t addr;
+    dma_addr_t addr;
     bool ccs;
 } XHCITRB;
 
@@ -291,8 +291,8 @@ typedef enum EPType {
 } EPType;
 
 typedef struct XHCIRing {
-    target_phys_addr_t base;
-    target_phys_addr_t dequeue;
+    dma_addr_t base;
+    dma_addr_t dequeue;
     bool ccs;
 } XHCIRing;
 
@@ -345,7 +345,7 @@ typedef struct XHCIEPContext {
     unsigned int next_bg;
     XHCITransfer bg_transfers[BG_XFERS];
     EPType type;
-    target_phys_addr_t pctx;
+    dma_addr_t pctx;
     unsigned int max_psize;
     bool has_bg;
     uint32_t state;
@@ -353,7 +353,7 @@ typedef struct XHCIEPContext {
 
 typedef struct XHCISlot {
     bool enabled;
-    target_phys_addr_t ctx;
+    dma_addr_t ctx;
     unsigned int port;
     unsigned int devaddr;
     XHCIEPContext * eps[31];
@@ -402,7 +402,7 @@ struct XHCIState {
     uint32_t erdp_low;
     uint32_t erdp_high;
 
-    target_phys_addr_t er_start;
+    dma_addr_t er_start;
     uint32_t er_size;
     bool er_pcs;
     unsigned int er_ep_idx;
@@ -479,18 +479,18 @@ static const char *trb_name(XHCITRB *trb)
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
                          unsigned int epid);
 
-static inline target_phys_addr_t xhci_addr64(uint32_t low, uint32_t high)
+static inline dma_addr_t xhci_addr64(uint32_t low, uint32_t high)
 {
-#if TARGET_PHYS_ADDR_BITS > 32
-    return low | ((target_phys_addr_t)high << 32);
+#if DMA_ADDR_BITS > 32
+    return low | ((dma_addr_t)high << 32);
 #else
     return low;
 #endif
 }
 
-static inline target_phys_addr_t xhci_mask64(uint64_t addr)
+static inline dma_addr_t xhci_mask64(uint64_t addr)
 {
-#if TARGET_PHYS_ADDR_BITS > 32
+#if DMA_ADDR_BITS > 32
     return addr;
 #else
     return addr & 0xffffffff;
@@ -532,7 +532,7 @@ static void xhci_die(XHCIState *xhci)
 static void xhci_write_event(XHCIState *xhci, XHCIEvent *event)
 {
     XHCITRB ev_trb;
-    target_phys_addr_t addr;
+    dma_addr_t addr;
 
     ev_trb.parameter = cpu_to_le64(event->ptr);
     ev_trb.status = cpu_to_le32(event->length | (event->ccode << 24));
@@ -548,7 +548,7 @@ static void xhci_write_event(XHCIState *xhci, XHCIEvent *event)
             trb_name(&ev_trb));
 
     addr = xhci->er_start + TRB_SIZE*xhci->er_ep_idx;
-    cpu_physical_memory_write(addr, (uint8_t *) &ev_trb, TRB_SIZE);
+    pci_dma_write(&xhci->pci_dev, addr, &ev_trb, TRB_SIZE);
 
     xhci->er_ep_idx++;
     if (xhci->er_ep_idx >= xhci->er_size) {
@@ -559,7 +559,7 @@ static void xhci_write_event(XHCIState *xhci, XHCIEvent *event)
 
 static void xhci_events_update(XHCIState *xhci)
 {
-    target_phys_addr_t erdp;
+    dma_addr_t erdp;
     unsigned int dp_idx;
     bool do_irq = 0;
 
@@ -570,8 +570,8 @@ static void xhci_events_update(XHCIState *xhci)
     erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
     if (erdp < xhci->er_start ||
         erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
-        fprintf(stderr, "xhci: ERDP out of bounds: "TARGET_FMT_plx"\n", erdp);
-        fprintf(stderr, "xhci: ER at "TARGET_FMT_plx" len %d\n",
+        fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
+        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
                 xhci->er_start, xhci->er_size);
         xhci_die(xhci);
         return;
@@ -630,7 +630,7 @@ static void xhci_events_update(XHCIState *xhci)
 
 static void xhci_event(XHCIState *xhci, XHCIEvent *event)
 {
-    target_phys_addr_t erdp;
+    dma_addr_t erdp;
     unsigned int dp_idx;
 
     if (xhci->er_full) {
@@ -649,8 +649,8 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event)
     erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
     if (erdp < xhci->er_start ||
         erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
-        fprintf(stderr, "xhci: ERDP out of bounds: "TARGET_FMT_plx"\n", erdp);
-        fprintf(stderr, "xhci: ER at "TARGET_FMT_plx" len %d\n",
+        fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
+        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
                 xhci->er_start, xhci->er_size);
         xhci_die(xhci);
         return;
@@ -686,7 +686,7 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event)
 }
 
 static void xhci_ring_init(XHCIState *xhci, XHCIRing *ring,
-                           target_phys_addr_t base)
+                           dma_addr_t base)
 {
     ring->base = base;
     ring->dequeue = base;
@@ -694,18 +694,18 @@ static void xhci_ring_init(XHCIState *xhci, XHCIRing *ring,
 }
 
 static TRBType xhci_ring_fetch(XHCIState *xhci, XHCIRing *ring, XHCITRB *trb,
-                               target_phys_addr_t *addr)
+                               dma_addr_t *addr)
 {
     while (1) {
         TRBType type;
-        cpu_physical_memory_read(ring->dequeue, (uint8_t *) trb, TRB_SIZE);
+        pci_dma_read(&xhci->pci_dev, ring->dequeue, trb, TRB_SIZE);
         trb->addr = ring->dequeue;
         trb->ccs = ring->ccs;
         le64_to_cpus(&trb->parameter);
         le32_to_cpus(&trb->status);
         le32_to_cpus(&trb->control);
 
-        DPRINTF("xhci: TRB fetched [" TARGET_FMT_plx "]: "
+        DPRINTF("xhci: TRB fetched [" DMA_ADDR_FMT "]: "
                 "%016" PRIx64 " %08x %08x %s\n",
                 ring->dequeue, trb->parameter, trb->status, trb->control,
                 trb_name(trb));
@@ -735,19 +735,19 @@ static int xhci_ring_chain_length(XHCIState *xhci, const XHCIRing *ring)
 {
     XHCITRB trb;
     int length = 0;
-    target_phys_addr_t dequeue = ring->dequeue;
+    dma_addr_t dequeue = ring->dequeue;
     bool ccs = ring->ccs;
     /* hack to bundle together the two/three TDs that make a setup transfer */
     bool control_td_set = 0;
 
     while (1) {
         TRBType type;
-        cpu_physical_memory_read(dequeue, (uint8_t *) &trb, TRB_SIZE);
+        pci_dma_read(&xhci->pci_dev, dequeue, &trb, TRB_SIZE);
         le64_to_cpus(&trb.parameter);
         le32_to_cpus(&trb.status);
         le32_to_cpus(&trb.control);
 
-        DPRINTF("xhci: TRB peeked [" TARGET_FMT_plx "]: "
+        DPRINTF("xhci: TRB peeked [" DMA_ADDR_FMT "]: "
                 "%016" PRIx64 " %08x %08x\n",
                 dequeue, trb.parameter, trb.status, trb.control);
 
@@ -790,8 +790,8 @@ static void xhci_er_reset(XHCIState *xhci)
         xhci_die(xhci);
         return;
     }
-    target_phys_addr_t erstba = xhci_addr64(xhci->erstba_low, xhci->erstba_high);
-    cpu_physical_memory_read(erstba, (uint8_t *) &seg, sizeof(seg));
+    dma_addr_t erstba = xhci_addr64(xhci->erstba_low, xhci->erstba_high);
+    pci_dma_read(&xhci->pci_dev, erstba, &seg, sizeof(seg));
     le32_to_cpus(&seg.addr_low);
     le32_to_cpus(&seg.addr_high);
     le32_to_cpus(&seg.size);
@@ -807,7 +807,7 @@ static void xhci_er_reset(XHCIState *xhci)
     xhci->er_pcs = 1;
     xhci->er_full = 0;
 
-    DPRINTF("xhci: event ring:" TARGET_FMT_plx " [%d]\n",
+    DPRINTF("xhci: event ring:" DMA_ADDR_FMT " [%d]\n",
             xhci->er_start, xhci->er_size);
 }
 
@@ -833,24 +833,24 @@ static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx,
         return;
     }
 
-    cpu_physical_memory_read(epctx->pctx, (uint8_t *) ctx, sizeof(ctx));
+    pci_dma_read(&xhci->pci_dev, epctx->pctx, ctx, sizeof(ctx));
     ctx[0] &= ~EP_STATE_MASK;
     ctx[0] |= state;
     ctx[2] = epctx->ring.dequeue | epctx->ring.ccs;
     ctx[3] = (epctx->ring.dequeue >> 16) >> 16;
-    DPRINTF("xhci: set epctx: " TARGET_FMT_plx " state=%d dequeue=%08x%08x\n",
+    DPRINTF("xhci: set epctx: " DMA_ADDR_FMT " state=%d dequeue=%08x%08x\n",
             epctx->pctx, state, ctx[3], ctx[2]);
-    cpu_physical_memory_write(epctx->pctx, (uint8_t *) ctx, sizeof(ctx));
+    pci_dma_write(&xhci->pci_dev, epctx->pctx, ctx, sizeof(ctx));
     epctx->state = state;
 }
 
 static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
-                               unsigned int epid, target_phys_addr_t pctx,
+                               unsigned int epid, dma_addr_t pctx,
                                uint32_t *ctx)
 {
     XHCISlot *slot;
     XHCIEPContext *epctx;
-    target_phys_addr_t dequeue;
+    dma_addr_t dequeue;
     int i;
 
     assert(slotid >= 1 && slotid <= MAXSLOTS);
@@ -1087,7 +1087,7 @@ static TRBCCode xhci_set_ep_dequeue(XHCIState *xhci, unsigned int slotid,
 {
     XHCISlot *slot;
     XHCIEPContext *epctx;
-    target_phys_addr_t dequeue;
+    dma_addr_t dequeue;
 
     assert(slotid >= 1 && slotid <= MAXSLOTS);
 
@@ -1142,7 +1142,7 @@ static int xhci_xfer_data(XHCITransfer *xfer, uint8_t *data,
 
     for (i = 0; i < xfer->trb_count; i++) {
         XHCITRB *trb = &xfer->trbs[i];
-        target_phys_addr_t addr;
+        dma_addr_t addr;
         unsigned int chunk = 0;
 
         switch (TRB_TYPE(*trb)) {
@@ -1173,11 +1173,11 @@ static int xhci_xfer_data(XHCITransfer *xfer, uint8_t *data,
                     memcpy(data, &idata, chunk);
                 } else {
                     DPRINTF("xhci_xfer_data: r/w(%d) %d bytes at "
-                            TARGET_FMT_plx "\n", in_xfer, chunk, addr);
+                            DMA_ADDR_FMT "\n", in_xfer, chunk, addr);
                     if (in_xfer) {
-                        cpu_physical_memory_write(addr, data, chunk);
+                        pci_dma_write(&xhci->pci_dev, addr, data, chunk);
                     } else {
-                        cpu_physical_memory_read(addr, data, chunk);
+                        pci_dma_read(&xhci->pci_dev, addr, data, chunk);
                     }
 #ifdef DEBUG_DATA
                     unsigned int count = chunk;
@@ -1240,7 +1240,7 @@ static void xhci_stall_ep(XHCITransfer *xfer)
     epctx->ring.ccs = xfer->trbs[0].ccs;
     xhci_set_ep_state(xhci, epctx, EP_HALTED);
     DPRINTF("xhci: stalled slot %d ep %d\n", xfer->slotid, xfer->epid);
-    DPRINTF("xhci: will continue at "TARGET_FMT_plx"\n", epctx->ring.dequeue);
+    DPRINTF("xhci: will continue at "DMA_ADDR_FMT"\n", epctx->ring.dequeue);
 }
 
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
@@ -1802,7 +1802,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
 {
     XHCISlot *slot;
     USBDevice *dev;
-    target_phys_addr_t ictx, octx, dcbaap;
+    dma_addr_t ictx, octx, dcbaap;
     uint64_t poctx;
     uint32_t ictl_ctx[2];
     uint32_t slot_ctx[4];
@@ -1815,15 +1815,14 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
     DPRINTF("xhci_address_slot(%d)\n", slotid);
 
     dcbaap = xhci_addr64(xhci->dcbaap_low, xhci->dcbaap_high);
-    cpu_physical_memory_read(dcbaap + 8*slotid,
-                             (uint8_t *) &poctx, sizeof(poctx));
+    pci_dma_read(&xhci->pci_dev, dcbaap + 8*slotid, &poctx, sizeof(poctx));
     ictx = xhci_mask64(pictx);
     octx = xhci_mask64(le64_to_cpu(poctx));
 
-    DPRINTF("xhci: input context at "TARGET_FMT_plx"\n", ictx);
-    DPRINTF("xhci: output context at "TARGET_FMT_plx"\n", octx);
+    DPRINTF("xhci: input context at "DMA_ADDR_FMT"\n", ictx);
+    DPRINTF("xhci: output context at "DMA_ADDR_FMT"\n", octx);
 
-    cpu_physical_memory_read(ictx, (uint8_t *) ictl_ctx, sizeof(ictl_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx, ictl_ctx, sizeof(ictl_ctx));
 
     if (ictl_ctx[0] != 0x0 || ictl_ctx[1] != 0x3) {
         fprintf(stderr, "xhci: invalid input context control %08x %08x\n",
@@ -1831,8 +1830,8 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
         return CC_TRB_ERROR;
     }
 
-    cpu_physical_memory_read(ictx+32, (uint8_t *) slot_ctx, sizeof(slot_ctx));
-    cpu_physical_memory_read(ictx+64, (uint8_t *) ep0_ctx, sizeof(ep0_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx+32, slot_ctx, sizeof(slot_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx+64, ep0_ctx, sizeof(ep0_ctx));
 
     DPRINTF("xhci: input slot context: %08x %08x %08x %08x\n",
             slot_ctx[0], slot_ctx[1], slot_ctx[2], slot_ctx[3]);
@@ -1881,8 +1880,8 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
     DPRINTF("xhci: output ep0 context: %08x %08x %08x %08x %08x\n",
             ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]);
 
-    cpu_physical_memory_write(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
-    cpu_physical_memory_write(octx+32, (uint8_t *) ep0_ctx, sizeof(ep0_ctx));
+    pci_dma_write(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
+    pci_dma_write(&xhci->pci_dev, octx+32, ep0_ctx, sizeof(ep0_ctx));
 
     return res;
 }
@@ -1891,7 +1890,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
 static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
                                   uint64_t pictx, bool dc)
 {
-    target_phys_addr_t ictx, octx;
+    dma_addr_t ictx, octx;
     uint32_t ictl_ctx[2];
     uint32_t slot_ctx[4];
     uint32_t islot_ctx[4];
@@ -1905,8 +1904,8 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
     ictx = xhci_mask64(pictx);
     octx = xhci->slots[slotid-1].ctx;
 
-    DPRINTF("xhci: input context at "TARGET_FMT_plx"\n", ictx);
-    DPRINTF("xhci: output context at "TARGET_FMT_plx"\n", octx);
+    DPRINTF("xhci: input context at "DMA_ADDR_FMT"\n", ictx);
+    DPRINTF("xhci: output context at "DMA_ADDR_FMT"\n", octx);
 
     if (dc) {
         for (i = 2; i <= 31; i++) {
@@ -1915,17 +1914,17 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
             }
         }
 
-        cpu_physical_memory_read(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+        pci_dma_read(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
         slot_ctx[3] &= ~(SLOT_STATE_MASK << SLOT_STATE_SHIFT);
         slot_ctx[3] |= SLOT_ADDRESSED << SLOT_STATE_SHIFT;
         DPRINTF("xhci: output slot context: %08x %08x %08x %08x\n",
                 slot_ctx[0], slot_ctx[1], slot_ctx[2], slot_ctx[3]);
-        cpu_physical_memory_write(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+        pci_dma_write(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
 
         return CC_SUCCESS;
     }
 
-    cpu_physical_memory_read(ictx, (uint8_t *) ictl_ctx, sizeof(ictl_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx, ictl_ctx, sizeof(ictl_ctx));
 
     if ((ictl_ctx[0] & 0x3) != 0x0 || (ictl_ctx[1] & 0x3) != 0x1) {
         fprintf(stderr, "xhci: invalid input context control %08x %08x\n",
@@ -1933,8 +1932,8 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
         return CC_TRB_ERROR;
     }
 
-    cpu_physical_memory_read(ictx+32, (uint8_t *) islot_ctx, sizeof(islot_ctx));
-    cpu_physical_memory_read(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx+32, islot_ctx, sizeof(islot_ctx));
+    pci_dma_read(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
 
     if (SLOT_STATE(slot_ctx[3]) < SLOT_ADDRESSED) {
         fprintf(stderr, "xhci: invalid slot state %08x\n", slot_ctx[3]);
@@ -1946,8 +1945,8 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
             xhci_disable_ep(xhci, slotid, i);
         }
         if (ictl_ctx[1] & (1<<i)) {
-            cpu_physical_memory_read(ictx+32+(32*i),
-                                     (uint8_t *) ep_ctx, sizeof(ep_ctx));
+            pci_dma_read(&xhci->pci_dev, ictx+32+(32*i), ep_ctx,
+                         sizeof(ep_ctx));
             DPRINTF("xhci: input ep%d.%d context: %08x %08x %08x %08x %08x\n",
                     i/2, i%2, ep_ctx[0], ep_ctx[1], ep_ctx[2],
                     ep_ctx[3], ep_ctx[4]);
@@ -1959,8 +1958,7 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
             DPRINTF("xhci: output ep%d.%d context: %08x %08x %08x %08x %08x\n",
                     i/2, i%2, ep_ctx[0], ep_ctx[1], ep_ctx[2],
                     ep_ctx[3], ep_ctx[4]);
-            cpu_physical_memory_write(octx+(32*i),
-                                      (uint8_t *) ep_ctx, sizeof(ep_ctx));
+            pci_dma_write(&xhci->pci_dev, octx+(32*i), ep_ctx, sizeof(ep_ctx));
         }
     }
 
@@ -1972,7 +1970,7 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
     DPRINTF("xhci: output slot context: %08x %08x %08x %08x\n",
             slot_ctx[0], slot_ctx[1], slot_ctx[2], slot_ctx[3]);
 
-    cpu_physical_memory_write(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+    pci_dma_write(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
 
     return CC_SUCCESS;
 }
@@ -1981,7 +1979,7 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
 static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
                                    uint64_t pictx)
 {
-    target_phys_addr_t ictx, octx;
+    dma_addr_t ictx, octx;
     uint32_t ictl_ctx[2];
     uint32_t iep0_ctx[5];
     uint32_t ep0_ctx[5];
@@ -1994,10 +1992,10 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
     ictx = xhci_mask64(pictx);
     octx = xhci->slots[slotid-1].ctx;
 
-    DPRINTF("xhci: input context at "TARGET_FMT_plx"\n", ictx);
-    DPRINTF("xhci: output context at "TARGET_FMT_plx"\n", octx);
+    DPRINTF("xhci: input context at "DMA_ADDR_FMT"\n", ictx);
+    DPRINTF("xhci: output context at "DMA_ADDR_FMT"\n", octx);
 
-    cpu_physical_memory_read(ictx, (uint8_t *) ictl_ctx, sizeof(ictl_ctx));
+    pci_dma_read(&xhci->pci_dev, ictx, ictl_ctx, sizeof(ictl_ctx));
 
     if (ictl_ctx[0] != 0x0 || ictl_ctx[1] & ~0x3) {
         fprintf(stderr, "xhci: invalid input context control %08x %08x\n",
@@ -2006,13 +2004,12 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
     }
 
     if (ictl_ctx[1] & 0x1) {
-        cpu_physical_memory_read(ictx+32,
-                                 (uint8_t *) islot_ctx, sizeof(islot_ctx));
+        pci_dma_read(&xhci->pci_dev, ictx+32, islot_ctx, sizeof(islot_ctx));
 
         DPRINTF("xhci: input slot context: %08x %08x %08x %08x\n",
                 islot_ctx[0], islot_ctx[1], islot_ctx[2], islot_ctx[3]);
 
-        cpu_physical_memory_read(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+        pci_dma_read(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
 
         slot_ctx[1] &= ~0xFFFF; /* max exit latency */
         slot_ctx[1] |= islot_ctx[1] & 0xFFFF;
@@ -2022,18 +2019,17 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
         DPRINTF("xhci: output slot context: %08x %08x %08x %08x\n",
                 slot_ctx[0], slot_ctx[1], slot_ctx[2], slot_ctx[3]);
 
-        cpu_physical_memory_write(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+        pci_dma_write(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
     }
 
     if (ictl_ctx[1] & 0x2) {
-        cpu_physical_memory_read(ictx+64,
-                                 (uint8_t *) iep0_ctx, sizeof(iep0_ctx));
+        pci_dma_read(&xhci->pci_dev, ictx+64, iep0_ctx, sizeof(iep0_ctx));
 
         DPRINTF("xhci: input ep0 context: %08x %08x %08x %08x %08x\n",
                 iep0_ctx[0], iep0_ctx[1], iep0_ctx[2],
                 iep0_ctx[3], iep0_ctx[4]);
 
-        cpu_physical_memory_read(octx+32, (uint8_t *) ep0_ctx, sizeof(ep0_ctx));
+        pci_dma_read(&xhci->pci_dev, octx+32, ep0_ctx, sizeof(ep0_ctx));
 
         ep0_ctx[1] &= ~0xFFFF0000; /* max packet size*/
         ep0_ctx[1] |= iep0_ctx[1] & 0xFFFF0000;
@@ -2041,8 +2037,7 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
         DPRINTF("xhci: output ep0 context: %08x %08x %08x %08x %08x\n",
                 ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]);
 
-        cpu_physical_memory_write(octx+32,
-                                  (uint8_t *) ep0_ctx, sizeof(ep0_ctx));
+        pci_dma_write(&xhci->pci_dev, octx+32, ep0_ctx, sizeof(ep0_ctx));
     }
 
     return CC_SUCCESS;
@@ -2051,7 +2046,7 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
 static TRBCCode xhci_reset_slot(XHCIState *xhci, unsigned int slotid)
 {
     uint32_t slot_ctx[4];
-    target_phys_addr_t octx;
+    dma_addr_t octx;
     int i;
 
     assert(slotid >= 1 && slotid <= MAXSLOTS);
@@ -2059,7 +2054,7 @@ static TRBCCode xhci_reset_slot(XHCIState *xhci, unsigned int slotid)
 
     octx = xhci->slots[slotid-1].ctx;
 
-    DPRINTF("xhci: output context at "TARGET_FMT_plx"\n", octx);
+    DPRINTF("xhci: output context at "DMA_ADDR_FMT"\n", octx);
 
     for (i = 2; i <= 31; i++) {
         if (xhci->slots[slotid-1].eps[i-1]) {
@@ -2067,12 +2062,12 @@ static TRBCCode xhci_reset_slot(XHCIState *xhci, unsigned int slotid)
         }
     }
 
-    cpu_physical_memory_read(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+    pci_dma_read(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
     slot_ctx[3] &= ~(SLOT_STATE_MASK << SLOT_STATE_SHIFT);
     slot_ctx[3] |= SLOT_DEFAULT << SLOT_STATE_SHIFT;
     DPRINTF("xhci: output slot context: %08x %08x %08x %08x\n",
             slot_ctx[0], slot_ctx[1], slot_ctx[2], slot_ctx[3]);
-    cpu_physical_memory_write(octx, (uint8_t *) slot_ctx, sizeof(slot_ctx));
+    pci_dma_write(&xhci->pci_dev, octx, slot_ctx, sizeof(slot_ctx));
 
     return CC_SUCCESS;
 }
@@ -2095,19 +2090,19 @@ static unsigned int xhci_get_slot(XHCIState *xhci, XHCIEvent *event, XHCITRB *tr
 
 static TRBCCode xhci_get_port_bandwidth(XHCIState *xhci, uint64_t pctx)
 {
-    target_phys_addr_t ctx;
+    dma_addr_t ctx;
     uint8_t bw_ctx[MAXPORTS+1];
 
     DPRINTF("xhci_get_port_bandwidth()\n");
 
     ctx = xhci_mask64(pctx);
 
-    DPRINTF("xhci: bandwidth context at "TARGET_FMT_plx"\n", ctx);
+    DPRINTF("xhci: bandwidth context at "DMA_ADDR_FMT"\n", ctx);
 
     /* TODO: actually implement real values here */
     bw_ctx[0] = 0;
     memset(&bw_ctx[1], 80, MAXPORTS); /* 80% */
-    cpu_physical_memory_write(ctx, bw_ctx, sizeof(bw_ctx));
+    pci_dma_write(&xhci->pci_dev, ctx, bw_ctx, sizeof(bw_ctx));
 
     return CC_SUCCESS;
 }
@@ -2128,13 +2123,13 @@ static uint32_t xhci_nec_challenge(uint32_t hi, uint32_t lo)
     return ~val;
 }
 
-static void xhci_via_challenge(uint64_t addr)
+static void xhci_via_challenge(XHCIState *xhci, uint64_t addr)
 {
     uint32_t buf[8];
     uint32_t obuf[8];
-    target_phys_addr_t paddr = xhci_mask64(addr);
+    dma_addr_t paddr = xhci_mask64(addr);
 
-    cpu_physical_memory_read(paddr, (uint8_t *) &buf, 32);
+    pci_dma_read(&xhci->pci_dev, paddr, &buf, 32);
 
     memcpy(obuf, buf, sizeof(obuf));
 
@@ -2150,7 +2145,7 @@ static void xhci_via_challenge(uint64_t addr)
         obuf[7] = obuf[2] ^ obuf[3] ^ 0x65866593;
     }
 
-    cpu_physical_memory_write(paddr, (uint8_t *) &obuf, 32);
+    pci_dma_write(&xhci->pci_dev, paddr, &obuf, 32);
 }
 
 static void xhci_process_commands(XHCIState *xhci)
@@ -2158,7 +2153,7 @@ static void xhci_process_commands(XHCIState *xhci)
     XHCITRB trb;
     TRBType type;
     XHCIEvent event = {ER_COMMAND_COMPLETE, CC_SUCCESS};
-    target_phys_addr_t addr;
+    dma_addr_t addr;
     unsigned int i, slotid = 0;
 
     DPRINTF("xhci_process_commands()\n");
@@ -2247,7 +2242,7 @@ static void xhci_process_commands(XHCIState *xhci)
             event.ccode = xhci_get_port_bandwidth(xhci, trb.parameter);
             break;
         case CR_VENDOR_VIA_CHALLENGE_RESPONSE:
-            xhci_via_challenge(trb.parameter);
+            xhci_via_challenge(xhci, trb.parameter);
             break;
         case CR_VENDOR_NEC_FIRMWARE_REVISION:
             event.type = 48; /* NEC reply */
@@ -2537,7 +2532,7 @@ static void xhci_oper_write(XHCIState *xhci, uint32_t reg, uint32_t val)
             xhci_event(xhci, &event);
             DPRINTF("xhci: command ring stopped (CRCR=%08x)\n", xhci->crcr_low);
         } else {
-            target_phys_addr_t base = xhci_addr64(xhci->crcr_low & ~0x3f, val);
+            dma_addr_t base = xhci_addr64(xhci->crcr_low & ~0x3f, val);
             xhci_ring_init(xhci, &xhci->cmd_ring, base);
         }
         xhci->crcr_low &= ~(CRCR_CA | CRCR_CS);
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 04/13] Implement cpu_physical_memory_zero()
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (2 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 05/13] iommu: Add universal DMA helper functions David Gibson
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: David Gibson

This patch adds cpu_physical_memory_zero() function.  This is equivalent to
calling cpu_physical_memory_write() with a buffer full of zeroes, but
avoids actually allocating such a buffer along the way.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 cpu-common.h |    1 +
 exec.c       |   53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/cpu-common.h b/cpu-common.h
index dca5175..146429c 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -53,6 +53,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                             int len, int is_write);
+void cpu_physical_memory_zero(target_phys_addr_t addr, int len);
 static inline void cpu_physical_memory_read(target_phys_addr_t addr,
                                             void *buf, int len)
 {
diff --git a/exec.c b/exec.c
index 265e895..43f83be 100644
--- a/exec.c
+++ b/exec.c
@@ -3868,6 +3868,59 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
     }
 }
 
+void cpu_physical_memory_zero(target_phys_addr_t addr, int len)
+{
+    int l;
+    uint8_t *ptr;
+    target_phys_addr_t page;
+    MemoryRegionSection *section;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        section = phys_page_find(page >> TARGET_PAGE_BITS);
+
+        if (!memory_region_is_ram(section->mr)) {
+            target_phys_addr_t addr1;
+            addr1 = section_addr(section, addr);
+            /* XXX: could force cpu_single_env to NULL to avoid
+               potential bugs */
+            if (l >= 4 && ((addr1 & 3) == 0)) {
+                /* 32 bit write access */
+                io_mem_write(section->mr, addr1, 0, 4);
+                l = 4;
+            } else if (l >= 2 && ((addr1 & 1) == 0)) {
+                /* 16 bit write access */
+                io_mem_write(section->mr, addr1, 0, 2);
+                l = 2;
+            } else {
+                /* 8 bit write access */
+                io_mem_write(section->mr, addr1, 0, 1);
+                l = 1;
+            }
+        } else if (!section->readonly) {
+            ram_addr_t addr1;
+            addr1 = memory_region_get_ram_addr(section->mr)
+                + section_addr(section, addr);
+            /* RAM case */
+            ptr = qemu_get_ram_ptr(addr1);
+            memset(ptr, 0, l);
+            if (!cpu_physical_memory_is_dirty(addr1)) {
+                /* invalidate code */
+                tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
+                /* set dirty bit */
+                cpu_physical_memory_set_dirty_flags(
+                    addr1, (0xff & ~CODE_DIRTY_FLAG));
+            }
+            qemu_put_ram_ptr(ptr);
+        }
+        len -= l;
+        addr += l;
+    }
+}
+
 /* used for ROM loading : can write in RAM and ROM */
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
                                    const uint8_t *buf, int len)
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 05/13] iommu: Add universal DMA helper functions
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (3 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 04/13] Implement cpu_physical_memory_zero() David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 06/13] usb-ohci: Use " David Gibson
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Joerg Rodel, Richard Henderson, Michael S. Tsirkin,
	Eduard - Gabriel Munteanu, David Gibson

Not that long ago, every device implementation using DMA directly
accessed guest memory using cpu_physical_memory_*().  This meant that
adding support for a guest visible IOMMU would require changing every
one of these devices to go through IOMMU translation.

Shortly before qemu 1.0, I made a start on fixing this by providing
helper functions for PCI DMA.  These are currently just stubs which
call the direct access functions, but mean that an IOMMU can be
implemented in one place, rather than for every PCI device.

Clearly, this doesn't help for non PCI devices, which could also be
IOMMU translated on some platforms.  It is also problematic for the
devices which have both PCI and non-PCI version (e.g. OHCI, AHCI) - we
cannot use the the pci_dma_*() functions, because they assume the
presence of a PCIDevice, but we don't want to have to check between
pci_dma_*() and cpu_physical_memory_*() every time we do a DMA in the
device code.

This patch makes the first step on addressing both these problems, by
introducing new (stub) dma helper functions which can be used for any
DMA capable device.

These dma functions take a DMAContext *, a new (currently empty)
variable describing the DMA address space in which the operation is to
take place.  NULL indicates untranslated DMA directly into guest
physical address space.  The intention is that in future non-NULL
values will given information about any necessary IOMMU translation.

DMA using devices must obtain a DMAContext (or, potentially, contexts)
from their bus or platform.  For now this patch just converts the PCI
wrappers to be implemented in terms of the universal wrappers,
converting other drivers can take place over time.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Joerg Rodel <Joerg.Rodel@amd.com>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Richard Henderson <rth@twiddle.net>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma.h         |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/pci.h      |   22 +++++++-----
 qemu-common.h |    1 +
 3 files changed, 117 insertions(+), 9 deletions(-)

diff --git a/dma.h b/dma.h
index 463095c..8b6ef44 100644
--- a/dma.h
+++ b/dma.h
@@ -35,6 +35,109 @@ typedef target_phys_addr_t dma_addr_t;
 #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
 #define DMA_ADDR_FMT TARGET_FMT_plx
 
+typedef void DMAInvalidateMapFunc(void *);
+
+/* Checks that the given range of addresses is valid for DMA.  This is
+ * useful for certain cases, but usually you should just use
+ * dma_memory_{read,write}() and check for errors */
+static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr,
+                                    dma_addr_t len, DMADirection dir)
+{
+    /* Stub version, with no iommu we assume all bus addresses are valid */
+    return true;
+}
+
+static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+                                void *buf, dma_addr_t len, DMADirection dir)
+{
+    /* Stub version when we have no iommu support */
+    cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len,
+                           dir == DMA_DIRECTION_FROM_DEVICE);
+    return 0;
+}
+
+static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
+                                  void *buf, dma_addr_t len)
+{
+    return dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
+                                   const void *buf, dma_addr_t len)
+{
+    return dma_memory_rw(dma, addr, (void *)buf, len,
+                         DMA_DIRECTION_FROM_DEVICE);
+}
+
+static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr,
+                                  dma_addr_t len)
+{
+    /* Stub version when we have no iommu support */
+    cpu_physical_memory_zero(addr, len);
+    return 0;
+}
+
+static inline void *dma_memory_map(DMAContext *dma,
+                                   DMAInvalidateMapFunc *cb, void *opaque,
+                                   dma_addr_t addr, dma_addr_t *len,
+                                   DMADirection dir)
+{
+    target_phys_addr_t xlen = *len;
+    void *p;
+
+    p = cpu_physical_memory_map(addr, &xlen,
+                                dir == DMA_DIRECTION_FROM_DEVICE);
+    *len = xlen;
+    return p;
+}
+
+static inline void dma_memory_unmap(DMAContext *dma,
+                                    void *buffer, dma_addr_t len,
+                                    DMADirection dir, dma_addr_t access_len)
+{
+    return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
+                                     dir == DMA_DIRECTION_FROM_DEVICE,
+                                     access_len);
+}
+
+#define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
+    static inline uint##_bits##_t ld##_lname##_##_end##_dma(DMAContext *dma, \
+                                                            dma_addr_t addr) \
+    {                                                                   \
+        uint##_bits##_t val;                                            \
+        dma_memory_read(dma, addr, &val, (_bits) / 8);                  \
+        return _end##_bits##_to_cpu(val);                               \
+    }                                                                   \
+    static inline void st##_sname##_##_end##_dma(DMAContext *dma,       \
+                                                 dma_addr_t addr,       \
+                                                 uint##_bits##_t val)   \
+    {                                                                   \
+        val = cpu_to_##_end##_bits(val);                                \
+        dma_memory_write(dma, addr, &val, (_bits) / 8);                 \
+    }
+
+static inline uint8_t ldub_dma(DMAContext *dma, dma_addr_t addr)
+{
+    uint8_t val;
+
+    dma_memory_read(dma, addr, &val, 1);
+    return val;
+}
+
+static inline void stb_dma(DMAContext *dma, dma_addr_t addr, uint8_t val)
+{
+    dma_memory_write(dma, addr, &val, 1);
+}
+
+DEFINE_LDST_DMA(uw, w, 16, le);
+DEFINE_LDST_DMA(l, l, 32, le);
+DEFINE_LDST_DMA(q, q, 64, le);
+DEFINE_LDST_DMA(uw, w, 16, be);
+DEFINE_LDST_DMA(l, l, 32, be);
+DEFINE_LDST_DMA(q, q, 64, be);
+
+#undef DEFINE_LDST_DMA
+
 struct ScatterGatherEntry {
     dma_addr_t base;
     dma_addr_t len;
diff --git a/hw/pci.h b/hw/pci.h
index 4f19fdb..c021805 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -545,10 +545,16 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 }
 
 /* DMA access functions */
+static inline DMAContext *pci_dma_context(PCIDevice *dev)
+{
+    /* Stub for when we have no PCI iommu support */
+    return NULL;
+}
+
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                              void *buf, dma_addr_t len, DMADirection dir)
 {
-    cpu_physical_memory_rw(addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    dma_memory_rw(pci_dma_context(dev), addr, buf, len, dir);
     return 0;
 }
 
@@ -568,12 +574,12 @@ static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
     static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
                                                    dma_addr_t addr)     \
     {                                                                   \
-        return ld##_l##_phys(addr);                                     \
+        return ld##_l##_dma(pci_dma_context(dev), addr);                \
     }                                                                   \
     static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
-                          dma_addr_t addr, uint##_bits##_t val)         \
+                                        dma_addr_t addr, uint##_bits##_t val) \
     {                                                                   \
-        st##_s##_phys(addr, val);                                       \
+        st##_s##_dma(pci_dma_context(dev), addr, val);                  \
     }
 
 PCI_DMA_DEFINE_LDST(ub, b, 8);
@@ -587,21 +593,19 @@ PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
 #undef PCI_DMA_DEFINE_LDST
 
 static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
+                                DMAInvalidateMapFunc *cb, void *opaque,
                                 dma_addr_t *plen, DMADirection dir)
 {
-    target_phys_addr_t len = *plen;
     void *buf;
 
-    buf = cpu_physical_memory_map(addr, &len, dir == DMA_DIRECTION_FROM_DEVICE);
-    *plen = len;
+    buf = dma_memory_map(pci_dma_context(dev), cb, opaque, addr, plen, dir);
     return buf;
 }
 
 static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
                                  DMADirection dir, dma_addr_t access_len)
 {
-    cpu_physical_memory_unmap(buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
-                              access_len);
+    dma_memory_unmap(pci_dma_context(dev), buffer, len, dir, access_len);
 }
 
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
diff --git a/qemu-common.h b/qemu-common.h
index c9e96a8..3fecbe6 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -259,6 +259,7 @@ typedef struct SSIBus SSIBus;
 typedef struct EventNotifier EventNotifier;
 typedef struct VirtIODevice VirtIODevice;
 typedef struct QEMUSGList QEMUSGList;
+typedef struct DMAContext DMAContext;
 
 typedef uint64_t pcibus_t;
 
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 06/13] usb-ohci: Use universal DMA helper functions
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (4 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 05/13] iommu: Add universal DMA helper functions David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22 10:01   ` Gerd Hoffmann
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers David Gibson
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Michael S. Tsirkin, Gerd Hoffmann, David Gibson

The OHCI device emulation can provide both PCI and SysBus OHCI
implementations.  Because of this, it was not previously converted to
use the PCI DMA helper functions.

This patch converts it to use the new universal DMA helper functions.
In the PCI case, it obtains its DMAContext from pci_dma_context(), in
the SysBus case, it uses NULL - i.e. assumes for now that there will
be no IOMMU translation for a SysBus OHCI.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/usb/hcd-ohci.c |   93 +++++++++++++++++++++++++++++------------------------
 1 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1a1cc88..844e7ed 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -31,7 +31,7 @@
 #include "hw/usb.h"
 #include "hw/pci.h"
 #include "hw/sysbus.h"
-#include "hw/qdev-addr.h"
+#include "hw/qdev-dma.h"
 
 //#define DEBUG_OHCI
 /* Dump packet contents.  */
@@ -62,6 +62,7 @@ typedef struct {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    DMAContext *dma;
     int num_ports;
     const char *name;
 
@@ -104,7 +105,7 @@ typedef struct {
     uint32_t htest;
 
     /* SM501 local memory offset */
-    target_phys_addr_t localmem_base;
+    dma_addr_t localmem_base;
 
     /* Active packets.  */
     uint32_t old_ctl;
@@ -482,14 +483,14 @@ static void ohci_reset(void *opaque)
 
 /* Get an array of dwords from main memory */
 static inline int get_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
+                             dma_addr_t addr, uint32_t *buf, int num)
 {
     int i;
 
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_read(addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -498,7 +499,7 @@ static inline int get_dwords(OHCIState *ohci,
 
 /* Put an array of dwords in to main memory */
 static inline int put_dwords(OHCIState *ohci,
-                             uint32_t addr, uint32_t *buf, int num)
+                             dma_addr_t addr, uint32_t *buf, int num)
 {
     int i;
 
@@ -506,7 +507,7 @@ static inline int put_dwords(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        cpu_physical_memory_write(addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -514,14 +515,14 @@ static inline int put_dwords(OHCIState *ohci,
 
 /* Get an array of words from main memory */
 static inline int get_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
+                            dma_addr_t addr, uint16_t *buf, int num)
 {
     int i;
 
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_read(addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
         *buf = le16_to_cpu(*buf);
     }
 
@@ -530,7 +531,7 @@ static inline int get_words(OHCIState *ohci,
 
 /* Put an array of words in to main memory */
 static inline int put_words(OHCIState *ohci,
-                            uint32_t addr, uint16_t *buf, int num)
+                            dma_addr_t addr, uint16_t *buf, int num)
 {
     int i;
 
@@ -538,40 +539,40 @@ static inline int put_words(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint16_t tmp = cpu_to_le16(*buf);
-        cpu_physical_memory_write(addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
 }
 
 static inline int ohci_read_ed(OHCIState *ohci,
-                               uint32_t addr, struct ohci_ed *ed)
+                               dma_addr_t addr, struct ohci_ed *ed)
 {
     return get_dwords(ohci, addr, (uint32_t *)ed, sizeof(*ed) >> 2);
 }
 
 static inline int ohci_read_td(OHCIState *ohci,
-                               uint32_t addr, struct ohci_td *td)
+                               dma_addr_t addr, struct ohci_td *td)
 {
     return get_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
 static inline int ohci_read_iso_td(OHCIState *ohci,
-                                   uint32_t addr, struct ohci_iso_td *td)
+                                   dma_addr_t addr, struct ohci_iso_td *td)
 {
     return (get_dwords(ohci, addr, (uint32_t *)td, 4) &&
             get_words(ohci, addr + 16, td->offset, 8));
 }
 
 static inline int ohci_read_hcca(OHCIState *ohci,
-                                 uint32_t addr, struct ohci_hcca *hcca)
+                                 dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    cpu_physical_memory_read(addr + ohci->localmem_base, hcca, sizeof(*hcca));
+    dma_memory_read(ohci->dma, addr + ohci->localmem_base, hcca, sizeof(*hcca));
     return 1;
 }
 
 static inline int ohci_put_ed(OHCIState *ohci,
-                              uint32_t addr, struct ohci_ed *ed)
+                              dma_addr_t addr, struct ohci_ed *ed)
 {
     /* ed->tail is under control of the HCD.
      * Since just ed->head is changed by HC, just write back this
@@ -583,64 +584,63 @@ static inline int ohci_put_ed(OHCIState *ohci,
 }
 
 static inline int ohci_put_td(OHCIState *ohci,
-                              uint32_t addr, struct ohci_td *td)
+                              dma_addr_t addr, struct ohci_td *td)
 {
     return put_dwords(ohci, addr, (uint32_t *)td, sizeof(*td) >> 2);
 }
 
 static inline int ohci_put_iso_td(OHCIState *ohci,
-                                  uint32_t addr, struct ohci_iso_td *td)
+                                  dma_addr_t addr, struct ohci_iso_td *td)
 {
     return (put_dwords(ohci, addr, (uint32_t *)td, 4) &&
             put_words(ohci, addr + 16, td->offset, 8));
 }
 
 static inline int ohci_put_hcca(OHCIState *ohci,
-                                uint32_t addr, struct ohci_hcca *hcca)
+                                dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    cpu_physical_memory_write(addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
-                              (char *)hcca + HCCA_WRITEBACK_OFFSET,
-                              HCCA_WRITEBACK_SIZE);
+    dma_memory_write(ohci->dma,
+                     addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
+                     (char *)hcca + HCCA_WRITEBACK_OFFSET,
+                     HCCA_WRITEBACK_SIZE);
     return 1;
 }
 
 /* Read/Write the contents of a TD from/to main memory.  */
 static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
-                         uint8_t *buf, int len, int write)
+                         uint8_t *buf, int len, DMADirection dir)
 {
-    uint32_t ptr;
-    uint32_t n;
+    dma_addr_t ptr, n;
 
     ptr = td->cbp;
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = td->be & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 /* Read/Write the contents of an ISO TD from/to main memory.  */
 static void ohci_copy_iso_td(OHCIState *ohci,
                              uint32_t start_addr, uint32_t end_addr,
-                             uint8_t *buf, int len, int write)
+                             uint8_t *buf, int len, DMADirection dir)
 {
-    uint32_t ptr;
-    uint32_t n;
+    dma_addr_t ptr, n;
 
     ptr = start_addr;
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = end_addr & ~0xfffu;
     buf += n;
-    cpu_physical_memory_rw(ptr + ohci->localmem_base, buf, len - n, write);
+    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 static void ohci_process_lists(OHCIState *ohci, int completion);
@@ -803,7 +803,8 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     }
 
     if (len && dir != OHCI_TD_DIR_IN) {
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, 0);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len,
+                         DMA_DIRECTION_TO_DEVICE);
     }
 
     if (completion) {
@@ -827,7 +828,8 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     /* Writeback */
     if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) {
         /* IN transfer succeeded */
-        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, 1);
+        ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret,
+                         DMA_DIRECTION_FROM_DEVICE);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_CC,
                     OHCI_CC_NOERROR);
         OHCI_SET_BM(iso_td.offset[relative_frame_number], TD_PSW_SIZE, ret);
@@ -971,7 +973,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
                 pktlen = len;
             }
             if (!completion) {
-                ohci_copy_td(ohci, &td, ohci->usb_buf, pktlen, 0);
+                ohci_copy_td(ohci, &td, ohci->usb_buf, pktlen,
+                             DMA_DIRECTION_TO_DEVICE);
             }
         }
     }
@@ -1021,7 +1024,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
     }
     if (ret >= 0) {
         if (dir == OHCI_TD_DIR_IN) {
-            ohci_copy_td(ohci, &td, ohci->usb_buf, ret, 1);
+            ohci_copy_td(ohci, &td, ohci->usb_buf, ret,
+                         DMA_DIRECTION_FROM_DEVICE);
 #ifdef DEBUG_PACKET
             DPRINTF("  data:");
             for (i = 0; i < ret; i++)
@@ -1748,11 +1752,14 @@ static USBBusOps ohci_bus_ops = {
 };
 
 static int usb_ohci_init(OHCIState *ohci, DeviceState *dev,
-                         int num_ports, uint32_t localmem_base,
-                         char *masterbus, uint32_t firstport)
+                         int num_ports, dma_addr_t localmem_base,
+                         char *masterbus, uint32_t firstport,
+                         DMAContext *dma)
 {
     int i;
 
+    ohci->dma = dma;
+
     if (usb_frame_time == 0) {
 #ifdef OHCI_TIME_WARP
         usb_frame_time = get_ticks_per_sec();
@@ -1817,7 +1824,8 @@ static int usb_ohci_initfn_pci(struct PCIDevice *dev)
     ohci->pci_dev.config[PCI_INTERRUPT_PIN] = 0x01; /* interrupt pin A */
 
     if (usb_ohci_init(&ohci->state, &dev->qdev, ohci->num_ports, 0,
-                      ohci->masterbus, ohci->firstport) != 0) {
+                      ohci->masterbus, ohci->firstport,
+                      pci_dma_context(dev)) != 0) {
         return -1;
     }
     ohci->state.irq = ohci->pci_dev.irq[0];
@@ -1831,7 +1839,7 @@ typedef struct {
     SysBusDevice busdev;
     OHCIState ohci;
     uint32_t num_ports;
-    target_phys_addr_t dma_offset;
+    dma_addr_t dma_offset;
 } OHCISysBusState;
 
 static int ohci_init_pxa(SysBusDevice *dev)
@@ -1839,7 +1847,8 @@ static int ohci_init_pxa(SysBusDevice *dev)
     OHCISysBusState *s = FROM_SYSBUS(OHCISysBusState, dev);
 
     /* Cannot fail as we pass NULL for masterbus */
-    usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0);
+    usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0,
+                  NULL);
     sysbus_init_irq(dev, &s->ohci.irq);
     sysbus_init_mmio(dev, &s->ohci.mem);
 
@@ -1875,7 +1884,7 @@ static TypeInfo ohci_pci_info = {
 
 static Property ohci_sysbus_properties[] = {
     DEFINE_PROP_UINT32("num-ports", OHCISysBusState, num_ports, 3),
-    DEFINE_PROP_TADDR("dma-offset", OHCISysBusState, dma_offset, 3),
+    DEFINE_PROP_DMAADDR("dma-offset", OHCISysBusState, dma_offset, 3),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (5 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 06/13] usb-ohci: Use " David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 08/13] ide/ahci: Use universal DMA helper functions David Gibson
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Paolo Bonzini, Michael S. Tsirkin, David Gibson

dma-helpers.c contains a number of helper functions for doing
scatter/gather DMA, and various block device related DMA.  Currently,
these directly access guest memory using cpu_physical_memory_*(),
assuming no IOMMU translation.

This patch updates this code to use the new universal DMA helper
functions.  qemu_sglist_init() now takes a DMAContext * to describe
the DMA address space in which the scatter/gather will take place.

We minimally update the callers qemu_sglist_init() to pass NULL
(i.e. no translation, same as current behaviour).  Some of those
callers should pass something else in some cases to allow proper IOMMU
translation in future, but that will be fixed in later patches.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma-helpers.c  |   64 ++++++++++++++++++++++++++++++-------------------------
 dma.h          |    3 +-
 hw/ide/ahci.c  |    3 +-
 hw/ide/macio.c |    4 +-
 hw/pci.h       |    2 +-
 5 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 5f19a85..85a39b5 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -11,12 +11,13 @@
 #include "block_int.h"
 #include "trace.h"
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
 {
     qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
     qsg->nsg = 0;
     qsg->nalloc = alloc_hint;
     qsg->size = 0;
+    qsg->dma = dma;
 }
 
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -75,10 +76,9 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
     int i;
 
     for (i = 0; i < dbs->iov.niov; ++i) {
-        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
-                                  dbs->iov.iov[i].iov_len,
-                                  dbs->dir != DMA_DIRECTION_TO_DEVICE,
-                                  dbs->iov.iov[i].iov_len);
+        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
+                         dbs->iov.iov[i].iov_len, dbs->dir,
+                         dbs->iov.iov[i].iov_len);
     }
     qemu_iovec_reset(&dbs->iov);
 }
@@ -104,10 +104,32 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
     }
 }
 
+static void dma_aio_cancel(BlockDriverAIOCB *acb)
+{
+    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
+
+    trace_dma_aio_cancel(dbs);
+
+    if (dbs->acb) {
+        BlockDriverAIOCB *acb = dbs->acb;
+        dbs->acb = NULL;
+        dbs->in_cancel = true;
+        bdrv_aio_cancel(acb);
+        dbs->in_cancel = false;
+    }
+    dbs->common.cb = NULL;
+    dma_complete(dbs, 0);
+}
+
+static void dma_bdrv_invalidate(void *opaque)
+{
+    dma_aio_cancel(&((DMAAIOCB *)opaque)->common);
+}
+
 static void dma_bdrv_cb(void *opaque, int ret)
 {
     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
-    target_phys_addr_t cur_addr, cur_len;
+    dma_addr_t cur_addr, cur_len;
     void *mem;
 
     trace_dma_bdrv_cb(dbs, ret);
@@ -124,8 +146,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
     while (dbs->sg_cur_index < dbs->sg->nsg) {
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = cpu_physical_memory_map(cur_addr, &cur_len,
-                                      dbs->dir != DMA_DIRECTION_TO_DEVICE);
+        mem = dma_memory_map(dbs->sg->dma, dma_bdrv_invalidate, dbs,
+                             cur_addr, &cur_len, dbs->dir);
         if (!mem)
             break;
         qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -147,23 +169,6 @@ static void dma_bdrv_cb(void *opaque, int ret)
     assert(dbs->acb);
 }
 
-static void dma_aio_cancel(BlockDriverAIOCB *acb)
-{
-    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
-
-    trace_dma_aio_cancel(dbs);
-
-    if (dbs->acb) {
-        BlockDriverAIOCB *acb = dbs->acb;
-        dbs->acb = NULL;
-        dbs->in_cancel = true;
-        bdrv_aio_cancel(acb);
-        dbs->in_cancel = false;
-    }
-    dbs->common.cb = NULL;
-    dma_complete(dbs, 0);
-}
-
 static AIOPool dma_aio_pool = {
     .aiocb_size         = sizeof(DMAAIOCB),
     .cancel             = dma_aio_cancel,
@@ -210,7 +215,8 @@ BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
 }
 
 
-static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg, bool to_dev)
+static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
+                           DMADirection dir)
 {
     uint64_t resid;
     int sg_cur_index;
@@ -221,7 +227,7 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg, bool to_de
     while (len > 0) {
         ScatterGatherEntry entry = sg->sg[sg_cur_index++];
         int32_t xfer = MIN(len, entry.len);
-        cpu_physical_memory_rw(entry.base, ptr, xfer, !to_dev);
+        dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
         ptr += xfer;
         len -= xfer;
         resid -= xfer;
@@ -232,12 +238,12 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg, bool to_de
 
 uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 {
-    return dma_buf_rw(ptr, len, sg, 0);
+    return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
 }
 
 uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 {
-    return dma_buf_rw(ptr, len, sg, 1);
+    return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
 }
 
 void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
diff --git a/dma.h b/dma.h
index 8b6ef44..a66e3d7 100644
--- a/dma.h
+++ b/dma.h
@@ -27,6 +27,7 @@ struct QEMUSGList {
     int nsg;
     int nalloc;
     size_t size;
+    DMAContext *dma;
 };
 
 #if defined(TARGET_PHYS_ADDR_BITS)
@@ -143,7 +144,7 @@ struct ScatterGatherEntry {
     dma_addr_t len;
 };
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index a883a92..96d8f62 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -667,7 +667,8 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     if (sglist_alloc_hint > 0) {
         AHCI_SG *tbl = (AHCI_SG *)prdt;
 
-        qemu_sglist_init(sglist, sglist_alloc_hint);
+        /* FIXME: pass the correct DMAContext */
+        qemu_sglist_init(sglist, sglist_alloc_hint, NULL);
         for (i = 0; i < sglist_alloc_hint; i++) {
             /* flags_size is zero-based */
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 7b38d9e..848cb31 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -76,7 +76,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
@@ -133,7 +133,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
     s->io_buffer_index = 0;
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
diff --git a/hw/pci.h b/hw/pci.h
index c021805..41dcd05 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -611,7 +611,7 @@ static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
                                        int alloc_hint)
 {
-    qemu_sglist_init(qsg, alloc_hint);
+    qemu_sglist_init(qsg, alloc_hint, pci_dma_context(dev));
 }
 
 extern const VMStateDescription vmstate_pci_device;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 08/13] ide/ahci: Use universal DMA helper functions
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (6 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 09/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers David Gibson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Michael S. Tsirkin, David Gibson

The AHCI device can provide both PCI and SysBus AHCI device
emulations.  For this reason, it wasn't previously converted to use
the pci_dma_*() helper functions.  Now that we have universal DMA
helper functions, this converts AHCI to use them.

The DMAContext is obtained from pci_dma_context() in the PCI case and
set to NULL in the SysBus case (i.e. we assume for now that a SysBus
AHCI has no IOMMU translation).

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ide/ahci.c |    8 ++++----
 hw/ide/ahci.h |    3 ++-
 hw/ide/ich.c  |    2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 96d8f62..1fc50fd 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -667,8 +667,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     if (sglist_alloc_hint > 0) {
         AHCI_SG *tbl = (AHCI_SG *)prdt;
 
-        /* FIXME: pass the correct DMAContext */
-        qemu_sglist_init(sglist, sglist_alloc_hint, NULL);
+        qemu_sglist_init(sglist, sglist_alloc_hint, ad->hba->dma);
         for (i = 0; i < sglist_alloc_hint; i++) {
             /* flags_size is zero-based */
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
@@ -1115,11 +1114,12 @@ static const IDEDMAOps ahci_dma_ops = {
     .reset = ahci_dma_reset,
 };
 
-void ahci_init(AHCIState *s, DeviceState *qdev, int ports)
+void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports)
 {
     qemu_irq *irqs;
     int i;
 
+    s->dma = dma;
     s->ports = ports;
     s->dev = g_malloc0(sizeof(AHCIDevice) * ports);
     ahci_reg_init(s);
@@ -1182,7 +1182,7 @@ static const VMStateDescription vmstate_sysbus_ahci = {
 static int sysbus_ahci_init(SysBusDevice *dev)
 {
     SysbusAHCIState *s = FROM_SYSBUS(SysbusAHCIState, dev);
-    ahci_init(&s->ahci, &dev->qdev, s->num_ports);
+    ahci_init(&s->ahci, &dev->qdev, NULL, s->num_ports);
 
     sysbus_init_mmio(dev, &s->ahci.mem);
     sysbus_init_irq(dev, &s->ahci.irq);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index b223d2c..af8c6ef 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -299,6 +299,7 @@ typedef struct AHCIState {
     uint32_t idp_index;     /* Current IDP index */
     int ports;
     qemu_irq irq;
+    DMAContext *dma;
 } AHCIState;
 
 typedef struct AHCIPCIState {
@@ -329,7 +330,7 @@ typedef struct NCQFrame {
     uint8_t reserved10;
 } QEMU_PACKED NCQFrame;
 
-void ahci_init(AHCIState *s, DeviceState *qdev, int ports);
+void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports);
 void ahci_uninit(AHCIState *s);
 
 void ahci_reset(void *opaque);
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 560ae37..5354e13 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -91,7 +91,7 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
     uint8_t *sata_cap;
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
-    ahci_init(&d->ahci, &dev->qdev, 6);
+    ahci_init(&d->ahci, &dev->qdev, pci_dma_context(dev), 6);
 
     pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
 
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 09/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (7 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 08/13] ide/ahci: Use universal DMA helper functions David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure David Gibson
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: David Gibson

The USB UHCI and EHCI drivers were converted some time ago to use the
pci_dma_*() helper functions.  However, this conversion was not complete
because in some places both these drivers do DMA via the usb_packet_map()
function in usb-libhw.c.  That function directly used
cpu_physical_memory_map().

Now that the sglist code uses DMA wrappers properly, we can convert the
functions in usb-libhw.c, thus conpleting the conversion of UHCI and EHCI
to use the DMA wrappers.

Note that usb_packet_map() invokes dma_memory_map() with a NULL invalidate
callback function.  When IOMMU support is added, this will mean that
usb_packet_map() and the corresponding usb_packet_unmap() must be called in
close proximity without dropping the qemu device lock - otherwise the guest
might invalidate IOMMU mappings while they are still in use by the device
code.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/usb.h          |    2 +-
 hw/usb/hcd-ehci.c |    4 ++--
 hw/usb/hcd-uhci.c |    2 +-
 hw/usb/libhw.c    |   22 ++++++++++++----------
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/hw/usb.h b/hw/usb.h
index e95085f..3204c3b 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -340,7 +340,7 @@ void usb_packet_check_state(USBPacket *p, USBPacketState expected);
 void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep);
 void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
-void usb_packet_unmap(USBPacket *p);
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl);
 void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes);
 void usb_packet_skip(USBPacket *p, size_t bytes);
 void usb_packet_cleanup(USBPacket *p);
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 60f9f5b..fe1aa77 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1337,7 +1337,7 @@ static void ehci_execute_complete(EHCIQueue *q)
         set_field(&q->qh.token, q->tbytes, QTD_TOKEN_TBYTES);
     }
     ehci_finish_transfer(q, q->usb_status);
-    usb_packet_unmap(&q->packet);
+    usb_packet_unmap(&q->packet, &q->sgl);
 
     q->qh.token ^= QTD_TOKEN_DTOGGLE;
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
@@ -1458,7 +1458,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 usb_packet_map(&ehci->ipacket, &ehci->isgl);
                 ret = usb_handle_packet(dev, &ehci->ipacket);
                 assert(ret != USB_RET_ASYNC);
-                usb_packet_unmap(&ehci->ipacket);
+                usb_packet_unmap(&ehci->ipacket, &ehci->isgl);
             } else {
                 DPRINTF("ISOCH: attempt to addess non-iso endpoint\n");
                 ret = USB_RET_NAK;
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index e55dad9..ea1eb22 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -866,7 +866,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td, uint32_t *in
 
 done:
     len = uhci_complete_td(s, td, async, int_mask);
-    usb_packet_unmap(&async->packet);
+    usb_packet_unmap(&async->packet, &async->sgl);
     uhci_async_free(async);
     return len;
 }
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index 2462351..3636525 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -26,15 +26,16 @@
 
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 {
-    int is_write = (p->pid == USB_TOKEN_IN);
-    target_phys_addr_t len;
+    DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+        DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
+    dma_addr_t len;
     void *mem;
     int i;
 
     for (i = 0; i < sgl->nsg; i++) {
         len = sgl->sg[i].len;
-        mem = cpu_physical_memory_map(sgl->sg[i].base, &len,
-                                      is_write);
+        mem = dma_memory_map(sgl->dma, NULL, NULL,
+                             sgl->sg[i].base, &len, dir);
         if (!mem) {
             goto err;
         }
@@ -46,18 +47,19 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
     return 0;
 
 err:
-    usb_packet_unmap(p);
+    usb_packet_unmap(p, sgl);
     return -1;
 }
 
-void usb_packet_unmap(USBPacket *p)
+void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl)
 {
-    int is_write = (p->pid == USB_TOKEN_IN);
+    DMADirection dir = (p->pid == USB_TOKEN_IN) ?
+        DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
     int i;
 
     for (i = 0; i < p->iov.niov; i++) {
-        cpu_physical_memory_unmap(p->iov.iov[i].iov_base,
-                                  p->iov.iov[i].iov_len, is_write,
-                                  p->iov.iov[i].iov_len);
+        dma_memory_unmap(sgl->dma, p->iov.iov[i].iov_base,
+                         p->iov.iov[i].iov_len, dir,
+                         p->iov.iov[i].iov_len);
     }
 }
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (8 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 09/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 11/13] pseries: Convert sPAPR TCEs to use generic IOMMU infrastructure David Gibson
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduard - Gabriel Munteanu, Richard Henderson, Michael S. Tsirkin,
	David Gibson

This patch adds the basic infrastructure necessary to emulate an IOMMU
visible to the guest.  The DMAContext structure is extended with
information and a callback describing the translation, and the various
DMA functions used by devices will now perform IOMMU translation using
this callback.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Conflicts:

	dma.h
---
 configure     |   12 ++++
 dma-helpers.c |  191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dma.h         |  125 +++++++++++++++++++++++++++++++-------
 hw/qdev-dma.h |    4 +-
 4 files changed, 307 insertions(+), 25 deletions(-)

diff --git a/configure b/configure
index 8b4e3c1..139419e 100755
--- a/configure
+++ b/configure
@@ -140,6 +140,7 @@ linux_aio=""
 cap_ng=""
 attr=""
 libattr=""
+iommu="yes"
 xfs=""
 
 vhost_net="no"
@@ -795,6 +796,10 @@ for opt do
   ;;
   --enable-vhost-net) vhost_net="yes"
   ;;
+  --enable-iommu) iommu="yes"
+  ;;
+  --disable-iommu) iommu="no"
+  ;;
   --disable-opengl) opengl="no"
   ;;
   --enable-opengl) opengl="yes"
@@ -1096,6 +1101,8 @@ echo "  --enable-docs            enable documentation build"
 echo "  --disable-docs           disable documentation build"
 echo "  --disable-vhost-net      disable vhost-net acceleration support"
 echo "  --enable-vhost-net       enable vhost-net acceleration support"
+echo "  --disable-iommu          disable IOMMU emulation support"
+echo "  --enable-iommu           enable IOMMU emulation support"
 echo "  --enable-trace-backend=B Set trace backend"
 echo "                           Available backends:" $("$source_path"/scripts/tracetool --list-backends)
 echo "  --with-trace-file=NAME   Full PATH,NAME of file to store traces"
@@ -2943,6 +2950,7 @@ echo "posix_madvise     $posix_madvise"
 echo "uuid support      $uuid"
 echo "libcap-ng support $cap_ng"
 echo "vhost-net support $vhost_net"
+echo "IOMMU support     $iommu"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
 echo "spice support     $spice"
@@ -3828,6 +3836,10 @@ if test "$target_softmmu" = "yes" -a \( \
   echo "CONFIG_NEED_MMU=y" >> $config_target_mak
 fi
 
+if test "$iommu" = "yes" ; then
+  echo "CONFIG_IOMMU=y" >> $config_host_mak
+fi
+
 if test "$gprof" = "yes" ; then
   echo "TARGET_GPROF=yes" >> $config_target_mak
   if test "$target_linux_user" = "yes" ; then
diff --git a/dma-helpers.c b/dma-helpers.c
index 85a39b5..1d5165c 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -10,6 +10,9 @@
 #include "dma.h"
 #include "block_int.h"
 #include "trace.h"
+#include "range.h"
+
+/* #define DEBUG_IOMMU */
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
 {
@@ -251,3 +254,191 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
 {
     bdrv_acct_start(bs, cookie, sg->size, type);
 }
+
+#ifdef CONFIG_IOMMU
+bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
+                            DMADirection dir)
+{
+    target_phys_addr_t paddr, plen;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
+            " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
+#endif
+
+    while (len) {
+        if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
+            return false;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        len -= plen;
+        addr += plen;
+    }
+
+    return true;
+}
+
+int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+                        void *buf, dma_addr_t len, DMADirection dir)
+{
+    target_phys_addr_t paddr, plen;
+    int err;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
+            DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
+#endif
+
+    while (len) {
+        err = dma->translate(dma, addr, &paddr, &plen, dir);
+        if (err) {
+            return -1;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        cpu_physical_memory_rw(paddr, buf, plen,
+                               dir == DMA_DIRECTION_FROM_DEVICE);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    }
+
+    return 0;
+}
+
+int iommu_dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len)
+{
+    target_phys_addr_t paddr, plen;
+    int err;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_zero context=%p addr=0x" DMA_ADDR_FMT
+            " len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
+#endif
+
+    while (len) {
+        err = dma->translate(dma, addr, &paddr, &plen,
+                             DMA_DIRECTION_FROM_DEVICE);
+        if (err) {
+            return err;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        cpu_physical_memory_zero(paddr, plen);
+
+        len -= plen;
+        addr += plen;
+    }
+
+    return 0;
+}
+
+typedef struct DMAMemoryMap DMAMemoryMap;
+struct DMAMemoryMap {
+    dma_addr_t              addr;
+    size_t                  len;
+    void                    *buf;
+    DMAInvalidateMapFunc    *invalidate;
+    void                    *invalidate_opaque;
+
+    QLIST_ENTRY(DMAMemoryMap) list;
+};
+
+void dma_context_init(DMAContext *dma, DMATranslateFunc fn)
+{
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_context_init(%p, %p)\n", dma, fn);
+#endif
+    dma->translate = fn;
+    QLIST_INIT(&dma->memory_maps);
+}
+
+void dma_invalidate_memory_range(DMAContext *dma,
+                                 dma_addr_t addr, dma_addr_t len)
+{
+    DMAMemoryMap *map;
+
+    QLIST_FOREACH(map, &dma->memory_maps, list) {
+        if (ranges_overlap(addr, len, map->addr, map->len)) {
+            map->invalidate(map->invalidate_opaque);
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void *iommu_dma_memory_map(DMAContext *dma, DMAInvalidateMapFunc *cb,
+                           void *cb_opaque, dma_addr_t addr, dma_addr_t *len,
+                           DMADirection dir)
+{
+    int err;
+    target_phys_addr_t paddr, plen;
+    void *buf;
+
+    plen = *len;
+    err = dma->translate(dma, addr, &paddr, &plen, dir);
+    if (err) {
+        return NULL;
+    }
+
+    /*
+     * If this is true, the virtual region is contiguous,
+     * but the translated physical region isn't. We just
+     * clamp *len, much like cpu_physical_memory_map() does.
+     */
+    if (plen < *len) {
+        *len = plen;
+    }
+
+    buf = cpu_physical_memory_map(paddr, &plen,
+                                  dir == DMA_DIRECTION_FROM_DEVICE);
+    *len = plen;
+
+    /* We treat maps as remote TLBs to cope with stuff like AIO. */
+    if (cb) {
+        DMAMemoryMap *map;
+
+        map = g_malloc(sizeof(DMAMemoryMap));
+        map->addr = addr;
+        map->len = *len;
+        map->buf = buf;
+        map->invalidate = cb;
+        map->invalidate_opaque = cb_opaque;
+
+        QLIST_INSERT_HEAD(&dma->memory_maps, map, list);
+    }
+
+    return buf;
+}
+
+void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
+                            DMADirection dir, dma_addr_t access_len)
+{
+    DMAMemoryMap *map;
+
+    cpu_physical_memory_unmap(buffer, len,
+                              dir == DMA_DIRECTION_FROM_DEVICE,
+                              access_len);
+
+    QLIST_FOREACH(map, &dma->memory_maps, list) {
+        if ((map->buf == buffer) && (map->len == len)) {
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+#endif /* CONFIG_IOMMU */
diff --git a/dma.h b/dma.h
index a66e3d7..1163242 100644
--- a/dma.h
+++ b/dma.h
@@ -15,6 +15,7 @@
 #include "hw/hw.h"
 #include "block.h"
 
+typedef struct DMAContext DMAContext;
 typedef struct ScatterGatherEntry ScatterGatherEntry;
 
 typedef enum {
@@ -31,30 +32,82 @@ struct QEMUSGList {
 };
 
 #if defined(TARGET_PHYS_ADDR_BITS)
+
+#ifdef CONFIG_IOMMU
+/*
+ * When an IOMMU is present, bus addresses become distinct from
+ * CPU/memory physical addresses and may be a different size.  Because
+ * the IOVA size depends more on the bus than on the platform, we more
+ * or less have to treat these as 64-bit always to cover all (or at
+ * least most) cases.
+ */
+typedef uint64_t dma_addr_t;
+
+#define DMA_ADDR_BITS 64
+#define DMA_ADDR_FMT "%" PRIx64
+#else
 typedef target_phys_addr_t dma_addr_t;
 
 #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
 #define DMA_ADDR_FMT TARGET_FMT_plx
+#endif
+
+typedef int DMATranslateFunc(DMAContext *dma,
+                             dma_addr_t addr,
+                             target_phys_addr_t *paddr,
+                             target_phys_addr_t *len,
+                             DMADirection dir);
+
+typedef struct DMAContext {
+#ifdef CONFIG_IOMMU
+    DMATranslateFunc *translate;
+    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
+#endif
+} DMAContext;
+
+#ifdef CONFIG_IOMMU
+static inline bool dma_has_iommu(DMAContext *dma)
+{
+    return !!dma;
+}
+#else
+static inline bool dma_has_iommu(DMAContext *dma)
+{
+    return false;
+}
+#endif
 
 typedef void DMAInvalidateMapFunc(void *);
 
 /* Checks that the given range of addresses is valid for DMA.  This is
  * useful for certain cases, but usually you should just use
  * dma_memory_{read,write}() and check for errors */
-static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr,
-                                    dma_addr_t len, DMADirection dir)
+bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
+                            DMADirection dir);
+static inline bool dma_memory_valid(DMAContext *dma,
+                                    dma_addr_t addr, dma_addr_t len,
+                                    DMADirection dir)
 {
-    /* Stub version, with no iommu we assume all bus addresses are valid */
-    return true;
+    if (!dma_has_iommu(dma)) {
+        return true;
+    } else {
+        return iommu_dma_memory_valid(dma, addr, len, dir);
+    }
 }
 
+int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+                        void *buf, dma_addr_t len, DMADirection dir);
 static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
                                 void *buf, dma_addr_t len, DMADirection dir)
 {
-    /* Stub version when we have no iommu support */
-    cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len,
-                           dir == DMA_DIRECTION_FROM_DEVICE);
-    return 0;
+    if (!dma_has_iommu(dma)) {
+        /* Fast-path for no IOMMU */
+        cpu_physical_memory_rw(addr, buf, len,
+                               dir == DMA_DIRECTION_FROM_DEVICE);
+        return 0;
+    } else {
+        return iommu_dma_memory_rw(dma, addr, buf, len, dir);
+    }
 }
 
 static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
@@ -70,35 +123,55 @@ static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
                          DMA_DIRECTION_FROM_DEVICE);
 }
 
+int iommu_dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len);
 static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr,
                                   dma_addr_t len)
 {
-    /* Stub version when we have no iommu support */
-    cpu_physical_memory_zero(addr, len);
-    return 0;
+    if (!dma_has_iommu(dma)) {
+        /* Fast-path for no IOMMU */
+        cpu_physical_memory_zero(addr, len);
+        return 0;
+    } else {
+        return iommu_dma_memory_zero(dma, addr, len);
+    }
 }
 
+void *iommu_dma_memory_map(DMAContext *dma,
+                           DMAInvalidateMapFunc *cb, void *opaque,
+                           dma_addr_t addr, dma_addr_t *len,
+                           DMADirection dir);
 static inline void *dma_memory_map(DMAContext *dma,
-                                   DMAInvalidateMapFunc *cb, void *opaque,
+                                   DMAInvalidateMapFunc *cb, void *cb_opaque,
                                    dma_addr_t addr, dma_addr_t *len,
                                    DMADirection dir)
 {
-    target_phys_addr_t xlen = *len;
-    void *p;
-
-    p = cpu_physical_memory_map(addr, &xlen,
-                                dir == DMA_DIRECTION_FROM_DEVICE);
-    *len = xlen;
-    return p;
+    if (!dma_has_iommu(dma)) {
+        target_phys_addr_t xlen = *len;
+        void *p;
+
+        p = cpu_physical_memory_map(addr, &xlen,
+                                    dir == DMA_DIRECTION_FROM_DEVICE);
+        *len = xlen;
+        return p;
+    } else {
+        return iommu_dma_memory_map(dma, cb, cb_opaque, addr, len, dir);
+    }
 }
 
+void iommu_dma_memory_unmap(DMAContext *dma,
+                            void *buffer, dma_addr_t len,
+                            DMADirection dir, dma_addr_t access_len);
 static inline void dma_memory_unmap(DMAContext *dma,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
 {
-    return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
-                                     dir == DMA_DIRECTION_FROM_DEVICE,
-                                     access_len);
+    if (!dma_has_iommu(dma)) {
+        return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
+                                         dir == DMA_DIRECTION_FROM_DEVICE,
+                                         access_len);
+    } else {
+        iommu_dma_memory_unmap(dma, buffer, len, dir, access_len);
+    }
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
@@ -139,6 +212,14 @@ DEFINE_LDST_DMA(q, q, 64, be);
 
 #undef DEFINE_LDST_DMA
 
+#ifdef CONFIG_IOMMU
+
+void dma_context_init(DMAContext *dma, DMATranslateFunc fn);
+void dma_invalidate_memory_range(DMAContext *dma,
+                                 dma_addr_t addr, dma_addr_t len);
+
+#endif /* CONFIG_IOMMU */
+
 struct ScatterGatherEntry {
     dma_addr_t base;
     dma_addr_t len;
diff --git a/hw/qdev-dma.h b/hw/qdev-dma.h
index e407771..92a404d 100644
--- a/hw/qdev-dma.h
+++ b/hw/qdev-dma.h
@@ -1,4 +1,2 @@
-#include "qdev-addr.h"
-
 #define DEFINE_PROP_DMAADDR(_n, _s, _f, _d)                               \
-    DEFINE_PROP_TADDR(_n, _s, _f, _d)
+    DEFINE_PROP_HEX64(_n, _s, _f, _d)
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 11/13] pseries: Convert sPAPR TCEs to use generic IOMMU infrastructure
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (9 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 12/13] iommu: Allow PCI to use " David Gibson
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alex Graf, David Gibson

The pseries platform already contains an IOMMU implementation, since it is
essential for the platform's paravirtualized VIO devices.  This IOMMU
support is currently built into the implementation of the VIO "bus" and
the various VIO devices.

This patch converts this code to make use of the new common IOMMU
infrastructure.

Cc: Alex Graf <agraf@suse.de>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 Makefile.target  |    2 +-
 configure        |    2 +-
 hw/spapr.c       |    3 +
 hw/spapr.h       |   16 +++
 hw/spapr_iommu.c |  242 ++++++++++++++++++++++++++++++++++++++++++++++
 hw/spapr_llan.c  |   61 ++++++------
 hw/spapr_vio.c   |  282 ++++--------------------------------------------------
 hw/spapr_vio.h   |   71 +++++++-------
 hw/spapr_vscsi.c |   24 +++--
 target-ppc/kvm.c |    4 +-
 10 files changed, 365 insertions(+), 342 deletions(-)
 create mode 100644 hw/spapr_iommu.c

diff --git a/Makefile.target b/Makefile.target
index 63cf769..61b4973 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -266,7 +266,7 @@ obj-ppc-y += ppc_oldworld.o
 # NewWorld PowerMac
 obj-ppc-y += ppc_newworld.o
 # IBM pSeries (sPAPR)
-obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
+obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o spapr_iommu.o
 obj-ppc-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 obj-ppc-$(CONFIG_PSERIES) += spapr_pci.o device-hotplug.o pci-hotplug.o
 # PowerPC 4xx boards
diff --git a/configure b/configure
index 139419e..c637db9 100755
--- a/configure
+++ b/configure
@@ -3667,7 +3667,7 @@ case "$target_arch2" in
       fi
     fi
 esac
-if test "$target_arch2" = "ppc64" -a "$fdt" = "yes"; then
+if test "$target_arch2" = "ppc64" -a "$fdt" = "yes" -a "$iommu" = "yes" ; then
   echo "CONFIG_PSERIES=y" >> $config_target_mak
 fi
 if test "$target_bigendian" = "yes" ; then
diff --git a/hw/spapr.c b/hw/spapr.c
index bfaf260..a66681a 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -626,6 +626,9 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     spapr->icp = xics_system_init(XICS_IRQS);
     spapr->next_irq = 16;
 
+    /* Set up IOMMU */
+    spapr_iommu_init();
+
     /* Set up VIO bus */
     spapr->vio_bus = spapr_vio_bus_init();
 
diff --git a/hw/spapr.h b/hw/spapr.h
index 11160b0..d11f618 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -319,4 +319,20 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
 int spapr_rtas_device_tree_setup(void *fdt, target_phys_addr_t rtas_addr,
                                  target_phys_addr_t rtas_size);
 
+#define SPAPR_TCE_PAGE_SHIFT   12
+#define SPAPR_TCE_PAGE_SIZE    (1ULL << SPAPR_TCE_PAGE_SHIFT)
+#define SPAPR_TCE_PAGE_MASK    (SPAPR_TCE_PAGE_SIZE - 1)
+
+typedef struct sPAPRTCE {
+    uint64_t tce;
+} sPAPRTCE;
+
+#define SPAPR_VIO_BASE_LIOBN    0x00000000
+
+void spapr_iommu_init(void);
+DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
+void spapr_tce_free(DMAContext *dma);
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 DMAContext *dma);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
new file mode 100644
index 0000000..96e2708
--- /dev/null
+++ b/hw/spapr_iommu.c
@@ -0,0 +1,242 @@
+/*
+ * QEMU sPAPR IOMMU (TCE) code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw.h"
+#include "kvm.h"
+#include "qdev.h"
+#include "kvm_ppc.h"
+#include "dma.h"
+
+#include "hw/spapr.h"
+
+#include <libfdt.h>
+
+/* #define DEBUG_TCE */
+
+enum sPAPRTCEAccess {
+    SPAPR_TCE_FAULT = 0,
+    SPAPR_TCE_RO = 1,
+    SPAPR_TCE_WO = 2,
+    SPAPR_TCE_RW = 3,
+};
+
+typedef struct sPAPRTCETable sPAPRTCETable;
+
+struct sPAPRTCETable {
+    DMAContext dma;
+    uint32_t liobn;
+    uint32_t window_size;
+    sPAPRTCE *table;
+    int fd;
+    QLIST_ENTRY(sPAPRTCETable) list;
+};
+
+
+QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;
+
+static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
+{
+    sPAPRTCETable *tcet;
+
+    QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
+        if (tcet->liobn == liobn) {
+            return tcet;
+        }
+    }
+
+    return NULL;
+}
+
+static int spapr_tce_translate(DMAContext *dma,
+                               dma_addr_t addr,
+                               target_phys_addr_t *paddr,
+                               target_phys_addr_t *len,
+                               DMADirection dir)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE)
+        ? SPAPR_TCE_WO : SPAPR_TCE_RO;
+    uint64_t tce;
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, "spapr_tce_translate liobn=0x%" PRIx32 " addr=0x"
+            DMA_ADDR_FMT "\n", tcet->liobn, addr);
+#endif
+
+    /* Check if we are in bound */
+    if (addr >= tcet->window_size) {
+#ifdef DEBUG_TCE
+        fprintf(stderr, "spapr_tce_translate out of bounds\n");
+#endif
+        return -EFAULT;
+    }
+
+    tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce;
+
+    /* Check TCE */
+    if (!(tce & access)) {
+        return -EPERM;
+    }
+
+    /* How much til end of page ? */
+    *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1;
+
+    /* Translate */
+    *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) |
+        (addr & SPAPR_TCE_PAGE_MASK);
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, " ->  *paddr=0x" TARGET_FMT_plx ", *len=0x"
+            TARGET_FMT_plx "\n", *paddr, *len);
+#endif
+
+    return 0;
+}
+
+DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
+{
+    sPAPRTCETable *tcet;
+
+    if (!window_size) {
+        return NULL;
+    }
+
+    tcet = g_malloc0(sizeof(*tcet));
+    dma_context_init(&tcet->dma, spapr_tce_translate);
+
+    tcet->liobn = liobn;
+    tcet->window_size = window_size;
+
+    if (kvm_enabled()) {
+        tcet->table = kvmppc_create_spapr_tce(liobn,
+                                              window_size,
+                                              &tcet->fd);
+    }
+
+    if (!tcet->table) {
+        size_t table_size = (window_size >> SPAPR_TCE_PAGE_SHIFT)
+            * sizeof(sPAPRTCE);
+        tcet->table = g_malloc0(table_size);
+    }
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, "spapr_iommu: New TCE table, liobn=0x%x, context @ %p, "
+            "table @ %p, fd=%d\n", liobn, &tcet->dma, tcet->table, tcet->fd);
+#endif
+
+    QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+
+    return &tcet->dma;
+}
+
+void spapr_tce_free(DMAContext *dma)
+{
+
+    if (dma) {
+        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+        QLIST_REMOVE(tcet, list);
+
+        if (!kvm_enabled() ||
+            (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
+                                     tcet->window_size) != 0)) {
+            g_free(tcet->table);
+        }
+
+        g_free(tcet);
+    }
+}
+
+
+static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce = args[2];
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+    sPAPRTCE *tcep;
+
+    if (liobn & 0xFFFFFFFF00000000ULL) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
+                      TARGET_FMT_lx "\n", liobn);
+        return H_PARAMETER;
+    }
+    if (!tcet) {
+        hcall_dprintf("spapr_vio_put_tce on non-existent LIOBN "
+                      TARGET_FMT_lx "\n", liobn);
+        return H_PARAMETER;
+    }
+
+    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
+
+#ifdef DEBUG_TCE
+    fprintf(stderr, "spapr_vio_put_tce on liobn=" TARGET_FMT_lx /*%s*/
+            "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
+            liobn, /*dev->qdev.id, */ioba, tce);
+#endif
+
+    if (ioba >= tcet->window_size) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-boards IOBA 0x"
+                      TARGET_FMT_lx "\n", ioba);
+        return H_PARAMETER;
+    }
+
+    tcep = tcet->table + (ioba >> SPAPR_TCE_PAGE_SHIFT);
+    tcep->tce = tce;
+
+    return H_SUCCESS;
+}
+
+void spapr_iommu_init(void)
+{
+    QLIST_INIT(&spapr_tce_tables);
+
+    /* hcall-tce */
+    spapr_register_hypercall(H_PUT_TCE, h_put_tce);
+}
+
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 DMAContext *dma)
+{
+    if (dma) {
+        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+        uint32_t dma_prop[] = {cpu_to_be32(tcet->liobn),
+                               0, 0,
+                               0, cpu_to_be32(tcet->window_size)};
+        int ret;
+
+        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = fdt_setprop(fdt, node_off, propname, dma_prop,
+                          sizeof(dma_prop));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
diff --git a/hw/spapr_llan.c b/hw/spapr_llan.c
index cfc7778..736ec6d 100644
--- a/hw/spapr_llan.c
+++ b/hw/spapr_llan.c
@@ -71,7 +71,7 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF      0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF      16
-#define VLAN_MAX_BUFS        ((SPAPR_VIO_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+#define VLAN_MAX_BUFS        ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
 
 typedef struct VIOsPAPRVLANDevice {
     VIOsPAPRDevice sdev;
@@ -95,7 +95,7 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
 {
     VIOsPAPRDevice *sdev = DO_UPCAST(NICState, nc, nc)->opaque;
     VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev;
-    vlan_bd_t rxq_bd = ldq_tce(sdev, dev->buf_list + VLAN_RXQ_BD_OFF);
+    vlan_bd_t rxq_bd = vio_ldq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF);
     vlan_bd_t bd;
     int buf_ptr = dev->use_buf_ptr;
     uint64_t handle;
@@ -114,11 +114,11 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
 
     do {
         buf_ptr += 8;
-        if (buf_ptr >= SPAPR_VIO_TCE_PAGE_SIZE) {
+        if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
             buf_ptr = VLAN_RX_BDS_OFF;
         }
 
-        bd = ldq_tce(sdev, dev->buf_list + buf_ptr);
+        bd = vio_ldq(sdev, dev->buf_list + buf_ptr);
         dprintf("use_buf_ptr=%d bd=0x%016llx\n",
                 buf_ptr, (unsigned long long)bd);
     } while ((!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8)))
@@ -132,12 +132,12 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
     /* Remove the buffer from the pool */
     dev->rx_bufs--;
     dev->use_buf_ptr = buf_ptr;
-    stq_tce(sdev, dev->buf_list + dev->use_buf_ptr, 0);
+    vio_stq(sdev, dev->buf_list + dev->use_buf_ptr, 0);
 
     dprintf("Found buffer: ptr=%d num=%d\n", dev->use_buf_ptr, dev->rx_bufs);
 
     /* Transfer the packet data */
-    if (spapr_tce_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) {
+    if (spapr_vio_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) {
         return -1;
     }
 
@@ -149,23 +149,23 @@ static ssize_t spapr_vlan_receive(VLANClientState *nc, const uint8_t *buf,
         control ^= VLAN_RXQC_TOGGLE;
     }
 
-    handle = ldq_tce(sdev, VLAN_BD_ADDR(bd));
-    stq_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 8, handle);
-    stw_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 4, size);
-    sth_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8);
-    stb_tce(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control);
+    handle = vio_ldq(sdev, VLAN_BD_ADDR(bd));
+    vio_stq(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 8, handle);
+    vio_stl(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 4, size);
+    vio_sth(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8);
+    vio_stb(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control);
 
     dprintf("wrote rxq entry (ptr=0x%llx): 0x%016llx 0x%016llx\n",
             (unsigned long long)dev->rxq_ptr,
-            (unsigned long long)ldq_tce(sdev, VLAN_BD_ADDR(rxq_bd) +
+            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
                                         dev->rxq_ptr),
-            (unsigned long long)ldq_tce(sdev, VLAN_BD_ADDR(rxq_bd) +
+            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
                                         dev->rxq_ptr + 8));
 
     dev->rxq_ptr += 16;
     if (dev->rxq_ptr >= VLAN_BD_LEN(rxq_bd)) {
         dev->rxq_ptr = 0;
-        stq_tce(sdev, dev->buf_list + VLAN_RXQ_BD_OFF, rxq_bd ^ VLAN_BD_TOGGLE);
+        vio_stq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF, rxq_bd ^ VLAN_BD_TOGGLE);
     }
 
     if (sdev->signal_state & 1) {
@@ -246,8 +246,10 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd,
         return -1;
     }
 
-    if (spapr_vio_check_tces(&dev->sdev, VLAN_BD_ADDR(bd),
-                             VLAN_BD_LEN(bd), SPAPR_TCE_RW) != 0) {
+    if (!spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd),
+                             VLAN_BD_LEN(bd), DMA_DIRECTION_FROM_DEVICE)
+        || !spapr_vio_dma_valid(&dev->sdev, VLAN_BD_ADDR(bd),
+                                VLAN_BD_LEN(bd), DMA_DIRECTION_TO_DEVICE)) {
         return -1;
     }
 
@@ -277,15 +279,15 @@ static target_ulong h_register_logical_lan(CPUPPCState *env,
         return H_RESOURCE;
     }
 
-    if (check_bd(dev, VLAN_VALID_BD(buf_list, SPAPR_VIO_TCE_PAGE_SIZE),
-                 SPAPR_VIO_TCE_PAGE_SIZE) < 0) {
+    if (check_bd(dev, VLAN_VALID_BD(buf_list, SPAPR_TCE_PAGE_SIZE),
+                 SPAPR_TCE_PAGE_SIZE) < 0) {
         hcall_dprintf("Bad buf_list 0x" TARGET_FMT_lx " for "
                       "H_REGISTER_LOGICAL_LAN\n", buf_list);
         return H_PARAMETER;
     }
 
-    filter_list_bd = VLAN_VALID_BD(filter_list, SPAPR_VIO_TCE_PAGE_SIZE);
-    if (check_bd(dev, filter_list_bd, SPAPR_VIO_TCE_PAGE_SIZE) < 0) {
+    filter_list_bd = VLAN_VALID_BD(filter_list, SPAPR_TCE_PAGE_SIZE);
+    if (check_bd(dev, filter_list_bd, SPAPR_TCE_PAGE_SIZE) < 0) {
         hcall_dprintf("Bad filter_list 0x" TARGET_FMT_lx " for "
                       "H_REGISTER_LOGICAL_LAN\n", filter_list);
         return H_PARAMETER;
@@ -303,17 +305,17 @@ static target_ulong h_register_logical_lan(CPUPPCState *env,
     rec_queue &= ~VLAN_BD_TOGGLE;
 
     /* Initialize the buffer list */
-    stq_tce(sdev, buf_list, rec_queue);
-    stq_tce(sdev, buf_list + 8, filter_list_bd);
-    spapr_tce_dma_zero(sdev, buf_list + VLAN_RX_BDS_OFF,
-                       SPAPR_VIO_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF);
+    vio_stq(sdev, buf_list, rec_queue);
+    vio_stq(sdev, buf_list + 8, filter_list_bd);
+    spapr_vio_dma_zero(sdev, buf_list + VLAN_RX_BDS_OFF,
+                       SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF);
     dev->add_buf_ptr = VLAN_RX_BDS_OFF - 8;
     dev->use_buf_ptr = VLAN_RX_BDS_OFF - 8;
     dev->rx_bufs = 0;
     dev->rxq_ptr = 0;
 
     /* Initialize the receive queue */
-    spapr_tce_dma_zero(sdev, VLAN_BD_ADDR(rec_queue), VLAN_BD_LEN(rec_queue));
+    spapr_vio_dma_zero(sdev, VLAN_BD_ADDR(rec_queue), VLAN_BD_LEN(rec_queue));
 
     dev->isopen = 1;
     return H_SUCCESS;
@@ -374,14 +376,14 @@ static target_ulong h_add_logical_lan_buffer(CPUPPCState *env,
 
     do {
         dev->add_buf_ptr += 8;
-        if (dev->add_buf_ptr >= SPAPR_VIO_TCE_PAGE_SIZE) {
+        if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
             dev->add_buf_ptr = VLAN_RX_BDS_OFF;
         }
 
-        bd = ldq_tce(sdev, dev->buf_list + dev->add_buf_ptr);
+        bd = vio_ldq(sdev, dev->buf_list + dev->add_buf_ptr);
     } while (bd & VLAN_BD_VALID);
 
-    stq_tce(sdev, dev->buf_list + dev->add_buf_ptr, buf);
+    vio_stq(sdev, dev->buf_list + dev->add_buf_ptr, buf);
 
     dev->rx_bufs++;
 
@@ -447,7 +449,7 @@ static target_ulong h_send_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr
     lbuf = alloca(total_len);
     p = lbuf;
     for (i = 0; i < nbufs; i++) {
-        ret = spapr_tce_dma_read(sdev, VLAN_BD_ADDR(bufs[i]),
+        ret = spapr_vio_dma_read(sdev, VLAN_BD_ADDR(bufs[i]),
                                  p, VLAN_BD_LEN(bufs[i]));
         if (ret < 0) {
             return ret;
@@ -492,6 +494,7 @@ static void spapr_vlan_class_init(ObjectClass *klass, void *data)
     k->dt_compatible = "IBM,l-lan";
     k->signal_mask = 0x1;
     dc->props = spapr_vlan_properties;
+    k->rtce_window_size = 0x10000000;
 }
 
 static TypeInfo spapr_vlan_info = {
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index dbf5a90..3158c88 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -39,7 +39,6 @@
 #endif /* CONFIG_FDT */
 
 /* #define DEBUG_SPAPR */
-/* #define DEBUG_TCE */
 
 #ifdef DEBUG_SPAPR
 #define dprintf(fmt, ...) \
@@ -141,26 +140,9 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
         }
     }
 
-    if (dev->rtce_window_size) {
-        uint32_t dma_prop[] = {cpu_to_be32(dev->reg),
-                               0, 0,
-                               0, cpu_to_be32(dev->rtce_window_size)};
-
-        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
-        if (ret < 0) {
-            return ret;
-        }
-
-        ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
-        if (ret < 0) {
-            return ret;
-        }
-
-        ret = fdt_setprop(fdt, node_off, "ibm,my-dma-window", dma_prop,
-                          sizeof(dma_prop));
-        if (ret < 0) {
-            return ret;
-        }
+    ret = spapr_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->dma);
+    if (ret < 0) {
+        return ret;
     }
 
     if (pc->devnode) {
@@ -175,234 +157,6 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
 #endif /* CONFIG_FDT */
 
 /*
- * RTCE handling
- */
-
-static void rtce_init(VIOsPAPRDevice *dev)
-{
-    size_t size = (dev->rtce_window_size >> SPAPR_VIO_TCE_PAGE_SHIFT)
-        * sizeof(VIOsPAPR_RTCE);
-
-    if (size) {
-        dev->rtce_table = kvmppc_create_spapr_tce(dev->reg,
-                                                  dev->rtce_window_size,
-                                                  &dev->kvmtce_fd);
-
-        if (!dev->rtce_table) {
-            dev->rtce_table = g_malloc0(size);
-        }
-    }
-}
-
-static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
-                              target_ulong opcode, target_ulong *args)
-{
-    target_ulong liobn = args[0];
-    target_ulong ioba = args[1];
-    target_ulong tce = args[2];
-    VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, liobn);
-    VIOsPAPR_RTCE *rtce;
-
-    if (!dev) {
-        hcall_dprintf("spapr_vio_put_tce on non-existent LIOBN "
-                      TARGET_FMT_lx "\n", liobn);
-        return H_PARAMETER;
-    }
-
-    ioba &= ~(SPAPR_VIO_TCE_PAGE_SIZE - 1);
-
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_vio_put_tce on %s  ioba 0x" TARGET_FMT_lx
-            "  TCE 0x" TARGET_FMT_lx "\n", dev->qdev.id, ioba, tce);
-#endif
-
-    if (ioba >= dev->rtce_window_size) {
-        hcall_dprintf("spapr_vio_put_tce on out-of-boards IOBA 0x"
-                      TARGET_FMT_lx "\n", ioba);
-        return H_PARAMETER;
-    }
-
-    rtce = dev->rtce_table + (ioba >> SPAPR_VIO_TCE_PAGE_SHIFT);
-    rtce->tce = tce;
-
-    return H_SUCCESS;
-}
-
-int spapr_vio_check_tces(VIOsPAPRDevice *dev, target_ulong ioba,
-                         target_ulong len, enum VIOsPAPR_TCEAccess access)
-{
-    int start, end, i;
-
-    start = ioba >> SPAPR_VIO_TCE_PAGE_SHIFT;
-    end = (ioba + len - 1) >> SPAPR_VIO_TCE_PAGE_SHIFT;
-
-    for (i = start; i <= end; i++) {
-        if ((dev->rtce_table[i].tce & access) != access) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "FAIL on %d\n", i);
-#endif
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int spapr_tce_dma_write(VIOsPAPRDevice *dev, uint64_t taddr, const void *buf,
-                        uint32_t size)
-{
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_write taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    /* Check for bypass */
-    if (dev->flags & VIO_PAPR_FLAG_DMA_BYPASS) {
-        cpu_physical_memory_write(taddr, buf, size);
-        return 0;
-    }
-
-    while (size) {
-        uint64_t tce;
-        uint32_t lsize;
-        uint64_t txaddr;
-
-        /* Check if we are in bound */
-        if (taddr >= dev->rtce_window_size) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "spapr_tce_dma_write out of bounds\n");
-#endif
-            return H_DEST_PARM;
-        }
-        tce = dev->rtce_table[taddr >> SPAPR_VIO_TCE_PAGE_SHIFT].tce;
-
-        /* How much til end of page ? */
-        lsize = MIN(size, ((~taddr) & SPAPR_VIO_TCE_PAGE_MASK) + 1);
-
-        /* Check TCE */
-        if (!(tce & 2)) {
-            return H_DEST_PARM;
-        }
-
-        /* Translate */
-        txaddr = (tce & ~SPAPR_VIO_TCE_PAGE_MASK) |
-            (taddr & SPAPR_VIO_TCE_PAGE_MASK);
-
-#ifdef DEBUG_TCE
-        fprintf(stderr, " -> write to txaddr=0x%llx, size=0x%x\n",
-                (unsigned long long)txaddr, lsize);
-#endif
-
-        /* Do it */
-        cpu_physical_memory_write(txaddr, buf, lsize);
-        buf += lsize;
-        taddr += lsize;
-        size -= lsize;
-    }
-    return 0;
-}
-
-int spapr_tce_dma_zero(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t size)
-{
-    /* FIXME: allocating a temp buffer is nasty, but just stepping
-     * through writing zeroes is awkward.  This will do for now. */
-    uint8_t zeroes[size];
-
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_zero taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    memset(zeroes, 0, size);
-    return spapr_tce_dma_write(dev, taddr, zeroes, size);
-}
-
-void stb_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint8_t val)
-{
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-void sth_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint16_t val)
-{
-    val = tswap16(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-
-void stw_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t val)
-{
-    val = tswap32(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-void stq_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint64_t val)
-{
-    val = tswap64(val);
-    spapr_tce_dma_write(dev, taddr, &val, sizeof(val));
-}
-
-int spapr_tce_dma_read(VIOsPAPRDevice *dev, uint64_t taddr, void *buf,
-                       uint32_t size)
-{
-#ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_tce_dma_write taddr=0x%llx size=0x%x\n",
-            (unsigned long long)taddr, size);
-#endif
-
-    /* Check for bypass */
-    if (dev->flags & VIO_PAPR_FLAG_DMA_BYPASS) {
-        cpu_physical_memory_read(taddr, buf, size);
-        return 0;
-    }
-
-    while (size) {
-        uint64_t tce;
-        uint32_t lsize;
-        uint64_t txaddr;
-
-        /* Check if we are in bound */
-        if (taddr >= dev->rtce_window_size) {
-#ifdef DEBUG_TCE
-            fprintf(stderr, "spapr_tce_dma_read out of bounds\n");
-#endif
-            return H_DEST_PARM;
-        }
-        tce = dev->rtce_table[taddr >> SPAPR_VIO_TCE_PAGE_SHIFT].tce;
-
-        /* How much til end of page ? */
-        lsize = MIN(size, ((~taddr) & SPAPR_VIO_TCE_PAGE_MASK) + 1);
-
-        /* Check TCE */
-        if (!(tce & 1)) {
-            return H_DEST_PARM;
-        }
-
-        /* Translate */
-        txaddr = (tce & ~SPAPR_VIO_TCE_PAGE_MASK) |
-            (taddr & SPAPR_VIO_TCE_PAGE_MASK);
-
-#ifdef DEBUG_TCE
-        fprintf(stderr, " -> write to txaddr=0x%llx, size=0x%x\n",
-                (unsigned long long)txaddr, lsize);
-#endif
-        /* Do it */
-        cpu_physical_memory_read(txaddr, buf, lsize);
-        buf += lsize;
-        taddr += lsize;
-        size -= lsize;
-    }
-    return H_SUCCESS;
-}
-
-uint64_t ldq_tce(VIOsPAPRDevice *dev, uint64_t taddr)
-{
-    uint64_t val;
-
-    spapr_tce_dma_read(dev, taddr, &val, sizeof(val));
-    return tswap64(val);
-}
-
-/*
  * CRQ handling
  */
 static target_ulong h_reg_crq(CPUPPCState *env, sPAPREnvironment *spapr,
@@ -525,7 +279,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
     }
 
     /* Maybe do a fast path for KVM just writing to the pages */
-    rc = spapr_tce_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
+    rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
     if (rc) {
         return rc;
     }
@@ -533,7 +287,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
         return 1;
     }
 
-    rc = spapr_tce_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
                              &crq[8], 8);
     if (rc) {
         return rc;
@@ -541,7 +295,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
     kvmppc_eieio();
 
-    rc = spapr_tce_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
     if (rc) {
         return rc;
     }
@@ -559,13 +313,13 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    dev->flags &= ~VIO_PAPR_FLAG_DMA_BYPASS;
+    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 
-    if (dev->rtce_table) {
-        size_t size = (dev->rtce_window_size >> SPAPR_VIO_TCE_PAGE_SHIFT)
-            * sizeof(VIOsPAPR_RTCE);
-        memset(dev->rtce_table, 0, size);
+    if (dev->dma) {
+        spapr_tce_free(dev->dma);
     }
+    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
 
     dev->crq.qladdr = 0;
     dev->crq.qsize = 0;
@@ -592,9 +346,13 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
     if (enable) {
-        dev->flags |= VIO_PAPR_FLAG_DMA_BYPASS;
+        spapr_tce_free(dev->dma);
+        dev->dma = NULL;
     } else {
-        dev->flags &= ~VIO_PAPR_FLAG_DMA_BYPASS;
+        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
     }
 
     rtas_st(rets, 0, 0);
@@ -653,6 +411,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 {
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
     char *id;
     int ret;
 
@@ -675,7 +434,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         return -1;
     }
 
-    rtce_init(dev);
+    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
 
     return pc->init(dev);
 }
@@ -722,9 +481,6 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
     /* hcall-vio */
     spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
 
-    /* hcall-tce */
-    spapr_register_hypercall(H_PUT_TCE, h_put_tce);
-
     /* hcall-crq */
     spapr_register_hypercall(H_REG_CRQ, h_reg_crq);
     spapr_register_hypercall(H_FREE_CRQ, h_free_crq);
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index d8527be..61032ab 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -21,16 +21,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#define SPAPR_VIO_TCE_PAGE_SHIFT   12
-#define SPAPR_VIO_TCE_PAGE_SIZE    (1ULL << SPAPR_VIO_TCE_PAGE_SHIFT)
-#define SPAPR_VIO_TCE_PAGE_MASK    (SPAPR_VIO_TCE_PAGE_SIZE - 1)
-
-enum VIOsPAPR_TCEAccess {
-    SPAPR_TCE_FAULT = 0,
-    SPAPR_TCE_RO = 1,
-    SPAPR_TCE_WO = 2,
-    SPAPR_TCE_RW = 3,
-};
+#include "dma.h"
 
 #define SPAPR_VTY_BASE_ADDRESS     0x30000000
 
@@ -44,10 +35,6 @@ enum VIOsPAPR_TCEAccess {
 
 struct VIOsPAPRDevice;
 
-typedef struct VIOsPAPR_RTCE {
-    uint64_t tce;
-} VIOsPAPR_RTCE;
-
 typedef struct VIOsPAPR_CRQ {
     uint64_t qladdr;
     uint32_t qsize;
@@ -63,6 +50,7 @@ typedef struct VIOsPAPRDeviceClass {
 
     const char *dt_name, *dt_type, *dt_compatible;
     target_ulong signal_mask;
+    uint32_t rtce_window_size;
     int (*init)(VIOsPAPRDevice *dev);
     void (*hcalls)(VIOsPAPRBus *bus);
     int (*devnode)(VIOsPAPRDevice *dev, void *fdt, int node_off);
@@ -72,20 +60,15 @@ struct VIOsPAPRDevice {
     DeviceState qdev;
     uint32_t reg;
     uint32_t flags;
-#define VIO_PAPR_FLAG_DMA_BYPASS        0x1
     qemu_irq qirq;
     uint32_t vio_irq_num;
     target_ulong signal_state;
-    uint32_t rtce_window_size;
-    VIOsPAPR_RTCE *rtce_table;
-    int kvmtce_fd;
     VIOsPAPR_CRQ crq;
+    DMAContext *dma;
 };
 
 #define DEFINE_SPAPR_PROPERTIES(type, field, default_reg, default_dma_window) \
-        DEFINE_PROP_UINT32("reg", type, field.reg, default_reg), \
-        DEFINE_PROP_UINT32("dma-window", type, field.rtce_window_size, \
-                           default_dma_window)
+        DEFINE_PROP_UINT32("reg", type, field.reg, default_reg)
 
 struct VIOsPAPRBus {
     BusState bus;
@@ -102,20 +85,38 @@ extern int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus);
 
 extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode);
 
-int spapr_vio_check_tces(VIOsPAPRDevice *dev, target_ulong ioba,
-                         target_ulong len,
-                         enum VIOsPAPR_TCEAccess access);
-
-int spapr_tce_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
-                       void *buf, uint32_t size);
-int spapr_tce_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
-                        const void *buf, uint32_t size);
-int spapr_tce_dma_zero(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t size);
-void stb_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint8_t val);
-void sth_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint16_t val);
-void stw_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint32_t val);
-void stq_tce(VIOsPAPRDevice *dev, uint64_t taddr, uint64_t val);
-uint64_t ldq_tce(VIOsPAPRDevice *dev, uint64_t taddr);
+static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
+                                       uint32_t size, DMADirection dir)
+{
+    return dma_memory_valid(dev->dma, taddr, size, dir);
+}
+
+static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
+                                     void *buf, uint32_t size)
+{
+    return (dma_memory_read(dev->dma, taddr, buf, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
+                                      const void *buf, uint32_t size)
+{
+    return (dma_memory_write(dev->dma, taddr, buf, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_zero(VIOsPAPRDevice *dev, uint64_t taddr,
+                                     uint32_t size)
+{
+    return (dma_memory_zero(dev->dma, taddr, size) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+#define vio_stb(_dev, _addr, _val) (stb_dma((_dev)->dma, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma((_dev)->dma, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma((_dev)->dma, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 2167017..4a54211 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -169,7 +169,7 @@ static int vscsi_send_iu(VSCSIState *s, vscsi_req *req,
     long rc, rc1;
 
     /* First copy the SRP */
-    rc = spapr_tce_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
+    rc = spapr_vio_dma_write(&s->vdev, req->crq.s.IU_data_ptr,
                              &req->iu, length);
     if (rc) {
         fprintf(stderr, "vscsi_send_iu: DMA write failure !\n");
@@ -285,9 +285,9 @@ static int vscsi_srp_direct_data(VSCSIState *s, vscsi_req *req,
     llen = MIN(len, md->len);
     if (llen) {
         if (req->writing) { /* writing = to device = reading from memory */
-            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_read(&s->vdev, md->va, buf, llen);
         } else {
-            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_write(&s->vdev, md->va, buf, llen);
         }
     }
     md->len -= llen;
@@ -333,10 +333,11 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
             md = req->cur_desc = &req->ext_desc;
             dprintf("VSCSI:   Reading desc from 0x%llx\n",
                     (unsigned long long)td->va);
-            rc = spapr_tce_dma_read(&s->vdev, td->va, md,
+            rc = spapr_vio_dma_read(&s->vdev, td->va, md,
                                     sizeof(struct srp_direct_buf));
             if (rc) {
-                dprintf("VSCSI: tce_dma_read -> %d reading ext_desc\n", rc);
+                dprintf("VSCSI: spapr_vio_dma_read -> %d reading ext_desc\n",
+                        rc);
                 break;
             }
             vscsi_swap_desc(md);
@@ -349,12 +350,12 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
         /* Perform transfer */
         llen = MIN(len, md->len);
         if (req->writing) { /* writing = to device = reading from memory */
-            rc = spapr_tce_dma_read(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_read(&s->vdev, md->va, buf, llen);
         } else {
-            rc = spapr_tce_dma_write(&s->vdev, md->va, buf, llen);
+            rc = spapr_vio_dma_write(&s->vdev, md->va, buf, llen);
         }
         if (rc) {
-            dprintf("VSCSI: tce_dma_r/w(%d) -> %d\n", req->writing, rc);
+            dprintf("VSCSI: spapr_vio_dma_r/w(%d) -> %d\n", req->writing, rc);
             break;
         }
         dprintf("VSCSI:     data: %02x %02x %02x %02x...\n",
@@ -732,7 +733,7 @@ static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
     sinfo = &req->iu.mad.adapter_info;
 
 #if 0 /* What for ? */
-    rc = spapr_tce_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
+    rc = spapr_vio_dma_read(&s->vdev, be64_to_cpu(sinfo->buffer),
                             &info, be16_to_cpu(sinfo->common.length));
     if (rc) {
         fprintf(stderr, "vscsi_send_adapter_info: DMA read failure !\n");
@@ -746,7 +747,7 @@ static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
     info.os_type = cpu_to_be32(2);
     info.port_max_txu[0] = cpu_to_be32(VSCSI_MAX_SECTORS << 9);
 
-    rc = spapr_tce_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
+    rc = spapr_vio_dma_write(&s->vdev, be64_to_cpu(sinfo->buffer),
                              &info, be16_to_cpu(sinfo->common.length));
     if (rc)  {
         fprintf(stderr, "vscsi_send_adapter_info: DMA write failure !\n");
@@ -808,7 +809,7 @@ static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq)
     }
 
     /* XXX Handle failure differently ? */
-    if (spapr_tce_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
+    if (spapr_vio_dma_read(&s->vdev, crq->s.IU_data_ptr, &req->iu,
                            crq->s.IU_length)) {
         fprintf(stderr, "vscsi_got_payload: DMA read failure !\n");
         g_free(req);
@@ -964,6 +965,7 @@ static void spapr_vscsi_class_init(ObjectClass *klass, void *data)
     k->dt_compatible = "IBM,v-scsi";
     k->signal_mask = 0x00000001;
     dc->props = spapr_vscsi_properties;
+    k->rtce_window_size = 0x10000000;
 }
 
 static TypeInfo spapr_vscsi_info = {
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 724f4c7..141edd9 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -858,7 +858,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
         return NULL;
     }
 
-    len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE);
+    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
     /* FIXME: round this up to page size */
 
     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
@@ -881,7 +881,7 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
         return -1;
     }
 
-    len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE);
+    len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
     if ((munmap(table, len) < 0) ||
         (close(fd) < 0)) {
         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 12/13] iommu: Allow PCI to use IOMMU infrastructure
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (10 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 11/13] pseries: Convert sPAPR TCEs to use generic IOMMU infrastructure David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 13/13] pseries: Implement IOMMU and DMA for PAPR PCI devices David Gibson
  2012-03-22 13:50 ` [Qemu-devel] [0/13] RFC: Guest visible IOMMU Wei Wang
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduard - Gabriel Munteanu, Richard Henderson, Michael S. Tsirkin,
	David Gibson

This patch adds some hooks to let PCI devices and busses use the new IOMMU
infrastructure.  When IOMMU support is enabled, each PCI device now
contains a DMAContext * which is used by the pci_dma_*() wrapper functions.

By default, the contexts are initialized to NULL, assuming no IOMMU.
However the platform or host bridge code which sets up the PCI bus can use
pci_setup_iommu() to set a function which will determine the correct
DMAContext for a given PCI device.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 hw/pci.c           |   13 +++++++++++++
 hw/pci.h           |   13 ++++++++++++-
 hw/pci_internals.h |    4 ++++
 3 files changed, 29 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index ed8ec99..eb1c456 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -761,6 +761,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
         return NULL;
     }
     pci_dev->bus = bus;
+#ifdef CONFIG_IOMMU
+    if (bus->dma_context_fn) {
+        pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
+    }
+#endif
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
@@ -2004,6 +2009,14 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
     k->bus_info = &pci_bus_info;
 }
 
+#ifdef CONFIG_IOMMU
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+{
+    bus->dma_context_fn = fn;
+    bus->dma_context_opaque = opaque;
+}
+#endif
+
 static TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci.h b/hw/pci.h
index 41dcd05..1273dc3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -170,6 +170,7 @@ typedef struct PCIDeviceClass {
 
 struct PCIDevice {
     DeviceState qdev;
+
     /* PCI config space */
     uint8_t *config;
 
@@ -191,6 +192,9 @@ struct PCIDevice {
     uint32_t devfn;
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
+#ifdef CONFIG_IOMMU
+    DMAContext *dma;
+#endif
 
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
@@ -311,6 +315,10 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
+typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+
+void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
 {
@@ -547,8 +555,11 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 /* DMA access functions */
 static inline DMAContext *pci_dma_context(PCIDevice *dev)
 {
-    /* Stub for when we have no PCI iommu support */
+#ifdef CONFIG_IOMMU
+    return dev->dma;
+#else
     return NULL;
+#endif
 }
 
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index 96690b7..b6b7a0e 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -16,6 +16,10 @@ extern struct BusInfo pci_bus_info;
 
 struct PCIBus {
     BusState qbus;
+#ifdef CONFIG_IOMMU
+    PCIDMAContextFunc dma_context_fn;
+    void *dma_context_opaque;
+#endif
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH 13/13] pseries: Implement IOMMU and DMA for PAPR PCI devices
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (11 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 12/13] iommu: Allow PCI to use " David Gibson
@ 2012-03-22  2:14 ` David Gibson
  2012-03-22 13:50 ` [Qemu-devel] [0/13] RFC: Guest visible IOMMU Wei Wang
  13 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-22  2:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alex Graf, David Gibson

Currently the pseries machine emulation does not support DMA for emulated
PCI devices, because the PAPR spec always requires a (guest visible,
paravirtualized) IOMMU which was not implemented.  Now that we have
infrastructure for IOMMU emulation, we can correct this and allow PCI DMA
for pseries.

With the existing PAPR IOMMU code used for VIO devices, this is almost
trivial. We use a single DMAContext for each (virtual) PCI host bridge,
which is the usual configuration on real PAPR machines (which often have
_many_ PCI host bridges).

Cc: Alex Graf <agraf@suse.de>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/spapr.h     |    1 +
 hw/spapr_pci.c |   15 +++++++++++++++
 hw/spapr_pci.h |    1 +
 3 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/hw/spapr.h b/hw/spapr.h
index d11f618..291546f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -328,6 +328,7 @@ typedef struct sPAPRTCE {
 } sPAPRTCE;
 
 #define SPAPR_VIO_BASE_LIOBN    0x00000000
+#define SPAPR_PCI_BASE_LIOBN    0x80000000
 
 void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 77f3e52..76e74dc 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -201,12 +201,21 @@ static const MemoryRegionOps spapr_io_ops = {
 /*
  * PHB PCI device
  */
+static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
+                                            int devfn)
+{
+    sPAPRPHBState *phb = opaque;
+
+    return phb->dma;
+}
+
 static int spapr_phb_init(SysBusDevice *s)
 {
     sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s);
     char *namebuf;
     int i;
     PCIBus *bus;
+    uint32_t liobn;
 
     phb->dtbusname = g_strdup_printf("pci@%" PRIx64, phb->buid);
     namebuf = alloca(strlen(phb->dtbusname) + 32);
@@ -247,6 +256,10 @@ static int spapr_phb_init(SysBusDevice *s)
                            PCI_DEVFN(0, 0), SPAPR_PCI_NUM_LSI);
     phb->host_state.bus = bus;
 
+    liobn = SPAPR_PCI_BASE_LIOBN | (pci_find_domain(bus) << 16);
+    phb->dma = spapr_tce_new_dma_context(liobn, 0x40000000);
+    pci_setup_iommu(bus, spapr_pci_dma_context_fn, phb);
+
     QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
 
     /* Initialize the LSI table */
@@ -404,6 +417,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                      7 * sizeof(interrupt_map[0])));
 
+    spapr_dma_dt(fdt, bus_off, "ibm,dma-window", phb->dma);
+
     return 0;
 }
 
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 039f85b..153a968 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -39,6 +39,7 @@ typedef struct sPAPRPHBState {
     MemoryRegion memspace, iospace;
     target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size;
     MemoryRegion memwindow, iowindow;
+    DMAContext *dma;
 
     struct {
         uint32_t dt_irq;
-- 
1.7.9.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions David Gibson
@ 2012-03-22 10:00   ` Gerd Hoffmann
  2012-03-23  2:05     ` David Gibson
  0 siblings, 1 reply; 19+ messages in thread
From: Gerd Hoffmann @ 2012-03-22 10:00 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-devel

  Hi,

> -#include "hw/qdev-addr.h"
> +//#include "hw/qdev-addr.h"

Minor nit: Just zap it instead of leaving it in commented out.

Looks fine otherwise.

Acked-by: Gerd Hoffmann <kraxel@redhat.com>

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 06/13] usb-ohci: Use universal DMA helper functions
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 06/13] usb-ohci: Use " David Gibson
@ 2012-03-22 10:01   ` Gerd Hoffmann
  0 siblings, 0 replies; 19+ messages in thread
From: Gerd Hoffmann @ 2012-03-22 10:01 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-devel, Michael S. Tsirkin

On 03/22/12 03:14, David Gibson wrote:
> This patch converts it to use the new universal DMA helper functions.
> In the PCI case, it obtains its DMAContext from pci_dma_context(), in
> the SysBus case, it uses NULL - i.e. assumes for now that there will
> be no IOMMU translation for a SysBus OHCI.

Acked-by: Gerd Hoffmann <kraxel@redhat.com>

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [0/13] RFC: Guest visible IOMMU
  2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
                   ` (12 preceding siblings ...)
  2012-03-22  2:14 ` [Qemu-devel] [PATCH 13/13] pseries: Implement IOMMU and DMA for PAPR PCI devices David Gibson
@ 2012-03-22 13:50 ` Wei Wang
  2012-03-23  2:02   ` David Gibson
  13 siblings, 1 reply; 19+ messages in thread
From: Wei Wang @ 2012-03-22 13:50 UTC (permalink / raw)
  To: David Gibson; +Cc: Joerg Roedel, mst, qemu-devel, eduard.munteanu

On 03/22/2012 03:14 AM, David Gibson wrote:
> Here's another version of my series implementing the nevessary
> framework for emulation of guest-visible IOMMUs.  I've incorporated
> the feedback from the last posting.  I'm fairly confident that
> preliminary patches 1-3 are ready to go in, the rest might still need
> more consideration.
>
> In particular, if anyone has thoughts on how to handle the case where
> an IOMMU mapping is invalidated by the guest where a qemu-side device
> currently has that piece of IOVA-space dma_memory_map()ed, I'd be
> interested to hear it.
>
>
>

Hi David,
I am Xen developer. Currently I am working on a hardware based iommu 
emulation for amd iommu v2 systems (we need this to enable iommuv2 
drivers in guest OS and therefore to passthroug gpgpu to hvm guest). In 
this work, only a virtual iommu device has to be registered on qemu bus 
and dma translations are still performed by iommu hardware. So I wonder 
if it is possible that I could integrate my work with yours. I had a 
proof-of-concept patch for qemu:

http://lists.xen.org/archives/html/xen-devel/2011-12/msg01269.html

my last post was:
http://lists.xen.org/archives/html/xen-devel/2012-03/msg00591.html

Thanks and looking forward to your thoughts
Wei

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [0/13] RFC: Guest visible IOMMU
  2012-03-22 13:50 ` [Qemu-devel] [0/13] RFC: Guest visible IOMMU Wei Wang
@ 2012-03-23  2:02   ` David Gibson
  0 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-23  2:02 UTC (permalink / raw)
  To: Wei Wang; +Cc: Joerg Roedel, mst, qemu-devel, eduard.munteanu

On Thu, Mar 22, 2012 at 02:50:25PM +0100, Wei Wang wrote:
> On 03/22/2012 03:14 AM, David Gibson wrote:
> >Here's another version of my series implementing the nevessary
> >framework for emulation of guest-visible IOMMUs.  I've incorporated
> >the feedback from the last posting.  I'm fairly confident that
> >preliminary patches 1-3 are ready to go in, the rest might still need
> >more consideration.
> >
> >In particular, if anyone has thoughts on how to handle the case where
> >an IOMMU mapping is invalidated by the guest where a qemu-side device
> >currently has that piece of IOVA-space dma_memory_map()ed, I'd be
> >interested to hear it.
> >
> >
> >
> 
> Hi David,
> I am Xen developer. Currently I am working on a hardware based iommu
> emulation for amd iommu v2 systems (we need this to enable iommuv2
> drivers in guest OS and therefore to passthroug gpgpu to hvm guest).
> In this work, only a virtual iommu device has to be registered on
> qemu bus and dma translations are still performed by iommu hardware.
> So I wonder if it is possible that I could integrate my work with
> yours. I had a proof-of-concept patch for qemu:
> 
> http://lists.xen.org/archives/html/xen-devel/2011-12/msg01269.html
> 
> my last post was:
> http://lists.xen.org/archives/html/xen-devel/2012-03/msg00591.html

I'd be very glad to see AMD IOMMU support integrated with my
infrastructure.  Eduard - Gabriel's original patch series included an
AMD IOMMU emulation, but since I'm not really familiar with that
IOMMU, and don't have the hardware to test it, I didn't feel confident
to attempt porting it to my new infrastructure.

I notice that you seem to be aiming mostly at pass-through.
Pass-through has some complications of its own - this patch series was
aimed firstly at IOMMU emulation, although I'm also concerned with
pass-through (we're currently struggling with how best to handle
pass-through for pSeries machines).

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions
  2012-03-22 10:00   ` Gerd Hoffmann
@ 2012-03-23  2:05     ` David Gibson
  0 siblings, 0 replies; 19+ messages in thread
From: David Gibson @ 2012-03-23  2:05 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: qemu-devel

On Thu, Mar 22, 2012 at 11:00:57AM +0100, Gerd Hoffmann wrote:
>   Hi,
> 
> > -#include "hw/qdev-addr.h"
> > +//#include "hw/qdev-addr.h"
> 
> Minor nit: Just zap it instead of leaving it in commented out.

Oops, fixed.

> Looks fine otherwise.
> 
> Acked-by: Gerd Hoffmann <kraxel@redhat.com>
> 
> cheers,
>   Gerd
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-03-23  2:20 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-22  2:14 [Qemu-devel] [0/13] RFC: Guest visible IOMMU David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 02/13] Better support for dma_addr_t variables David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 03/13] usb-xhci: Use PCI DMA helper functions David Gibson
2012-03-22 10:00   ` Gerd Hoffmann
2012-03-23  2:05     ` David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 04/13] Implement cpu_physical_memory_zero() David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 05/13] iommu: Add universal DMA helper functions David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 06/13] usb-ohci: Use " David Gibson
2012-03-22 10:01   ` Gerd Hoffmann
2012-03-22  2:14 ` [Qemu-devel] [PATCH 07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 08/13] ide/ahci: Use universal DMA helper functions David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 09/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 11/13] pseries: Convert sPAPR TCEs to use generic IOMMU infrastructure David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 12/13] iommu: Allow PCI to use " David Gibson
2012-03-22  2:14 ` [Qemu-devel] [PATCH 13/13] pseries: Implement IOMMU and DMA for PAPR PCI devices David Gibson
2012-03-22 13:50 ` [Qemu-devel] [0/13] RFC: Guest visible IOMMU Wei Wang
2012-03-23  2:02   ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.