All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: "Aviv B.D" <bd.aviv@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>,
	qemu-devel@nongnu.org, Peter Xu <peterx@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v3 3/3] IOMMU: Integrate between VFIO and vIOMMU to support device assignment
Date: Thu, 26 May 2016 14:58:44 -0600	[thread overview]
Message-ID: <20160526145844.552b21fb@t450s.home> (raw)
In-Reply-To: <20160523115342.636a5164@ul30vt.home>

On Mon, 23 May 2016 11:53:42 -0600
Alex Williamson <alex.williamson@redhat.com> wrote:

> On Sat, 21 May 2016 19:19:50 +0300
> "Aviv B.D" <bd.aviv@gmail.com> wrote:
> 
> > From: "Aviv Ben-David" <bd.aviv@gmail.com>
> >   
> 
> Some commentary about the changes necessary to achieve $SUBJECT would
> be nice here.
> 
> > Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> > ---
> >  hw/i386/intel_iommu.c          | 69 ++++++++++++++++++++++++++++++++++++++++--
> >  hw/i386/intel_iommu_internal.h |  2 ++
> >  hw/vfio/common.c               | 11 +++++--
> >  include/hw/i386/intel_iommu.h  |  4 +++
> >  include/hw/vfio/vfio-common.h  |  1 +
> >  5 files changed, 81 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 410f810..128ec7c 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -43,6 +43,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
> >  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
> >  #endif
> >  
> > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> > +                                    uint8_t devfn, VTDContextEntry *ce);
> > +
> >  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
> >                              uint64_t wmask, uint64_t w1cmask)
> >  {
> > @@ -126,6 +129,22 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
> >      return new_val;
> >  }
> >  
> > +static int vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t devfn, uint16_t * domain_id)
> > +{
> > +    VTDContextEntry ce;
> > +    int ret_fr;
> > +
> > +    assert(domain_id);
> > +
> > +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
> > +    if (ret_fr){
> > +        return -1;
> > +    }
> > +
> > +    *domain_id =  VTD_CONTEXT_ENTRY_DID(ce.hi);
> > +    return 0;
> > +}
> > +
> >  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
> >                                          uint64_t clear, uint64_t mask)
> >  {
> > @@ -724,9 +743,6 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> >      }
> >  
> >      if (!vtd_context_entry_present(ce)) {
> > -        VTD_DPRINTF(GENERAL,
> > -                    "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> > -                    "is not present", devfn, bus_num);
> >          return -VTD_FR_CONTEXT_ENTRY_P;
> >      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
> >                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> > @@ -1033,18 +1049,58 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
> >                                  &domain_id);
> >  }
> >  
> > +static void vtd_iotlb_page_invalidate_vfio(IntelIOMMUState *s, uint16_t domain_id,
> > +                                      hwaddr addr, uint8_t am)
> > +{
> > +    VFIOGuestIOMMU * giommu;
> > +  
> 
> VT-d parsing VFIO private data structures, nope this is not a good idea.
> 
> > +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, iommu);  
> 
> VT-d needs to keep track of its own address spaces and call the iommu
> notifier, it should have no visibility whatsoever that there are vfio
> devices attached.
> 
> > +        uint16_t vfio_domain_id; 
> > +        int ret = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &vfio_domain_id);
> > +        int i=0;
> > +        if (!ret && domain_id == vfio_domain_id){
> > +            IOMMUTLBEntry entry; 
> > +            
> > +            /* do vfio unmap */
> > +            VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr, am);
> > +            entry.target_as = NULL;
> > +            entry.iova = addr & VTD_PAGE_MASK_4K;
> > +            entry.translated_addr = 0;
> > +            entry.addr_mask = ~VTD_PAGE_MASK(VTD_PAGE_SHIFT_4K + am);
> > +            entry.perm = IOMMU_NONE;
> > +            memory_region_notify_iommu(giommu->iommu, entry);
> > +       
> > +            /* do vfio map */
> > +            VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
> > +            /* call to vtd_iommu_translate */
> > +            for (i = 0; i < (1 << am); i++, addr+=(1 << VTD_PAGE_SHIFT_4K)){
> > +                IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, addr, IOMMU_NO_FAIL); 
> > +                if (entry.perm != IOMMU_NONE){
> > +                    memory_region_notify_iommu(giommu->iommu, entry);
> > +                }
> > +            }
> > +        }
> > +    }
> > +}
> > +


I think I see what you're trying to do here, find the VTDAddressSpaces
that vfio knows about and determine which are affected by an
invalidation, but I think it really just represents a flaw in the VT-d
code not really matching real hardware.  As I understand the real
hardware, per device translations use the bus:devfn to get the context
entry.  The context entry contains both the domain_id and a pointer to
the page table.  Note that VT-d requires that domain_id and page table
pointers are consistent for all entries, there cannot be two separate
context entries with the same domain_id that point to different page
tables. So really, VTDAddressSpace should be based at the domain, not
the context entry.  Multiple context entries can point to the same
domain, and thus the same VTDAddressSpace.  Doing so would make the
invalidation trivial, simply lookup the VTDAddressSpace based on the
domain_id, get the MemoryRegion, and fire off
memory_region_notify_iommu()s, which then get {un}mapped through vfio
without any direct interaction.  Unfortunately I think that means that
intel-iommu needs some data structure surgery.

With the current code, I think the best we could do would be to look
through every context for matching domain_ids.  For each one, use that
bus:devfn to get the VTDAddressSpace and thus the MemoryRegion and
call a notify for each.  That's not only an inefficient lookup, but
requires a notify per matching bus:devfn since each uses a separate
VTDAddressSpace when we should really only need a notify per domain_id. 

Also, I haven't figured it out yet, but I think we're also going to
need to actually populate the MemoryRegion rather than just use it to
send notifies, otherwise replay won't work, which means that a device
added to a domain with existing mappings wouldn't work.  Thanks,

Alex

  reply	other threads:[~2016-05-26 20:58 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-21 16:19 [Qemu-devel] [PATCH v3 0/3] IOMMU: Add Support to VFIO devices with vIOMMU present Aviv B.D
2016-05-21 16:19 ` [Qemu-devel] [PATCH v3 1/3] IOMMU: add VTD_CAP_CM to vIOMMU capability exposed to guest Aviv B.D
2016-05-21 16:42   ` Jan Kiszka
2016-06-02  8:44     ` Peter Xu
2016-06-02 13:00       ` Alex Williamson
2016-06-02 13:14         ` Jan Kiszka
2016-06-02 13:17           ` Jan Kiszka
2016-06-02 16:15           ` Michael S. Tsirkin
2016-06-06  5:04           ` Peter Xu
2016-06-06 13:11             ` Alex Williamson
2016-06-06 13:43               ` Peter Xu
2016-06-06 17:02                 ` Alex Williamson
2016-06-07  3:20                   ` Peter Xu
2016-06-07  3:58                     ` Alex Williamson
2016-06-07  5:00                       ` Peter Xu
2016-06-07  5:21                       ` Huang, Kai
2016-06-07 18:46                         ` Alex Williamson
2016-06-07 22:39                           ` Huang, Kai
2016-05-24  8:14   ` Jason Wang
2016-05-24  9:25     ` Jan Kiszka
2016-05-28 16:12       ` Aviv B.D.
2016-05-28 16:34         ` Kiszka, Jan
2016-05-21 16:19 ` [Qemu-devel] [PATCH v3 2/3] IOMMU: change iommu_op->translate's is_write to flags, add support to NO_FAIL flag mode Aviv B.D
2016-06-06  5:04   ` Peter Xu
2016-05-21 16:19 ` [Qemu-devel] [PATCH v3 3/3] IOMMU: Integrate between VFIO and vIOMMU to support device assignment Aviv B.D
2016-05-23 17:53   ` Alex Williamson
2016-05-26 20:58     ` Alex Williamson [this message]
2016-05-28 10:52       ` Aviv B.D.
2016-05-28 16:02         ` Alex Williamson
2016-05-28 16:10           ` Aviv B.D.
2016-05-28 17:39             ` Alex Williamson
2016-05-28 18:14               ` Aviv B.D.
2016-05-28 19:48                 ` Alex Williamson
2016-06-02 13:09                   ` Aviv B.D.
2016-06-02 13:34                     ` Alex Williamson
2016-06-06  8:09                       ` Peter Xu
2016-06-06 18:21                         ` Alex Williamson
2016-06-07 13:20                           ` Peter Xu
2016-06-06  7:38     ` Peter Xu
2016-06-06 17:30       ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160526145844.552b21fb@t450s.home \
    --to=alex.williamson@redhat.com \
    --cc=bd.aviv@gmail.com \
    --cc=jan.kiszka@siemens.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.