All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pci: Add support for multiple DMA aliases
@ 2016-01-18 11:59 Jacek Lawrynowicz
  2016-01-18 16:07 ` Jacek Lawrynowicz
  0 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2016-01-18 11:59 UTC (permalink / raw)
  To: linux-pci; +Cc: bhelgaas, dwmw2, jroedel, jacek.lawrynowicz

This patch solves IOMMU support issues with PCIe non-transparent bridges
that use Requester ID look-up tables (LUT), e.g. PEX8733. Before exiting
the bridge, packet's RID is rewritten according to LUT programmed by
a driver. Modified packets are then passed to a destination bus and
processed upstream. The problem is that such packets seem to come from
non-existent nodes that are hidden behind NTB and are not discoverable
by a destination node, so IOMMU discards them. Adding DMA alias for a
given LUT entry allows IOMMU to create a proper mapping that enables
inter-node communication.

The current DMA alias implementation supports only single alias, so it's
not possible to connect more than two nodes when IOMMU is enabled. This
implementation enables all possible aliases on a given bus (256) that
are stored in a bitset. Alias devfn is directly translated to a bit
number. The bitset is not allocated for devices that have no need for
DMA aliases.

More details can be found in following article:
http://www.plxtech.com/files/pdf/technical/expresslane/RTC_Enabling%20MulitHostSystemDesigns.pdf

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@intel.com>
---
 drivers/iommu/iommu.c |  8 ++++----
 drivers/pci/pci.c     | 19 +++++++++++++++++++
 drivers/pci/probe.c   |  1 +
 drivers/pci/quirks.c  | 15 ++++++---------
 drivers/pci/search.c  | 14 +++++++++-----
 include/linux/pci.h   | 15 ++++++++-------
 6 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index abae363..98ae7ff 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -686,10 +686,10 @@ static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
 			continue;
 
 		/* We alias them or they alias us */
-		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     pdev->dma_alias_devfn == tmp->devfn) ||
-		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     tmp->dma_alias_devfn == pdev->devfn)) {
+		if ((pdev->dma_alias_mask &&
+		     test_bit(tmp->devfn, pdev->dma_alias_mask)) ||
+		    ((tmp->dma_alias_mask &&
+		     test_bit(pdev->devfn, tmp->dma_alias_mask)))) {
 
 			group = get_pci_alias_group(tmp, devfns);
 			if (group) {
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 49e3715..5b27d65 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4568,6 +4568,25 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode,
 	return 0;
 }
 
+/**
+ * pci_add_dma_alias - Allows to add multiple devfn aliases for given device
+ * @dev: the PCI device for which alias is added
+ * @devfn: alias slot and function
+ *
+ * This helper encodes 8-bit devfn as bit number in dma_alias_mask
+ */
+void pci_add_dma_alias(struct pci_dev *dev, u8 devfn)
+{
+	if (!dev->dma_alias_mask) {
+		dev->dma_alias_mask = kcalloc(BITS_TO_LONGS(U8_MAX),
+					      sizeof(long), GFP_KERNEL);
+		dev_warn(&dev->dev, "Unable to allocate DMA alias mask.\n");
+	}
+	if (dev->dma_alias_mask)
+		set_bit(devfn, dev->dma_alias_mask);
+}
+EXPORT_SYMBOL_GPL(pci_add_dma_alias);
+
 bool pci_device_is_present(struct pci_dev *pdev)
 {
 	u32 v;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 32b9f1b..5da4dd3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1501,6 +1501,7 @@ static void pci_release_dev(struct device *dev)
 	pcibios_release_device(pci_dev);
 	pci_bus_put(pci_dev->bus);
 	kfree(pci_dev->driver_override);
+	kfree(pci_dev->dma_alias_mask);
 	kfree(pci_dev);
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 83e93d7..3ed1f9a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3579,8 +3579,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, int probe)
 static void quirk_dma_func0_alias(struct pci_dev *dev)
 {
 	if (PCI_FUNC(dev->devfn) != 0) {
-		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
 	}
 }
 
@@ -3595,8 +3594,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias);
 static void quirk_dma_func1_alias(struct pci_dev *dev)
 {
 	if (PCI_FUNC(dev->devfn) != 1) {
-		dev->dma_alias_devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 1);
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+		pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1));
 	}
 }
 
@@ -3660,11 +3658,10 @@ static void quirk_fixed_dma_alias(struct pci_dev *dev)
 
 	id = pci_match_id(fixed_dma_alias_tbl, dev);
 	if (id) {
-		dev->dma_alias_devfn = id->driver_data;
-		dev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-		dev_info(&dev->dev, "Enabling fixed DMA alias to %02x.%d\n",
-			 PCI_SLOT(dev->dma_alias_devfn),
-			 PCI_FUNC(dev->dma_alias_devfn));
+		pci_add_dma_alias(dev, id->driver_data);
+		dev_info(&dev->dev, "Enabling fixed DMA alias to %02lx.%ld\n",
+			 PCI_SLOT(id->driver_data),
+			 PCI_FUNC(id->driver_data));
 	}
 }
 
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index a20ce7d..33e0f03 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -40,11 +40,15 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 	 * If the device is broken and uses an alias requester ID for
 	 * DMA, iterate over that too.
 	 */
-	if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) {
-		ret = fn(pdev, PCI_DEVID(pdev->bus->number,
-					 pdev->dma_alias_devfn), data);
-		if (ret)
-			return ret;
+	if (unlikely(pdev->dma_alias_mask)) {
+		u8 devfn;
+
+		for_each_set_bit(devfn, pdev->dma_alias_mask, U8_MAX) {
+			ret = fn(pdev, PCI_DEVID(pdev->bus->number, devfn),
+				 data);
+			if (ret)
+				return ret;
+		}
 	}
 
 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f9f79ad..6200175 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -172,16 +172,14 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
-	/* Flag to indicate the device uses dma_alias_devfn */
-	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
 	/* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */
-	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5),
+	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 4),
 	/* Do not use bus resets for device */
-	PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6),
+	PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 5),
 	/* Do not use PM reset even if device advertises NoSoftRst- */
-	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
+	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 6),
 	/* Get VPD from function 0 VPD */
-	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
+	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 7),
 };
 
 enum pci_irq_reroute_variant {
@@ -279,7 +277,7 @@ struct pci_dev {
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;		/* which interrupt pin this device uses */
 	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
-	u8		dma_alias_devfn;/* devfn of DMA alias, if any */
+	unsigned long	*dma_alias_mask;/* mask of enabled devfn aliases */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
@@ -1229,6 +1227,9 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
+
+void pci_add_dma_alias(struct pci_dev *dev, u8 devfn);
+
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/pci-dma.h>
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread
* Re: [PATCH v4 3/6] PCI: Add support for multiple DMA aliases
@ 2016-02-29 22:44 Bjorn Helgaas
  2016-03-03 14:22   ` Jacek Lawrynowicz
  0 siblings, 1 reply; 16+ messages in thread
From: Bjorn Helgaas @ 2016-02-29 22:44 UTC (permalink / raw)
  To: Lawrynowicz, Jacek
  Cc: Bjorn Helgaas, linux-pci, Alex Williamson, Joerg Roedel,
	David Woodhouse, iommu

On Thu, Feb 25, 2016 at 03:41:51PM +0000, Lawrynowicz, Jacek wrote:
> > -----Original Message-----
> > From: Bjorn Helgaas [mailto:helgaas@kernel.org]
> > Sent: Thursday, February 25, 2016 3:39 PM
> > To: Bjorn Helgaas <bhelgaas@google.com>
> > Cc: Lawrynowicz, Jacek <jacek.lawrynowicz@intel.com>; linux-
> > pci@vger.kernel.org; Alex Williamson <alex.williamson@redhat.com>; Joerg
> > Roedel <jroedel@suse.de>; David Woodhouse <dwmw2@infradead.org>;
> > iommu@lists.linux-foundation.org
> > Subject: Re: [PATCH v4 3/6] PCI: Add support for multiple DMA aliases
> > 
> > On Wed, Feb 24, 2016 at 01:44:06PM -0600, Bjorn Helgaas wrote:
> > > From: Jacek Lawrynowicz <jacek.lawrynowicz@intel.com>
> > >
> > > <Insert changelog here>
> > 
> > (Sorry, I should have copied this changelog in the patch; I copied
> > this manually from your v3 posting):
> > 
> > > This patch solves IOMMU support issues with PCIe non-transparent bridges
> > > that use Requester ID look-up tables (LUT), e.g. PEX8733. Before exiting
> > > the bridge, packet's RID is rewritten according to LUT programmed by
> > > a driver. Modified packets are then passed to a destination bus and
> > > processed upstream. The problem is that such packets seem to come from
> > > non-existent nodes that are hidden behind NTB and are not discoverable
> > > by a destination node, so IOMMU discards them. Adding DMA alias for a
> > > given LUT entry allows IOMMU to create a proper mapping that enables
> > > inter-node communication.
> > 
> > A specific example here would help me understand.  Here's how I
> > understand this (correct me if I'm wrong): We're talking about a DMA
> > packet being forwarded upstream from an NTB.  The NTB uses the LUT to
> > rewrite the RID in the DMA packet.  The new RID from the LUT is
> > unknown to the IOMMU, so it discards the DMA packet.
> 
> Yes, this is exactly the problem.
> 
> > > The current DMA alias implementation supports only single alias, so it's
> > > not possible to connect more than two nodes when IOMMU is enabled. This
> > > implementation enables all possible aliases on a given bus (256) that
> > > are stored in a bitset. Alias devfn is directly translated to a bit
> > > number. The bitset is not allocated for devices that have no need for
> > > DMA aliases.
> > 
> > I think "two nodes" is referring to two PCIe devices on the other side
> > of the NTB.  You want DMA packets from those devices to have different
> > RIDs so the IOMMU can distinguish them.
> 
> Right.
> 
> > The LUT entries basically create aliases of the NTB (one alias for
> > each device beyond the NTB).  Your quirk uses pci_add_dma_alias(), and
> > the aliases are all on the same bus as the NTB itself.
> > 
> > The quirk adds PCI_DEVFN(0x10, 0x0), PCI_DEVFN(0x11, 0x0), and
> > PCI_DEVFN(0x12, 0x0).  Shouldn't there be some connection between this
> > and the LUT programming?  I assume the LUT is programmed to correspond
> > to those aliases.  Does this mean you're limited to three devices
> > beyond the NTB?
> 
> Yes, there is an indirect connection between LUT table and devfns used in the
> quirk.
> Dev part is an offset in the LUT table and function is taken from the device
> behind the NTB.
> So the driver can only change the dev part by using different LUT offsets.
> We don't plan to modify this quirk. The number of PCIe devices beyond single
> x200 card NTB will not change.
> Two are used by x200 CPU (host bridge & root port) and one is used by x200 DMA
> engine.
> I'm not sure introducing some dependencies to make sure the offsets are set
> correctly is really worth it.

I'd like at least a comment that points to the specific x200 code that
must coordinate with this.

> So regarding the improvements in the patch description, you want me to update
> and repost it?

Yes, please.

> BTW I posted x200 DMA driver (the client for this change) on DMA list:
> https://lkml.org/lkml/2016/2/9/287
> I'm working on integrating review comments and hope to get it included in 4.6.

What about my questions on the code itself, below?

> > > ---
> > >  drivers/iommu/iommu.c |   17 ++++++++++-------
> > >  drivers/pci/pci.c     |   11 +++++++++--
> > >  drivers/pci/probe.c   |    1 +
> > >  drivers/pci/search.c  |   14 +++++++++-----
> > >  include/linux/pci.h   |    4 +---
> > >  5 files changed, 30 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > index 0e3b009..a214e19 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -659,9 +659,15 @@ static struct iommu_group
> > *get_pci_function_alias_group(struct pci_dev *pdev,
> > >  	return NULL;
> > >  }
> > >
> > > +static bool dma_alias_is_enabled(struct pci_dev *dev, u8 devfn)
> > > +{
> > > +	return dev->dma_alias_mask &&
> > > +	       test_bit(devfn, dev->dma_alias_mask);
> > > +}
> > > +
> > >  /*
> > > - * Look for aliases to or from the given device for exisiting groups.  The
> > > - * dma_alias_devfn only supports aliases on the same bus, therefore the
> > search
> > > + * Look for aliases to or from the given device for existing groups. DMA
> > > + * aliases are only supported on the same bus, therefore the search
> > 
> > I'm trying to reconcile this statement that "DMA aliases are only
> > supported on the same bus" (which was there even before this patch)
> > with the fact that pci_for_each_dma_alias() does not have that
> > limitation.
> > 
> > >   * space is quite small (especially since we're really only looking at pcie
> > >   * device, and therefore only expect multiple slots on the root complex or
> > >   * downstream switch ports).  It's conceivable though that a pair of
> > > @@ -686,11 +692,8 @@ static struct iommu_group *get_pci_alias_group(struct
> > pci_dev *pdev,
> > >  			continue;
> > >
> > >  		/* We alias them or they alias us */
> > > -		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)
> > &&
> > > -		     pdev->dma_alias_devfn == tmp->devfn) ||
> > > -		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
> > > -		     tmp->dma_alias_devfn == pdev->devfn)) {
> > > -
> > > +		if (dma_alias_is_enabled(pdev, tmp->devfn) ||
> > > +		    dma_alias_is_enabled(tmp, pdev->devfn)) {
> > >  			group = get_pci_alias_group(tmp, devfns);
> > 
> > We basically have this:
> > 
> >   for_each_pci_dev(tmp) {
> >     if (<pdev and tmp are DMA aliases>)
> >       group = get_pci_alias_group();
> >       ...
> >   }
> > 
> > The DMA alias stuff relies on PCI internals, so it doesn't doesn't
> > seem quite right to use things like PCI_DEV_FLAGS_DMA_ALIAS_DEVFN and
> > dma_alias_devfn here in the IOMMU code.
> > 
> > I'm trying to figure out why we don't do something like the following
> > instead:
> > 
> >   callback(struct pci_dev *pdev, u16 alias, void *opaque)
> >   {
> >     struct iommu_group *group;
> > 
> >     group = get_pci_alias_group();
> >     if (group)
> >       return group;
> > 
> >     return 0;
> >   }
> > 
> >   pci_for_each_dma_alias(pdev, callback, ...);
> > 
> > Is the existing code some sort of optimization, e.g., checking
> > PCI_DEV_FLAGS_DMA_ALIAS_DEVFN is cheaper than using
> > pci_for_each_dma_alias()?
> > 
> > It seems like this won't work for some very unlikely but theoretically
> > possible topologies, e.g.,
> > 
> >   PCIe Root Complex/IOMMU
> >     PCIe switch A
> >       PCIe to conventional PCI bridge
> >         PCI to PCIe Root Complex
> > 	  PCIe NTB
> > 
> > Here, I think the IOMMU will only see RIDs from PCIe switch A, but the
> > current code only looks at DMA aliases that are on the same bus as the
> > PCIe NTB.  Wouldn't using pci_for_each_dma_alias() handle this
> > correctly?
> > 
> > >  			if (group) {
> > >  				pci_dev_put(tmp);



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-03-03 14:22 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-18 11:59 [PATCH] pci: Add support for multiple DMA aliases Jacek Lawrynowicz
2016-01-18 16:07 ` Jacek Lawrynowicz
2016-01-19  3:33   ` Bjorn Helgaas
2016-01-19  9:21     ` Lawrynowicz, Jacek
2016-01-19 20:12     ` Bjorn Helgaas
2016-01-19 21:04       ` Alex Williamson
2016-01-19 21:39         ` Bjorn Helgaas
2016-01-20 15:02           ` Lawrynowicz, Jacek
2016-01-20 17:46             ` Bjorn Helgaas
2016-01-21  9:39               ` David Woodhouse
2016-01-21 15:22                 ` Bjorn Helgaas
2016-01-21 15:32                   ` David Woodhouse
2016-01-26 10:15                     ` Lawrynowicz, Jacek
2016-01-21 12:43               ` Lawrynowicz, Jacek
2016-02-29 22:44 [PATCH v4 3/6] PCI: " Bjorn Helgaas
2016-03-03 14:22 ` [PATCH] " Jacek Lawrynowicz
2016-03-03 14:22   ` Jacek Lawrynowicz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.