All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bjorn Helgaas <helgaas@kernel.org>
To: Logan Gunthorpe <logang@deltatee.com>
Cc: linux-kernel@vger.kernel.org, linux-ntb@googlegroups.com,
	linux-pci@vger.kernel.org, iommu@lists.linux-foundation.org,
	linux-kselftest@vger.kernel.org, Jon Mason <jdmason@kudzu.us>,
	Joerg Roedel <joro@8bytes.org>, Allen Hubbe <allenbh@gmail.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Serge Semin <fancer.lancer@gmail.com>,
	Eric Pilmore <epilmore@gigaio.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Marc Zyngier <marc.zyngier@arm.com>
Subject: Re: [PATCH 2/9] PCI/MSI: Support allocating virtual MSI interrupts
Date: Thu, 31 Jan 2019 16:39:32 -0600	[thread overview]
Message-ID: <20190131223932.GR229773@google.com> (raw)
In-Reply-To: <20190131185656.17972-3-logang@deltatee.com>

[+cc Thomas, Marc]

On Thu, Jan 31, 2019 at 11:56:49AM -0700, Logan Gunthorpe wrote:
> For NTB devices, we want to be able to trigger MSI interrupts
> through a memory window. In these cases we may want to use
> more interrupts than the NTB PCI device has available in its MSI-X
> table.
> 
> We allow for this by creating a new 'virtual' interrupt. These
> interrupts are allocated as usual but are not programmed into the
> MSI-X table (as there may not be space for them).
> 
> The MSI address and data will then handled through an NTB MSI library
> introduced later in this series.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>

I assume you'll merge this along with the rest of the series, so:

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

Minor question and typo below.

> ---
>  drivers/pci/msi.c   | 51 +++++++++++++++++++++++++++++++++++++--------
>  include/linux/msi.h |  1 +
>  include/linux/pci.h |  9 ++++++++
>  3 files changed, 52 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 4c0b47867258..145587da686c 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
>  
>  static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  {
> +	if (desc->msi_attrib.is_virtual)
> +		return NULL;
> +
>  	return desc->mask_base +
>  		desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
>  }
> @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
>  {
>  	u32 mask_bits = desc->masked;
> +	void __iomem *desc_addr;
>  
>  	if (pci_msi_ignore_mask)
>  		return 0;
> +	desc_addr = pci_msix_desc_addr(desc);
> +	if (!desc_addr)
> +		return 0;
>  
>  	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>  	if (flag)
>  		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
> -	writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +	writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
>  
>  	return mask_bits;
>  }
> @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base) {
> +			WARN_ON(1);
> +			return;
> +		}
> +
>  		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
> @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	} else if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base)
> +			goto skip;
> +
>  		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
> @@ -327,6 +343,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  					      msg->data);
>  		}
>  	}
> +
> +skip:
>  	entry->msg = *msg;
>  }
>  
> @@ -550,6 +568,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
>  
>  	entry->msi_attrib.is_msix	= 0;
>  	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
> +	entry->msi_attrib.is_virtual    = 0;
>  	entry->msi_attrib.entry_nr	= 0;
>  	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
>  	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
> @@ -674,6 +693,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  	struct irq_affinity_desc *curmsk, *masks = NULL;
>  	struct msi_desc *entry;
>  	int ret, i;
> +	int vec_count = pci_msix_vec_count(dev);
>  
>  	if (affd)
>  		masks = irq_create_affinity_masks(nvec, affd);
> @@ -696,6 +716,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  			entry->msi_attrib.entry_nr = entries[i].entry;
>  		else
>  			entry->msi_attrib.entry_nr = i;
> +
> +		entry->msi_attrib.is_virtual =
> +			entry->msi_attrib.entry_nr >= vec_count;
> +
>  		entry->msi_attrib.default_irq	= dev->irq;
>  		entry->mask_base		= base;
>  
> @@ -714,12 +738,19 @@ static void msix_program_entries(struct pci_dev *dev,
>  {
>  	struct msi_desc *entry;
>  	int i = 0;
> +	void __iomem *desc_addr;
>  
>  	for_each_pci_msi_entry(entry, dev) {
>  		if (entries)
>  			entries[i++].vector = entry->irq;
> -		entry->masked = readl(pci_msix_desc_addr(entry) +
> -				PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +		desc_addr = pci_msix_desc_addr(entry);
> +		if (desc_addr)
> +			entry->masked = readl(desc_addr +
> +					      PCI_MSIX_ENTRY_VECTOR_CTRL);
> +		else
> +			entry->masked = 0;
> +
>  		msix_mask_irq(entry, 1);
>  	}
>  }
> @@ -932,7 +963,8 @@ int pci_msix_vec_count(struct pci_dev *dev)
>  EXPORT_SYMBOL(pci_msix_vec_count);
>  
>  static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
> -			     int nvec, const struct irq_affinity *affd)
> +			     int nvec, const struct irq_affinity *affd,
> +			     int flags)
>  {
>  	int nr_entries;
>  	int i, j;
> @@ -943,7 +975,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
>  	nr_entries = pci_msix_vec_count(dev);
>  	if (nr_entries < 0)
>  		return nr_entries;
> -	if (nvec > nr_entries)
> +	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
>  		return nr_entries;
>  
>  	if (entries) {
> @@ -1086,7 +1118,8 @@ EXPORT_SYMBOL(pci_enable_msi);
>  
>  static int __pci_enable_msix_range(struct pci_dev *dev,
>  				   struct msix_entry *entries, int minvec,
> -				   int maxvec, const struct irq_affinity *affd)
> +				   int maxvec, const struct irq_affinity *affd,
> +				   int flags)
>  {
>  	int rc, nvec = maxvec;
>  
> @@ -1110,7 +1143,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  				return -ENOSPC;
>  		}
>  
> -		rc = __pci_enable_msix(dev, entries, nvec, affd);
> +		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
>  		if (rc == 0)
>  			return nvec;
>  
> @@ -1141,7 +1174,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>  		int minvec, int maxvec)
>  {
> -	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
> +	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> @@ -1181,7 +1214,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
>  
>  	if (flags & PCI_IRQ_MSIX) {
>  		msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
> -						    max_vecs, affd);
> +						    max_vecs, affd, flags);
>  		if (msix_vecs > 0)
>  			return msix_vecs;
>  	}
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 784fb52b9900..6458ab049852 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -88,6 +88,7 @@ struct msi_desc {
>  				__u8	multi_cap	: 3;
>  				__u8	maskbit		: 1;
>  				__u8	is_64		: 1;
> +				__u8    is_virtual      : 1;

You did the right thing by using the same style as what's already
here, but does anybody know why are we using __u8 and __u16 here?

Those typedefs are in include/uapi/asm-generic/int-l64.h, which
suggests they're for things exported to user space, but I don't think
that's the case here, so I'm wondering if we could someday replace
these with u8 and u16.  Obviously that wouldn't be part of *this*
series.

>  				__u16	entry_nr;
>  				unsigned default_irq;
>  			} msi_attrib;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 65f1d8c2f082..ce0815c2c498 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1352,6 +1352,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>  #define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
>  #define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
>  #define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
> +
> +/*
> + * Virtual interrupts allow for more interrupts to be allocated
> + * than the device has interrupts for. These are not programmed
> + * into the devices MSI-X table and must be handled by some

s/devices/device's/

> + * other driver means.
> + */
> +#define PCI_IRQ_VIRTUAL		(1 << 4)
> +
>  #define PCI_IRQ_ALL_TYPES \
>  	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
>  
> -- 
> 2.19.0
> 

WARNING: multiple messages have this Message-ID (diff)
From: helgaas at kernel.org (Bjorn Helgaas)
Subject: [PATCH 2/9] PCI/MSI: Support allocating virtual MSI interrupts
Date: Thu, 31 Jan 2019 16:39:32 -0600	[thread overview]
Message-ID: <20190131223932.GR229773@google.com> (raw)
In-Reply-To: <20190131185656.17972-3-logang@deltatee.com>

[+cc Thomas, Marc]

On Thu, Jan 31, 2019 at 11:56:49AM -0700, Logan Gunthorpe wrote:
> For NTB devices, we want to be able to trigger MSI interrupts
> through a memory window. In these cases we may want to use
> more interrupts than the NTB PCI device has available in its MSI-X
> table.
> 
> We allow for this by creating a new 'virtual' interrupt. These
> interrupts are allocated as usual but are not programmed into the
> MSI-X table (as there may not be space for them).
> 
> The MSI address and data will then handled through an NTB MSI library
> introduced later in this series.
> 
> Signed-off-by: Logan Gunthorpe <logang at deltatee.com>
> Cc: Bjorn Helgaas <bhelgaas at google.com>

I assume you'll merge this along with the rest of the series, so:

Acked-by: Bjorn Helgaas <bhelgaas at google.com>

Minor question and typo below.

> ---
>  drivers/pci/msi.c   | 51 +++++++++++++++++++++++++++++++++++++--------
>  include/linux/msi.h |  1 +
>  include/linux/pci.h |  9 ++++++++
>  3 files changed, 52 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 4c0b47867258..145587da686c 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
>  
>  static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  {
> +	if (desc->msi_attrib.is_virtual)
> +		return NULL;
> +
>  	return desc->mask_base +
>  		desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
>  }
> @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
>  {
>  	u32 mask_bits = desc->masked;
> +	void __iomem *desc_addr;
>  
>  	if (pci_msi_ignore_mask)
>  		return 0;
> +	desc_addr = pci_msix_desc_addr(desc);
> +	if (!desc_addr)
> +		return 0;
>  
>  	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>  	if (flag)
>  		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
> -	writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +	writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
>  
>  	return mask_bits;
>  }
> @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base) {
> +			WARN_ON(1);
> +			return;
> +		}
> +
>  		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
> @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	} else if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base)
> +			goto skip;
> +
>  		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
> @@ -327,6 +343,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  					      msg->data);
>  		}
>  	}
> +
> +skip:
>  	entry->msg = *msg;
>  }
>  
> @@ -550,6 +568,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
>  
>  	entry->msi_attrib.is_msix	= 0;
>  	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
> +	entry->msi_attrib.is_virtual    = 0;
>  	entry->msi_attrib.entry_nr	= 0;
>  	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
>  	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
> @@ -674,6 +693,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  	struct irq_affinity_desc *curmsk, *masks = NULL;
>  	struct msi_desc *entry;
>  	int ret, i;
> +	int vec_count = pci_msix_vec_count(dev);
>  
>  	if (affd)
>  		masks = irq_create_affinity_masks(nvec, affd);
> @@ -696,6 +716,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  			entry->msi_attrib.entry_nr = entries[i].entry;
>  		else
>  			entry->msi_attrib.entry_nr = i;
> +
> +		entry->msi_attrib.is_virtual =
> +			entry->msi_attrib.entry_nr >= vec_count;
> +
>  		entry->msi_attrib.default_irq	= dev->irq;
>  		entry->mask_base		= base;
>  
> @@ -714,12 +738,19 @@ static void msix_program_entries(struct pci_dev *dev,
>  {
>  	struct msi_desc *entry;
>  	int i = 0;
> +	void __iomem *desc_addr;
>  
>  	for_each_pci_msi_entry(entry, dev) {
>  		if (entries)
>  			entries[i++].vector = entry->irq;
> -		entry->masked = readl(pci_msix_desc_addr(entry) +
> -				PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +		desc_addr = pci_msix_desc_addr(entry);
> +		if (desc_addr)
> +			entry->masked = readl(desc_addr +
> +					      PCI_MSIX_ENTRY_VECTOR_CTRL);
> +		else
> +			entry->masked = 0;
> +
>  		msix_mask_irq(entry, 1);
>  	}
>  }
> @@ -932,7 +963,8 @@ int pci_msix_vec_count(struct pci_dev *dev)
>  EXPORT_SYMBOL(pci_msix_vec_count);
>  
>  static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
> -			     int nvec, const struct irq_affinity *affd)
> +			     int nvec, const struct irq_affinity *affd,
> +			     int flags)
>  {
>  	int nr_entries;
>  	int i, j;
> @@ -943,7 +975,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
>  	nr_entries = pci_msix_vec_count(dev);
>  	if (nr_entries < 0)
>  		return nr_entries;
> -	if (nvec > nr_entries)
> +	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
>  		return nr_entries;
>  
>  	if (entries) {
> @@ -1086,7 +1118,8 @@ EXPORT_SYMBOL(pci_enable_msi);
>  
>  static int __pci_enable_msix_range(struct pci_dev *dev,
>  				   struct msix_entry *entries, int minvec,
> -				   int maxvec, const struct irq_affinity *affd)
> +				   int maxvec, const struct irq_affinity *affd,
> +				   int flags)
>  {
>  	int rc, nvec = maxvec;
>  
> @@ -1110,7 +1143,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  				return -ENOSPC;
>  		}
>  
> -		rc = __pci_enable_msix(dev, entries, nvec, affd);
> +		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
>  		if (rc == 0)
>  			return nvec;
>  
> @@ -1141,7 +1174,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>  		int minvec, int maxvec)
>  {
> -	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
> +	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> @@ -1181,7 +1214,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
>  
>  	if (flags & PCI_IRQ_MSIX) {
>  		msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
> -						    max_vecs, affd);
> +						    max_vecs, affd, flags);
>  		if (msix_vecs > 0)
>  			return msix_vecs;
>  	}
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 784fb52b9900..6458ab049852 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -88,6 +88,7 @@ struct msi_desc {
>  				__u8	multi_cap	: 3;
>  				__u8	maskbit		: 1;
>  				__u8	is_64		: 1;
> +				__u8    is_virtual      : 1;

You did the right thing by using the same style as what's already
here, but does anybody know why are we using __u8 and __u16 here?

Those typedefs are in include/uapi/asm-generic/int-l64.h, which
suggests they're for things exported to user space, but I don't think
that's the case here, so I'm wondering if we could someday replace
these with u8 and u16.  Obviously that wouldn't be part of *this*
series.

>  				__u16	entry_nr;
>  				unsigned default_irq;
>  			} msi_attrib;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 65f1d8c2f082..ce0815c2c498 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1352,6 +1352,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>  #define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
>  #define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
>  #define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
> +
> +/*
> + * Virtual interrupts allow for more interrupts to be allocated
> + * than the device has interrupts for. These are not programmed
> + * into the devices MSI-X table and must be handled by some

s/devices/device's/

> + * other driver means.
> + */
> +#define PCI_IRQ_VIRTUAL		(1 << 4)
> +
>  #define PCI_IRQ_ALL_TYPES \
>  	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
>  
> -- 
> 2.19.0
> 

WARNING: multiple messages have this Message-ID (diff)
From: helgaas@kernel.org (Bjorn Helgaas)
Subject: [PATCH 2/9] PCI/MSI: Support allocating virtual MSI interrupts
Date: Thu, 31 Jan 2019 16:39:32 -0600	[thread overview]
Message-ID: <20190131223932.GR229773@google.com> (raw)
Message-ID: <20190131223932.hN5lDqbvPZHm09uN4CHfN8FDjM6Qoo0HNT0rcmnKVrc@z> (raw)
In-Reply-To: <20190131185656.17972-3-logang@deltatee.com>

[+cc Thomas, Marc]

On Thu, Jan 31, 2019@11:56:49AM -0700, Logan Gunthorpe wrote:
> For NTB devices, we want to be able to trigger MSI interrupts
> through a memory window. In these cases we may want to use
> more interrupts than the NTB PCI device has available in its MSI-X
> table.
> 
> We allow for this by creating a new 'virtual' interrupt. These
> interrupts are allocated as usual but are not programmed into the
> MSI-X table (as there may not be space for them).
> 
> The MSI address and data will then handled through an NTB MSI library
> introduced later in this series.
> 
> Signed-off-by: Logan Gunthorpe <logang at deltatee.com>
> Cc: Bjorn Helgaas <bhelgaas at google.com>

I assume you'll merge this along with the rest of the series, so:

Acked-by: Bjorn Helgaas <bhelgaas at google.com>

Minor question and typo below.

> ---
>  drivers/pci/msi.c   | 51 +++++++++++++++++++++++++++++++++++++--------
>  include/linux/msi.h |  1 +
>  include/linux/pci.h |  9 ++++++++
>  3 files changed, 52 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 4c0b47867258..145587da686c 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
>  
>  static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  {
> +	if (desc->msi_attrib.is_virtual)
> +		return NULL;
> +
>  	return desc->mask_base +
>  		desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
>  }
> @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
>  {
>  	u32 mask_bits = desc->masked;
> +	void __iomem *desc_addr;
>  
>  	if (pci_msi_ignore_mask)
>  		return 0;
> +	desc_addr = pci_msix_desc_addr(desc);
> +	if (!desc_addr)
> +		return 0;
>  
>  	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
>  	if (flag)
>  		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
> -	writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +	writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
>  
>  	return mask_bits;
>  }
> @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base) {
> +			WARN_ON(1);
> +			return;
> +		}
> +
>  		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
> @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  	} else if (entry->msi_attrib.is_msix) {
>  		void __iomem *base = pci_msix_desc_addr(entry);
>  
> +		if (!base)
> +			goto skip;
> +
>  		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
>  		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
>  		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
> @@ -327,6 +343,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>  					      msg->data);
>  		}
>  	}
> +
> +skip:
>  	entry->msg = *msg;
>  }
>  
> @@ -550,6 +568,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
>  
>  	entry->msi_attrib.is_msix	= 0;
>  	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
> +	entry->msi_attrib.is_virtual    = 0;
>  	entry->msi_attrib.entry_nr	= 0;
>  	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
>  	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
> @@ -674,6 +693,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  	struct irq_affinity_desc *curmsk, *masks = NULL;
>  	struct msi_desc *entry;
>  	int ret, i;
> +	int vec_count = pci_msix_vec_count(dev);
>  
>  	if (affd)
>  		masks = irq_create_affinity_masks(nvec, affd);
> @@ -696,6 +716,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
>  			entry->msi_attrib.entry_nr = entries[i].entry;
>  		else
>  			entry->msi_attrib.entry_nr = i;
> +
> +		entry->msi_attrib.is_virtual =
> +			entry->msi_attrib.entry_nr >= vec_count;
> +
>  		entry->msi_attrib.default_irq	= dev->irq;
>  		entry->mask_base		= base;
>  
> @@ -714,12 +738,19 @@ static void msix_program_entries(struct pci_dev *dev,
>  {
>  	struct msi_desc *entry;
>  	int i = 0;
> +	void __iomem *desc_addr;
>  
>  	for_each_pci_msi_entry(entry, dev) {
>  		if (entries)
>  			entries[i++].vector = entry->irq;
> -		entry->masked = readl(pci_msix_desc_addr(entry) +
> -				PCI_MSIX_ENTRY_VECTOR_CTRL);
> +
> +		desc_addr = pci_msix_desc_addr(entry);
> +		if (desc_addr)
> +			entry->masked = readl(desc_addr +
> +					      PCI_MSIX_ENTRY_VECTOR_CTRL);
> +		else
> +			entry->masked = 0;
> +
>  		msix_mask_irq(entry, 1);
>  	}
>  }
> @@ -932,7 +963,8 @@ int pci_msix_vec_count(struct pci_dev *dev)
>  EXPORT_SYMBOL(pci_msix_vec_count);
>  
>  static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
> -			     int nvec, const struct irq_affinity *affd)
> +			     int nvec, const struct irq_affinity *affd,
> +			     int flags)
>  {
>  	int nr_entries;
>  	int i, j;
> @@ -943,7 +975,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
>  	nr_entries = pci_msix_vec_count(dev);
>  	if (nr_entries < 0)
>  		return nr_entries;
> -	if (nvec > nr_entries)
> +	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
>  		return nr_entries;
>  
>  	if (entries) {
> @@ -1086,7 +1118,8 @@ EXPORT_SYMBOL(pci_enable_msi);
>  
>  static int __pci_enable_msix_range(struct pci_dev *dev,
>  				   struct msix_entry *entries, int minvec,
> -				   int maxvec, const struct irq_affinity *affd)
> +				   int maxvec, const struct irq_affinity *affd,
> +				   int flags)
>  {
>  	int rc, nvec = maxvec;
>  
> @@ -1110,7 +1143,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  				return -ENOSPC;
>  		}
>  
> -		rc = __pci_enable_msix(dev, entries, nvec, affd);
> +		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
>  		if (rc == 0)
>  			return nvec;
>  
> @@ -1141,7 +1174,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
>  int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>  		int minvec, int maxvec)
>  {
> -	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
> +	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> @@ -1181,7 +1214,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
>  
>  	if (flags & PCI_IRQ_MSIX) {
>  		msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
> -						    max_vecs, affd);
> +						    max_vecs, affd, flags);
>  		if (msix_vecs > 0)
>  			return msix_vecs;
>  	}
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 784fb52b9900..6458ab049852 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -88,6 +88,7 @@ struct msi_desc {
>  				__u8	multi_cap	: 3;
>  				__u8	maskbit		: 1;
>  				__u8	is_64		: 1;
> +				__u8    is_virtual      : 1;

You did the right thing by using the same style as what's already
here, but does anybody know why are we using __u8 and __u16 here?

Those typedefs are in include/uapi/asm-generic/int-l64.h, which
suggests they're for things exported to user space, but I don't think
that's the case here, so I'm wondering if we could someday replace
these with u8 and u16.  Obviously that wouldn't be part of *this*
series.

>  				__u16	entry_nr;
>  				unsigned default_irq;
>  			} msi_attrib;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 65f1d8c2f082..ce0815c2c498 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1352,6 +1352,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>  #define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
>  #define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
>  #define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
> +
> +/*
> + * Virtual interrupts allow for more interrupts to be allocated
> + * than the device has interrupts for. These are not programmed
> + * into the devices MSI-X table and must be handled by some

s/devices/device's/

> + * other driver means.
> + */
> +#define PCI_IRQ_VIRTUAL		(1 << 4)
> +
>  #define PCI_IRQ_ALL_TYPES \
>  	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
>  
> -- 
> 2.19.0
> 

  reply	other threads:[~2019-01-31 22:39 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-31 18:56 [PATCH 0/9] Support using MSI interrupts in ntb_transport Logan Gunthorpe
2019-01-31 18:56 ` Logan Gunthorpe
2019-01-31 18:56 ` logang
2019-01-31 18:56 ` [PATCH 1/9] iommu/vt-d: Allow interrupts from the entire bus for aliased devices Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-02-01 16:44   ` Joerg Roedel
2019-02-01 16:44     ` Joerg Roedel
2019-02-01 16:44     ` joro
2019-02-01 17:27     ` Logan Gunthorpe
2019-02-01 17:27       ` Logan Gunthorpe
2019-02-01 17:27       ` logang
2019-02-05 19:19       ` Jacob Pan
2019-02-05 19:19         ` Jacob Pan
2019-02-05 19:19         ` jacob.jun.pan
2019-02-05 20:40         ` Logan Gunthorpe
2019-02-05 20:40           ` Logan Gunthorpe
2019-02-05 20:40           ` logang
2019-02-05 23:58           ` Jacob Pan
2019-02-05 23:58             ` Jacob Pan
2019-02-05 23:58             ` jacob.jun.pan
2019-01-31 18:56 ` [PATCH 2/9] PCI/MSI: Support allocating virtual MSI interrupts Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 22:39   ` Bjorn Helgaas [this message]
2019-01-31 22:39     ` Bjorn Helgaas
2019-01-31 22:39     ` helgaas
2019-01-31 22:52     ` Logan Gunthorpe
2019-01-31 22:52       ` Logan Gunthorpe
2019-01-31 22:52       ` logang
2019-02-01 19:23       ` Bjorn Helgaas
2019-02-01 19:23         ` Bjorn Helgaas
2019-02-01 19:23         ` helgaas
2019-01-31 18:56 ` [PATCH 3/9] PCI/switchtec: Add module parameter to request more interrupts Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 4/9] NTB: Introduce functions to calculate multi-port resource index Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 5/9] NTB: Rename ntb.c to support multiple source files in the module Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 6/9] NTB: Introduce MSI library Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 7/9] NTB: Introduce NTB MSI Test Client Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 8/9] NTB: Add ntb_msi_test support to ntb_test Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 18:56 ` [PATCH 9/9] NTB: Add MSI interrupt support to ntb_transport Logan Gunthorpe
2019-01-31 18:56   ` Logan Gunthorpe
2019-01-31 18:56   ` logang
2019-01-31 20:20 ` [PATCH 0/9] Support using MSI interrupts in ntb_transport Dave Jiang
2019-01-31 20:20   ` Dave Jiang
2019-01-31 20:20   ` dave.jiang
2019-01-31 20:48   ` Logan Gunthorpe
2019-01-31 20:48     ` Logan Gunthorpe
2019-01-31 20:48     ` logang
2019-01-31 20:58     ` Dave Jiang
2019-01-31 20:58       ` Dave Jiang
2019-01-31 20:58       ` Dave Jiang
2019-01-31 20:58       ` dave.jiang
2019-01-31 22:39       ` Logan Gunthorpe
2019-01-31 22:39         ` Logan Gunthorpe
2019-01-31 22:39         ` Logan Gunthorpe
2019-01-31 22:39         ` logang
2019-01-31 22:46         ` Dave Jiang
2019-01-31 22:46           ` Dave Jiang
2019-01-31 22:46           ` Dave Jiang
2019-01-31 22:46           ` dave.jiang
2019-01-31 23:41           ` Logan Gunthorpe
2019-01-31 23:41             ` Logan Gunthorpe
2019-01-31 23:41             ` logang
2019-01-31 23:48             ` Dave Jiang
2019-01-31 23:48               ` Dave Jiang
2019-01-31 23:48               ` dave.jiang
2019-01-31 23:52               ` Logan Gunthorpe
2019-01-31 23:52                 ` Logan Gunthorpe
2019-01-31 23:52                 ` logang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190131223932.GR229773@google.com \
    --to=helgaas@kernel.org \
    --cc=allenbh@gmail.com \
    --cc=dave.jiang@intel.com \
    --cc=epilmore@gigaio.com \
    --cc=fancer.lancer@gmail.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jdmason@kudzu.us \
    --cc=joro@8bytes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-ntb@googlegroups.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=logang@deltatee.com \
    --cc=marc.zyngier@arm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.