All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sysfs: add per pci device msi[x] irq listing
@ 2011-09-14 18:36 Neil Horman
  2011-09-15 14:40 ` Greg KH
                   ` (3 more replies)
  0 siblings, 4 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-14 18:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Neil Horman, Greg Kroah-Hartman, Jesse Barnes

So a while back, I wanted to provide a way for irqbalance (and other apps) to
definitively map irqs to devices, which, for msi[x] irqs is currently not really
possible in user space.  My first attempt wen't not so well:
https://lkml.org/lkml/2011/4/21/308

It was plauged by the same issues that prior attempts were, namely that it
violated the one-file-one-value sysfs rule.  I wandered off but have recently
come back to this.  I've got a new implementation here that exports a new
subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
a variable number of numbered subdirectories, in which the number represents an
msi irq.  Each numbered subdirectory contains attributes for that irq, which
currently is only the mode it is operating in (msi vs. msix).  I think fits
within the constraints sysfs requires, and will allow irqbalance to properly map
msi irqs to devices without have to rely on rickety, best guess methods like
interface name matching.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-devices-msi_irqs |   17 ++++
 drivers/pci/msi.c                                |  100 ++++++++++++++++++++++
 include/linux/msi.h                              |    3 +
 include/linux/pci.h                              |    1 +
 4 files changed, 121 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-msi_irqs

diff --git a/Documentation/ABI/testing/sysfs-devices-msi_irqs b/Documentation/ABI/testing/sysfs-devices-msi_irqs
new file mode 100644
index 0000000..dab3a52
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-msi_irqs
@@ -0,0 +1,17 @@
+What:		/sys/devices/.../power/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		subdirectories, with each subdirectory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered subdirectory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vecotor named by
+		the parent directory is in (msi vs. msix)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f10328..3eaf2c0 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -322,6 +322,7 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -402,6 +403,88 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+static struct kobj_type msi_irq_ktype = {
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -453,6 +536,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -573,6 +663,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -731,6 +827,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -829,6 +927,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced..ce93a34 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b..fff3961 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -332,6 +332,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_IOV
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing
  2011-09-14 18:36 [PATCH] sysfs: add per pci device msi[x] irq listing Neil Horman
@ 2011-09-15 14:40 ` Greg KH
  2011-09-15 15:07   ` Neil Horman
  2011-09-15 20:08 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v2) Neil Horman
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 39+ messages in thread
From: Greg KH @ 2011-09-15 14:40 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Jesse Barnes

On Wed, Sep 14, 2011 at 02:36:53PM -0400, Neil Horman wrote:
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-devices-msi_irqs
> @@ -0,0 +1,17 @@
> +What:		/sys/devices/.../power/

I don't think this is the file that you are describing here :)

And I see you have not tried to remove a PCI device with this patch
running, right?

Please do so and properly fix up the messages that the kernel spits at
you.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing
  2011-09-15 14:40 ` Greg KH
@ 2011-09-15 15:07   ` Neil Horman
  0 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-15 15:07 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, Jesse Barnes

On Thu, Sep 15, 2011 at 04:40:15PM +0200, Greg KH wrote:
> On Wed, Sep 14, 2011 at 02:36:53PM -0400, Neil Horman wrote:
> > --- /dev/null
> > +++ b/Documentation/ABI/testing/sysfs-devices-msi_irqs
> > @@ -0,0 +1,17 @@
> > +What:		/sys/devices/.../power/
> 
> I don't think this is the file that you are describing here :)
> 
Yeah, you're right, thats what I get for copying an existing entry and assuming
there was only one devices subdir :)

> And I see you have not tried to remove a PCI device with this patch
> running, right?
> 
> Please do so and properly fix up the messages that the kernel spits at
> you.
> 
Hadn't considered doing that, no, I'll give it a shot and repost fixes.

Thanks
Neil


^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-14 18:36 [PATCH] sysfs: add per pci device msi[x] irq listing Neil Horman
  2011-09-15 14:40 ` Greg KH
@ 2011-09-15 20:08 ` Neil Horman
  2011-09-16  8:36   ` Greg KH
  2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
  2011-09-29 14:38 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v4) Neil Horman
  3 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-15 20:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: Neil Horman, Greg Kroah-Hartman, Jesse Barnes

So a while back, I wanted to provide a way for irqbalance (and other apps) to
definitively map irqs to devices, which, for msi[x] irqs is currently not really
possible in user space.  My first attempt wen't not so well:
https://lkml.org/lkml/2011/4/21/308

It was plauged by the same issues that prior attempts were, namely that it
violated the one-file-one-value sysfs rule.  I wandered off but have recently
come back to this.  I've got a new implementation here that exports a new
subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
a variable number of numbered subdirectories, in which the number represents an
msi irq.  Each numbered subdirectory contains attributes for that irq, which
currently is only the mode it is operating in (msi vs. msix).  I think fits
within the constraints sysfs requires, and will allow irqbalance to properly map
msi irqs to devices without having to rely on rickety, best guess methods like
interface name matching.

Change Notes:

(v2)
Fixed up Documentation to put new sysfs interface descriptions in the right
place, as per request by Greg K-H

Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
exactly right, but looking at the crash (triggered by echo 1 >
/sys/class/net/eth0/device/remove), it looked as though we were freeing the
pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
such it seemed most appropriate to hold references on the pci_dev for each msi
irq sysfs object that we create, and release them on free accordingly.  With
this change in place, I can remove, and add (via rescan) msi enabled devices
ad-nauseum without a panic.  Again thanks to Greg K-H

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |   18 ++++++
 drivers/pci/msi.c                       |  102 +++++++++++++++++++++++++++++++
 include/linux/msi.h                     |    3 +
 include/linux/pci.h                     |    1 +
 4 files changed, 124 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf2..699da99 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		subdirectories, with each subdirectory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered subdirectory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vecotor named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f10328..13a59a5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -322,6 +322,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_put(&entry->kobj);
+		pci_dev_put(dev);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -402,6 +404,89 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+static struct kobj_type msi_irq_ktype = {
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -453,6 +538,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -573,6 +665,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -731,6 +829,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -829,6 +929,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced..ce93a34 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b..fff3961 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -332,6 +332,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_IOV
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-15 20:08 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v2) Neil Horman
@ 2011-09-16  8:36   ` Greg KH
  2011-09-16 10:57     ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Greg KH @ 2011-09-16  8:36 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Jesse Barnes

On Thu, Sep 15, 2011 at 04:08:38PM -0400, Neil Horman wrote:
> +static struct kobj_type msi_irq_ktype = {
> +	.sysfs_ops = &msi_irq_sysfs_ops,
> +	.default_attrs = msi_irq_default_attrs,
> +};

You still haven't successfully removed the kobject associated with this
kobj_type.  Otherwise you would have seen the error messages in the
kernel log and fixed up what it told you to fix.

Please do so.

greg k-h

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-16  8:36   ` Greg KH
@ 2011-09-16 10:57     ` Neil Horman
  2011-09-16 13:23       ` Greg KH
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-16 10:57 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, Jesse Barnes

On Fri, Sep 16, 2011 at 10:36:46AM +0200, Greg KH wrote:
> On Thu, Sep 15, 2011 at 04:08:38PM -0400, Neil Horman wrote:
> > +static struct kobj_type msi_irq_ktype = {
> > +	.sysfs_ops = &msi_irq_sysfs_ops,
> > +	.default_attrs = msi_irq_default_attrs,
> > +};
> 
> You still haven't successfully removed the kobject associated with this
> kobj_type.  Otherwise you would have seen the error messages in the
> kernel log and fixed up what it told you to fix.
> 
> Please do so.
> 
Well, I'm not using kobject_del, you're right, I'm just using a kobject_put, and
I swear I'm not getting any error messages about it:
[root@amd-dinar-01 eth0]# echo 1 > ./device/remove
[root@amd-dinar-01 eth0]# dmesg | tasil -10
[   17.673050] RPC: Registered tcp transport module.
[   17.677759] RPC: Registered tcp NFSv4.1 backchannel transport module.
[   17.702913] SELinux: initialized (dev rpc_pipefs, type rpc_pipefs), uses
genfs_contexts
[   20.060280] bnx2 0000:01:00.0: eth0: NIC Copper Link is Up, 1000 Mbps full
duplex
[   20.070903] ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[   20.106296] bnx2 0000:01:00.1: eth1: NIC Copper Link is Up, 1000 Mbps full
duplex
[   20.116857] ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready
[   30.850059] eth1: no IPv6 routers present

I'm not sure why that would be, but theres definately no warning about it.
Either way, I'll gladly make the change to use kobject_del

Regards
Neil


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-16 10:57     ` Neil Horman
@ 2011-09-16 13:23       ` Greg KH
  2011-09-16 13:32         ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Greg KH @ 2011-09-16 13:23 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Jesse Barnes

On Fri, Sep 16, 2011 at 06:57:16AM -0400, Neil Horman wrote:
> On Fri, Sep 16, 2011 at 10:36:46AM +0200, Greg KH wrote:
> > On Thu, Sep 15, 2011 at 04:08:38PM -0400, Neil Horman wrote:
> > > +static struct kobj_type msi_irq_ktype = {
> > > +	.sysfs_ops = &msi_irq_sysfs_ops,
> > > +	.default_attrs = msi_irq_default_attrs,
> > > +};
> > 
> > You still haven't successfully removed the kobject associated with this
> > kobj_type.  Otherwise you would have seen the error messages in the
> > kernel log and fixed up what it told you to fix.
> > 
> > Please do so.
> > 
> Well, I'm not using kobject_del, you're right, I'm just using a kobject_put, and
> I swear I'm not getting any error messages about it:

Then you really are not ever freeing that kobject, so there's a
reference counting bug in your code which you need to fix.

You will notice the real problem once you try to finally release that
kobject, trust me :)

greg k-h

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-16 13:23       ` Greg KH
@ 2011-09-16 13:32         ` Neil Horman
  2011-09-16 16:12           ` Bjorn Helgaas
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-16 13:32 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, Jesse Barnes

On Fri, Sep 16, 2011 at 03:23:19PM +0200, Greg KH wrote:
> On Fri, Sep 16, 2011 at 06:57:16AM -0400, Neil Horman wrote:
> > On Fri, Sep 16, 2011 at 10:36:46AM +0200, Greg KH wrote:
> > > On Thu, Sep 15, 2011 at 04:08:38PM -0400, Neil Horman wrote:
> > > > +static struct kobj_type msi_irq_ktype = {
> > > > +	.sysfs_ops = &msi_irq_sysfs_ops,
> > > > +	.default_attrs = msi_irq_default_attrs,
> > > > +};
> > > 
> > > You still haven't successfully removed the kobject associated with this
> > > kobj_type.  Otherwise you would have seen the error messages in the
> > > kernel log and fixed up what it told you to fix.
> > > 
> > > Please do so.
> > > 
> > Well, I'm not using kobject_del, you're right, I'm just using a kobject_put, and
> > I swear I'm not getting any error messages about it:
> 
> Then you really are not ever freeing that kobject, so there's a
> reference counting bug in your code which you need to fix.
> 
> You will notice the real problem once you try to finally release that
> kobject, trust me :)
> 
Ok, copy that, I'll continue to dig, thanks!
Neil

> greg k-h
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v2)
  2011-09-16 13:32         ` Neil Horman
@ 2011-09-16 16:12           ` Bjorn Helgaas
  0 siblings, 0 replies; 39+ messages in thread
From: Bjorn Helgaas @ 2011-09-16 16:12 UTC (permalink / raw)
  To: Neil Horman; +Cc: Greg KH, linux-kernel, Jesse Barnes, linux-pci

[+cc linux-pci@vger.kernel.org]

On Fri, Sep 16, 2011 at 7:32 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Fri, Sep 16, 2011 at 03:23:19PM +0200, Greg KH wrote:
>> On Fri, Sep 16, 2011 at 06:57:16AM -0400, Neil Horman wrote:
>> > On Fri, Sep 16, 2011 at 10:36:46AM +0200, Greg KH wrote:
>> > > On Thu, Sep 15, 2011 at 04:08:38PM -0400, Neil Horman wrote:
>> > > > +static struct kobj_type msi_irq_ktype = {
>> > > > +       .sysfs_ops = &msi_irq_sysfs_ops,
>> > > > +       .default_attrs = msi_irq_default_attrs,
>> > > > +};
>> > >
>> > > You still haven't successfully removed the kobject associated with this
>> > > kobj_type.  Otherwise you would have seen the error messages in the
>> > > kernel log and fixed up what it told you to fix.
>> > >
>> > > Please do so.
>> > >
>> > Well, I'm not using kobject_del, you're right, I'm just using a kobject_put, and
>> > I swear I'm not getting any error messages about it:
>>
>> Then you really are not ever freeing that kobject, so there's a
>> reference counting bug in your code which you need to fix.
>>
>> You will notice the real problem once you try to finally release that
>> kobject, trust me :)
>>
> Ok, copy that, I'll continue to dig, thanks!
> Neil
>
>> greg k-h
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-14 18:36 [PATCH] sysfs: add per pci device msi[x] irq listing Neil Horman
  2011-09-15 14:40 ` Greg KH
  2011-09-15 20:08 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v2) Neil Horman
@ 2011-09-19 15:47 ` Neil Horman
  2011-09-19 17:14   ` Greg KH
                     ` (2 more replies)
  2011-09-29 14:38 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v4) Neil Horman
  3 siblings, 3 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-19 15:47 UTC (permalink / raw)
  To: linux-kernel; +Cc: Neil Horman, Greg Kroah-Hartman, Jesse Barnes, linux-pci

So a while back, I wanted to provide a way for irqbalance (and other apps) to
definitively map irqs to devices, which, for msi[x] irqs is currently not really
possible in user space.  My first attempt wen't not so well:
https://lkml.org/lkml/2011/4/21/308

It was plauged by the same issues that prior attempts were, namely that it
violated the one-file-one-value sysfs rule.  I wandered off but have recently
come back to this.  I've got a new implementation here that exports a new
subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
a variable number of numbered subdirectories, in which the number represents an
msi irq.  Each numbered subdirectory contains attributes for that irq, which
currently is only the mode it is operating in (msi vs. msix).  I think fits
within the constraints sysfs requires, and will allow irqbalance to properly map
msi irqs to devices without having to rely on rickety, best guess methods like
interface name matching.

Change Notes:

(v2)
Fixed up Documentation to put new sysfs interface descriptions in the right
place, as per request by Greg K-H

Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
exactly right, but looking at the crash (triggered by echo 1 >
/sys/class/net/eth0/device/remove), it looked as though we were freeing the
pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
such it seemed most appropriate to hold references on the pci_dev for each msi
irq sysfs object that we create, and release them on free accordingly.  With
this change in place, I can remove, and add (via rescan) msi enabled devices
ad-nauseum without a panic.  Again thanks to Greg K-H

(v3)
As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
producing any errors on remove, but only because I had a refcounting problem,
and my new sysfs objects were left orphaned with a dangling refcount.  I've
fixed that, added a release method to the new ktype, which now drops the
reference I hold on the pci_dev for us and I've validated that all objects I've
created, along with the parent directory and pci device are cleaned up and freed
by enabling the kobject dyanic_debug set and observing the appropriate release
calls.  I can provide the logs if anyone wants to review them specifically.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: linux-pci@vger.kernel.org
---
 Documentation/ABI/testing/sysfs-bus-pci |   18 +++++
 drivers/pci/msi.c                       |  111 +++++++++++++++++++++++++++++++
 include/linux/msi.h                     |    3 +
 include/linux/pci.h                     |    1 +
 4 files changed, 133 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf2..699da99 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		subdirectories, with each subdirectory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered subdirectory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vecotor named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f10328..73613e2 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -322,6 +322,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -402,6 +404,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -453,6 +547,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -573,6 +674,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -731,6 +838,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -829,6 +938,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced..ce93a34 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b..fff3961 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -332,6 +332,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_IOV
-- 
1.7.6.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
@ 2011-09-19 17:14   ` Greg KH
  2011-09-19 17:33     ` Neil Horman
  2011-09-22 10:49   ` Konrad Rzeszutek Wilk
  2011-09-22 13:54   ` Matthew Wilcox
  2 siblings, 1 reply; 39+ messages in thread
From: Greg KH @ 2011-09-19 17:14 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Jesse Barnes, linux-pci

On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> So a while back, I wanted to provide a way for irqbalance (and other apps) to
> definitively map irqs to devices, which, for msi[x] irqs is currently not really
> possible in user space.  My first attempt wen't not so well:
> https://lkml.org/lkml/2011/4/21/308
> 
> It was plauged by the same issues that prior attempts were, namely that it
> violated the one-file-one-value sysfs rule.  I wandered off but have recently
> come back to this.  I've got a new implementation here that exports a new
> subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> a variable number of numbered subdirectories, in which the number represents an
> msi irq.  Each numbered subdirectory contains attributes for that irq, which
> currently is only the mode it is operating in (msi vs. msix).  I think fits
> within the constraints sysfs requires, and will allow irqbalance to properly map
> msi irqs to devices without having to rely on rickety, best guess methods like
> interface name matching.
> 
> Change Notes:
> 
> (v2)
> Fixed up Documentation to put new sysfs interface descriptions in the right
> place, as per request by Greg K-H
> 
> Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
> exactly right, but looking at the crash (triggered by echo 1 >
> /sys/class/net/eth0/device/remove), it looked as though we were freeing the
> pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
> such it seemed most appropriate to hold references on the pci_dev for each msi
> irq sysfs object that we create, and release them on free accordingly.  With
> this change in place, I can remove, and add (via rescan) msi enabled devices
> ad-nauseum without a panic.  Again thanks to Greg K-H
> 
> (v3)
> As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
> producing any errors on remove, but only because I had a refcounting problem,
> and my new sysfs objects were left orphaned with a dangling refcount.  I've
> fixed that, added a release method to the new ktype, which now drops the
> reference I hold on the pci_dev for us and I've validated that all objects I've
> created, along with the parent directory and pci device are cleaned up and freed
> by enabling the kobject dyanic_debug set and observing the appropriate release
> calls.  I can provide the logs if anyone wants to review them specifically.

Wonderful, thanks for doing this, the code now looks fine to me, and if
the PCI maintainer has no objections to it, feel free to add my:
	Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
to it.

nice job, thanks for sticking with it.

greg k-h

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-19 17:14   ` Greg KH
@ 2011-09-19 17:33     ` Neil Horman
  0 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-19 17:33 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel, Jesse Barnes, linux-pci

On Mon, Sep 19, 2011 at 10:14:11AM -0700, Greg KH wrote:
> On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > possible in user space.  My first attempt wen't not so well:
> > https://lkml.org/lkml/2011/4/21/308
> > 
> > It was plauged by the same issues that prior attempts were, namely that it
> > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > come back to this.  I've got a new implementation here that exports a new
> > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > a variable number of numbered subdirectories, in which the number represents an
> > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > within the constraints sysfs requires, and will allow irqbalance to properly map
> > msi irqs to devices without having to rely on rickety, best guess methods like
> > interface name matching.
> > 
> > Change Notes:
> > 
> > (v2)
> > Fixed up Documentation to put new sysfs interface descriptions in the right
> > place, as per request by Greg K-H
> > 
> > Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
> > exactly right, but looking at the crash (triggered by echo 1 >
> > /sys/class/net/eth0/device/remove), it looked as though we were freeing the
> > pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
> > such it seemed most appropriate to hold references on the pci_dev for each msi
> > irq sysfs object that we create, and release them on free accordingly.  With
> > this change in place, I can remove, and add (via rescan) msi enabled devices
> > ad-nauseum without a panic.  Again thanks to Greg K-H
> > 
> > (v3)
> > As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
> > producing any errors on remove, but only because I had a refcounting problem,
> > and my new sysfs objects were left orphaned with a dangling refcount.  I've
> > fixed that, added a release method to the new ktype, which now drops the
> > reference I hold on the pci_dev for us and I've validated that all objects I've
> > created, along with the parent directory and pci device are cleaned up and freed
> > by enabling the kobject dyanic_debug set and observing the appropriate release
> > calls.  I can provide the logs if anyone wants to review them specifically.
> 
> Wonderful, thanks for doing this, the code now looks fine to me, and if
> the PCI maintainer has no objections to it, feel free to add my:
> 	Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
> to it.
> 
> nice job, thanks for sticking with it.
> 
No problem, thanks for your reviews!
Neil

> greg k-h
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
  2011-09-19 17:14   ` Greg KH
@ 2011-09-22 10:49   ` Konrad Rzeszutek Wilk
  2011-09-22 10:57     ` Neil Horman
  2011-09-22 13:17     ` Neil Horman
  2011-09-22 13:54   ` Matthew Wilcox
  2 siblings, 2 replies; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-09-22 10:49 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> So a while back, I wanted to provide a way for irqbalance (and other apps) to
> definitively map irqs to devices, which, for msi[x] irqs is currently not really
> possible in user space.  My first attempt wen't not so well:
> https://lkml.org/lkml/2011/4/21/308
> 
> It was plauged by the same issues that prior attempts were, namely that it
> violated the one-file-one-value sysfs rule.  I wandered off but have recently
> come back to this.  I've got a new implementation here that exports a new
> subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> a variable number of numbered subdirectories, in which the number represents an
> msi irq.  Each numbered subdirectory contains attributes for that irq, which
> currently is only the mode it is operating in (msi vs. msix).  I think fits
> within the constraints sysfs requires, and will allow irqbalance to properly map
> msi irqs to devices without having to rely on rickety, best guess methods like
> interface name matching.

Are there irqbalance patches that correspond to this? Where would they be available?

> 
> Change Notes:
> 
> (v2)
> Fixed up Documentation to put new sysfs interface descriptions in the right
> place, as per request by Greg K-H
> 
> Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
> exactly right, but looking at the crash (triggered by echo 1 >
> /sys/class/net/eth0/device/remove), it looked as though we were freeing the
> pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
> such it seemed most appropriate to hold references on the pci_dev for each msi
> irq sysfs object that we create, and release them on free accordingly.  With
> this change in place, I can remove, and add (via rescan) msi enabled devices
> ad-nauseum without a panic.  Again thanks to Greg K-H
> 
> (v3)
> As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
> producing any errors on remove, but only because I had a refcounting problem,
> and my new sysfs objects were left orphaned with a dangling refcount.  I've
> fixed that, added a release method to the new ktype, which now drops the
> reference I hold on the pci_dev for us and I've validated that all objects I've
> created, along with the parent directory and pci device are cleaned up and freed
> by enabling the kobject dyanic_debug set and observing the appropriate release
> calls.  I can provide the logs if anyone wants to review them specifically.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> CC: Greg Kroah-Hartman <gregkh@suse.de>
> CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> CC: linux-pci@vger.kernel.org
> ---
>  Documentation/ABI/testing/sysfs-bus-pci |   18 +++++
>  drivers/pci/msi.c                       |  111 +++++++++++++++++++++++++++++++
>  include/linux/msi.h                     |    3 +
>  include/linux/pci.h                     |    1 +
>  4 files changed, 133 insertions(+), 0 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
> index 349ecf2..699da99 100644
> --- a/Documentation/ABI/testing/sysfs-bus-pci
> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> @@ -66,6 +66,24 @@ Description:
>  		re-discover previously removed devices.
>  		Depends on CONFIG_HOTPLUG.
>  
> +What:		/sys/bus/pci/devices/.../msi_irqs/
> +Date:		September, 2011
> +Contact:	Neil Horman <nhorman@tuxdriver.com>
> +Description:
> +		The /sys/devices/.../msi_irqs directory contains a variable set
> +		subdirectories, with each subdirectory being named after a
> +		corresponding msi irq vector allocated to that device.  Each
> +		numbered subdirectory N contains attributes of that irq.
> +		Note that this directory is not created for device drivers which
> +		do not support msi irqs
> +
> +What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
> +Date:		September 2011
> +Contact:	Neil Horman <nhorman@tuxdriver.com>
> +Description:
> +		This attribute indicates the mode that the irq vecotor named by

vector
> +		the parent directory is in (msi vs. msix)
> +
>  What:		/sys/bus/pci/devices/.../remove
>  Date:		January 2009
>  Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index 2f10328..73613e2 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -322,6 +322,8 @@ static void free_msi_irqs(struct pci_dev *dev)
>  			if (list_is_last(&entry->list, &dev->msi_list))
>  				iounmap(entry->mask_base);
>  		}
> +		kobject_del(&entry->kobj);
> +		kobject_put(&entry->kobj);
>  		list_del(&entry->list);
>  		kfree(entry);
>  	}
> @@ -402,6 +404,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(pci_restore_msi_state);
>  
> +
> +#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
> +#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
> +
> +struct msi_attribute {
> +	struct attribute        attr;
> +	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
> +			char *buf);
> +	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
> +			 const char *buf, size_t count);
> +};
> +
> +static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
> +			     char *buf)
> +{
> +	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
> +}
> +
> +static ssize_t msi_irq_attr_show(struct kobject *kobj,
> +				 struct attribute *attr, char *buf)
> +{
> +	struct msi_attribute *attribute = to_msi_attr(attr);
> +	struct msi_desc *entry = to_msi_desc(kobj);
> +
> +	if (!attribute->show)
> +		return -EIO;
> +
> +	return attribute->show(entry, attribute, buf);
> +}
> +
> +static const struct sysfs_ops msi_irq_sysfs_ops = {
> +	.show = msi_irq_attr_show,
> +};
> +
> +static struct msi_attribute mode_attribute =
> +	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
> +
> +
> +struct attribute *msi_irq_default_attrs[] = {
> +	&mode_attribute.attr,
> +	NULL
> +};
> +
> +void msi_kobj_release(struct kobject *kobj)
> +{
> +	struct msi_desc *entry = to_msi_desc(kobj);
> +
> +	pci_dev_put(entry->dev);
> +}
> +
> +static struct kobj_type msi_irq_ktype = {
> +	.release = msi_kobj_release,
> +	.sysfs_ops = &msi_irq_sysfs_ops,
> +	.default_attrs = msi_irq_default_attrs,
> +};
> +
> +static int populate_msi_sysfs(struct pci_dev *pdev)

So,  are there any cases where CONFIG_SYSFS is turned off and
CONFIG_MSI is set? Should there be some #ifdef CONFIG_SYSFS
magic tricks?

> +{
> +	struct msi_desc *entry;
> +	struct kobject *kobj;
> +	int ret;
> +	int count = 0;
> +
> +	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
> +	if (!pdev->msi_kset)
> +		return -ENOMEM;
> +
> +	list_for_each_entry(entry, &pdev->msi_list, list) {
> +		kobj = &entry->kobj;
> +		kobj->kset = pdev->msi_kset;
> +		pci_dev_get(pdev);
> +		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
> +				     "%u", entry->irq);
> +		if (ret)
> +			goto out_unroll;
> +
> +		count++;
> +	}
> +
> +	return 0;
> +
> +out_unroll:
> +	list_for_each_entry(entry, &pdev->msi_list, list) {
> +		if (!count)
> +			break;
> +		kobject_del(&entry->kobj);
> +		kobject_put(&entry->kobj);
> +		count--;
> +	}
> +	return ret;
> +}
> +
>  /**
>   * msi_capability_init - configure device's MSI capability structure
>   * @dev: pointer to the pci_dev data structure of MSI device function
> @@ -453,6 +547,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
>  		return ret;
>  	}
>  
> +	ret = populate_msi_sysfs(dev);
> +	if (ret) {
> +		msi_mask_irq(entry, mask, ~mask);
> +		free_msi_irqs(dev);
> +		return ret;
> +	}
> +

That is rather draconian way of doing it. I mean if the SysFS entries
can't be created, then abonden the whole thing? Why not just WARN
and continue on without creating the SysFS entries?

>  	/* Set MSI enabled bits	 */
>  	pci_intx_for_msi(dev, 0);
>  	msi_set_enable(dev, pos, 1);
> @@ -573,6 +674,12 @@ static int msix_capability_init(struct pci_dev *dev,
>  
>  	msix_program_entries(dev, entries);
>  
> +	ret = populate_msi_sysfs(dev);
> +	if (ret) {
> +		ret = 0;

Why the reset to zero?

> +		goto error;
> +	}
> +
>  	/* Set MSI-X enabled bits and unmask the function */
>  	pci_intx_for_msi(dev, 0);
>  	dev->msix_enabled = 1;
> @@ -731,6 +838,8 @@ void pci_disable_msi(struct pci_dev *dev)
>  
>  	pci_msi_shutdown(dev);
>  	free_msi_irqs(dev);
> +	kset_unregister(dev->msi_kset);
> +	dev->msi_kset = NULL;
>  }
>  EXPORT_SYMBOL(pci_disable_msi);
>  
> @@ -829,6 +938,8 @@ void pci_disable_msix(struct pci_dev *dev)
>  
>  	pci_msix_shutdown(dev);
>  	free_msi_irqs(dev);
> +	kset_unregister(dev->msi_kset);
> +	dev->msi_kset = NULL;
>  }
>  EXPORT_SYMBOL(pci_disable_msix);
>  
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 05acced..ce93a34 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -1,6 +1,7 @@
>  #ifndef LINUX_MSI_H
>  #define LINUX_MSI_H
>  
> +#include <linux/kobject.h>
>  #include <linux/list.h>
>  
>  struct msi_msg {
> @@ -44,6 +45,8 @@ struct msi_desc {
>  
>  	/* Last set MSI message */
>  	struct msi_msg msg;
> +
> +	struct kobject kobj;
>  };
>  
>  /*
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index f27893b..fff3961 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -332,6 +332,7 @@ struct pci_dev {
>  	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
>  #ifdef CONFIG_PCI_MSI
>  	struct list_head msi_list;
> +	struct kset *msi_kset;

Probably should be guarded by CONFIG_SYSFS

>  #endif
>  	struct pci_vpd *vpd;
>  #ifdef CONFIG_PCI_IOV
> -- 
> 1.7.6.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 10:49   ` Konrad Rzeszutek Wilk
@ 2011-09-22 10:57     ` Neil Horman
  2011-09-22 11:10       ` Konrad Rzeszutek Wilk
  2011-09-22 13:17     ` Neil Horman
  1 sibling, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-22 10:57 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Thu, Sep 22, 2011 at 06:49:02AM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > possible in user space.  My first attempt wen't not so well:
> > https://lkml.org/lkml/2011/4/21/308
> > 
> > It was plauged by the same issues that prior attempts were, namely that it
> > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > come back to this.  I've got a new implementation here that exports a new
> > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > a variable number of numbered subdirectories, in which the number represents an
> > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > within the constraints sysfs requires, and will allow irqbalance to properly map
> > msi irqs to devices without having to rely on rickety, best guess methods like
> > interface name matching.
> 
> Are there irqbalance patches that correspond to this? Where would they be available?
> 
I've got them here locally, shemminger and I are testing them out, when I'm comfortable with
them, I'll be comitting them to the public repository at code.google.com

Note, the changes to support this kernel update in irqbalance is also comming
with a major gutting that I'm doing of the daemon.  Things like making workload
bias adjustments based on packets received per interface, since theres no
guaranteed 1:1 correlation between network interfaces and irqs.  The point being
that while irq identification will definately be fixed, you might notice other
behavioral differences that we may or may not want to address.  Bug reports
would be greatly appreciated at the project site.

Thanks!
Neil


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 10:57     ` Neil Horman
@ 2011-09-22 11:10       ` Konrad Rzeszutek Wilk
  2011-09-22 13:21         ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-09-22 11:10 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Thu, Sep 22, 2011 at 06:57:06AM -0400, Neil Horman wrote:
> On Thu, Sep 22, 2011 at 06:49:02AM -0400, Konrad Rzeszutek Wilk wrote:
> > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > possible in user space.  My first attempt wen't not so well:
> > > https://lkml.org/lkml/2011/4/21/308
> > > 
> > > It was plauged by the same issues that prior attempts were, namely that it
> > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > come back to this.  I've got a new implementation here that exports a new
> > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > a variable number of numbered subdirectories, in which the number represents an
> > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > interface name matching.
> > 
> > Are there irqbalance patches that correspond to this? Where would they be available?
> > 
> I've got them here locally, shemminger and I are testing them out, when I'm comfortable with
> them, I'll be comitting them to the public repository at code.google.com

Right.. asking b/c it might be a good idea to include that in the git
description of the patch.

BTW, I also had some question on the patch itself - not sure if you had a chance
to read them.
> 
> Note, the changes to support this kernel update in irqbalance is also comming
> with a major gutting that I'm doing of the daemon.  Things like making workload
> bias adjustments based on packets received per interface, since theres no
> guaranteed 1:1 correlation between network interfaces and irqs.  The point being
> that while irq identification will definately be fixed, you might notice other
> behavioral differences that we may or may not want to address.  Bug reports
> would be greatly appreciated at the project site.
> 
> Thanks!
> Neil

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 10:49   ` Konrad Rzeszutek Wilk
  2011-09-22 10:57     ` Neil Horman
@ 2011-09-22 13:17     ` Neil Horman
  1 sibling, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-22 13:17 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Thu, Sep 22, 2011 at 06:49:02AM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > possible in user space.  My first attempt wen't not so well:
> > https://lkml.org/lkml/2011/4/21/308
> > 
> > It was plauged by the same issues that prior attempts were, namely that it
> > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > come back to this.  I've got a new implementation here that exports a new
> > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > a variable number of numbered subdirectories, in which the number represents an
> > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > within the constraints sysfs requires, and will allow irqbalance to properly map
> > msi irqs to devices without having to rely on rickety, best guess methods like
> > interface name matching.
> 
> Are there irqbalance patches that correspond to this? Where would they be available?
> 
Sorry, missed your other in-line comments, responses below

><snip>
> > +static int populate_msi_sysfs(struct pci_dev *pdev)
> 
> So,  are there any cases where CONFIG_SYSFS is turned off and
> CONFIG_MSI is set? Should there be some #ifdef CONFIG_SYSFS
> magic tricks?
> 
Its not needed.  The kobject calls are always built in unconditionally and are
used to generate udev events.  As part of that kobject registration, sysfs files
are created, and the ifdefs for sysfs dependencies are handled internally at the
sysfs api calls.  Even if their not defined though, we still want to create the
kobjects for the uevent generation.

> > +
> 
> That is rather draconian way of doing it. I mean if the SysFS entries
> can't be created, then abonden the whole thing? Why not just WARN
> and continue on without creating the SysFS entries?
> 
Because it just seems like its asking for trouble.  If a system is so memory
constrained that it can't create kobjects for an msi interrupt, we're going to
run into other problems anyway, I'd rather not have the device being setup as a
warning, than a quiet missing set of irqs in sysfs.

> >  	/* Set MSI enabled bits	 */
> >  	pci_intx_for_msi(dev, 0);
> >  	msi_set_enable(dev, pos, 1);
> > @@ -573,6 +674,12 @@ static int msix_capability_init(struct pci_dev *dev,
> >  
> >  	msix_program_entries(dev, entries);
> >  
> > +	ret = populate_msi_sysfs(dev);
> > +	if (ret) {
> > +		ret = 0;
> 
> Why the reset to zero?
> 
look at the meaning of the return code.  As I noted above, I didn't want to be
able to allocate the msi interrupts at all if the sysfs setup failed.  Zero is
the proper value to jump to the error label with there to indicate that.  It
means we we didn't allocate any msi interrupts.

> > +++ b/include/linux/pci.h
> > @@ -332,6 +332,7 @@ struct pci_dev {
> >  	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
> >  #ifdef CONFIG_PCI_MSI
> >  	struct list_head msi_list;
> > +	struct kset *msi_kset;
> 
> Probably should be guarded by CONFIG_SYSFS
> 
Nope, see above.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 11:10       ` Konrad Rzeszutek Wilk
@ 2011-09-22 13:21         ` Neil Horman
  0 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-22 13:21 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Thu, Sep 22, 2011 at 07:10:46AM -0400, Konrad Rzeszutek Wilk wrote:
> On Thu, Sep 22, 2011 at 06:57:06AM -0400, Neil Horman wrote:
> > On Thu, Sep 22, 2011 at 06:49:02AM -0400, Konrad Rzeszutek Wilk wrote:
> > > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > > possible in user space.  My first attempt wen't not so well:
> > > > https://lkml.org/lkml/2011/4/21/308
> > > > 
> > > > It was plauged by the same issues that prior attempts were, namely that it
> > > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > > come back to this.  I've got a new implementation here that exports a new
> > > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > > a variable number of numbered subdirectories, in which the number represents an
> > > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > > interface name matching.
> > > 
> > > Are there irqbalance patches that correspond to this? Where would they be available?
> > > 
> > I've got them here locally, shemminger and I are testing them out, when I'm comfortable with
> > them, I'll be comitting them to the public repository at code.google.com
> 
> Right.. asking b/c it might be a good idea to include that in the git
> description of the patch.
> 
meh, its generally useful, just so happens I'm updating irqbalance to use it.
irqd, and all the other vendor specific irqbalance solutions can also make use
of it.  I was going to mention the relationship in the irqbalance changelog
Neil



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
  2011-09-19 17:14   ` Greg KH
  2011-09-22 10:49   ` Konrad Rzeszutek Wilk
@ 2011-09-22 13:54   ` Matthew Wilcox
  2011-09-22 14:32     ` Neil Horman
  2 siblings, 1 reply; 39+ messages in thread
From: Matthew Wilcox @ 2011-09-22 13:54 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> So a while back, I wanted to provide a way for irqbalance (and other apps) to
> definitively map irqs to devices, which, for msi[x] irqs is currently not really
> possible in user space.  My first attempt wen't not so well:
> https://lkml.org/lkml/2011/4/21/308
> 
> It was plauged by the same issues that prior attempts were, namely that it
> violated the one-file-one-value sysfs rule.  I wandered off but have recently
> come back to this.  I've got a new implementation here that exports a new
> subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> a variable number of numbered subdirectories, in which the number represents an
> msi irq.  Each numbered subdirectory contains attributes for that irq, which
> currently is only the mode it is operating in (msi vs. msix).  I think fits
> within the constraints sysfs requires, and will allow irqbalance to properly map
> msi irqs to devices without having to rely on rickety, best guess methods like
> interface name matching.

This approach feels like building bigger rockets instead of a space
elevator :-)

What we need is to allow device drivers to ask for per-CPU interrupts,
and implement them in terms of MSI-X.  I've made a couple of stabs at
implementing this, but haven't got anything working yet.  It would solve
a number of problems:

1. NUMA cacheline fetch.  At the moment, desc->istate gets modified by
handle_edge_irq.  handle_percpu_irq doesn't need to worry about any
of that stuff, so doesn't touch desc->istate.  I've heard this is a
significant problem for the high-speed networking people.

2. /proc/interrupts is unmanagable on large machines.  There are hundreds
of interrupts and dozens of CPUs.  This would go a long way to reducing
the number of rows in the table (doesn't do anything about the columns).

ie instead of this:

 79:          0          0          0          0          0          0          0          0   PCI-MSI-edge      eth1
 80:          0          0    9275611          0          0          0          0          0   PCI-MSI-edge      eth1-TxRx-0
 81:          0          0    9275611          0          0          0          0          0   PCI-MSI-edge      eth1-TxRx-1
 82:          0          0          0          0    9275611          0          0          0   PCI-MSI-edge      eth1-TxRx-2
 83:          0          0          0          0    9275611          0          0          0   PCI-MSI-edge      eth1-TxRx-3
 84:          0          0          0          0          0    9275611          0          0   PCI-MSI-edge      eth1-TxRx-4
 85:          0          0          0          0          0    9275611          0          0   PCI-MSI-edge      eth1-TxRx-5
 86:          0          0          0          0          0          0    9275611          0   PCI-MSI-edge      eth1-TxRx-6
 87:          0          0          0          0          0          0    9275611          0   PCI-MSI-edge      eth1-TxRx-7

We'd get this:

 79:          0          0          0          0          0          0          0          0   PCI-MSI-edge      eth1
 80:    9275611    9275611    9275611    9275611    9275611    9275611    9275611    9275611   PCI-MSI-edge      eth1-TxRx

3. /proc/irq/x/smp_affinity actually makes sense again.  It can be a
mask of which interrupts are active instead of being a degenerate case
in which only the lowest set bit is actually honoured.

4. Easier to manage for the device driver.  All it needs is to call
request_percpu_irq(...) instead of trying to figure out how many
threads/cores/numa nodes/... there are in the machine, and how many
other multi-interrupt devices there are; and thus how many interrupts
it should allocate.  That can be left to the interrupt core which at
least has a chance of getting it right.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 13:54   ` Matthew Wilcox
@ 2011-09-22 14:32     ` Neil Horman
  2011-09-28 22:18       ` Bjorn Helgaas
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-22 14:32 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, linux-pci

On Thu, Sep 22, 2011 at 07:54:28AM -0600, Matthew Wilcox wrote:
> On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > possible in user space.  My first attempt wen't not so well:
> > https://lkml.org/lkml/2011/4/21/308
> > 
> > It was plauged by the same issues that prior attempts were, namely that it
> > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > come back to this.  I've got a new implementation here that exports a new
> > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > a variable number of numbered subdirectories, in which the number represents an
> > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > within the constraints sysfs requires, and will allow irqbalance to properly map
> > msi irqs to devices without having to rely on rickety, best guess methods like
> > interface name matching.
> 
> This approach feels like building bigger rockets instead of a space
> elevator :-)
> 
In which case your comments make me think that you're trying to build the
Death Star instead of buying more tie fighters :)
https://docs.google.com/viewer?url=http://www.dau.mil/pubscats/ATL%20Docs/Sep-Oct11/Ward.pdf

> What we need is to allow device drivers to ask for per-CPU interrupts,
> and implement them in terms of MSI-X.  I've made a couple of stabs at
> implementing this, but haven't got anything working yet.  It would solve
Yes, IIRC you were trying to do this the first time I proposed this:
https://lkml.org/lkml/2011/4/21/315

> a number of problems:
> 
Thats great, I don't see how this precludes what I'm trying to do here.  All
this patch does is expose a definitive relationship between msi irqs and the pci
devices that allocate them.  The kernel internal model used to allocate msi
interrupts can change, the kobject creation and removal just has to change with
it (presumably to create and destroy the msi irq kobjects when the individual
irqs are allocated/freed, rather than in a batch).  I don't see why we should
block enhancements to the existing msi implementation until you get new model
sorted, especially when this feature works equally well, despite the model we
use internally.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-22 14:32     ` Neil Horman
@ 2011-09-28 22:18       ` Bjorn Helgaas
  2011-09-29  0:42         ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Bjorn Helgaas @ 2011-09-28 22:18 UTC (permalink / raw)
  To: Neil Horman
  Cc: Matthew Wilcox, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Thu, Sep 22, 2011 at 8:32 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Thu, Sep 22, 2011 at 07:54:28AM -0600, Matthew Wilcox wrote:
> > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > possible in user space.  My first attempt wen't not so well:
> > > https://lkml.org/lkml/2011/4/21/308
> > >
> > > It was plauged by the same issues that prior attempts were, namely that it
> > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > come back to this.  I've got a new implementation here that exports a new
> > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > a variable number of numbered subdirectories, in which the number represents an
> > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > interface name matching.
> >
> > This approach feels like building bigger rockets instead of a space
> > elevator :-)
> >
> In which case your comments make me think that you're trying to build the
> Death Star instead of buying more tie fighters :)
> https://docs.google.com/viewer?url=http://www.dau.mil/pubscats/ATL%20Docs/Sep-Oct11/Ward.pdf
>
> > What we need is to allow device drivers to ask for per-CPU interrupts,
> > and implement them in terms of MSI-X.  I've made a couple of stabs at
> > implementing this, but haven't got anything working yet.  It would solve
> Yes, IIRC you were trying to do this the first time I proposed this:
> https://lkml.org/lkml/2011/4/21/315
>
> > a number of problems:
> >
> Thats great, I don't see how this precludes what I'm trying to do here.  All
> this patch does is expose a definitive relationship between msi irqs and the pci
> devices that allocate them.  The kernel internal model used to allocate msi
> interrupts can change, the kobject creation and removal just has to change with
> it (presumably to create and destroy the msi irq kobjects when the individual
> irqs are allocated/freed, rather than in a batch).  I don't see why we should
> block enhancements to the existing msi implementation until you get new model
> sorted, especially when this feature works equally well, despite the model we
> use internally.

Matthew, I don't understand this issue well enough to know whether
Neil's patch would get in the way of your planned enhancements, or
whether it would be baggage we won't want to maintain forever.  As far
as I can tell, the patch exposes an (IRQ -> device) mapping, which
would still be meaningful even with per-CPU interrupts.  Can you
educate me?

Neil, why do you propose doing this just for MSI IRQs?  I would think
it'd be useful information for *all* IRQs, regardless of type, and
that exposing the mapping for all IRQs would make it easier for tools.

Also, you have a nice long changelog, but in five years, the details
about previous attempts and changes between v1/v2/v3 will be useless.
Can you replace it with a short summary of what the patch *does*,
maybe something along the lines of the
Documentation/ABI/testing/sysfs-bus-pci update?  (BTW, that update
contains a couple typos -- "set subdirectories," "vecotor")

Bjorn

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-28 22:18       ` Bjorn Helgaas
@ 2011-09-29  0:42         ` Neil Horman
  2011-09-29  4:40           ` Bjorn Helgaas
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-29  0:42 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Matthew Wilcox, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Wed, Sep 28, 2011 at 04:18:55PM -0600, Bjorn Helgaas wrote:
> On Thu, Sep 22, 2011 at 8:32 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Thu, Sep 22, 2011 at 07:54:28AM -0600, Matthew Wilcox wrote:
> > > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > > possible in user space.  My first attempt wen't not so well:
> > > > https://lkml.org/lkml/2011/4/21/308
> > > >
> > > > It was plauged by the same issues that prior attempts were, namely that it
> > > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > > come back to this.  I've got a new implementation here that exports a new
> > > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > > a variable number of numbered subdirectories, in which the number represents an
> > > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > > interface name matching.
> > >
> > > This approach feels like building bigger rockets instead of a space
> > > elevator :-)
> > >
> > In which case your comments make me think that you're trying to build the
> > Death Star instead of buying more tie fighters :)
> > https://docs.google.com/viewer?url=http://www.dau.mil/pubscats/ATL%20Docs/Sep-Oct11/Ward.pdf
> >
> > > What we need is to allow device drivers to ask for per-CPU interrupts,
> > > and implement them in terms of MSI-X.  I've made a couple of stabs at
> > > implementing this, but haven't got anything working yet.  It would solve
> > Yes, IIRC you were trying to do this the first time I proposed this:
> > https://lkml.org/lkml/2011/4/21/315
> >
> > > a number of problems:
> > >
> > Thats great, I don't see how this precludes what I'm trying to do here.  All
> > this patch does is expose a definitive relationship between msi irqs and the pci
> > devices that allocate them.  The kernel internal model used to allocate msi
> > interrupts can change, the kobject creation and removal just has to change with
> > it (presumably to create and destroy the msi irq kobjects when the individual
> > irqs are allocated/freed, rather than in a batch).  I don't see why we should
> > block enhancements to the existing msi implementation until you get new model
> > sorted, especially when this feature works equally well, despite the model we
> > use internally.
> 
> Matthew, I don't understand this issue well enough to know whether
> Neil's patch would get in the way of your planned enhancements, or
> whether it would be baggage we won't want to maintain forever.  As far
> as I can tell, the patch exposes an (IRQ -> device) mapping, which
> would still be meaningful even with per-CPU interrupts.  Can you
> educate me?
> 
Thats my view on the subject, to which I think I commented.  Matthews
enhancements are perfectly reasonable, but they're orthogonal to these changes.
Regardless of the way they're allocated (matthews changes), theres still an
association between the irq and the device (my changes)

> Neil, why do you propose doing this just for MSI IRQs?  I would think
> it'd be useful information for *all* IRQs, regardless of type, and
> that exposing the mapping for all IRQs would make it easier for tools.
> 
Because legacy (non-msi) irqs are already ostensibly exposed via
/proc/bus/pci/devices/.../irq.  So non-msi irqs are already covered.

> Also, you have a nice long changelog, but in five years, the details
> about previous attempts and changes between v1/v2/v3 will be useless.
> Can you replace it with a short summary of what the patch *does*,
> maybe something along the lines of the
> Documentation/ABI/testing/sysfs-bus-pci update?  (BTW, that update
> contains a couple typos -- "set subdirectories," "vecotor")
> 
Yeah, I can repost a v4 with a more concise changelog.  I'll post it tomorrow.

Thanks!
NEil

> Bjorn
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-29  0:42         ` Neil Horman
@ 2011-09-29  4:40           ` Bjorn Helgaas
  2011-09-29 13:07             ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Bjorn Helgaas @ 2011-09-29  4:40 UTC (permalink / raw)
  To: Neil Horman
  Cc: Matthew Wilcox, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Wed, Sep 28, 2011 at 6:42 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Wed, Sep 28, 2011 at 04:18:55PM -0600, Bjorn Helgaas wrote:
> > On Thu, Sep 22, 2011 at 8:32 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > >
> > > On Thu, Sep 22, 2011 at 07:54:28AM -0600, Matthew Wilcox wrote:
> > > > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > > > possible in user space.  My first attempt wen't not so well:
> > > > > https://lkml.org/lkml/2011/4/21/308
> > > > >
> > > > > It was plauged by the same issues that prior attempts were, namely that it
> > > > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > > > come back to this.  I've got a new implementation here that exports a new
> > > > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > > > a variable number of numbered subdirectories, in which the number represents an
> > > > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > > > interface name matching.
> > > >
> > > > This approach feels like building bigger rockets instead of a space
> > > > elevator :-)
> > > >
> > > In which case your comments make me think that you're trying to build the
> > > Death Star instead of buying more tie fighters :)
> > > https://docs.google.com/viewer?url=http://www.dau.mil/pubscats/ATL%20Docs/Sep-Oct11/Ward.pdf
> > >
> > > > What we need is to allow device drivers to ask for per-CPU interrupts,
> > > > and implement them in terms of MSI-X.  I've made a couple of stabs at
> > > > implementing this, but haven't got anything working yet.  It would solve
> > > Yes, IIRC you were trying to do this the first time I proposed this:
> > > https://lkml.org/lkml/2011/4/21/315
> > >
> > > > a number of problems:
> > > >
> > > Thats great, I don't see how this precludes what I'm trying to do here.  All
> > > this patch does is expose a definitive relationship between msi irqs and the pci
> > > devices that allocate them.  The kernel internal model used to allocate msi
> > > interrupts can change, the kobject creation and removal just has to change with
> > > it (presumably to create and destroy the msi irq kobjects when the individual
> > > irqs are allocated/freed, rather than in a batch).  I don't see why we should
> > > block enhancements to the existing msi implementation until you get new model
> > > sorted, especially when this feature works equally well, despite the model we
> > > use internally.
> >
> > Matthew, I don't understand this issue well enough to know whether
> > Neil's patch would get in the way of your planned enhancements, or
> > whether it would be baggage we won't want to maintain forever.  As far
> > as I can tell, the patch exposes an (IRQ -> device) mapping, which
> > would still be meaningful even with per-CPU interrupts.  Can you
> > educate me?
> >
> Thats my view on the subject, to which I think I commented.  Matthews
> enhancements are perfectly reasonable, but they're orthogonal to these changes.
> Regardless of the way they're allocated (matthews changes), theres still an
> association between the irq and the device (my changes)
>
> > Neil, why do you propose doing this just for MSI IRQs?  I would think
> > it'd be useful information for *all* IRQs, regardless of type, and
> > that exposing the mapping for all IRQs would make it easier for tools.
> >
> Because legacy (non-msi) irqs are already ostensibly exposed via
> /proc/bus/pci/devices/.../irq.  So non-msi irqs are already covered.

But that's a different mechanism, in a different directory hierarchy.
It seems like it could be easier for user-space if all types of IRQs
were exposed uniformly in sysfs, even if we had the leftover /proc/
stuff that only covers non-MSI IRQs.  I guess one could argue that we
shouldn't have non-MSI IRQs in both places, since we can never remove
the /proc stuff anyway.

Bjorn

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v3)
  2011-09-29  4:40           ` Bjorn Helgaas
@ 2011-09-29 13:07             ` Neil Horman
  0 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-29 13:07 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Matthew Wilcox, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Wed, Sep 28, 2011 at 10:40:43PM -0600, Bjorn Helgaas wrote:
> On Wed, Sep 28, 2011 at 6:42 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Wed, Sep 28, 2011 at 04:18:55PM -0600, Bjorn Helgaas wrote:
> > > On Thu, Sep 22, 2011 at 8:32 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > > >
> > > > On Thu, Sep 22, 2011 at 07:54:28AM -0600, Matthew Wilcox wrote:
> > > > > On Mon, Sep 19, 2011 at 11:47:15AM -0400, Neil Horman wrote:
> > > > > > So a while back, I wanted to provide a way for irqbalance (and other apps) to
> > > > > > definitively map irqs to devices, which, for msi[x] irqs is currently not really
> > > > > > possible in user space.  My first attempt wen't not so well:
> > > > > > https://lkml.org/lkml/2011/4/21/308
> > > > > >
> > > > > > It was plauged by the same issues that prior attempts were, namely that it
> > > > > > violated the one-file-one-value sysfs rule.  I wandered off but have recently
> > > > > > come back to this.  I've got a new implementation here that exports a new
> > > > > > subdirectory for every pci device,  called msi_irqs.  This subdirectory contanis
> > > > > > a variable number of numbered subdirectories, in which the number represents an
> > > > > > msi irq.  Each numbered subdirectory contains attributes for that irq, which
> > > > > > currently is only the mode it is operating in (msi vs. msix).  I think fits
> > > > > > within the constraints sysfs requires, and will allow irqbalance to properly map
> > > > > > msi irqs to devices without having to rely on rickety, best guess methods like
> > > > > > interface name matching.
> > > > >
> > > > > This approach feels like building bigger rockets instead of a space
> > > > > elevator :-)
> > > > >
> > > > In which case your comments make me think that you're trying to build the
> > > > Death Star instead of buying more tie fighters :)
> > > > https://docs.google.com/viewer?url=http://www.dau.mil/pubscats/ATL%20Docs/Sep-Oct11/Ward.pdf
> > > >
> > > > > What we need is to allow device drivers to ask for per-CPU interrupts,
> > > > > and implement them in terms of MSI-X.  I've made a couple of stabs at
> > > > > implementing this, but haven't got anything working yet.  It would solve
> > > > Yes, IIRC you were trying to do this the first time I proposed this:
> > > > https://lkml.org/lkml/2011/4/21/315
> > > >
> > > > > a number of problems:
> > > > >
> > > > Thats great, I don't see how this precludes what I'm trying to do here.  All
> > > > this patch does is expose a definitive relationship between msi irqs and the pci
> > > > devices that allocate them.  The kernel internal model used to allocate msi
> > > > interrupts can change, the kobject creation and removal just has to change with
> > > > it (presumably to create and destroy the msi irq kobjects when the individual
> > > > irqs are allocated/freed, rather than in a batch).  I don't see why we should
> > > > block enhancements to the existing msi implementation until you get new model
> > > > sorted, especially when this feature works equally well, despite the model we
> > > > use internally.
> > >
> > > Matthew, I don't understand this issue well enough to know whether
> > > Neil's patch would get in the way of your planned enhancements, or
> > > whether it would be baggage we won't want to maintain forever.  As far
> > > as I can tell, the patch exposes an (IRQ -> device) mapping, which
> > > would still be meaningful even with per-CPU interrupts.  Can you
> > > educate me?
> > >
> > Thats my view on the subject, to which I think I commented.  Matthews
> > enhancements are perfectly reasonable, but they're orthogonal to these changes.
> > Regardless of the way they're allocated (matthews changes), theres still an
> > association between the irq and the device (my changes)
> >
> > > Neil, why do you propose doing this just for MSI IRQs?  I would think
> > > it'd be useful information for *all* IRQs, regardless of type, and
> > > that exposing the mapping for all IRQs would make it easier for tools.
> > >
> > Because legacy (non-msi) irqs are already ostensibly exposed via
> > /proc/bus/pci/devices/.../irq.  So non-msi irqs are already covered.
> 
> But that's a different mechanism, in a different directory hierarchy.
Its not in a different hierarchy, we have:
/sys/bus/pci/devices/<device bus:dev:fn>/irq
for legacy devices, and with this patch we now additionally have:
/sys/bus/pci/devices/device bus:dev:fn/msi_irqs/
for msi irqs

And yes, its a different mechanism, but they're different mechanisms in the
kernel.  The legacy irqs are communicated via pci config space and exposed with
all the other pci config space items in the device directory.  Since a device
may have a variable number of msi irqs, who's vectors are allocated at run time
by the OS, it makes sense to put them in a kset in a separate subdirectory off
the device to avoid polluting the device directory with a bunch of numbered
subdirs.  I suppose we could look at ways of merging the two together, but I
don't really see that as necessecary in any way, especially given that when
using msi irqs, that pci config space irq value is left unset.

> It seems like it could be easier for user-space if all types of IRQs
> were exposed uniformly in sysfs, even if we had the leftover /proc/
Not really.  The big user of this information is daemons like irqbalance, and it
took me 5 minutes to write the code to do the parsing of both legacy and
msi_irqs.  In fact it was kind of useful to have them separate, since the path
implied the type of irq (legacy vs msi[x]).  Its available at:
http://code.google.com/p/irqbalance
if you want to look at it.

I understand that merging the two might be nice, but its really unnecessecary,
from a user space perspective.

> stuff that only covers non-MSI IRQs.  I guess one could argue that we
> shouldn't have non-MSI IRQs in both places, since we can never remove
> the /proc stuff anyway.
> 
The /proc/interrupts file covers both legacy and msi interrupts.  The reason I
want to add the msi code here to allow us to draw a definitive connection
between a given msi interrupt and its pci device without having to do haphazzard
guessing based on device names or other strings.

Neil


> Bjorn
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-14 18:36 [PATCH] sysfs: add per pci device msi[x] irq listing Neil Horman
                   ` (2 preceding siblings ...)
  2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
@ 2011-09-29 14:38 ` Neil Horman
  2011-09-29 14:51   ` Greg KH
  2011-09-30 12:32   ` Stefan Richter
  3 siblings, 2 replies; 39+ messages in thread
From: Neil Horman @ 2011-09-29 14:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: Neil Horman, Greg Kroah-Hartman, Jesse Barnes, Bjorn Helgaas, linux-pci

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.  Currently the
only attribute is called mode, which tracks the operational mode of that vector
(msi vs. msix)

---

Change Notes:

(v2)
Fixed up Documentation to put new sysfs interface descriptions in the right
place, as per request by Greg K-H

Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
exactly right, but looking at the crash (triggered by echo 1 >
/sys/class/net/eth0/device/remove), it looked as though we were freeing the
pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
such it seemed most appropriate to hold references on the pci_dev for each msi
irq sysfs object that we create, and release them on free accordingly.  With
this change in place, I can remove, and add (via rescan) msi enabled devices
ad-nauseum without a panic.  Again thanks to Greg K-H

(v3)
As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
producing any errors on remove, but only because I had a refcounting problem,
and my new sysfs objects were left orphaned with a dangling refcount.  I've
fixed that, added a release method to the new ktype, which now drops the
reference I hold on the pci_dev for us and I've validated that all objects I've
created, along with the parent directory and pci device are cleaned up and freed
by enabling the kobject dyanic_debug set and observing the appropriate release
calls.  I can provide the logs if anyone wants to review them specifically.

(v4)
Fixed up some spelling mistakes, and added a scissors line with a good
commitlog, so that git-am drops all the version logging

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: Bjorn Helgaas <bhelgaas@google.com>
CC: linux-pci@vger.kernel.org
---
 Documentation/ABI/testing/sysfs-bus-pci |   18 +++++
 drivers/pci/msi.c                       |  111 +++++++++++++++++++++++++++++++
 include/linux/msi.h                     |    3 +
 include/linux/pci.h                     |    1 +
 4 files changed, 133 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf2..2e445b7 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f10328..73613e2 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -322,6 +322,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -402,6 +404,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -453,6 +547,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -573,6 +674,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -731,6 +838,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -829,6 +938,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced..ce93a34 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b..fff3961 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -332,6 +332,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_IOV
-- 
1.7.6.2


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-29 14:38 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v4) Neil Horman
@ 2011-09-29 14:51   ` Greg KH
  2011-09-30 12:32   ` Stefan Richter
  1 sibling, 0 replies; 39+ messages in thread
From: Greg KH @ 2011-09-29 14:51 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, Jesse Barnes, Bjorn Helgaas, linux-pci

On Thu, Sep 29, 2011 at 10:38:49AM -0400, Neil Horman wrote:
> This patch adds a per-pci-device subdirectory in sysfs called:
> /sys/bus/pci/devices/<device>/msi_irqs
> 
> This sub-directory exports the set of msi vectors allocated by a given
> pci device, by creating a numbered sub-directory for each vector beneath
> msi_irqs.  For each vector various attributes can be exported.  Currently the
> only attribute is called mode, which tracks the operational mode of that vector
> (msi vs. msix)
> 
> ---
> 
> Change Notes:
> 
> (v2)
> Fixed up Documentation to put new sysfs interface descriptions in the right
> place, as per request by Greg K-H
> 
> Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
> exactly right, but looking at the crash (triggered by echo 1 >
> /sys/class/net/eth0/device/remove), it looked as though we were freeing the
> pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
> such it seemed most appropriate to hold references on the pci_dev for each msi
> irq sysfs object that we create, and release them on free accordingly.  With
> this change in place, I can remove, and add (via rescan) msi enabled devices
> ad-nauseum without a panic.  Again thanks to Greg K-H
> 
> (v3)
> As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
> producing any errors on remove, but only because I had a refcounting problem,
> and my new sysfs objects were left orphaned with a dangling refcount.  I've
> fixed that, added a release method to the new ktype, which now drops the
> reference I hold on the pci_dev for us and I've validated that all objects I've
> created, along with the parent directory and pci device are cleaned up and freed
> by enabling the kobject dyanic_debug set and observing the appropriate release
> calls.  I can provide the logs if anyone wants to review them specifically.
> 
> (v4)
> Fixed up some spelling mistakes, and added a scissors line with a good
> commitlog, so that git-am drops all the version logging
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> CC: Greg Kroah-Hartman <gregkh@suse.de>

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-29 14:38 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v4) Neil Horman
  2011-09-29 14:51   ` Greg KH
@ 2011-09-30 12:32   ` Stefan Richter
  2011-09-30 15:33     ` Neil Horman
  1 sibling, 1 reply; 39+ messages in thread
From: Stefan Richter @ 2011-09-30 12:32 UTC (permalink / raw)
  To: Neil Horman
  Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, Bjorn Helgaas, linux-pci

On Sep 29 Neil Horman wrote:
> (v4)
> Fixed up some spelling mistakes, and added a scissors line with a good
> commitlog, so that git-am drops all the version logging
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> CC: Greg Kroah-Hartman <gregkh@suse.de>
> CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> CC: Bjorn Helgaas <bhelgaas@google.com>
> CC: linux-pci@vger.kernel.org

It drops these last 5 lines then too, doesn't it?

> --- a/Documentation/ABI/testing/sysfs-bus-pci
> +++ b/Documentation/ABI/testing/sysfs-bus-pci
> @@ -66,6 +66,24 @@ Description:
>  		re-discover previously removed devices.
>  		Depends on CONFIG_HOTPLUG.
>  
> +What:		/sys/bus/pci/devices/.../msi_irqs/
> +Date:		September, 2011
> +Contact:	Neil Horman <nhorman@tuxdriver.com>
> +Description:
> +		The /sys/devices/.../msi_irqs directory contains a variable set
> +		sub-directories, with each sub-directory being named after a
> +		corresponding msi irq vector allocated to that device.

"contains a variable set _of_ sub-directories"?
-- 
Stefan Richter
-=====-==-== =--= ====-
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-30 12:32   ` Stefan Richter
@ 2011-09-30 15:33     ` Neil Horman
  2011-09-30 16:33       ` Bjorn Helgaas
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-30 15:33 UTC (permalink / raw)
  To: Stefan Richter
  Cc: linux-kernel, Greg Kroah-Hartman, Jesse Barnes, Bjorn Helgaas, linux-pci

On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> On Sep 29 Neil Horman wrote:
> > (v4)
> > Fixed up some spelling mistakes, and added a scissors line with a good
> > commitlog, so that git-am drops all the version logging
> > 
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > CC: Bjorn Helgaas <bhelgaas@google.com>
> > CC: linux-pci@vger.kernel.org
> 
> It drops these last 5 lines then too, doesn't it?
> 
I would have thought so, but I tested a git-format-patch / git-am operation here
on a temporary branch, and it worked properly.

> > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > @@ -66,6 +66,24 @@ Description:
> >  		re-discover previously removed devices.
> >  		Depends on CONFIG_HOTPLUG.
> >  
> > +What:		/sys/bus/pci/devices/.../msi_irqs/
> > +Date:		September, 2011
> > +Contact:	Neil Horman <nhorman@tuxdriver.com>
> > +Description:
> > +		The /sys/devices/.../msi_irqs directory contains a variable set
> > +		sub-directories, with each sub-directory being named after a
> > +		corresponding msi irq vector allocated to that device.
> 
> "contains a variable set _of_ sub-directories"?
Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?
Neil

> -- 
> Stefan Richter
> -=====-==-== =--= ====-
> http://arcgraph.de/sr/
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-30 15:33     ` Neil Horman
@ 2011-09-30 16:33       ` Bjorn Helgaas
  2011-09-30 16:54         ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Bjorn Helgaas @ 2011-09-30 16:33 UTC (permalink / raw)
  To: Neil Horman
  Cc: Stefan Richter, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Fri, Sep 30, 2011 at 9:33 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> > On Sep 29 Neil Horman wrote:
> > > (v4)
> > > Fixed up some spelling mistakes, and added a scissors line with a good
> > > commitlog, so that git-am drops all the version logging
> > >
> > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > CC: Bjorn Helgaas <bhelgaas@google.com>
> > > CC: linux-pci@vger.kernel.org
> >
> > It drops these last 5 lines then too, doesn't it?
> >
> I would have thought so, but I tested a git-format-patch / git-am operation here
> on a temporary branch, and it worked properly.
>
> > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > @@ -66,6 +66,24 @@ Description:
> > >             re-discover previously removed devices.
> > >             Depends on CONFIG_HOTPLUG.
> > >
> > > +What:              /sys/bus/pci/devices/.../msi_irqs/
> > > +Date:              September, 2011
> > > +Contact:   Neil Horman <nhorman@tuxdriver.com>
> > > +Description:
> > > +           The /sys/devices/.../msi_irqs directory contains a variable set
> > > +           sub-directories, with each sub-directory being named after a
> > > +           corresponding msi irq vector allocated to that device.
> >
> > "contains a variable set _of_ sub-directories"?
> Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?

I'll try to remember to add that in.

Bjorn

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-30 16:33       ` Bjorn Helgaas
@ 2011-09-30 16:54         ` Neil Horman
  2011-10-06 15:36           ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-09-30 16:54 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Stefan Richter, linux-kernel, Greg Kroah-Hartman, Jesse Barnes,
	linux-pci

On Fri, Sep 30, 2011 at 10:33:36AM -0600, Bjorn Helgaas wrote:
> On Fri, Sep 30, 2011 at 9:33 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> > > On Sep 29 Neil Horman wrote:
> > > > (v4)
> > > > Fixed up some spelling mistakes, and added a scissors line with a good
> > > > commitlog, so that git-am drops all the version logging
> > > >
> > > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > CC: Bjorn Helgaas <bhelgaas@google.com>
> > > > CC: linux-pci@vger.kernel.org
> > >
> > > It drops these last 5 lines then too, doesn't it?
> > >
> > I would have thought so, but I tested a git-format-patch / git-am operation here
> > on a temporary branch, and it worked properly.
> >
> > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > @@ -66,6 +66,24 @@ Description:
> > > >             re-discover previously removed devices.
> > > >             Depends on CONFIG_HOTPLUG.
> > > >
> > > > +What:              /sys/bus/pci/devices/.../msi_irqs/
> > > > +Date:              September, 2011
> > > > +Contact:   Neil Horman <nhorman@tuxdriver.com>
> > > > +Description:
> > > > +           The /sys/devices/.../msi_irqs directory contains a variable set
> > > > +           sub-directories, with each sub-directory being named after a
> > > > +           corresponding msi irq vector allocated to that device.
> > >
> > > "contains a variable set _of_ sub-directories"?
> > Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?
> 
> I'll try to remember to add that in.
> 
Thank you, apologies for my poor writing skills :)
Neil

> Bjorn
> 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-09-30 16:54         ` Neil Horman
@ 2011-10-06 15:36           ` Jesse Barnes
  2011-10-06 17:12             ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Jesse Barnes @ 2011-10-06 15:36 UTC (permalink / raw)
  To: Neil Horman
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci

[-- Attachment #1: Type: text/plain, Size: 2117 bytes --]

On Fri, 30 Sep 2011 12:54:03 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> On Fri, Sep 30, 2011 at 10:33:36AM -0600, Bjorn Helgaas wrote:
> > On Fri, Sep 30, 2011 at 9:33 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > >
> > > On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> > > > On Sep 29 Neil Horman wrote:
> > > > > (v4)
> > > > > Fixed up some spelling mistakes, and added a scissors line with a good
> > > > > commitlog, so that git-am drops all the version logging
> > > > >
> > > > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > > > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > > > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > CC: Bjorn Helgaas <bhelgaas@google.com>
> > > > > CC: linux-pci@vger.kernel.org
> > > >
> > > > It drops these last 5 lines then too, doesn't it?
> > > >
> > > I would have thought so, but I tested a git-format-patch / git-am operation here
> > > on a temporary branch, and it worked properly.
> > >
> > > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > > @@ -66,6 +66,24 @@ Description:
> > > > >             re-discover previously removed devices.
> > > > >             Depends on CONFIG_HOTPLUG.
> > > > >
> > > > > +What:              /sys/bus/pci/devices/.../msi_irqs/
> > > > > +Date:              September, 2011
> > > > > +Contact:   Neil Horman <nhorman@tuxdriver.com>
> > > > > +Description:
> > > > > +           The /sys/devices/.../msi_irqs directory contains a variable set
> > > > > +           sub-directories, with each sub-directory being named after a
> > > > > +           corresponding msi irq vector allocated to that device.
> > > >
> > > > "contains a variable set _of_ sub-directories"?
> > > Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?
> > 
> > I'll try to remember to add that in.
> > 
> Thank you, apologies for my poor writing skills :)
> Neil

Is there a final patch available for me to queue up?

-- 
Jesse Barnes, Intel Open Source Technology Center

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-10-06 15:36           ` Jesse Barnes
@ 2011-10-06 17:12             ` Neil Horman
  2011-10-06 17:57               ` Bjorn Helgaas
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-10-06 17:12 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci

On Thu, Oct 06, 2011 at 08:36:07AM -0700, Jesse Barnes wrote:
> On Fri, 30 Sep 2011 12:54:03 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Fri, Sep 30, 2011 at 10:33:36AM -0600, Bjorn Helgaas wrote:
> > > On Fri, Sep 30, 2011 at 9:33 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > > >
> > > > On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> > > > > On Sep 29 Neil Horman wrote:
> > > > > > (v4)
> > > > > > Fixed up some spelling mistakes, and added a scissors line with a good
> > > > > > commitlog, so that git-am drops all the version logging
> > > > > >
> > > > > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > > > > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > > > > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > > CC: Bjorn Helgaas <bhelgaas@google.com>
> > > > > > CC: linux-pci@vger.kernel.org
> > > > >
> > > > > It drops these last 5 lines then too, doesn't it?
> > > > >
> > > > I would have thought so, but I tested a git-format-patch / git-am operation here
> > > > on a temporary branch, and it worked properly.
> > > >
> > > > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > > > @@ -66,6 +66,24 @@ Description:
> > > > > >             re-discover previously removed devices.
> > > > > >             Depends on CONFIG_HOTPLUG.
> > > > > >
> > > > > > +What:              /sys/bus/pci/devices/.../msi_irqs/
> > > > > > +Date:              September, 2011
> > > > > > +Contact:   Neil Horman <nhorman@tuxdriver.com>
> > > > > > +Description:
> > > > > > +           The /sys/devices/.../msi_irqs directory contains a variable set
> > > > > > +           sub-directories, with each sub-directory being named after a
> > > > > > +           corresponding msi irq vector allocated to that device.
> > > > >
> > > > > "contains a variable set _of_ sub-directories"?
> > > > Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?
> > > 
> > > I'll try to remember to add that in.
> > > 
> > Thank you, apologies for my poor writing skills :)
> > Neil
> 
> Is there a final patch available for me to queue up?
> 
I was under the impression that v4 was the version to queue up.  Stefan had
found that I left the word of out of the documentation at one location but Bjorn
was kind enough to offer to fix it up on commit.

Regards
Neil




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v4)
  2011-10-06 17:12             ` Neil Horman
@ 2011-10-06 17:57               ` Bjorn Helgaas
  2011-10-06 18:08                 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v5) Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Bjorn Helgaas @ 2011-10-06 17:57 UTC (permalink / raw)
  To: Neil Horman
  Cc: Jesse Barnes, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci

On Thu, Oct 6, 2011 at 11:12 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Thu, Oct 06, 2011 at 08:36:07AM -0700, Jesse Barnes wrote:
> > On Fri, 30 Sep 2011 12:54:03 -0400
> > Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > > On Fri, Sep 30, 2011 at 10:33:36AM -0600, Bjorn Helgaas wrote:
> > > > On Fri, Sep 30, 2011 at 9:33 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > > > >
> > > > > On Fri, Sep 30, 2011 at 02:32:11PM +0200, Stefan Richter wrote:
> > > > > > On Sep 29 Neil Horman wrote:
> > > > > > > (v4)
> > > > > > > Fixed up some spelling mistakes, and added a scissors line with a good
> > > > > > > commitlog, so that git-am drops all the version logging
> > > > > > >
> > > > > > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > > > > > CC: Greg Kroah-Hartman <gregkh@suse.de>
> > > > > > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > > > CC: Bjorn Helgaas <bhelgaas@google.com>
> > > > > > > CC: linux-pci@vger.kernel.org
> > > > > >
> > > > > > It drops these last 5 lines then too, doesn't it?
> > > > > >
> > > > > I would have thought so, but I tested a git-format-patch / git-am operation here
> > > > > on a temporary branch, and it worked properly.
> > > > >
> > > > > > > --- a/Documentation/ABI/testing/sysfs-bus-pci
> > > > > > > +++ b/Documentation/ABI/testing/sysfs-bus-pci
> > > > > > > @@ -66,6 +66,24 @@ Description:
> > > > > > >             re-discover previously removed devices.
> > > > > > >             Depends on CONFIG_HOTPLUG.
> > > > > > >
> > > > > > > +What:              /sys/bus/pci/devices/.../msi_irqs/
> > > > > > > +Date:              September, 2011
> > > > > > > +Contact:   Neil Horman <nhorman@tuxdriver.com>
> > > > > > > +Description:
> > > > > > > +           The /sys/devices/.../msi_irqs directory contains a variable set
> > > > > > > +           sub-directories, with each sub-directory being named after a
> > > > > > > +           corresponding msi irq vector allocated to that device.
> > > > > >
> > > > > > "contains a variable set _of_ sub-directories"?
> > > > > Gah, bad ispell foo on my part.  Bjorn, can you add that in, or shall I repost?
> > > >
> > > > I'll try to remember to add that in.
> > > >
> > > Thank you, apologies for my poor writing skills :)
> > > Neil
> >
> > Is there a final patch available for me to queue up?
> >
> I was under the impression that v4 was the version to queue up.  Stefan had
> found that I left the word of out of the documentation at one location but Bjorn
> was kind enough to offer to fix it up on commit.

I intended to set up a git tree for the merge window, but Jesse came
back from vacation, so I don't think I'll do that after all.

Can you just repost it with the typo correction for Jesse?

Bjorn

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-10-06 17:57               ` Bjorn Helgaas
@ 2011-10-06 18:08                 ` Neil Horman
  2011-10-14 16:21                   ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-10-06 18:08 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Jesse Barnes, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.  Currently the
only attribute is called mode, which tracks the operational mode of that vector
(msi vs. msix)

---

Change Notes:

(v2)
Fixed up Documentation to put new sysfs interface descriptions in the right
place, as per request by Greg K-H

Fixed up oops that resulted from removing pci device.  Not 100% sure I did this
exactly right, but looking at the crash (triggered by echo 1 >
/sys/class/net/eth0/device/remove), it looked as though we were freeing the
pci_dev struct prior to all sysfs objects releasing their use of the device.  AS
such it seemed most appropriate to hold references on the pci_dev for each msi
irq sysfs object that we create, and release them on free accordingly.  With
this change in place, I can remove, and add (via rescan) msi enabled devices
ad-nauseum without a panic.  Again thanks to Greg K-H

(v3)
As per Gregs suggestion, I looked further and noted that in fact, yes, it wasn't
producing any errors on remove, but only because I had a refcounting problem,
and my new sysfs objects were left orphaned with a dangling refcount.  I've
fixed that, added a release method to the new ktype, which now drops the
reference I hold on the pci_dev for us and I've validated that all objects I've
created, along with the parent directory and pci device are cleaned up and freed
by enabling the kobject dyanic_debug set and observing the appropriate release
calls.  I can provide the logs if anyone wants to review them specifically.

(v4)
Fixed up some spelling mistakes, and added a scissors line with a good
commitlog, so that git-am drops all the version logging

(v5)
Repost for Jesse with typo corrected

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: Bjorn Helgaas <bhelgaas@google.com>
CC: linux-pci@vger.kernel.org
---
 Documentation/ABI/testing/sysfs-bus-pci |   18 +++++
 drivers/pci/msi.c                       |  111 +++++++++++++++++++++++++++++++
 include/linux/msi.h                     |    3 +
 include/linux/pci.h                     |    1 +
 4 files changed, 133 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf2..2e445b7 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		of sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f10328..73613e2 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -322,6 +322,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -402,6 +404,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -453,6 +547,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -573,6 +674,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -731,6 +838,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -829,6 +938,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced..ce93a34 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index f27893b..fff3961 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -332,6 +332,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_IOV
-- 
1.7.6.2



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-10-06 18:08                 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v5) Neil Horman
@ 2011-10-14 16:21                   ` Jesse Barnes
  2011-10-14 16:40                     ` Greg KH
                                       ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Jesse Barnes @ 2011-10-14 16:21 UTC (permalink / raw)
  To: Neil Horman
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci, Matthew Wilcox

[-- Attachment #1: Type: text/plain, Size: 1183 bytes --]

On Thu, 6 Oct 2011 14:08:18 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> This patch adds a per-pci-device subdirectory in sysfs called:
> /sys/bus/pci/devices/<device>/msi_irqs
> 
> This sub-directory exports the set of msi vectors allocated by a given
> pci device, by creating a numbered sub-directory for each vector beneath
> msi_irqs.  For each vector various attributes can be exported.  Currently the
> only attribute is called mode, which tracks the operational mode of that vector
> (msi vs. msix)
> 

Ok this adds new ABI in the form of sysfs files so I want to make sure
it's ok.

Matthew has dreams of a more sophisticated MSI-X management scheme, but
it sounds to me like that will mostly affect the driver interfaces and
shouldn't be incompatible with the sysfs scheme you propose here.

If so, great, we can pull this in and maybe one day someone will
implement better MSI-X support.

If not, I'd like to hear objections now; Neil has been responsive and
fixed several issues.  I don't want to block his patch unless we have
something concrete that it conflicts with.

Thanks,
-- 
Jesse Barnes, Intel Open Source Technology Center

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-10-14 16:21                   ` Jesse Barnes
@ 2011-10-14 16:40                     ` Greg KH
  2011-10-14 17:31                     ` Neil Horman
  2011-11-01 16:47                     ` Neil Horman
  2 siblings, 0 replies; 39+ messages in thread
From: Greg KH @ 2011-10-14 16:40 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Neil Horman, Bjorn Helgaas, Stefan Richter, linux-kernel,
	linux-pci, Matthew Wilcox

On Fri, Oct 14, 2011 at 09:21:48AM -0700, Jesse Barnes wrote:
> On Thu, 6 Oct 2011 14:08:18 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > This patch adds a per-pci-device subdirectory in sysfs called:
> > /sys/bus/pci/devices/<device>/msi_irqs
> > 
> > This sub-directory exports the set of msi vectors allocated by a given
> > pci device, by creating a numbered sub-directory for each vector beneath
> > msi_irqs.  For each vector various attributes can be exported.  Currently the
> > only attribute is called mode, which tracks the operational mode of that vector
> > (msi vs. msix)
> > 
> 
> Ok this adds new ABI in the form of sysfs files so I want to make sure
> it's ok.
> 
> Matthew has dreams of a more sophisticated MSI-X management scheme, but
> it sounds to me like that will mostly affect the driver interfaces and
> shouldn't be incompatible with the sysfs scheme you propose here.
> 
> If so, great, we can pull this in and maybe one day someone will
> implement better MSI-X support.
> 
> If not, I'd like to hear objections now; Neil has been responsive and
> fixed several issues.  I don't want to block his patch unless we have
> something concrete that it conflicts with.

Again, no objection from me at all.

greg k-h

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-10-14 16:21                   ` Jesse Barnes
  2011-10-14 16:40                     ` Greg KH
@ 2011-10-14 17:31                     ` Neil Horman
  2011-11-01 16:47                     ` Neil Horman
  2 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-10-14 17:31 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci, Matthew Wilcox

On Fri, Oct 14, 2011 at 09:21:48AM -0700, Jesse Barnes wrote:
> On Thu, 6 Oct 2011 14:08:18 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > This patch adds a per-pci-device subdirectory in sysfs called:
> > /sys/bus/pci/devices/<device>/msi_irqs
> > 
> > This sub-directory exports the set of msi vectors allocated by a given
> > pci device, by creating a numbered sub-directory for each vector beneath
> > msi_irqs.  For each vector various attributes can be exported.  Currently the
> > only attribute is called mode, which tracks the operational mode of that vector
> > (msi vs. msix)
> > 
> 
> Ok this adds new ABI in the form of sysfs files so I want to make sure
> it's ok.
> 
> Matthew has dreams of a more sophisticated MSI-X management scheme, but
> it sounds to me like that will mostly affect the driver interfaces and
> shouldn't be incompatible with the sysfs scheme you propose here.
> 
> If so, great, we can pull this in and maybe one day someone will
> implement better MSI-X support.
> 
> If not, I'd like to hear objections now; Neil has been responsive and
> fixed several issues.  I don't want to block his patch unless we have
> something concrete that it conflicts with.
> 
> Thanks,
> -- 
> Jesse Barnes, Intel Open Source Technology Center


Thanks Jesse, Greg, Bjorn, I appreciate all the input here.
Neil
 

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-10-14 16:21                   ` Jesse Barnes
  2011-10-14 16:40                     ` Greg KH
  2011-10-14 17:31                     ` Neil Horman
@ 2011-11-01 16:47                     ` Neil Horman
  2011-11-01 16:58                       ` Jesse Barnes
  2 siblings, 1 reply; 39+ messages in thread
From: Neil Horman @ 2011-11-01 16:47 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci, Matthew Wilcox

On Fri, Oct 14, 2011 at 09:21:48AM -0700, Jesse Barnes wrote:
> On Thu, 6 Oct 2011 14:08:18 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > This patch adds a per-pci-device subdirectory in sysfs called:
> > /sys/bus/pci/devices/<device>/msi_irqs
> > 
> > This sub-directory exports the set of msi vectors allocated by a given
> > pci device, by creating a numbered sub-directory for each vector beneath
> > msi_irqs.  For each vector various attributes can be exported.  Currently the
> > only attribute is called mode, which tracks the operational mode of that vector
> > (msi vs. msix)
> > 
> 
> Ok this adds new ABI in the form of sysfs files so I want to make sure
> it's ok.
> 
> Matthew has dreams of a more sophisticated MSI-X management scheme, but
> it sounds to me like that will mostly affect the driver interfaces and
> shouldn't be incompatible with the sysfs scheme you propose here.
> 
> If so, great, we can pull this in and maybe one day someone will
> implement better MSI-X support.
> 
> If not, I'd like to hear objections now; Neil has been responsive and
> fixed several issues.  I don't want to block his patch unless we have
> something concrete that it conflicts with.
> 
> Thanks,
> -- 
> Jesse Barnes, Intel Open Source Technology Center
Ping Jesse, has something gone awry here? I've not seen this show up in your
tree yet.

Thanks & Regards
Neil




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-11-01 16:47                     ` Neil Horman
@ 2011-11-01 16:58                       ` Jesse Barnes
  2011-11-01 18:05                         ` Neil Horman
  0 siblings, 1 reply; 39+ messages in thread
From: Jesse Barnes @ 2011-11-01 16:58 UTC (permalink / raw)
  To: Neil Horman
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci, Matthew Wilcox

[-- Attachment #1: Type: text/plain, Size: 1746 bytes --]

On Tue, 1 Nov 2011 12:47:26 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> On Fri, Oct 14, 2011 at 09:21:48AM -0700, Jesse Barnes wrote:
> > On Thu, 6 Oct 2011 14:08:18 -0400
> > Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > > This patch adds a per-pci-device subdirectory in sysfs called:
> > > /sys/bus/pci/devices/<device>/msi_irqs
> > > 
> > > This sub-directory exports the set of msi vectors allocated by a given
> > > pci device, by creating a numbered sub-directory for each vector beneath
> > > msi_irqs.  For each vector various attributes can be exported.  Currently the
> > > only attribute is called mode, which tracks the operational mode of that vector
> > > (msi vs. msix)
> > > 
> > 
> > Ok this adds new ABI in the form of sysfs files so I want to make sure
> > it's ok.
> > 
> > Matthew has dreams of a more sophisticated MSI-X management scheme, but
> > it sounds to me like that will mostly affect the driver interfaces and
> > shouldn't be incompatible with the sysfs scheme you propose here.
> > 
> > If so, great, we can pull this in and maybe one day someone will
> > implement better MSI-X support.
> > 
> > If not, I'd like to hear objections now; Neil has been responsive and
> > fixed several issues.  I don't want to block his patch unless we have
> > something concrete that it conflicts with.
> > 
> > Thanks,
> > -- 
> > Jesse Barnes, Intel Open Source Technology Center
> Ping Jesse, has something gone awry here? I've not seen this show up in your
> tree yet.

It's sitting in my queue and I've timed out waiting for objections.  So
I'll put it in my -next tree shortly.  Thanks for your patience Neil.

-- 
Jesse Barnes, Intel Open Source Technology Center

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] sysfs: add per pci device msi[x] irq listing (v5)
  2011-11-01 16:58                       ` Jesse Barnes
@ 2011-11-01 18:05                         ` Neil Horman
  0 siblings, 0 replies; 39+ messages in thread
From: Neil Horman @ 2011-11-01 18:05 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Bjorn Helgaas, Stefan Richter, linux-kernel, Greg Kroah-Hartman,
	linux-pci, Matthew Wilcox

On Tue, Nov 01, 2011 at 09:58:33AM -0700, Jesse Barnes wrote:
> On Tue, 1 Nov 2011 12:47:26 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Fri, Oct 14, 2011 at 09:21:48AM -0700, Jesse Barnes wrote:
> > > On Thu, 6 Oct 2011 14:08:18 -0400
> > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > 
> > > > This patch adds a per-pci-device subdirectory in sysfs called:
> > > > /sys/bus/pci/devices/<device>/msi_irqs
> > > > 
> > > > This sub-directory exports the set of msi vectors allocated by a given
> > > > pci device, by creating a numbered sub-directory for each vector beneath
> > > > msi_irqs.  For each vector various attributes can be exported.  Currently the
> > > > only attribute is called mode, which tracks the operational mode of that vector
> > > > (msi vs. msix)
> > > > 
> > > 
> > > Ok this adds new ABI in the form of sysfs files so I want to make sure
> > > it's ok.
> > > 
> > > Matthew has dreams of a more sophisticated MSI-X management scheme, but
> > > it sounds to me like that will mostly affect the driver interfaces and
> > > shouldn't be incompatible with the sysfs scheme you propose here.
> > > 
> > > If so, great, we can pull this in and maybe one day someone will
> > > implement better MSI-X support.
> > > 
> > > If not, I'd like to hear objections now; Neil has been responsive and
> > > fixed several issues.  I don't want to block his patch unless we have
> > > something concrete that it conflicts with.
> > > 
> > > Thanks,
> > > -- 
> > > Jesse Barnes, Intel Open Source Technology Center
> > Ping Jesse, has something gone awry here? I've not seen this show up in your
> > tree yet.
> 
> It's sitting in my queue and I've timed out waiting for objections.  So
> I'll put it in my -next tree shortly.  Thanks for your patience Neil.
> 
No problem, just wanted to make sure it didn't get lost in the move back to
kernel.org
Thanks!
Neil

> -- 
> Jesse Barnes, Intel Open Source Technology Center



^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2011-11-01 18:06 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-14 18:36 [PATCH] sysfs: add per pci device msi[x] irq listing Neil Horman
2011-09-15 14:40 ` Greg KH
2011-09-15 15:07   ` Neil Horman
2011-09-15 20:08 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v2) Neil Horman
2011-09-16  8:36   ` Greg KH
2011-09-16 10:57     ` Neil Horman
2011-09-16 13:23       ` Greg KH
2011-09-16 13:32         ` Neil Horman
2011-09-16 16:12           ` Bjorn Helgaas
2011-09-19 15:47 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v3) Neil Horman
2011-09-19 17:14   ` Greg KH
2011-09-19 17:33     ` Neil Horman
2011-09-22 10:49   ` Konrad Rzeszutek Wilk
2011-09-22 10:57     ` Neil Horman
2011-09-22 11:10       ` Konrad Rzeszutek Wilk
2011-09-22 13:21         ` Neil Horman
2011-09-22 13:17     ` Neil Horman
2011-09-22 13:54   ` Matthew Wilcox
2011-09-22 14:32     ` Neil Horman
2011-09-28 22:18       ` Bjorn Helgaas
2011-09-29  0:42         ` Neil Horman
2011-09-29  4:40           ` Bjorn Helgaas
2011-09-29 13:07             ` Neil Horman
2011-09-29 14:38 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v4) Neil Horman
2011-09-29 14:51   ` Greg KH
2011-09-30 12:32   ` Stefan Richter
2011-09-30 15:33     ` Neil Horman
2011-09-30 16:33       ` Bjorn Helgaas
2011-09-30 16:54         ` Neil Horman
2011-10-06 15:36           ` Jesse Barnes
2011-10-06 17:12             ` Neil Horman
2011-10-06 17:57               ` Bjorn Helgaas
2011-10-06 18:08                 ` [PATCH] sysfs: add per pci device msi[x] irq listing (v5) Neil Horman
2011-10-14 16:21                   ` Jesse Barnes
2011-10-14 16:40                     ` Greg KH
2011-10-14 17:31                     ` Neil Horman
2011-11-01 16:47                     ` Neil Horman
2011-11-01 16:58                       ` Jesse Barnes
2011-11-01 18:05                         ` Neil Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.