All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PCI: Data corruption happening due to race condition
@ 2018-06-25 10:10 Hari Vyas
  2018-06-25 10:37 ` Lukas Wunner
  0 siblings, 1 reply; 12+ messages in thread
From: Hari Vyas @ 2018-06-25 10:10 UTC (permalink / raw)
  To: bhelgaas; +Cc: linux-pci, ray.jui, Hari Vyas

When a pci device is detected, a variable is_added is set to
1 in pci device structure and proc, sys entries are created.

When a pci device is removed, first is_added is checked for one
and then device is detached with clearing of proc and sys
entries and at end, is_added is set to 0.

is_added and is_busmaster are bit fields in pci_dev structure
sharing same memory location.

A strange issue was observed with multiple times removal and
rescan of a pcie nvme device using sysfs commands where is_added
flag was observed as zero instead of one while removing device
and proc,sys entries are not cleared.  This causes issue in
later device addition with warning message "proc_dir_entry"
already registered.

Debugging revealed a race condition between pcie core driver
enabling is_added bit(pci_bus_add_device()) and nvme driver
reset work-queue enabling is_busmaster bit (by pci_set_master()).
As both fields are not handled in atomic manner and that clears
is_added bit.

Fix protects is_added and is_busmaster bits updation by a spin
locking mechanism.

Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
---
 drivers/pci/bus.c        | 3 +++
 drivers/pci/pci-driver.c | 2 ++
 drivers/pci/pci.c        | 7 +++++++
 drivers/pci/remove.c     | 5 +++++
 include/linux/pci.h      | 1 +
 5 files changed, 18 insertions(+)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 35b7fc8..61d389d 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
 void pci_bus_add_device(struct pci_dev *dev)
 {
 	int retval;
+	unsigned long flags;
 
 	/*
 	 * Can not put in pci_device_add yet because resources
@@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
 		return;
 	}
 
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_added = 1;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index c125d53..547bcd7 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 		pdev->subsystem_device = subdevice;
 		pdev->class = class;
 
+		spin_lock_init(&pdev->lock);
+
 		if (pci_match_id(pdrv->id_table, pdev))
 			retval = -EEXIST;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba7..bcb1f96 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev *dev)
 void pci_disable_device(struct pci_dev *dev)
 {
 	struct pci_devres *dr;
+	unsigned long flags;
 
 	dr = find_pci_dr(dev);
 	if (dr)
@@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
 
 	do_pci_disable_device(dev);
 
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_busmaster = 0;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 EXPORT_SYMBOL(pci_disable_device);
 
@@ -3664,6 +3667,7 @@ void __iomem *devm_pci_remap_cfg_resource(struct device *dev,
 static void __pci_set_master(struct pci_dev *dev, bool enable)
 {
 	u16 old_cmd, cmd;
+	unsigned long flags;
 
 	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
 	if (enable)
@@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
 			enable ? "enabling" : "disabling");
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
+
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_busmaster = enable;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 /**
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 6f072ea..ff57bd6 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
 
 static void pci_stop_dev(struct pci_dev *dev)
 {
+	unsigned long flags;
+
 	pci_pme_active(dev, false);
 
 	if (dev->is_added) {
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
+
+		spin_lock_irqsave(&dev->lock, flags);
 		dev->is_added = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
 	}
 
 	if (dev->bus->self)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b..6940825 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -365,6 +365,7 @@ struct pci_dev {
 
 	bool		match_driver;		/* Skip attaching driver */
 
+	spinlock_t	lock;			/* Protect is_added and is_busmaster */
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	multifunction:1;	/* Multi-function device */
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] PCI: Data corruption happening due to race condition
  2018-06-25 10:10 [PATCH] PCI: Data corruption happening due to race condition Hari Vyas
@ 2018-06-25 10:37 ` Lukas Wunner
  2018-06-25 10:57   ` Hari Vyas
  0 siblings, 1 reply; 12+ messages in thread
From: Lukas Wunner @ 2018-06-25 10:37 UTC (permalink / raw)
  To: Hari Vyas; +Cc: bhelgaas, linux-pci, ray.jui

On Mon, Jun 25, 2018 at 03:40:46PM +0530, Hari Vyas wrote:
> When a pci device is detected, a variable is_added is set to
> 1 in pci device structure and proc, sys entries are created.
> 
> When a pci device is removed, first is_added is checked for one
> and then device is detached with clearing of proc and sys
> entries and at end, is_added is set to 0.
> 
> is_added and is_busmaster are bit fields in pci_dev structure
> sharing same memory location.
> 
> A strange issue was observed with multiple times removal and
> rescan of a pcie nvme device using sysfs commands where is_added
> flag was observed as zero instead of one while removing device
> and proc,sys entries are not cleared.

Where exactly was is_added incorrectly observed as 0?  Normally
addition and removal of devices are serialized using
pci_lock_rescan_remove(), maybe this is missing somewhere?

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] PCI: Data corruption happening due to race condition
  2018-06-25 10:37 ` Lukas Wunner
@ 2018-06-25 10:57   ` Hari Vyas
  2018-06-25 11:15     ` Lukas Wunner
  0 siblings, 1 reply; 12+ messages in thread
From: Hari Vyas @ 2018-06-25 10:57 UTC (permalink / raw)
  To: Lukas Wunner; +Cc: bhelgaas, linux-pci, Ray Jui

Hi Lukas,
	This issue is happening  with multiple times device removal and
rescan from sysfs. Card is not removed physically.
	Is_added bit is set after device attach which probe nvme driver.
NVMe driver starts one workqueue and that one is calling pci_set_master()
to set is_busmaster bit.
	With multiple times device removal and rescan from sysfs,  race
condition is observed and is_added bit is over-written to 0 from workqueue
started by NVMe driver.

	Hope it clarifies concern.

Sequence 1:
pci_bus_add_device()
{
    device_attach();
   ...
    dev->is_added = 1;
}

Sequence 2:
nvme_probe()
{
...
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
...
}
nvme_reset_work()--->nvme_pci_enable()-->pci_set_master()-->__pci_set_mast
er(true)-->dev->is_busmaster = enable

Regards,
Hari

-----Original Message-----
From: Lukas Wunner [mailto:lukas@wunner.de]
Sent: Monday, June 25, 2018 4:08 PM
To: Hari Vyas <hari.vyas@broadcom.com>
Cc: bhelgaas@google.com; linux-pci@vger.kernel.org; ray.jui@broadcom.com
Subject: Re: [PATCH] PCI: Data corruption happening due to race condition

On Mon, Jun 25, 2018 at 03:40:46PM +0530, Hari Vyas wrote:
> When a pci device is detected, a variable is_added is set to
> 1 in pci device structure and proc, sys entries are created.
>
> When a pci device is removed, first is_added is checked for one and
> then device is detached with clearing of proc and sys entries and at
> end, is_added is set to 0.
>
> is_added and is_busmaster are bit fields in pci_dev structure sharing
> same memory location.
>
> A strange issue was observed with multiple times removal and rescan of
> a pcie nvme device using sysfs commands where is_added flag was
> observed as zero instead of one while removing device and proc,sys
> entries are not cleared.

Where exactly was is_added incorrectly observed as 0?  Normally addition
and removal of devices are serialized using pci_lock_rescan_remove(),
maybe this is missing somewhere?

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] PCI: Data corruption happening due to race condition
  2018-06-25 10:57   ` Hari Vyas
@ 2018-06-25 11:15     ` Lukas Wunner
  2018-06-26 10:17       ` Hari Vyas
  0 siblings, 1 reply; 12+ messages in thread
From: Lukas Wunner @ 2018-06-25 11:15 UTC (permalink / raw)
  To: Hari Vyas; +Cc: bhelgaas, linux-pci, Ray Jui

On Mon, Jun 25, 2018 at 04:27:37PM +0530, Hari Vyas wrote:
> 	This issue is happening  with multiple times device removal and
> rescan from sysfs. Card is not removed physically.
> 	Is_added bit is set after device attach which probe nvme driver.
> NVMe driver starts one workqueue and that one is calling pci_set_master()
> to set is_busmaster bit.
> 	With multiple times device removal and rescan from sysfs,  race
> condition is observed and is_added bit is over-written to 0 from workqueue
> started by NVMe driver.

Could you add a dump_stack() to pci_bus_add_device() and pci_stop_dev()
where the is_added bit is modified, reproduce the issue and attach the
resulting dmesg output to a newly opened bug on bugzilla.kernel.org?

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] PCI: Data corruption happening due to race condition
  2018-06-25 11:15     ` Lukas Wunner
@ 2018-06-26 10:17       ` Hari Vyas
  2018-06-26 11:53         ` Lukas Wunner
  0 siblings, 1 reply; 12+ messages in thread
From: Hari Vyas @ 2018-06-26 10:17 UTC (permalink / raw)
  To: Lukas Wunner; +Cc: Bjorn Helgaas, linux-pci, Ray Jui

On Mon, Jun 25, 2018 at 4:45 PM, Lukas Wunner <lukas@wunner.de> wrote:
> On Mon, Jun 25, 2018 at 04:27:37PM +0530, Hari Vyas wrote:
>>       This issue is happening  with multiple times device removal and
>> rescan from sysfs. Card is not removed physically.
>>       Is_added bit is set after device attach which probe nvme driver.
>> NVMe driver starts one workqueue and that one is calling pci_set_master()
>> to set is_busmaster bit.
>>       With multiple times device removal and rescan from sysfs,  race
>> condition is observed and is_added bit is over-written to 0 from workqueue
>> started by NVMe driver.
>
> Could you add a dump_stack() to pci_bus_add_device() and pci_stop_dev()
> where the is_added bit is modified, reproduce the issue and attach the
> resulting dmesg output to a newly opened bug on bugzilla.kernel.org?
>

I have raised a Bug 200283 - PCI: Data corruption happening due to a
race condition.
Please note that is_added bit is lost in pci_bus_add_device() and
__pci_set_master()
functions. Added dump_stack() with one additional debug message to
print cpu-id in
pci_set_master() is called from a CPU3 workqueue and pci_bus_add_device() from
CPU2 sysfs call.

root@bcm958742k:~# echo 1 > /sys/bus/pci/devices/0002\:01\:00.0/remove
[   32.385389] nvme nvme0: failed to set APST feature (-19)
root@bcm958742k:~# echo 1 > /sys/bus/pci/rescan
[   38.916435] pci 0002:01:00.0: BAR 0: assigned [mem
0x500000000-0x500003fff 64bit]
[   38.924822] nvme nvme0: pci function 0002:01:00.0

[   38.929702] nvme 0002:01:00.0: pci_bus_add_device:333 cpu=2


[   38.929705] CPU: 2 PID: 2360 Comm: sh Not tainted
4.17.0-02102-gdcfa25a-dirty #106
[   38.943259] Hardware name: Stingray Combo SVK (BCM958742K) (DT)


[   38.943267] nvme 0002:01:00.0: __pci_set_master:3681 cpu=3


[   38.949366] Call trace:
[   38.949375]  dump_backtrace+0x0/0x1b8
[   38.949377]  show_stack+0x14/0x1c
[   38.964748]  dump_stack+0x90/0xb0
[   38.968168]  pci_bus_add_device+0xbc/0xe0
[   38.972303]  pci_bus_add_devices+0x44/0x90
[   38.976527]  pci_bus_add_devices+0x74/0x90
[   38.980751]  pci_rescan_bus+0x2c/0x3c
[   38.984529]  bus_rescan_store+0x7c/0xa0
[   38.988485]  bus_attr_store+0x20/0x34
[   38.992264]  sysfs_kf_write+0x40/0x50
[   38.996040]  kernfs_fop_write+0xcc/0x1cc
[   39.000087]  __vfs_write+0x40/0x154
[   39.003683]  vfs_write+0xa8/0x198
[   39.007102]  ksys_write+0x58/0xbc
[   39.010520]  sys_write+0xc/0x14
[   39.013758]  __sys_trace_return+0x0/0x4

[   39.017714] CPU: 3 PID: 50 Comm: kworker/u16:1 Not tainted
4.17.0-02102-gdcfa25a-dirty #106
[   39.026329] Hardware name: Stingray Combo SVK (BCM958742K) (DT)
[   39.026336] Workqueue: nvme-reset-wq nvme_reset_work
[   39.037561] Call trace:
[   39.037563]  dump_backtrace+0x0/0x1b8
[   39.037564]  show_stack+0x14/0x1c
[   39.037566]  dump_stack+0x90/0xb0
[   39.037569]  __pci_set_master+0xd4/0x130
[   39.043866]  pci_set_master+0x18/0x2c
[   39.043867]  nvme_reset_work+0x110/0x14a4
[   39.050702]  process_one_work+0x12c/0x29c
[   39.050704]  worker_thread+0x13c/0x410
[   39.058522]  kthread+0xfc/0x128
[   39.058524]  ret_from_fork+0x10/0x18
root@bcm958742k:~#


> Thanks,
>
> Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] PCI: Data corruption happening due to race condition
  2018-06-26 10:17       ` Hari Vyas
@ 2018-06-26 11:53         ` Lukas Wunner
  2018-06-27  9:38           ` [PATCH v1] " Hari Vyas
       [not found]           ` <CAM5rFu-Sb5Vhvy19GKesV00=tf0+7Q8hByU11=4F9MVhoO7nWA@mail.gmail.com>
  0 siblings, 2 replies; 12+ messages in thread
From: Lukas Wunner @ 2018-06-26 11:53 UTC (permalink / raw)
  To: Hari Vyas; +Cc: Bjorn Helgaas, linux-pci, Ray Jui

On Tue, Jun 26, 2018 at 03:47:43PM +0530, Hari Vyas wrote:
> On Mon, Jun 25, 2018 at 4:45 PM, Lukas Wunner <lukas@wunner.de> wrote:
> > On Mon, Jun 25, 2018 at 04:27:37PM +0530, Hari Vyas wrote:
> >>       This issue is happening  with multiple times device removal and
> >> rescan from sysfs. Card is not removed physically.
> >>       Is_added bit is set after device attach which probe nvme driver.
> >> NVMe driver starts one workqueue and that one is calling pci_set_master()
> >> to set is_busmaster bit.
> >>       With multiple times device removal and rescan from sysfs,  race
> >> condition is observed and is_added bit is over-written to 0 from workqueue
> >> started by NVMe driver.
> >
> > Could you add a dump_stack() to pci_bus_add_device() and pci_stop_dev()
> > where the is_added bit is modified, reproduce the issue and attach the
> > resulting dmesg output to a newly opened bug on bugzilla.kernel.org?
> >
> 
> I have raised a Bug 200283 - PCI: Data corruption happening due to a
> race condition.

Thanks for taking the time to open the bug and provide more detailed
information.

So the upshot seems to be that is_added and is_busmaster end up in
the same word and two CPUs perform a read-modify-write wherein one
CPU clobbers the result of the other CPU.

While a spinlock may do the job, I think a better solution would be
to move is_added to the priv_flags bitmap in struct pci_dev.  The
is_added flag is internal to the PCI core and anything outside has
no business dealing with it.

(Assuming arch/powerpc/kernel/pci-common.c can also be considered
part of the PCI core.)

The flags in priv_flags are defined in drivers/pci/pci.h, so far
there's only one for PCI_DEV_DISCONNECTED which was introduced by
89ee9f768.  That commit also introduced accessors, personally I
don't think that's necessary for the few places in the PCI core
that the new PCI_DEV_ADDED flag would be used and I'd just update
those sites to set or test the bit directly.

Moving the is_added flag should already fix the race with is_busmaster.
It may be worth making is_busmaster a bitmap flag as well, but
priv_flags might not be suitable because the flag is also queried
by various drivers.  I'll defer that decision to Bjorn.

HTH,

Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v1] PCI: Data corruption happening due to race condition
  2018-06-26 11:53         ` Lukas Wunner
@ 2018-06-27  9:38           ` Hari Vyas
  2018-06-27 16:27             ` Ray Jui
       [not found]           ` <CAM5rFu-Sb5Vhvy19GKesV00=tf0+7Q8hByU11=4F9MVhoO7nWA@mail.gmail.com>
  1 sibling, 1 reply; 12+ messages in thread
From: Hari Vyas @ 2018-06-27  9:38 UTC (permalink / raw)
  To: bhelgaas; +Cc: linux-pci, ray.jui, Hari Vyas

When a pci device is detected, a variable is_added is set to
1 in pci device structure and proc, sys entries are created.

When a pci device is removed, first is_added is checked for one
and then device is detached with clearing of proc and sys
entries and at end, is_added is set to 0.

is_added and is_busmaster are bit fields in pci_dev structure
sharing same memory location.

A strange issue was observed with multiple times removal and
rescan of a pcie nvme device using sysfs commands where is_added
flag was observed as zero instead of one while removing device
and proc,sys entries are not cleared.  This causes issue in
later device addition with warning message "proc_dir_entry"
already registered.

Debugging revealed a race condition between pcie core driver
enabling is_added bit(pci_bus_add_device()) and nvme driver
reset work-queue enabling is_busmaster bit (by pci_set_master()).
As both fields are not handled in atomic manner and that clears
is_added bit.

Fix protects is_added and is_busmaster bits updation by a spin
locking mechanism.

Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
---
 drivers/pci/bus.c        | 3 +++
 drivers/pci/pci-driver.c | 2 ++
 drivers/pci/pci.c        | 7 +++++++
 drivers/pci/probe.c      | 1 +
 drivers/pci/remove.c     | 5 +++++
 include/linux/pci.h      | 1 +
 6 files changed, 19 insertions(+)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 35b7fc8..61d389d 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
 void pci_bus_add_device(struct pci_dev *dev)
 {
 	int retval;
+	unsigned long flags;
 
 	/*
 	 * Can not put in pci_device_add yet because resources
@@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
 		return;
 	}
 
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_added = 1;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index c125d53..547bcd7 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 		pdev->subsystem_device = subdevice;
 		pdev->class = class;
 
+		spin_lock_init(&pdev->lock);
+
 		if (pci_match_id(pdrv->id_table, pdev))
 			retval = -EEXIST;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba7..bcb1f96 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev *dev)
 void pci_disable_device(struct pci_dev *dev)
 {
 	struct pci_devres *dr;
+	unsigned long flags;
 
 	dr = find_pci_dr(dev);
 	if (dr)
@@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
 
 	do_pci_disable_device(dev);
 
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_busmaster = 0;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 EXPORT_SYMBOL(pci_disable_device);
 
@@ -3664,6 +3667,7 @@ void __iomem *devm_pci_remap_cfg_resource(struct device *dev,
 static void __pci_set_master(struct pci_dev *dev, bool enable)
 {
 	u16 old_cmd, cmd;
+	unsigned long flags;
 
 	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
 	if (enable)
@@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
 			enable ? "enabling" : "disabling");
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
 	}
+
+	spin_lock_irqsave(&dev->lock, flags);
 	dev->is_busmaster = enable;
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 /**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e3..9203b88 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2102,6 +2102,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
 	INIT_LIST_HEAD(&dev->bus_list);
 	dev->dev.type = &pci_dev_type;
 	dev->bus = pci_bus_get(bus);
+	spin_lock_init(&dev->lock);
 
 	return dev;
 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 6f072ea..ff57bd6 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
 
 static void pci_stop_dev(struct pci_dev *dev)
 {
+	unsigned long flags;
+
 	pci_pme_active(dev, false);
 
 	if (dev->is_added) {
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
+
+		spin_lock_irqsave(&dev->lock, flags);
 		dev->is_added = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
 	}
 
 	if (dev->bus->self)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b..6940825 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -365,6 +365,7 @@ struct pci_dev {
 
 	bool		match_driver;		/* Skip attaching driver */
 
+	spinlock_t	lock;			/* Protect is_added and is_busmaster */
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	multifunction:1;	/* Multi-function device */
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] PCI: Data corruption happening due to race condition
       [not found]             ` <20180627124920.GA27447@wunner.de>
@ 2018-06-27 13:00               ` Hari Vyas
  0 siblings, 0 replies; 12+ messages in thread
From: Hari Vyas @ 2018-06-27 13:00 UTC (permalink / raw)
  To: Lukas Wunner, Bjorn Helgaas, linux-pci, Ray Jui

On Wed, Jun 27, 2018 at 6:19 PM, Lukas Wunner <lukas@wunner.de> wrote:
> Hi Hari,
>
> please cc the list when replying so that others can comment.
>
apologize. My mistake.
> On Wed, Jun 27, 2018 at 03:45:26PM +0530, Hari Vyas wrote:
>> On Tue, Jun 26, 2018 at 5:23 PM, Lukas Wunner <lukas@wunner.de> wrote:
>> > While a spinlock may do the job, I think a better solution would be
>> > to move is_added to the priv_flags bitmap in struct pci_dev.  The
>> > is_added flag is internal to the PCI core and anything outside has
>> > no business dealing with it.
>> >
>> > (Assuming arch/powerpc/kernel/pci-common.c can also be considered
>> > part of the PCI core.)
>>
>> Simple grep is showing that
>> is_added is being used outside in 1) arch/powerpc/platforms/powernv/
>> pci-ioda.c 2) arch/powerpc/platforms/pseries/setup.c files.
>
> I'd #include "../../../../drivers/pci/pci.h" in the few arch-specific
> files, there's some precedent for that, see
>
> git grep '#include .*\.\./\.\./\.\./\.\.' -- :/arch
>
> (I'd mention that precedent in the commit message or below the three
> dashes, in the latter case it doesn't become part of the commit message
> but it helps reviewers understand what's going on.)
>
>>> > The flags in priv_flags are defined in drivers/pci/pci.h, so far
>> > there's only one for PCI_DEV_DISCONNECTED which was introduced by
>> > 89ee9f768.  That commit also introduced accessors, personally I
>> > don't think that's necessary for the few places in the PCI core
>> > that the new PCI_DEV_ADDED flag would be used and I'd just update
>> > those sites to set or test the bit directly.
>> >
>> Agree. Issue needs to be just fixed in good way. Let me know if priv_flags
>> approach needs to be taken for is_added flag.
>
> I'd definitely recommend using priv_flags and the atomic bitops
> test_bit() and set_bit() (see Documentation/atomic_bitops.txt),
> it's less code than using a spinlock and nominally reduces the
> size of struct pci_dev by 1 bit (modulo padding by the compiler).
> spinlock is for when you have several function calls which need
> to be executed atomically.
>
Okay. Will update code (move is_added:1  to priv_flags), test and revert back.
Should fix concern but better to verify.
>
> Thanks,
>
> Lukas

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] PCI: Data corruption happening due to race condition
  2018-06-27  9:38           ` [PATCH v1] " Hari Vyas
@ 2018-06-27 16:27             ` Ray Jui
  2018-06-27 16:32               ` Hari Vyas
  0 siblings, 1 reply; 12+ messages in thread
From: Ray Jui @ 2018-06-27 16:27 UTC (permalink / raw)
  To: Hari Vyas, bhelgaas; +Cc: linux-pci

Are you re-sending v1 version of this patch or this is actually v2?

Thanks,

Ray

On 6/27/2018 2:38 AM, Hari Vyas wrote:
> When a pci device is detected, a variable is_added is set to
> 1 in pci device structure and proc, sys entries are created.
> 
> When a pci device is removed, first is_added is checked for one
> and then device is detached with clearing of proc and sys
> entries and at end, is_added is set to 0.
> 
> is_added and is_busmaster are bit fields in pci_dev structure
> sharing same memory location.
> 
> A strange issue was observed with multiple times removal and
> rescan of a pcie nvme device using sysfs commands where is_added
> flag was observed as zero instead of one while removing device
> and proc,sys entries are not cleared.  This causes issue in
> later device addition with warning message "proc_dir_entry"
> already registered.
> 
> Debugging revealed a race condition between pcie core driver
> enabling is_added bit(pci_bus_add_device()) and nvme driver
> reset work-queue enabling is_busmaster bit (by pci_set_master()).
> As both fields are not handled in atomic manner and that clears
> is_added bit.
> 
> Fix protects is_added and is_busmaster bits updation by a spin
> locking mechanism.
> 
> Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
> ---
>   drivers/pci/bus.c        | 3 +++
>   drivers/pci/pci-driver.c | 2 ++
>   drivers/pci/pci.c        | 7 +++++++
>   drivers/pci/probe.c      | 1 +
>   drivers/pci/remove.c     | 5 +++++
>   include/linux/pci.h      | 1 +
>   6 files changed, 19 insertions(+)
> 
> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
> index 35b7fc8..61d389d 100644
> --- a/drivers/pci/bus.c
> +++ b/drivers/pci/bus.c
> @@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
>   void pci_bus_add_device(struct pci_dev *dev)
>   {
>   	int retval;
> +	unsigned long flags;
>   
>   	/*
>   	 * Can not put in pci_device_add yet because resources
> @@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
>   		return;
>   	}
>   
> +	spin_lock_irqsave(&dev->lock, flags);
>   	dev->is_added = 1;
> +	spin_unlock_irqrestore(&dev->lock, flags);
>   }
>   EXPORT_SYMBOL_GPL(pci_bus_add_device);
>   
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index c125d53..547bcd7 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
>   		pdev->subsystem_device = subdevice;
>   		pdev->class = class;
>   
> +		spin_lock_init(&pdev->lock);
> +
>   		if (pci_match_id(pdrv->id_table, pdev))
>   			retval = -EEXIST;
>   
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 97acba7..bcb1f96 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev *dev)
>   void pci_disable_device(struct pci_dev *dev)
>   {
>   	struct pci_devres *dr;
> +	unsigned long flags;
>   
>   	dr = find_pci_dr(dev);
>   	if (dr)
> @@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
>   
>   	do_pci_disable_device(dev);
>   
> +	spin_lock_irqsave(&dev->lock, flags);
>   	dev->is_busmaster = 0;
> +	spin_unlock_irqrestore(&dev->lock, flags);
>   }
>   EXPORT_SYMBOL(pci_disable_device);
>   
> @@ -3664,6 +3667,7 @@ void __iomem *devm_pci_remap_cfg_resource(struct device *dev,
>   static void __pci_set_master(struct pci_dev *dev, bool enable)
>   {
>   	u16 old_cmd, cmd;
> +	unsigned long flags;
>   
>   	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
>   	if (enable)
> @@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
>   			enable ? "enabling" : "disabling");
>   		pci_write_config_word(dev, PCI_COMMAND, cmd);
>   	}
> +
> +	spin_lock_irqsave(&dev->lock, flags);
>   	dev->is_busmaster = enable;
> +	spin_unlock_irqrestore(&dev->lock, flags);
>   }
>   
>   /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ac876e3..9203b88 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2102,6 +2102,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
>   	INIT_LIST_HEAD(&dev->bus_list);
>   	dev->dev.type = &pci_dev_type;
>   	dev->bus = pci_bus_get(bus);
> +	spin_lock_init(&dev->lock);
>   
>   	return dev;
>   }
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 6f072ea..ff57bd6 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
>   
>   static void pci_stop_dev(struct pci_dev *dev)
>   {
> +	unsigned long flags;
> +
>   	pci_pme_active(dev, false);
>   
>   	if (dev->is_added) {
>   		device_release_driver(&dev->dev);
>   		pci_proc_detach_device(dev);
>   		pci_remove_sysfs_dev_files(dev);
> +
> +		spin_lock_irqsave(&dev->lock, flags);
>   		dev->is_added = 0;
> +		spin_unlock_irqrestore(&dev->lock, flags);
>   	}
>   
>   	if (dev->bus->self)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 340029b..6940825 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -365,6 +365,7 @@ struct pci_dev {
>   
>   	bool		match_driver;		/* Skip attaching driver */
>   
> +	spinlock_t	lock;			/* Protect is_added and is_busmaster */
>   	unsigned int	transparent:1;		/* Subtractive decode bridge */
>   	unsigned int	multifunction:1;	/* Multi-function device */
>   
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] PCI: Data corruption happening due to race condition
  2018-06-27 16:27             ` Ray Jui
@ 2018-06-27 16:32               ` Hari Vyas
  2018-06-27 16:36                 ` Ray Jui
  0 siblings, 1 reply; 12+ messages in thread
From: Hari Vyas @ 2018-06-27 16:32 UTC (permalink / raw)
  To: Ray Jui; +Cc: Bjorn Helgaas, linux-pci

On Wed, Jun 27, 2018 at 9:57 PM, Ray Jui <ray.jui@broadcom.com> wrote:
> Are you re-sending v1 version of this patch or this is actually v2?
>
This is actually v2. Forgot to give version in first patch.
There was spin lock initialization issue with v1 which was not caught
in our environment.
In any case, I am trying out suggested priv_flag approach and testing it.
Hopefully that should be final version.
> Thanks,
>
> Ray
>
>
> On 6/27/2018 2:38 AM, Hari Vyas wrote:
>>
>> When a pci device is detected, a variable is_added is set to
>> 1 in pci device structure and proc, sys entries are created.
>>
>> When a pci device is removed, first is_added is checked for one
>> and then device is detached with clearing of proc and sys
>> entries and at end, is_added is set to 0.
>>
>> is_added and is_busmaster are bit fields in pci_dev structure
>> sharing same memory location.
>>
>> A strange issue was observed with multiple times removal and
>> rescan of a pcie nvme device using sysfs commands where is_added
>> flag was observed as zero instead of one while removing device
>> and proc,sys entries are not cleared.  This causes issue in
>> later device addition with warning message "proc_dir_entry"
>> already registered.
>>
>> Debugging revealed a race condition between pcie core driver
>> enabling is_added bit(pci_bus_add_device()) and nvme driver
>> reset work-queue enabling is_busmaster bit (by pci_set_master()).
>> As both fields are not handled in atomic manner and that clears
>> is_added bit.
>>
>> Fix protects is_added and is_busmaster bits updation by a spin
>> locking mechanism.
>>
>> Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>> ---
>>   drivers/pci/bus.c        | 3 +++
>>   drivers/pci/pci-driver.c | 2 ++
>>   drivers/pci/pci.c        | 7 +++++++
>>   drivers/pci/probe.c      | 1 +
>>   drivers/pci/remove.c     | 5 +++++
>>   include/linux/pci.h      | 1 +
>>   6 files changed, 19 insertions(+)
>>
>> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
>> index 35b7fc8..61d389d 100644
>> --- a/drivers/pci/bus.c
>> +++ b/drivers/pci/bus.c
>> @@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev
>> *pdev) { }
>>   void pci_bus_add_device(struct pci_dev *dev)
>>   {
>>         int retval;
>> +       unsigned long flags;
>>         /*
>>          * Can not put in pci_device_add yet because resources
>> @@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
>>                 return;
>>         }
>>   +     spin_lock_irqsave(&dev->lock, flags);
>>         dev->is_added = 1;
>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>   }
>>   EXPORT_SYMBOL_GPL(pci_bus_add_device);
>>   diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>> index c125d53..547bcd7 100644
>> --- a/drivers/pci/pci-driver.c
>> +++ b/drivers/pci/pci-driver.c
>> @@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver
>> *driver, const char *buf,
>>                 pdev->subsystem_device = subdevice;
>>                 pdev->class = class;
>>   +             spin_lock_init(&pdev->lock);
>> +
>>                 if (pci_match_id(pdrv->id_table, pdev))
>>                         retval = -EEXIST;
>>   diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 97acba7..bcb1f96 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev *dev)
>>   void pci_disable_device(struct pci_dev *dev)
>>   {
>>         struct pci_devres *dr;
>> +       unsigned long flags;
>>         dr = find_pci_dr(dev);
>>         if (dr)
>> @@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
>>         do_pci_disable_device(dev);
>>   +     spin_lock_irqsave(&dev->lock, flags);
>>         dev->is_busmaster = 0;
>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>   }
>>   EXPORT_SYMBOL(pci_disable_device);
>>   @@ -3664,6 +3667,7 @@ void __iomem *devm_pci_remap_cfg_resource(struct
>> device *dev,
>>   static void __pci_set_master(struct pci_dev *dev, bool enable)
>>   {
>>         u16 old_cmd, cmd;
>> +       unsigned long flags;
>>         pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
>>         if (enable)
>> @@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev,
>> bool enable)
>>                         enable ? "enabling" : "disabling");
>>                 pci_write_config_word(dev, PCI_COMMAND, cmd);
>>         }
>> +
>> +       spin_lock_irqsave(&dev->lock, flags);
>>         dev->is_busmaster = enable;
>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>   }
>>     /**
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index ac876e3..9203b88 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -2102,6 +2102,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
>>         INIT_LIST_HEAD(&dev->bus_list);
>>         dev->dev.type = &pci_dev_type;
>>         dev->bus = pci_bus_get(bus);
>> +       spin_lock_init(&dev->lock);
>>         return dev;
>>   }
>> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
>> index 6f072ea..ff57bd6 100644
>> --- a/drivers/pci/remove.c
>> +++ b/drivers/pci/remove.c
>> @@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
>>     static void pci_stop_dev(struct pci_dev *dev)
>>   {
>> +       unsigned long flags;
>> +
>>         pci_pme_active(dev, false);
>>         if (dev->is_added) {
>>                 device_release_driver(&dev->dev);
>>                 pci_proc_detach_device(dev);
>>                 pci_remove_sysfs_dev_files(dev);
>> +
>> +               spin_lock_irqsave(&dev->lock, flags);
>>                 dev->is_added = 0;
>> +               spin_unlock_irqrestore(&dev->lock, flags);
>>         }
>>         if (dev->bus->self)
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 340029b..6940825 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -365,6 +365,7 @@ struct pci_dev {
>>         bool            match_driver;           /* Skip attaching driver
>> */
>>   +     spinlock_t      lock;                   /* Protect is_added and
>> is_busmaster */
>>         unsigned int    transparent:1;          /* Subtractive decode
>> bridge */
>>         unsigned int    multifunction:1;        /* Multi-function device
>> */
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] PCI: Data corruption happening due to race condition
  2018-06-27 16:32               ` Hari Vyas
@ 2018-06-27 16:36                 ` Ray Jui
  2018-06-28 11:23                   ` Hari Vyas
  0 siblings, 1 reply; 12+ messages in thread
From: Ray Jui @ 2018-06-27 16:36 UTC (permalink / raw)
  To: Hari Vyas; +Cc: Bjorn Helgaas, linux-pci



On 6/27/2018 9:32 AM, Hari Vyas wrote:
> On Wed, Jun 27, 2018 at 9:57 PM, Ray Jui <ray.jui@broadcom.com> wrote:
>> Are you re-sending v1 version of this patch or this is actually v2?
>>
> This is actually v2. Forgot to give version in first patch.

You may choose to specify [PATCH v1]: or simply [PATCH]: on your first 
version of the patch, so what you did in the first version was okay.

Now, prefixing the 2nd version of the patch with v1 is just wrong and 
causes confusions. Can you fix that?

In addition, the Reviewed-by filed on me needs to be removed on v2 of 
this patch, since I've never reviewed since you changed it from v1.

> There was spin lock initialization issue with v1 which was not caught
> in our environment.
> In any case, I am trying out suggested priv_flag approach and testing it.
> Hopefully that should be final version.
>> Thanks,
>>
>> Ray
>>
>>
>> On 6/27/2018 2:38 AM, Hari Vyas wrote:
>>>
>>> When a pci device is detected, a variable is_added is set to
>>> 1 in pci device structure and proc, sys entries are created.
>>>
>>> When a pci device is removed, first is_added is checked for one
>>> and then device is detached with clearing of proc and sys
>>> entries and at end, is_added is set to 0.
>>>
>>> is_added and is_busmaster are bit fields in pci_dev structure
>>> sharing same memory location.
>>>
>>> A strange issue was observed with multiple times removal and
>>> rescan of a pcie nvme device using sysfs commands where is_added
>>> flag was observed as zero instead of one while removing device
>>> and proc,sys entries are not cleared.  This causes issue in
>>> later device addition with warning message "proc_dir_entry"
>>> already registered.
>>>
>>> Debugging revealed a race condition between pcie core driver
>>> enabling is_added bit(pci_bus_add_device()) and nvme driver
>>> reset work-queue enabling is_busmaster bit (by pci_set_master()).
>>> As both fields are not handled in atomic manner and that clears
>>> is_added bit.
>>>
>>> Fix protects is_added and is_busmaster bits updation by a spin
>>> locking mechanism.
>>>
>>> Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
>>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>>> ---
>>>    drivers/pci/bus.c        | 3 +++
>>>    drivers/pci/pci-driver.c | 2 ++
>>>    drivers/pci/pci.c        | 7 +++++++
>>>    drivers/pci/probe.c      | 1 +
>>>    drivers/pci/remove.c     | 5 +++++
>>>    include/linux/pci.h      | 1 +
>>>    6 files changed, 19 insertions(+)
>>>
>>> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
>>> index 35b7fc8..61d389d 100644
>>> --- a/drivers/pci/bus.c
>>> +++ b/drivers/pci/bus.c
>>> @@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev
>>> *pdev) { }
>>>    void pci_bus_add_device(struct pci_dev *dev)
>>>    {
>>>          int retval;
>>> +       unsigned long flags;
>>>          /*
>>>           * Can not put in pci_device_add yet because resources
>>> @@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
>>>                  return;
>>>          }
>>>    +     spin_lock_irqsave(&dev->lock, flags);
>>>          dev->is_added = 1;
>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>    }
>>>    EXPORT_SYMBOL_GPL(pci_bus_add_device);
>>>    diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>>> index c125d53..547bcd7 100644
>>> --- a/drivers/pci/pci-driver.c
>>> +++ b/drivers/pci/pci-driver.c
>>> @@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver
>>> *driver, const char *buf,
>>>                  pdev->subsystem_device = subdevice;
>>>                  pdev->class = class;
>>>    +             spin_lock_init(&pdev->lock);
>>> +
>>>                  if (pci_match_id(pdrv->id_table, pdev))
>>>                          retval = -EEXIST;
>>>    diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>> index 97acba7..bcb1f96 100644
>>> --- a/drivers/pci/pci.c
>>> +++ b/drivers/pci/pci.c
>>> @@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev *dev)
>>>    void pci_disable_device(struct pci_dev *dev)
>>>    {
>>>          struct pci_devres *dr;
>>> +       unsigned long flags;
>>>          dr = find_pci_dr(dev);
>>>          if (dr)
>>> @@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
>>>          do_pci_disable_device(dev);
>>>    +     spin_lock_irqsave(&dev->lock, flags);
>>>          dev->is_busmaster = 0;
>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>    }
>>>    EXPORT_SYMBOL(pci_disable_device);
>>>    @@ -3664,6 +3667,7 @@ void __iomem *devm_pci_remap_cfg_resource(struct
>>> device *dev,
>>>    static void __pci_set_master(struct pci_dev *dev, bool enable)
>>>    {
>>>          u16 old_cmd, cmd;
>>> +       unsigned long flags;
>>>          pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
>>>          if (enable)
>>> @@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev,
>>> bool enable)
>>>                          enable ? "enabling" : "disabling");
>>>                  pci_write_config_word(dev, PCI_COMMAND, cmd);
>>>          }
>>> +
>>> +       spin_lock_irqsave(&dev->lock, flags);
>>>          dev->is_busmaster = enable;
>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>    }
>>>      /**
>>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>>> index ac876e3..9203b88 100644
>>> --- a/drivers/pci/probe.c
>>> +++ b/drivers/pci/probe.c
>>> @@ -2102,6 +2102,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
>>>          INIT_LIST_HEAD(&dev->bus_list);
>>>          dev->dev.type = &pci_dev_type;
>>>          dev->bus = pci_bus_get(bus);
>>> +       spin_lock_init(&dev->lock);
>>>          return dev;
>>>    }
>>> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
>>> index 6f072ea..ff57bd6 100644
>>> --- a/drivers/pci/remove.c
>>> +++ b/drivers/pci/remove.c
>>> @@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
>>>      static void pci_stop_dev(struct pci_dev *dev)
>>>    {
>>> +       unsigned long flags;
>>> +
>>>          pci_pme_active(dev, false);
>>>          if (dev->is_added) {
>>>                  device_release_driver(&dev->dev);
>>>                  pci_proc_detach_device(dev);
>>>                  pci_remove_sysfs_dev_files(dev);
>>> +
>>> +               spin_lock_irqsave(&dev->lock, flags);
>>>                  dev->is_added = 0;
>>> +               spin_unlock_irqrestore(&dev->lock, flags);
>>>          }
>>>          if (dev->bus->self)
>>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>>> index 340029b..6940825 100644
>>> --- a/include/linux/pci.h
>>> +++ b/include/linux/pci.h
>>> @@ -365,6 +365,7 @@ struct pci_dev {
>>>          bool            match_driver;           /* Skip attaching driver
>>> */
>>>    +     spinlock_t      lock;                   /* Protect is_added and
>>> is_busmaster */
>>>          unsigned int    transparent:1;          /* Subtractive decode
>>> bridge */
>>>          unsigned int    multifunction:1;        /* Multi-function device
>>> */
>>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] PCI: Data corruption happening due to race condition
  2018-06-27 16:36                 ` Ray Jui
@ 2018-06-28 11:23                   ` Hari Vyas
  0 siblings, 0 replies; 12+ messages in thread
From: Hari Vyas @ 2018-06-28 11:23 UTC (permalink / raw)
  To: Ray Jui; +Cc: Bjorn Helgaas, linux-pci

On Wed, Jun 27, 2018 at 10:06 PM, Ray Jui <ray.jui@broadcom.com> wrote:
>
>
> On 6/27/2018 9:32 AM, Hari Vyas wrote:
>>
>> On Wed, Jun 27, 2018 at 9:57 PM, Ray Jui <ray.jui@broadcom.com> wrote:
>>>
>>> Are you re-sending v1 version of this patch or this is actually v2?
>>>
>> This is actually v2. Forgot to give version in first patch.
>
>
> You may choose to specify [PATCH v1]: or simply [PATCH]: on your first
> version of the patch, so what you did in the first version was okay.
>
> Now, prefixing the 2nd version of the patch with v1 is just wrong and causes
> confusions. Can you fix that?
>
> In addition, the Reviewed-by filed on me needs to be removed on v2 of this
> patch, since I've never reviewed since you changed it from v1.
>
>
One spin_lock_init() was missing so just added for new patch. As that patch
should be obsolete so will take care about versioning in upcoming patch.
I have tested with priv_flag changes and needed to modify following files.
Changes fixing current scenario concern but eventually looks like  all pci_dev
relevant bit fields needs to be protected using atomic operation
keeping future in mind
and some cleanup in other modules using pci_dev structure fields.

I will  probably need to  raise three separate patches i.e. one for
powerpc/ files,
driver/pci/*.c and include/linux.h;driver/pci/pci.h. Probably need
to include powerpc maintainer too.

modified:   arch/powerpc/kernel/pci-common.c
modified:   arch/powerpc/platforms/powernv/pci-ioda.c
modified:   arch/powerpc/platforms/pseries/setup.c
modified:   drivers/pci/bus.c
modified:   drivers/pci/hotplug/acpiphp_glue.c
modified:   drivers/pci/pci-driver.c
modified:   drivers/pci/pci.c
modified:   drivers/pci/pci.h
modified:   drivers/pci/probe.c
modified:   drivers/pci/remove.c
modified:   include/linux/pci.h

>> There was spin lock initialization issue with v1 which was not caught
>> in our environment.
>> In any case, I am trying out suggested priv_flag approach and testing it.
>> Hopefully that should be final version.
>>>
>>> Thanks,
>>>
>>> Ray
>>>
>>>
>>> On 6/27/2018 2:38 AM, Hari Vyas wrote:
>>>>
>>>>
>>>> When a pci device is detected, a variable is_added is set to
>>>> 1 in pci device structure and proc, sys entries are created.
>>>>
>>>> When a pci device is removed, first is_added is checked for one
>>>> and then device is detached with clearing of proc and sys
>>>> entries and at end, is_added is set to 0.
>>>>
>>>> is_added and is_busmaster are bit fields in pci_dev structure
>>>> sharing same memory location.
>>>>
>>>> A strange issue was observed with multiple times removal and
>>>> rescan of a pcie nvme device using sysfs commands where is_added
>>>> flag was observed as zero instead of one while removing device
>>>> and proc,sys entries are not cleared.  This causes issue in
>>>> later device addition with warning message "proc_dir_entry"
>>>> already registered.
>>>>
>>>> Debugging revealed a race condition between pcie core driver
>>>> enabling is_added bit(pci_bus_add_device()) and nvme driver
>>>> reset work-queue enabling is_busmaster bit (by pci_set_master()).
>>>> As both fields are not handled in atomic manner and that clears
>>>> is_added bit.
>>>>
>>>> Fix protects is_added and is_busmaster bits updation by a spin
>>>> locking mechanism.
>>>>
>>>> Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
>>>> Reviewed-by: Ray Jui <ray.jui@broadcom.com>
>>>> ---
>>>>    drivers/pci/bus.c        | 3 +++
>>>>    drivers/pci/pci-driver.c | 2 ++
>>>>    drivers/pci/pci.c        | 7 +++++++
>>>>    drivers/pci/probe.c      | 1 +
>>>>    drivers/pci/remove.c     | 5 +++++
>>>>    include/linux/pci.h      | 1 +
>>>>    6 files changed, 19 insertions(+)
>>>>
>>>> diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
>>>> index 35b7fc8..61d389d 100644
>>>> --- a/drivers/pci/bus.c
>>>> +++ b/drivers/pci/bus.c
>>>> @@ -310,6 +310,7 @@ void __weak pcibios_bus_add_device(struct pci_dev
>>>> *pdev) { }
>>>>    void pci_bus_add_device(struct pci_dev *dev)
>>>>    {
>>>>          int retval;
>>>> +       unsigned long flags;
>>>>          /*
>>>>           * Can not put in pci_device_add yet because resources
>>>> @@ -330,7 +331,9 @@ void pci_bus_add_device(struct pci_dev *dev)
>>>>                  return;
>>>>          }
>>>>    +     spin_lock_irqsave(&dev->lock, flags);
>>>>          dev->is_added = 1;
>>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(pci_bus_add_device);
>>>>    diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>>>> index c125d53..547bcd7 100644
>>>> --- a/drivers/pci/pci-driver.c
>>>> +++ b/drivers/pci/pci-driver.c
>>>> @@ -123,6 +123,8 @@ static ssize_t new_id_store(struct device_driver
>>>> *driver, const char *buf,
>>>>                  pdev->subsystem_device = subdevice;
>>>>                  pdev->class = class;
>>>>    +             spin_lock_init(&pdev->lock);
>>>> +
>>>>                  if (pci_match_id(pdrv->id_table, pdev))
>>>>                          retval = -EEXIST;
>>>>    diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>>> index 97acba7..bcb1f96 100644
>>>> --- a/drivers/pci/pci.c
>>>> +++ b/drivers/pci/pci.c
>>>> @@ -1649,6 +1649,7 @@ void pci_disable_enabled_device(struct pci_dev
>>>> *dev)
>>>>    void pci_disable_device(struct pci_dev *dev)
>>>>    {
>>>>          struct pci_devres *dr;
>>>> +       unsigned long flags;
>>>>          dr = find_pci_dr(dev);
>>>>          if (dr)
>>>> @@ -1662,7 +1663,9 @@ void pci_disable_device(struct pci_dev *dev)
>>>>          do_pci_disable_device(dev);
>>>>    +     spin_lock_irqsave(&dev->lock, flags);
>>>>          dev->is_busmaster = 0;
>>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>>    }
>>>>    EXPORT_SYMBOL(pci_disable_device);
>>>>    @@ -3664,6 +3667,7 @@ void __iomem
>>>> *devm_pci_remap_cfg_resource(struct
>>>> device *dev,
>>>>    static void __pci_set_master(struct pci_dev *dev, bool enable)
>>>>    {
>>>>          u16 old_cmd, cmd;
>>>> +       unsigned long flags;
>>>>          pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
>>>>          if (enable)
>>>> @@ -3675,7 +3679,10 @@ static void __pci_set_master(struct pci_dev *dev,
>>>> bool enable)
>>>>                          enable ? "enabling" : "disabling");
>>>>                  pci_write_config_word(dev, PCI_COMMAND, cmd);
>>>>          }
>>>> +
>>>> +       spin_lock_irqsave(&dev->lock, flags);
>>>>          dev->is_busmaster = enable;
>>>> +       spin_unlock_irqrestore(&dev->lock, flags);
>>>>    }
>>>>      /**
>>>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>>>> index ac876e3..9203b88 100644
>>>> --- a/drivers/pci/probe.c
>>>> +++ b/drivers/pci/probe.c
>>>> @@ -2102,6 +2102,7 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
>>>>          INIT_LIST_HEAD(&dev->bus_list);
>>>>          dev->dev.type = &pci_dev_type;
>>>>          dev->bus = pci_bus_get(bus);
>>>> +       spin_lock_init(&dev->lock);
>>>>          return dev;
>>>>    }
>>>> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
>>>> index 6f072ea..ff57bd6 100644
>>>> --- a/drivers/pci/remove.c
>>>> +++ b/drivers/pci/remove.c
>>>> @@ -17,13 +17,18 @@ static void pci_free_resources(struct pci_dev *dev)
>>>>      static void pci_stop_dev(struct pci_dev *dev)
>>>>    {
>>>> +       unsigned long flags;
>>>> +
>>>>          pci_pme_active(dev, false);
>>>>          if (dev->is_added) {
>>>>                  device_release_driver(&dev->dev);
>>>>                  pci_proc_detach_device(dev);
>>>>                  pci_remove_sysfs_dev_files(dev);
>>>> +
>>>> +               spin_lock_irqsave(&dev->lock, flags);
>>>>                  dev->is_added = 0;
>>>> +               spin_unlock_irqrestore(&dev->lock, flags);
>>>>          }
>>>>          if (dev->bus->self)
>>>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>>>> index 340029b..6940825 100644
>>>> --- a/include/linux/pci.h
>>>> +++ b/include/linux/pci.h
>>>> @@ -365,6 +365,7 @@ struct pci_dev {
>>>>          bool            match_driver;           /* Skip attaching
>>>> driver
>>>> */
>>>>    +     spinlock_t      lock;                   /* Protect is_added and
>>>> is_busmaster */
>>>>          unsigned int    transparent:1;          /* Subtractive decode
>>>> bridge */
>>>>          unsigned int    multifunction:1;        /* Multi-function
>>>> device
>>>> */
>>>>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-06-28 11:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-25 10:10 [PATCH] PCI: Data corruption happening due to race condition Hari Vyas
2018-06-25 10:37 ` Lukas Wunner
2018-06-25 10:57   ` Hari Vyas
2018-06-25 11:15     ` Lukas Wunner
2018-06-26 10:17       ` Hari Vyas
2018-06-26 11:53         ` Lukas Wunner
2018-06-27  9:38           ` [PATCH v1] " Hari Vyas
2018-06-27 16:27             ` Ray Jui
2018-06-27 16:32               ` Hari Vyas
2018-06-27 16:36                 ` Ray Jui
2018-06-28 11:23                   ` Hari Vyas
     [not found]           ` <CAM5rFu-Sb5Vhvy19GKesV00=tf0+7Q8hByU11=4F9MVhoO7nWA@mail.gmail.com>
     [not found]             ` <20180627124920.GA27447@wunner.de>
2018-06-27 13:00               ` [PATCH] " Hari Vyas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.