linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
@ 2018-06-08  9:27 Vadim Lomovtsev
  2018-06-10 19:35 ` David Miller
  0 siblings, 1 reply; 5+ messages in thread
From: Vadim Lomovtsev @ 2018-06-08  9:27 UTC (permalink / raw)
  To: davem, rric, sgoutham, linux-arm-kernel, netdev, linux-kernel
  Cc: dnelson, Vadim Lomovtsev

From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>

For each network interface linux network stack issue ndo_set_rx_mode call
in order to configure MAC address filters (e.g. for multicast filtering).
Currently ThunderX NICVF driver has only one ordered workqueue to process
such requests for all VFs.

And because of that it is possible that subsequent call to
ndo_set_rx_mode would corrupt data which is currently in use
by nicvf_set_rx_mode_task. Which in turn could cause following issue:
[...]
[   48.978341] Unable to handle kernel paging request at virtual address 1fffff0000000000
[   48.986275] Mem abort info:
[   48.989058]   Exception class = DABT (current EL), IL = 32 bits
[   48.994965]   SET = 0, FnV = 0
[   48.998020]   EA = 0, S1PTW = 0
[   49.001152] Data abort info:
[   49.004022]   ISV = 0, ISS = 0x00000004
[   49.007869]   CM = 0, WnR = 0
[   49.010826] [1fffff0000000000] address between user and kernel address ranges
[   49.017963] Internal error: Oops: 96000004 [#1] SMP
[...]
[   49.072138] task: ffff800fdd675400 task.stack: ffff000026440000
[   49.078051] PC is at prefetch_freepointer.isra.37+0x28/0x3c
[   49.083613] LR is at kmem_cache_alloc_trace+0xc8/0x1fc
[...]
[   49.272684] [<ffff0000082738f0>] prefetch_freepointer.isra.37+0x28/0x3c
[   49.279286] [<ffff000008276bc8>] kmem_cache_alloc_trace+0xc8/0x1fc
[   49.285455] [<ffff0000082c0c0c>] alloc_fdtable+0x78/0x134
[   49.290841] [<ffff0000082c15c0>] dup_fd+0x254/0x2f4
[   49.295709] [<ffff0000080d1954>] copy_process.isra.38.part.39+0x64c/0x1168
[   49.302572] [<ffff0000080d264c>] _do_fork+0xfc/0x3b0
[   49.307524] [<ffff0000080d29e8>] SyS_clone+0x44/0x50
[...]

This patch is to prevent such concurrent data write with spinlock.

Reported-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/nic.h        |  2 +
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 50 +++++++++++++++++-------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 448d1fafc827..f4d81765221e 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -325,6 +325,8 @@ struct nicvf {
 	struct tasklet_struct	qs_err_task;
 	struct work_struct	reset_task;
 	struct nicvf_work       rx_mode_work;
+	/* spinlock to protect workqueue arguments from concurrent access */
+	spinlock_t              rx_mode_wq_lock;
 
 	/* PTP timestamp */
 	struct cavium_ptp	*ptp_clock;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7135db45927e..135766c4296b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1923,17 +1923,12 @@ static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
 	}
 }
 
-static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
+				     struct nicvf *nic)
 {
-	struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
-						  work.work);
-	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
 	union nic_mbx mbx = {};
 	int idx;
 
-	if (!vf_work)
-		return;
-
 	/* From the inside of VM code flow we have only 128 bits memory
 	 * available to send message to host's PF, so send all mc addrs
 	 * one by one, starting from flush command in case if kernel
@@ -1944,7 +1939,7 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 	mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
 	nicvf_send_msg_to_pf(nic, &mbx);
 
-	if (vf_work->mode & BGX_XCAST_MCAST_FILTER) {
+	if (mode & BGX_XCAST_MCAST_FILTER) {
 		/* once enabling filtering, we need to signal to PF to add
 		 * its' own LMAC to the filter to accept packets for it.
 		 */
@@ -1954,23 +1949,46 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 	}
 
 	/* check if we have any specific MACs to be added to PF DMAC filter */
-	if (vf_work->mc) {
+	if (mc_addrs) {
 		/* now go through kernel list of MACs and add them one by one */
-		for (idx = 0; idx < vf_work->mc->count; idx++) {
+		for (idx = 0; idx < mc_addrs->count; idx++) {
 			mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-			mbx.xcast.data.mac = vf_work->mc->mc[idx];
+			mbx.xcast.data.mac = mc_addrs->mc[idx];
 			nicvf_send_msg_to_pf(nic, &mbx);
 		}
-		kfree(vf_work->mc);
+		kfree(mc_addrs);
 	}
 
 	/* and finally set rx mode for PF accordingly */
 	mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
-	mbx.xcast.data.mode = vf_work->mode;
+	mbx.xcast.data.mode = mode;
 
 	nicvf_send_msg_to_pf(nic, &mbx);
 }
 
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+	struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+						  work.work);
+	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+	u8 mode;
+	struct xcast_addr_list *mc;
+
+	if (!vf_work)
+		return;
+
+	/* Save message data locally to prevent them from
+	 * being overwritten by next ndo_set_rx_mode call().
+	 */
+	spin_lock(&nic->rx_mode_wq_lock);
+	mode = vf_work->mode;
+	mc = vf_work->mc;
+	vf_work->mc = NULL;
+	spin_unlock(&nic->rx_mode_wq_lock);
+
+	__nicvf_set_rx_mode_task(mode, mc, nic);
+}
+
 static void nicvf_set_rx_mode(struct net_device *netdev)
 {
 	struct nicvf *nic = netdev_priv(netdev);
@@ -2004,9 +2022,12 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
 			}
 		}
 	}
+	spin_lock(&nic->rx_mode_wq_lock);
+	kfree(nic->rx_mode_work.mc);
 	nic->rx_mode_work.mc = mc_list;
 	nic->rx_mode_work.mode = mode;
-	queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 2 * HZ);
+	queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+	spin_unlock(&nic->rx_mode_wq_lock);
 }
 
 static const struct net_device_ops nicvf_netdev_ops = {
@@ -2163,6 +2184,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
 	INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+	spin_lock_init(&nic->rx_mode_wq_lock);
 
 	err = register_netdev(netdev);
 	if (err) {
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
  2018-06-08  9:27 [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode Vadim Lomovtsev
@ 2018-06-10 19:35 ` David Miller
  2018-06-11 11:22   ` Dean Nelson
  0 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2018-06-10 19:35 UTC (permalink / raw)
  To: Vadim.Lomovtsev
  Cc: rric, sgoutham, linux-arm-kernel, netdev, linux-kernel, dnelson,
	Vadim.Lomovtsev

From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
Date: Fri,  8 Jun 2018 02:27:59 -0700

> +	/* Save message data locally to prevent them from
> +	 * being overwritten by next ndo_set_rx_mode call().
> +	 */
> +	spin_lock(&nic->rx_mode_wq_lock);
> +	mode = vf_work->mode;
> +	mc = vf_work->mc;
> +	vf_work->mc = NULL;
> +	spin_unlock(&nic->rx_mode_wq_lock);

At the moment you drop this lock, the memory behind 'mc' can be
freed up by:

> +	spin_lock(&nic->rx_mode_wq_lock);
> +	kfree(nic->rx_mode_work.mc);

And you'll crash when you dereference it above via
__nicvf_set_rx_mode_task().

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
  2018-06-10 19:35 ` David Miller
@ 2018-06-11 11:22   ` Dean Nelson
  2018-06-12 22:25     ` David Miller
  0 siblings, 1 reply; 5+ messages in thread
From: Dean Nelson @ 2018-06-11 11:22 UTC (permalink / raw)
  To: David Miller, Vadim.Lomovtsev
  Cc: rric, sgoutham, linux-arm-kernel, netdev, linux-kernel, Vadim.Lomovtsev

On 06/10/2018 02:35 PM, David Miller wrote:
> From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
> Date: Fri,  8 Jun 2018 02:27:59 -0700
> 
>> +	/* Save message data locally to prevent them from
>> +	 * being overwritten by next ndo_set_rx_mode call().
>> +	 */
>> +	spin_lock(&nic->rx_mode_wq_lock);
>> +	mode = vf_work->mode;
>> +	mc = vf_work->mc;
>> +	vf_work->mc = NULL;

If I'm reading this code correctly, I believe nic->rx_mode_work.mc will
have been set to NULL before the lock is dropped by
nicvf_set_rx_mode_task() and acquired by nicvf_set_rx_mode().


>> +	spin_unlock(&nic->rx_mode_wq_lock);
> 
> At the moment you drop this lock, the memory behind 'mc' can be
> freed up by:
> 
>> +	spin_lock(&nic->rx_mode_wq_lock);
>> +	kfree(nic->rx_mode_work.mc);

So the kfree() will be called with a NULL pointer and quickly return.


> 
> And you'll crash when you dereference it above via
> __nicvf_set_rx_mode_task().
> 

I believe the call to kfree() in nicvf_set_rx_mode() is there to free
up a mc_list that has been allocated by nicvf_set_rx_mode() during a
previous callback to the function, one that has not yet been processed
by nicvf_set_rx_mode_task().

In this way only the last 'unprocessed' callback to nicvf_set_rx_mode()
gets processed should there be multiple callbacks occurring between the
times the nicvf_set_rx_mode_task() runs.

In my testing with this patch, this is what I see happening.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
  2018-06-11 11:22   ` Dean Nelson
@ 2018-06-12 22:25     ` David Miller
  2018-06-13  9:15       ` Vadim Lomovtsev
  0 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2018-06-12 22:25 UTC (permalink / raw)
  To: dnelson
  Cc: Vadim.Lomovtsev, rric, sgoutham, linux-arm-kernel, netdev,
	linux-kernel, Vadim.Lomovtsev

From: Dean Nelson <dnelson@redhat.com>
Date: Mon, 11 Jun 2018 06:22:14 -0500

> On 06/10/2018 02:35 PM, David Miller wrote:
>> From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
>> Date: Fri,  8 Jun 2018 02:27:59 -0700
>> 
>>> +	/* Save message data locally to prevent them from
>>> +	 * being overwritten by next ndo_set_rx_mode call().
>>> +	 */
>>> +	spin_lock(&nic->rx_mode_wq_lock);
>>> +	mode = vf_work->mode;
>>> +	mc = vf_work->mc;
>>> +	vf_work->mc = NULL;
> 
> If I'm reading this code correctly, I believe nic->rx_mode_work.mc
> will
> have been set to NULL before the lock is dropped by
> nicvf_set_rx_mode_task() and acquired by nicvf_set_rx_mode().
> 
> 
>>> +	spin_unlock(&nic->rx_mode_wq_lock);
>> At the moment you drop this lock, the memory behind 'mc' can be
>> freed up by:
>> 
>>> +	spin_lock(&nic->rx_mode_wq_lock);
>>> +	kfree(nic->rx_mode_work.mc);
> 
> So the kfree() will be called with a NULL pointer and quickly return.
> 
> 
>> And you'll crash when you dereference it above via
>> __nicvf_set_rx_mode_task().
>> 
> 
> I believe the call to kfree() in nicvf_set_rx_mode() is there to free
> up a mc_list that has been allocated by nicvf_set_rx_mode() during a
> previous callback to the function, one that has not yet been processed
> by nicvf_set_rx_mode_task().
> 
> In this way only the last 'unprocessed' callback to
> nicvf_set_rx_mode()
> gets processed should there be multiple callbacks occurring between
> the
> times the nicvf_set_rx_mode_task() runs.
> 
> In my testing with this patch, this is what I see happening.

You're right, my bad.

Patch applied.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
  2018-06-12 22:25     ` David Miller
@ 2018-06-13  9:15       ` Vadim Lomovtsev
  0 siblings, 0 replies; 5+ messages in thread
From: Vadim Lomovtsev @ 2018-06-13  9:15 UTC (permalink / raw)
  To: David Miller
  Cc: dnelson, rric, sgoutham, linux-arm-kernel, netdev, linux-kernel,
	Vadim.Lomovtsev

Sorry for delay.

On Tue, Jun 12, 2018 at 03:25:40PM -0700, David Miller wrote:
> From: Dean Nelson <dnelson@redhat.com>
> Date: Mon, 11 Jun 2018 06:22:14 -0500
> 
> > On 06/10/2018 02:35 PM, David Miller wrote:
> >> From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
> >> Date: Fri,  8 Jun 2018 02:27:59 -0700
> >> 
> >>> +	/* Save message data locally to prevent them from
> >>> +	 * being overwritten by next ndo_set_rx_mode call().
> >>> +	 */
> >>> +	spin_lock(&nic->rx_mode_wq_lock);
> >>> +	mode = vf_work->mode;
> >>> +	mc = vf_work->mc;
> >>> +	vf_work->mc = NULL;
> > 
> > If I'm reading this code correctly, I believe nic->rx_mode_work.mc
> > will
> > have been set to NULL before the lock is dropped by
> > nicvf_set_rx_mode_task() and acquired by nicvf_set_rx_mode().
> > 
> > 
> >>> +	spin_unlock(&nic->rx_mode_wq_lock);
> >> At the moment you drop this lock, the memory behind 'mc' can be
> >> freed up by:
> >> 
> >>> +	spin_lock(&nic->rx_mode_wq_lock);
> >>> +	kfree(nic->rx_mode_work.mc);
> > 
> > So the kfree() will be called with a NULL pointer and quickly return.
> > 
> > 
> >> And you'll crash when you dereference it above via
> >> __nicvf_set_rx_mode_task().
> >> 
> > 
> > I believe the call to kfree() in nicvf_set_rx_mode() is there to free
> > up a mc_list that has been allocated by nicvf_set_rx_mode() during a
> > previous callback to the function, one that has not yet been processed
> > by nicvf_set_rx_mode_task().
> > 
> > In this way only the last 'unprocessed' callback to
> > nicvf_set_rx_mode()
> > gets processed should there be multiple callbacks occurring between
> > the
> > times the nicvf_set_rx_mode_task() runs.
> > 
> > In my testing with this patch, this is what I see happening.
> 
> You're right, my bad.
> 
> Patch applied.

Thank you for your time.

WBR,
Vadim

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-06-13  9:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-08  9:27 [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode Vadim Lomovtsev
2018-06-10 19:35 ` David Miller
2018-06-11 11:22   ` Dean Nelson
2018-06-12 22:25     ` David Miller
2018-06-13  9:15       ` Vadim Lomovtsev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).