* [PATCH] Multiqueue macvtap driver
@ 2010-07-31 13:57 Krishna Kumar
2010-07-31 19:18 ` Ben Hutchings
0 siblings, 1 reply; 5+ messages in thread
From: Krishna Kumar @ 2010-07-31 13:57 UTC (permalink / raw)
To: davem, arnd; +Cc: netdev, Krishna Kumar, mst
From: Krishna Kumar <krkumar2@in.ibm.com>
Implement multiqueue facility for macvtap driver. The idea is that
a macvtap device can be opened multiple times and the fd's can be
used to register eg, as backend for vhost.
Please review.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/net/macvtap.c | 85 ++++++++++++++++++++++++++++-------
include/linux/if_macvlan.h | 11 ++++
2 files changed, 78 insertions(+), 18 deletions(-)
diff -ruNp org/include/linux/if_macvlan.h new/include/linux/if_macvlan.h
--- org/include/linux/if_macvlan.h 2010-07-28 15:10:10.000000000 +0530
+++ new/include/linux/if_macvlan.h 2010-07-31 19:18:38.000000000 +0530
@@ -40,6 +40,14 @@ struct macvlan_rx_stats {
unsigned long rx_errors;
};
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*
+ * Maximum times a macvtap device can be opened. This can be used to
+ * configure the number of receive queue, e.g. for multiqueue virtio.
+ */
+#define MAX_MACVTAP_QUEUES MIN(16, NR_CPUS)
+
struct macvlan_dev {
struct net_device *dev;
struct list_head list;
@@ -50,7 +58,8 @@ struct macvlan_dev {
enum macvlan_mode mode;
int (*receive)(struct sk_buff *skb);
int (*forward)(struct net_device *dev, struct sk_buff *skb);
- struct macvtap_queue *tap;
+ struct macvtap_queue *taps[MAX_MACVTAP_QUEUES];
+ int numvtaps;
};
static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff -ruNp org/drivers/net/macvtap.c new/drivers/net/macvtap.c
--- org/drivers/net/macvtap.c 2010-07-28 15:10:10.000000000 +0530
+++ new/drivers/net/macvtap.c 2010-07-31 19:07:22.000000000 +0530
@@ -84,26 +84,45 @@ static const struct proto_ops macvtap_so
static DEFINE_SPINLOCK(macvtap_lock);
/*
- * Choose the next free queue, for now there is only one
+ * get_slot: return a [unused/occupied] slot in vlan->taps[]:
+ * - if 'q' is NULL, return the first empty slot;
+ * - otherwise, return the slot this pointer occupies.
*/
+static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q)
+{
+ int i;
+
+ for (i = 0; i < MAX_MACVTAP_QUEUES; i++) {
+ if (rcu_dereference(vlan->taps[i]) == q)
+ return i;
+ }
+
+ /* Should never happen */
+ BUG_ON(1);
+}
+
static int macvtap_set_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ int index;
int err = -EBUSY;
spin_lock(&macvtap_lock);
- if (rcu_dereference(vlan->tap))
+ if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
goto out;
err = 0;
+ index = get_slot(vlan, NULL);
rcu_assign_pointer(q->vlan, vlan);
- rcu_assign_pointer(vlan->tap, q);
+ rcu_assign_pointer(vlan->taps[index], q);
sock_hold(&q->sk);
q->file = file;
file->private_data = q;
+ vlan->numvtaps++;
+
out:
spin_unlock(&macvtap_lock);
return err;
@@ -124,9 +143,12 @@ static void macvtap_put_queue(struct mac
spin_lock(&macvtap_lock);
vlan = rcu_dereference(q->vlan);
if (vlan) {
- rcu_assign_pointer(vlan->tap, NULL);
+ int index = get_slot(vlan, q);
+
+ rcu_assign_pointer(vlan->taps[index], NULL);
rcu_assign_pointer(q->vlan, NULL);
sock_put(&q->sk);
+ --vlan->numvtaps;
}
spin_unlock(&macvtap_lock);
@@ -136,39 +158,68 @@ static void macvtap_put_queue(struct mac
}
/*
- * Since we only support one queue, just dereference the pointer.
+ * Select a queue based on the rxq of the device on which this packet
+ * arrived. If the incoming device is not mq, then use our cpu number
+ * to select a queue. vlan->numvtaps is cached in case it changes
+ * during the execution of this function.
*/
static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
struct sk_buff *skb)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ struct macvtap_queue *tap = NULL;
+ int numvtaps = vlan->numvtaps;
+ u16 rxq;
+
+ if (!numvtaps)
+ return NULL;
+
+ if (likely(skb_rx_queue_recorded(skb))) {
+ rxq = skb_get_rx_queue(skb);
- return rcu_dereference(vlan->tap);
+ while (unlikely(rxq >= numvtaps))
+ rxq -= numvtaps;
+
+ tap = rcu_dereference(vlan->taps[rxq]);
+ }
+
+ if (unlikely(!tap)) {
+ rxq = smp_processor_id() % numvtaps;
+ tap = rcu_dereference(vlan->taps[rxq]);
+ }
+
+ return tap;
}
/*
* The net_device is going away, give up the reference
- * that it holds on the queue (all the queues one day)
- * and safely set the pointer from the queues to NULL.
+ * that it holds on all queues and safely set the pointer
+ * from the queues to NULL.
*/
static void macvtap_del_queues(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- struct macvtap_queue *q;
+ struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES];
+ int i, j = 0;
+ /* macvtap_put_queue can free some slots, so go through all slots */
spin_lock(&macvtap_lock);
- q = rcu_dereference(vlan->tap);
- if (!q) {
- spin_unlock(&macvtap_lock);
- return;
+ for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) {
+ q = rcu_dereference(vlan->taps[i]);
+ if (q) {
+ qlist[j++] = q;
+ rcu_assign_pointer(vlan->taps[i], NULL);
+ rcu_assign_pointer(q->vlan, NULL);
+ vlan->numvtaps--;
+ }
}
-
- rcu_assign_pointer(vlan->tap, NULL);
- rcu_assign_pointer(q->vlan, NULL);
+ BUG_ON(vlan->numvtaps != 0);
spin_unlock(&macvtap_lock);
synchronize_rcu();
- sock_put(&q->sk);
+
+ for (--j; j >= 0; j--)
+ sock_put(&qlist[j]->sk);
}
/*
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Multiqueue macvtap driver
2010-07-31 13:57 [PATCH] Multiqueue macvtap driver Krishna Kumar
@ 2010-07-31 19:18 ` Ben Hutchings
2010-08-01 7:34 ` David Miller
0 siblings, 1 reply; 5+ messages in thread
From: Ben Hutchings @ 2010-07-31 19:18 UTC (permalink / raw)
To: Krishna Kumar; +Cc: davem, arnd, netdev, mst
On Sat, 2010-07-31 at 19:27 +0530, Krishna Kumar wrote:
[...]
> @@ -136,39 +158,68 @@ static void macvtap_put_queue(struct mac
> }
>
> /*
> - * Since we only support one queue, just dereference the pointer.
> + * Select a queue based on the rxq of the device on which this packet
> + * arrived. If the incoming device is not mq, then use our cpu number
> + * to select a queue. vlan->numvtaps is cached in case it changes
> + * during the execution of this function.
> */
[...]
This can result in reordering if a single-queue device's RX interrupt's
CPU affinity is changed. We generally try to avoid that. You should
really use or generate a flow hash. There is code for this in
net/core/dev.c:get_rps_cpu() which could be factored out into a separate
function.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Multiqueue macvtap driver
2010-07-31 19:18 ` Ben Hutchings
@ 2010-08-01 7:34 ` David Miller
2010-08-02 12:37 ` Krishna Kumar2
0 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2010-08-01 7:34 UTC (permalink / raw)
To: bhutchings; +Cc: krkumar2, arnd, netdev, mst
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Sat, 31 Jul 2010 20:18:27 +0100
> On Sat, 2010-07-31 at 19:27 +0530, Krishna Kumar wrote:
> [...]
>> @@ -136,39 +158,68 @@ static void macvtap_put_queue(struct mac
>> }
>>
>> /*
>> - * Since we only support one queue, just dereference the pointer.
>> + * Select a queue based on the rxq of the device on which this packet
>> + * arrived. If the incoming device is not mq, then use our cpu number
>> + * to select a queue. vlan->numvtaps is cached in case it changes
>> + * during the execution of this function.
>> */
> [...]
>
> This can result in reordering if a single-queue device's RX interrupt's
> CPU affinity is changed. We generally try to avoid that. You should
> really use or generate a flow hash. There is code for this in
> net/core/dev.c:get_rps_cpu() which could be factored out into a separate
> function.
Agreed.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Multiqueue macvtap driver
2010-08-01 7:34 ` David Miller
@ 2010-08-02 12:37 ` Krishna Kumar2
2010-08-02 13:29 ` Ben Hutchings
0 siblings, 1 reply; 5+ messages in thread
From: Krishna Kumar2 @ 2010-08-02 12:37 UTC (permalink / raw)
To: David Miller; +Cc: arnd, bhutchings, mst, netdev
Thanks Ben & Dave. A question though - which of the following
is preferable for the macvtap driver:
1. Calculate flow and use that to find a queue; or
2. First check if skb_rx_queue_recorded is true and if so use
that; otherwise calculate the flow as in #1.
I guess #1 is better, since packets for a single flow will go to the
same queue even if they arrive on different rxqs of a mq driver.
But I want to make sure.
Thanks,
- KK
David Miller <davem@davemloft.net> wrote on 08/01/2010 01:04:06 PM:
> David Miller <davem@davemloft.net>
> 08/01/2010 01:04 PM
>
> To
>
> bhutchings@solarflare.com
>
> cc
>
> Krishna Kumar2/India/IBM@IBMIN, arnd@arndb.de,
> netdev@vger.kernel.org, mst@redhat.com
>
> Subject
>
> Re: [PATCH] Multiqueue macvtap driver
>
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Sat, 31 Jul 2010 20:18:27 +0100
>
> > On Sat, 2010-07-31 at 19:27 +0530, Krishna Kumar wrote:
> > [...]
> >> @@ -136,39 +158,68 @@ static void macvtap_put_queue(struct mac
> >> }
> >>
> >> /*
> >> - * Since we only support one queue, just dereference the pointer.
> >> + * Select a queue based on the rxq of the device on which this packet
> >> + * arrived. If the incoming device is not mq, then use our cpu number
> >> + * to select a queue. vlan->numvtaps is cached in case it changes
> >> + * during the execution of this function.
> >> */
> > [...]
> >
> > This can result in reordering if a single-queue device's RX interrupt's
> > CPU affinity is changed. We generally try to avoid that. You should
> > really use or generate a flow hash. There is code for this in
> > net/core/dev.c:get_rps_cpu() which could be factored out into a
separate
> > function.
>
> Agreed.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Multiqueue macvtap driver
2010-08-02 12:37 ` Krishna Kumar2
@ 2010-08-02 13:29 ` Ben Hutchings
0 siblings, 0 replies; 5+ messages in thread
From: Ben Hutchings @ 2010-08-02 13:29 UTC (permalink / raw)
To: Krishna Kumar2; +Cc: David Miller, arnd, mst, netdev
On Mon, 2010-08-02 at 18:07 +0530, Krishna Kumar2 wrote:
> Thanks Ben & Dave. A question though - which of the following
> is preferable for the macvtap driver:
>
> 1. Calculate flow and use that to find a queue; or
> 2. First check if skb_rx_queue_recorded is true and if so use
> that; otherwise calculate the flow as in #1.
>
> I guess #1 is better, since packets for a single flow will go to the
> same queue even if they arrive on different rxqs of a mq driver.
> But I want to make sure.
[...]
#2 is right. Just as macvtap should provide a stable flow to RX-queue
mapping, so should the drivers it interfaces with.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-08-02 13:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-31 13:57 [PATCH] Multiqueue macvtap driver Krishna Kumar
2010-07-31 19:18 ` Ben Hutchings
2010-08-01 7:34 ` David Miller
2010-08-02 12:37 ` Krishna Kumar2
2010-08-02 13:29 ` Ben Hutchings
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.