All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use
@ 2010-08-24  9:01 Changli Gao
  2010-08-24 13:06 ` Eric Dumazet
  2010-08-24 16:45 ` Ben Hutchings
  0 siblings, 2 replies; 3+ messages in thread
From: Changli Gao @ 2010-08-24  9:01 UTC (permalink / raw)
  To: David S. Miller
  Cc: Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev,
	Changli Gao

For the packets sent out from a local server socket, we can use the queue
from which the packets from the client socket are received.

It may help on a TCP or UDP server. Because I don't have a multiqueue NIC,
I don't even test it.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
 include/net/sock.h  |   18 ++++++++++++++++++
 net/core/dev.c      |   25 ++++++++++++++++++-------
 net/ipv4/tcp_ipv4.c |    5 ++++-
 net/ipv4/udp.c      |    4 +++-
 4 files changed, 43 insertions(+), 9 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 100e43b..4e5f2f4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1231,6 +1231,24 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
+static inline void sk_rx_queue_save(struct sock *sk, struct sk_buff *skb)
+{
+	struct dst_entry *dst;
+	int rxqueue;
+
+	if (!skb_rx_queue_recorded(skb))
+		return;
+	rcu_read_lock();
+	dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+	if (dst && !dst->dev->netdev_ops->ndo_select_queue &&
+	    dst->dev == skb->dev) {
+		rxqueue = skb_get_rx_queue(skb);
+		if (rxqueue != sk_tx_queue_get(sk))
+			sk_tx_queue_set(sk, rxqueue);
+	}
+	rcu_read_unlock();
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 859e30f..8dc1904 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2054,6 +2054,18 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 	return queue_index;
 }
 
+static inline void dev_save_tx_queue(struct sk_buff *skb, struct sock *sk,
+				     int queue_index)
+{
+	if (sk) {
+		struct dst_entry *dst;
+
+		dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+		if (dst && skb_dst(skb) == dst)
+			sk_tx_queue_set(sk, queue_index);
+	}
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
@@ -2071,14 +2083,13 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 			queue_index = 0;
 			if (dev->real_num_tx_queues > 1)
 				queue_index = skb_tx_hash(dev, skb);
-
-			if (sk) {
-				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
-
-				if (dst && skb_dst(skb) == dst)
-					sk_tx_queue_set(sk, queue_index);
-			}
+			dev_save_tx_queue(skb, sk, queue_index);
 		}
+	} else if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		do {
+			queue_index -= dev->real_num_tx_queues;
+		} while (unlikely(queue_index >= dev->real_num_tx_queues));
+		dev_save_tx_queue(skb, sk, queue_index);
 	}
 
 	skb_set_queue_mapping(skb, queue_index);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0207662..b1c6d3c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1560,6 +1560,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		sock_rps_save_rxhash(sk, skb->rxhash);
+		sk_rx_queue_save(sk, skb);
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
@@ -1584,8 +1585,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 			return 0;
 		}
-	} else
+	} else {
 		sock_rps_save_rxhash(sk, skb->rxhash);
+		sk_rx_queue_save(sk, skb);
+	}
 
 
 	TCP_CHECK_TIMER(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 86e757e..e59f3db 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1264,8 +1264,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
-	if (inet_sk(sk)->inet_daddr)
+	if (inet_sk(sk)->inet_daddr) {
 		sock_rps_save_rxhash(sk, skb->rxhash);
+		sk_rx_queue_save(sk, skb);
+	}
 
 	rc = ip_queue_rcv_skb(sk, skb);
 	if (rc < 0) {

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use
  2010-08-24  9:01 [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use Changli Gao
@ 2010-08-24 13:06 ` Eric Dumazet
  2010-08-24 16:45 ` Ben Hutchings
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Dumazet @ 2010-08-24 13:06 UTC (permalink / raw)
  To: Changli Gao, Tom Herbert
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

Le mardi 24 août 2010 à 17:01 +0800, Changli Gao a écrit :
> For the packets sent out from a local server socket, we can use the queue
> from which the packets from the client socket are received.
> 
> It may help on a TCP or UDP server. Because I don't have a multiqueue NIC,
> I don't even test it.
> 

It may help ? or it may not...

> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  include/net/sock.h  |   18 ++++++++++++++++++
>  net/core/dev.c      |   25 ++++++++++++++++++-------
>  net/ipv4/tcp_ipv4.c |    5 ++++-
>  net/ipv4/udp.c      |    4 +++-
>  4 files changed, 43 insertions(+), 9 deletions(-)
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 100e43b..4e5f2f4 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1231,6 +1231,24 @@ static inline int sk_tx_queue_get(const struct sock *sk)
>  	return sk ? sk->sk_tx_queue_mapping : -1;
>  }
>  
> +static inline void sk_rx_queue_save(struct sock *sk, struct sk_buff *skb)
> +{
> +	struct dst_entry *dst;
> +	int rxqueue;
> +
> +	if (!skb_rx_queue_recorded(skb))
> +		return;
> +	rcu_read_lock();
> +	dst = rcu_dereference_check(sk->sk_dst_cache, 1);

rcu_dereference(sk->sk_dst_cache) is fine here.

> +	if (dst && !dst->dev->netdev_ops->ndo_select_queue &&
> +	    dst->dev == skb->dev) {
> +		rxqueue = skb_get_rx_queue(skb);
> +		if (rxqueue != sk_tx_queue_get(sk))
> +			sk_tx_queue_set(sk, rxqueue);
> +	}
> +	rcu_read_unlock();
> +}
> +

ipv6 not handled

Anyway, should we store a queue number, a rxhash, a cpu number, all
values ?

Tom Herbert last patch (XPS) is about selecting a txqueue given a cpu
number at sending time. In its use, no need to add logic at receive time
to remember rxqueue. But logic is wide (for a given NIC)

I do think we should really clarify the needs before writing hundred of
lines of code that we have to maintain for next decade...

For example, storing the cpu number of the cpu that received a UDP/TCP
frame would probably be nice, and using it as a key for xmit path queue
selection would probably be faster...

Probably we want a selectable way... A multi threaded UDP application,
receiving frames on a single socket, might need to select a single
txqueue (socket property), or multiple queues (based on packet content,
or sender cpu, or last receive cpu number, ...)




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use
  2010-08-24  9:01 [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use Changli Gao
  2010-08-24 13:06 ` Eric Dumazet
@ 2010-08-24 16:45 ` Ben Hutchings
  1 sibling, 0 replies; 3+ messages in thread
From: Ben Hutchings @ 2010-08-24 16:45 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Tue, 2010-08-24 at 17:01 +0800, Changli Gao wrote:
> For the packets sent out from a local server socket, we can use the queue
> from which the packets from the client socket are received.
> 
> It may help on a TCP or UDP server. Because I don't have a multiqueue NIC,
> I don't even test it.
[...]

There are some fairly cheap 1G multiqueue NICs around now (and even
several of the ARM embedded Ethernet controllers support multiqueue).

Could you compare this with my patch in
<http://thread.gmane.org/gmane.linux.network/158477>?

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-08-24 16:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-24  9:01 [PATCH RFC] net: save RX queue number in sock for dev_pick_tx() use Changli Gao
2010-08-24 13:06 ` Eric Dumazet
2010-08-24 16:45 ` Ben Hutchings

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.