* [PATCH net-next 1/4] tcp: tsq: add tsq_flags / tsq_enum
2016-12-02 18:25 [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
@ 2016-12-02 18:25 ` Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 2/4] tcp: tsq: remove one locked operation in tcp_wfree() Eric Dumazet
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 18:25 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet
This is a cleanup, to ease code review of following patches.
Old 'enum tsq_flags' is renamed, and a new enumeration is added
with the flags used in cmpxchg() operations as opposed to
single bit operations.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/linux/tcp.h | 11 ++++++++++-
net/ipv4/tcp_output.c | 16 ++++++++--------
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 00e0ee8f001f..c79ee070c56f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -363,7 +363,7 @@ struct tcp_sock {
u32 *saved_syn;
};
-enum tsq_flags {
+enum tsq_enum {
TSQ_THROTTLED,
TSQ_QUEUED,
TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */
@@ -374,6 +374,15 @@ enum tsq_flags {
*/
};
+enum tsq_flags {
+ TSQF_THROTTLED = (1UL << TSQ_THROTTLED),
+ TSQF_QUEUED = (1UL << TSQ_QUEUED),
+ TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED),
+ TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED),
+ TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED),
+ TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED),
+};
+
static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
return (struct tcp_sock *)sk;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d3545d0cff75..ac55aefc881d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -784,10 +784,10 @@ static void tcp_tasklet_func(unsigned long data)
}
}
-#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
- (1UL << TCP_WRITE_TIMER_DEFERRED) | \
- (1UL << TCP_DELACK_TIMER_DEFERRED) | \
- (1UL << TCP_MTU_REDUCED_DEFERRED))
+#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
+ TCPF_WRITE_TIMER_DEFERRED | \
+ TCPF_DELACK_TIMER_DEFERRED | \
+ TCPF_MTU_REDUCED_DEFERRED)
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
@@ -808,7 +808,7 @@ void tcp_release_cb(struct sock *sk)
nflags = flags & ~TCP_DEFERRED_ALL;
} while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
- if (flags & (1UL << TCP_TSQ_DEFERRED))
+ if (flags & TCPF_TSQ_DEFERRED)
tcp_tsq_handler(sk);
/* Here begins the tricky part :
@@ -822,15 +822,15 @@ void tcp_release_cb(struct sock *sk)
*/
sock_release_ownership(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
+ if (flags & TCPF_WRITE_TIMER_DEFERRED) {
tcp_write_timer_handler(sk);
__sock_put(sk);
}
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
+ if (flags & TCPF_DELACK_TIMER_DEFERRED) {
tcp_delack_timer_handler(sk);
__sock_put(sk);
}
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
+ if (flags & TCPF_MTU_REDUCED_DEFERRED) {
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH net-next 2/4] tcp: tsq: remove one locked operation in tcp_wfree()
2016-12-02 18:25 [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 1/4] tcp: tsq: add tsq_flags / tsq_enum Eric Dumazet
@ 2016-12-02 18:25 ` Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 3/4] tcp: tsq: add shortcut in tcp_tasklet_func() Eric Dumazet
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 18:25 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet
Instead of atomically clear TSQ_THROTTLED and atomically set TSQ_QUEUED
bits, use one cmpxchg() to perform a single locked operation.
Since the following patch will also set TCP_TSQ_DEFERRED here,
this cmpxchg() will make this addition free.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/tcp_output.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ac55aefc881d..76be79437595 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -860,6 +860,7 @@ void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long flags, nval, oval;
int wmem;
/* Keep one reference on sk_wmem_alloc.
@@ -877,11 +878,17 @@ void tcp_wfree(struct sk_buff *skb)
if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
goto out;
- if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
- !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
- unsigned long flags;
+ for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) {
struct tsq_tasklet *tsq;
+ if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
+ goto out;
+
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
+ nval = cmpxchg(&tp->tsq_flags, oval, nval);
+ if (nval != oval)
+ continue;
+
/* queue this socket to tasklet queue */
local_irq_save(flags);
tsq = this_cpu_ptr(&tsq_tasklet);
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH net-next 3/4] tcp: tsq: add shortcut in tcp_tasklet_func()
2016-12-02 18:25 [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 1/4] tcp: tsq: add tsq_flags / tsq_enum Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 2/4] tcp: tsq: remove one locked operation in tcp_wfree() Eric Dumazet
@ 2016-12-02 18:25 ` Eric Dumazet
2016-12-02 22:12 ` Eric Dumazet
2016-12-02 18:25 ` [PATCH net-next 4/4] tcp: tsq: avoid one atomic in tcp_wfree() Eric Dumazet
2016-12-02 20:57 ` [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
4 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 18:25 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet
Under high stress, I've seen tcp_tasklet_func() consuming
~700 usec, handling ~150 tcp sockets.
By setting TCP_TSQ_DEFERRED in tcp_wfree(), we give a chance
for other cpus/threads entering tcp_write_xmit() to grab it,
allowing tcp_tasklet_func() to skip sockets that already did
an xmit cycle.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/tcp_output.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 76be79437595..9143c52b3105 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -767,19 +767,19 @@ static void tcp_tasklet_func(unsigned long data)
list_for_each_safe(q, n, &list) {
tp = list_entry(q, struct tcp_sock, tsq_node);
list_del(&tp->tsq_node);
+ clear_bit(TSQ_QUEUED, &tp->tsq_flags);
sk = (struct sock *)tp;
- bh_lock_sock(sk);
-
- if (!sock_owned_by_user(sk)) {
- tcp_tsq_handler(sk);
- } else {
- /* defer the work to tcp_release_cb() */
- set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
+ if (!sk->sk_lock.owned &&
+ test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) {
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk)) {
+ clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
+ tcp_tsq_handler(sk);
+ }
+ bh_unlock_sock(sk);
}
- bh_unlock_sock(sk);
- clear_bit(TSQ_QUEUED, &tp->tsq_flags);
sk_free(sk);
}
}
@@ -884,7 +884,7 @@ void tcp_wfree(struct sk_buff *skb)
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
goto out;
- nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCP_TSQ_DEFERRED;
nval = cmpxchg(&tp->tsq_flags, oval, nval);
if (nval != oval)
continue;
@@ -2229,6 +2229,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
+ if (test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags))
+ clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
if (tcp_small_queue_check(sk, skb, 0))
break;
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 3/4] tcp: tsq: add shortcut in tcp_tasklet_func()
2016-12-02 18:25 ` [PATCH net-next 3/4] tcp: tsq: add shortcut in tcp_tasklet_func() Eric Dumazet
@ 2016-12-02 22:12 ` Eric Dumazet
0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 22:12 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David S . Miller, netdev
On Fri, 2016-12-02 at 10:25 -0800, Eric Dumazet wrote:
> Under high stress, I've seen tcp_tasklet_func() consuming
> ~700 usec, handling ~150 tcp sockets.
>
> By setting TCP_TSQ_DEFERRED in tcp_wfree(), we give a chance
> for other cpus/threads entering tcp_write_xmit() to grab it,
> allowing tcp_tasklet_func() to skip sockets that already did
> an xmit cycle.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
...
> @@ -884,7 +884,7 @@ void tcp_wfree(struct sk_buff *skb)
> if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
> goto out;
>
> - nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
> + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCP_TSQ_DEFERRED;
Typo here...
Should be :
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
> nval = cmpxchg(&tp->tsq_flags, oval, nval);
> if (nval != oval)
> continue;
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH net-next 4/4] tcp: tsq: avoid one atomic in tcp_wfree()
2016-12-02 18:25 [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
` (2 preceding siblings ...)
2016-12-02 18:25 ` [PATCH net-next 3/4] tcp: tsq: add shortcut in tcp_tasklet_func() Eric Dumazet
@ 2016-12-02 18:25 ` Eric Dumazet
2016-12-02 20:57 ` [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
4 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 18:25 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet
Under high load, tcp_wfree() has an atomic operation trying
to schedule a tasklet over and over.
We can schedule it only if our per cpu list was empty.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/tcp_output.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9143c52b3105..999f7d583092 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -880,6 +880,7 @@ void tcp_wfree(struct sk_buff *skb)
for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) {
struct tsq_tasklet *tsq;
+ bool empty;
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
goto out;
@@ -892,8 +893,10 @@ void tcp_wfree(struct sk_buff *skb)
/* queue this socket to tasklet queue */
local_irq_save(flags);
tsq = this_cpu_ptr(&tsq_tasklet);
+ empty = list_empty(&tsq->head);
list_add(&tp->tsq_node, &tsq->head);
- tasklet_schedule(&tsq->tasklet);
+ if (empty)
+ tasklet_schedule(&tsq->tasklet);
local_irq_restore(flags);
return;
}
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 0/4] tcp: tsq: performance series
2016-12-02 18:25 [PATCH net-next 0/4] tcp: tsq: performance series Eric Dumazet
` (3 preceding siblings ...)
2016-12-02 18:25 ` [PATCH net-next 4/4] tcp: tsq: avoid one atomic in tcp_wfree() Eric Dumazet
@ 2016-12-02 20:57 ` Eric Dumazet
4 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2016-12-02 20:57 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet
On Fri, Dec 2, 2016 at 10:25 AM, Eric Dumazet <edumazet@google.com> wrote:
> Under very high TX stress, CPU handling NIC TX completions can spend
> considerable amount of cycles handling TSQ (TCP Small Queues) logic.
>
> This patch series avoids some atomic operations, but more important
> patch is the 3rd one, allowing other cpus processing ACK packets and
> calling tcp_write_xmit() to grab TCP_TSQ_DEFERRED so that
> tcp_tasklet_func() can skip already processed sockets.
>
> This avoid lots of lock acquisitions and cache lines accesses,
> particularly under load.
>
Please do not merge this version.
I probably messed something, I need to make more tests.
Thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread