* [PATCH net] ipv6: introduce tcp_v6_iif()
@ 2014-10-17 16:17 Eric Dumazet
2014-10-17 16:26 ` David Laight
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Eric Dumazet @ 2014-10-17 16:17 UTC (permalink / raw)
To: David Miller; +Cc: netdev
From: Eric Dumazet <edumazet@google.com>
Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
misses") added a regression for SO_BINDTODEVICE on IPv6.
This is because we still use inet6_iif() which expects that IP6 control
block is still at the beginning of skb->cb[]
This patch adds tcp_v6_iif() helper and uses it where necessary.
Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
parameter to it.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
---
include/net/inet6_hashtables.h | 5 +++--
include/net/tcp.h | 9 +++++++++
net/dccp/ipv6.c | 3 ++-
net/ipv6/syncookies.c | 2 +-
net/ipv6/tcp_ipv6.c | 26 +++++++++++++++-----------
5 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index ae0613544308..d1d272843b3b 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -80,7 +80,8 @@ static inline struct sock *__inet6_lookup(struct net *net,
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
const __be16 sport,
- const __be16 dport)
+ const __be16 dport,
+ int iif)
{
struct sock *sk = skb_steal_sock(skb);
@@ -90,7 +91,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
&ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
- inet6_iif(skb));
+ iif);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 74efeda994b3..12e895c52139 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -730,6 +730,15 @@ struct tcp_skb_cb {
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
+
+/* This is the variant of inet6_iif() that must be used by TCP,
+ * as TCP moves IP6CB into a different location in skb->cb[]
+ */
+static inline int tcp_v6_iif(const struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->header.h6.iif;
+}
+
/* Due to TSO, an SKB can be composed of multiple actual
* packets. To keep these tracked properly, we use this.
*/
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ad2acfe1ca61..6bcaa33cd804 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -757,7 +757,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
- dh->dccph_sport, dh->dccph_dport);
+ dh->dccph_sport, dh->dccph_dport,
+ inet6_iif(skb));
/*
* Step 2:
* If no socket ...
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9a2838e93cc5..2a86a0f00f2b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_iif = tcp_v6_iif(skb);
ireq->ir_mark = inet_request_mark(sk, skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cf2e45ab2fa4..831495529b82 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sock_owned_by_user(sk))
goto out;
+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
&hdr->saddr, inet6_iif(skb));
if (!req)
@@ -738,7 +739,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
(ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
@@ -860,7 +861,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
- fl6.flowi6_oif = inet6_iif(skb);
+ fl6.flowi6_oif = tcp_v6_iif(skb);
else
fl6.flowi6_oif = oif;
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
@@ -918,7 +919,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
&tcp_hashinfo, &ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source), inet6_iif(skb));
+ ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
return;
@@ -1000,13 +1001,14 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
/* Find possible connection requests. */
req = inet6_csk_search_req(sk, &prev, th->source,
&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, inet6_iif(skb));
+ &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev, false);
nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
+ &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest),
+ tcp_v6_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1090,7 +1092,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
if (np->repflow)
@@ -1174,7 +1176,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
skb_set_owner_r(newnp->pktoptions, newsk);
}
newnp->opt = NULL;
- newnp->mcast_oif = inet6_iif(skb);
+ newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
if (np->repflow)
@@ -1360,7 +1362,7 @@ ipv6_pktoptions:
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = inet6_iif(opt_skb);
+ np->mcast_oif = tcp_v6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
@@ -1427,7 +1429,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
+ tcp_v6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1514,7 +1517,7 @@ do_time_wait:
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
- ntohs(th->dest), inet6_iif(skb));
+ ntohs(th->dest), tcp_v6_iif(skb));
if (sk2 != NULL) {
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule(tw, &tcp_death_row);
@@ -1553,6 +1556,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return;
+ /* Note : We use inet6_iif() here, not tcp_v6_iif() */
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
^ permalink raw reply related [flat|nested] 9+ messages in thread
* RE: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:17 [PATCH net] ipv6: introduce tcp_v6_iif() Eric Dumazet
@ 2014-10-17 16:26 ` David Laight
2014-10-17 16:52 ` Eric Dumazet
2014-10-17 16:58 ` Cong Wang
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: David Laight @ 2014-10-17 16:26 UTC (permalink / raw)
To: 'Eric Dumazet', David Miller; +Cc: netdev
From: Eric Dumazet
> Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> misses") added a regression for SO_BINDTODEVICE on IPv6.
>
> This is because we still use inet6_iif() which expects that IP6 control
> block is still at the beginning of skb->cb[]
>
> This patch adds tcp_v6_iif() helper and uses it where necessary.
>
> Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> parameter to it.
...
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index cf2e45ab2fa4..831495529b82 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> if (sock_owned_by_user(sk))
> goto out;
>
> + /* Note : We use inet6_iif() here, not tcp_v6_iif() */
> req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
> &hdr->saddr, inet6_iif(skb));
> if (!req)
That comment isn't particularly informative....
David
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:26 ` David Laight
@ 2014-10-17 16:52 ` Eric Dumazet
0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2014-10-17 16:52 UTC (permalink / raw)
To: David Laight; +Cc: David Miller, netdev
On Fri, 2014-10-17 at 16:26 +0000, David Laight wrote:
> From: Eric Dumazet
> > Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> > misses") added a regression for SO_BINDTODEVICE on IPv6.
> >
> > This is because we still use inet6_iif() which expects that IP6 control
> > block is still at the beginning of skb->cb[]
> >
> > This patch adds tcp_v6_iif() helper and uses it where necessary.
> >
> > Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> > parameter to it.
> ...
> > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> > index cf2e45ab2fa4..831495529b82 100644
> > --- a/net/ipv6/tcp_ipv6.c
> > +++ b/net/ipv6/tcp_ipv6.c
> > @@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> > if (sock_owned_by_user(sk))
> > goto out;
> >
> > + /* Note : We use inet6_iif() here, not tcp_v6_iif() */
> > req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
> > &hdr->saddr, inet6_iif(skb));
> > if (!req)
>
> That comment isn't particularly informative....
It is showing I considered this spot, and the right thing here is to use
inet6_iif()
The commit changelog will give to curious people the reasons.
Adding fat comments is superseded with good changelog.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:17 [PATCH net] ipv6: introduce tcp_v6_iif() Eric Dumazet
2014-10-17 16:26 ` David Laight
@ 2014-10-17 16:58 ` Cong Wang
2014-10-17 18:19 ` Eric Dumazet
2014-10-18 0:02 ` Cong Wang
2014-10-18 3:48 ` David Miller
3 siblings, 1 reply; 9+ messages in thread
From: Cong Wang @ 2014-10-17 16:58 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On Fri, Oct 17, 2014 at 9:17 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> misses") added a regression for SO_BINDTODEVICE on IPv6.
>
> This is because we still use inet6_iif() which expects that IP6 control
> block is still at the beginning of skb->cb[]
>
> This patch adds tcp_v6_iif() helper and uses it where necessary.
>
> Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> parameter to it.
>
I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
can be just removed? Does this make any sense to you?
(I have a patch locally since I thought it should be target for net-next.)
Thanks.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:58 ` Cong Wang
@ 2014-10-17 18:19 ` Eric Dumazet
2014-10-18 0:00 ` Cong Wang
0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2014-10-17 18:19 UTC (permalink / raw)
To: Cong Wang; +Cc: David Miller, netdev
On Fri, 2014-10-17 at 09:58 -0700, Cong Wang wrote:
> I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
> we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
> can be just removed? Does this make any sense to you?
>
This makes sense and would gain 4 bytes in skb->cb[], so definitely
worth it.
> (I have a patch locally since I thought it should be target for net-next.)
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 18:19 ` Eric Dumazet
@ 2014-10-18 0:00 ` Cong Wang
0 siblings, 0 replies; 9+ messages in thread
From: Cong Wang @ 2014-10-18 0:00 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On Fri, Oct 17, 2014 at 11:19 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2014-10-17 at 09:58 -0700, Cong Wang wrote:
>
>> I doubt we still need to store iif in IP6CB() since we have skb->skb_iif,
>> we can probably just make inet6_iif() be like inet_iif() so that IP6CB()->iif
>> can be just removed? Does this make any sense to you?
>>
>
> This makes sense and would gain 4 bytes in skb->cb[], so definitely
> worth it.
>
Hmm, after testing my patch, I found that some IPv6 code (scope_id) calls
inet6_iif() at socket layer too, where skb dst is already dropped. So we can't
simply use skb_dst() for all inet6_iif(). This is also why IPv4 is different.
Therefore, I think your patch is fine at least for net.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:17 [PATCH net] ipv6: introduce tcp_v6_iif() Eric Dumazet
2014-10-17 16:26 ` David Laight
2014-10-17 16:58 ` Cong Wang
@ 2014-10-18 0:02 ` Cong Wang
2014-10-18 3:48 ` David Miller
3 siblings, 0 replies; 9+ messages in thread
From: Cong Wang @ 2014-10-18 0:02 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On Fri, Oct 17, 2014 at 9:17 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> misses") added a regression for SO_BINDTODEVICE on IPv6.
>
> This is because we still use inet6_iif() which expects that IP6 control
> block is still at the beginning of skb->cb[]
>
> This patch adds tcp_v6_iif() helper and uses it where necessary.
>
> Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> parameter to it.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
As I already explained in previous reply, this is probably the best solution
for net, so:
Acked-by: Cong Wang <cwang@twopensource.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-17 16:17 [PATCH net] ipv6: introduce tcp_v6_iif() Eric Dumazet
` (2 preceding siblings ...)
2014-10-18 0:02 ` Cong Wang
@ 2014-10-18 3:48 ` David Miller
2014-10-18 15:30 ` Eric Dumazet
3 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2014-10-18 3:48 UTC (permalink / raw)
To: eric.dumazet; +Cc: netdev
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 17 Oct 2014 09:17:20 -0700
> From: Eric Dumazet <edumazet@google.com>
>
> Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> misses") added a regression for SO_BINDTODEVICE on IPv6.
>
> This is because we still use inet6_iif() which expects that IP6 control
> block is still at the beginning of skb->cb[]
>
> This patch adds tcp_v6_iif() helper and uses it where necessary.
>
> Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> parameter to it.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Applied.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH net] ipv6: introduce tcp_v6_iif()
2014-10-18 3:48 ` David Miller
@ 2014-10-18 15:30 ` Eric Dumazet
0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2014-10-18 15:30 UTC (permalink / raw)
To: David Miller; +Cc: netdev
On Fri, 2014-10-17 at 23:48 -0400, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 17 Oct 2014 09:17:20 -0700
>
> > From: Eric Dumazet <edumazet@google.com>
> >
> > Commit 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line
> > misses") added a regression for SO_BINDTODEVICE on IPv6.
> >
> > This is because we still use inet6_iif() which expects that IP6 control
> > block is still at the beginning of skb->cb[]
> >
> > This patch adds tcp_v6_iif() helper and uses it where necessary.
> >
> > Because __inet6_lookup_skb() is used by TCP and DCCP, we add an iif
> > parameter to it.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
>
> Applied.
Oh well, I forgot IPV6 could be not present in .config
Will send a patch, sorry for this.
$ make M=net/ipv4
CC net/ipv4/route.o
In file included from net/ipv4/route.c:102:0:
include/net/tcp.h: In function ‘tcp_v6_iif’:
include/net/tcp.h:738:32: error: ‘union <anonymous>’ has no member named ‘h6’
return TCP_SKB_CB(skb)->header.h6.iif;
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-10-18 15:30 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-17 16:17 [PATCH net] ipv6: introduce tcp_v6_iif() Eric Dumazet
2014-10-17 16:26 ` David Laight
2014-10-17 16:52 ` Eric Dumazet
2014-10-17 16:58 ` Cong Wang
2014-10-17 18:19 ` Eric Dumazet
2014-10-18 0:00 ` Cong Wang
2014-10-18 0:02 ` Cong Wang
2014-10-18 3:48 ` David Miller
2014-10-18 15:30 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).