All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tcp: md5: fix md5 RST when both sides have listener
@ 2012-01-31  2:07 Shawn Lu
  2012-01-31  2:16 ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-01-31  2:07 UTC (permalink / raw)
  To: davem; +Cc: netdev, xiaoclu

TCP RST mechanism is broken in TCP md5(RFC2385). When
connection is gone, md5 key is lost, sending RST without
md5 hash is deem to ignored by peer. This can be a problem
since RST help protocal like bgp fast recove from peer crash.

In most case, users of tcp md5, such as bgp and ldp, have listeners
on both sides. md5 keys for peers are saved in listening socket.
When passive side connection is gone, we can still get md5 key
from listening socket. When active side of connection is gone,
we can try to find listening socket through source port, and
then md5 key.
we are not loosing sercuriy here: packet is valified checked with
md5 hash. No RST is generated if md5 hash doesn't match or no md5
key can be found.

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
---
 net/ipv4/tcp_ipv4.c |   31 ++++++++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c |   34 ++++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 337ba4c..b2358b4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -601,6 +601,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
+	 __u8 *hash_location = NULL;
+	unsigned char newhash[16];
+	int genhash;
 #endif
 	struct net *net;
 
@@ -631,7 +634,33 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+					    &tcp_hashinfo, ip_hdr(skb)->daddr,
+					    ntohs(th->source), inet_iif(skb));
+		/* don't send rst if it can't find key */
+		if (!sk)
+			return;
+		key = tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr);
+		if (!key)
+			return;
+		genhash = tcp_v4_md5_hash_skb(newhash, key,
+					      NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			return;
+
+	} else {
+		key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	}
+
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3edd05a..1da99fd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1074,6 +1074,12 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+	__u8 *hash_location = NULL;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned char newhash[16];
+	int genhash;
+#endif
 
 	if (th->rst)
 		return;
@@ -1082,8 +1088,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		return;
 
 #ifdef CONFIG_TCP_MD5SIG
-	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+					   &tcp_hashinfo, &ipv6h->daddr,
+					   ntohs(th->source), inet6_iif(skb));
+		if (!sk)
+			return;
+
+		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+		if (!key)
+			return;
+
+		genhash = tcp_v6_md5_hash_skb(newhash, key,
+					      NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			return;
+	} else {
+		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+	}
 #endif
 
 	if (th->ack)
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31  2:07 [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
@ 2012-01-31  2:16 ` Eric Dumazet
  2012-01-31  2:37   ` Shawn Lu
  2012-01-31  8:39   ` Shawn Lu
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31  2:16 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le 31 janvier 2012 03:07, Shawn Lu <shawn.lu@ericsson.com> a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When
> connection is gone, md5 key is lost, sending RST without
> md5 hash is deem to ignored by peer. This can be a problem
> since RST help protocal like bgp fast recove from peer crash.
>
> In most case, users of tcp md5, such as bgp and ldp, have listeners
> on both sides. md5 keys for peers are saved in listening socket.
> When passive side connection is gone, we can still get md5 key
> from listening socket. When active side of connection is gone,
> we can try to find listening socket through source port, and
> then md5 key.
> we are not loosing sercuriy here: packet is valified checked with
> md5 hash. No RST is generated if md5 hash doesn't match or no md5
> key can be found.
>
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---
>  net/ipv4/tcp_ipv4.c |   31 ++++++++++++++++++++++++++++++-
>  net/ipv6/tcp_ipv6.c |   34 ++++++++++++++++++++++++++++++++--
>  2 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 337ba4c..b2358b4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -601,6 +601,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
>        struct ip_reply_arg arg;
>  #ifdef CONFIG_TCP_MD5SIG
>        struct tcp_md5sig_key *key;
> +        __u8 *hash_location = NULL;
> +       unsigned char newhash[16];
> +       int genhash;
>  #endif
>        struct net *net;
>
> @@ -631,7 +634,33 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
>        arg.iov[0].iov_len  = sizeof(rep.th);
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                           &tcp_hashinfo, ip_hdr(skb)->daddr,
> +                                           ntohs(th->source), inet_iif(skb));
> +               /* don't send rst if it can't find key */
> +               if (!sk)
> +                       return;

Now you have a socket in sk, you also are responsible to release the
refcount taken in __inet_lookup_listener()

> +               key = tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr);
> +               if (!key)
> +                       return;

leak refcount

> +               genhash = tcp_v4_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 0)
> +                       return;

same leak

> +
> +       } else {
> +               key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
> +       }
> +
>        if (key) {
>                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
>                                   (TCPOPT_NOP << 16) |
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 3edd05a..1da99fd 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1074,6 +1074,12 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
>        const struct tcphdr *th = tcp_hdr(skb);
>        u32 seq = 0, ack_seq = 0;
>        struct tcp_md5sig_key *key = NULL;
> +#ifdef CONFIG_TCP_MD5SIG
> +       __u8 *hash_location = NULL;
> +       struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> +       unsigned char newhash[16];
> +       int genhash;
> +#endif
>
>        if (th->rst)
>                return;
> @@ -1082,8 +1088,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
>                return;
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       if (sk)
> -               key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                          &tcp_hashinfo, &ipv6h->daddr,
> +                                          ntohs(th->source), inet6_iif(skb));
> +               if (!sk)
> +                       return;
> +
> +               key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> +               if (!key)
> +                       return;

same leak

> +
> +               genhash = tcp_v6_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 0)
> +                       return;

same leak

> +       } else {
> +               key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
> +       }
>  #endif
>
>        if (th->ack)
> --
> 1.7.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31  2:16 ` Eric Dumazet
@ 2012-01-31  2:37   ` Shawn Lu
  2012-01-31  8:39   ` Shawn Lu
  1 sibling, 0 replies; 21+ messages in thread
From: Shawn Lu @ 2012-01-31  2:37 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiaoclu

Yes. Eric is right.  I will have to fix refcount problem.
Will send out another patch later on.  

-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: Monday, January 30, 2012 6:16 PM
To: Shawn Lu
Cc: davem@davemloft.net; netdev@vger.kernel.org; xiaoclu@gmail.com
Subject: Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener

Le 31 janvier 2012 03:07, Shawn Lu <shawn.lu@ericsson.com> a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When connection is 
> gone, md5 key is lost, sending RST without
> md5 hash is deem to ignored by peer. This can be a problem since RST 
> help protocal like bgp fast recove from peer crash.
>
> In most case, users of tcp md5, such as bgp and ldp, have listeners on 
> both sides. md5 keys for peers are saved in listening socket.
> When passive side connection is gone, we can still get md5 key from 
> listening socket. When active side of connection is gone, we can try 
> to find listening socket through source port, and then md5 key.
> we are not loosing sercuriy here: packet is valified checked with
> md5 hash. No RST is generated if md5 hash doesn't match or no md5 key 
> can be found.
>
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---
>  net/ipv4/tcp_ipv4.c |   31 ++++++++++++++++++++++++++++++-
>  net/ipv6/tcp_ipv6.c |   34 ++++++++++++++++++++++++++++++++--
>  2 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 
> 337ba4c..b2358b4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -601,6 +601,9 @@ static void tcp_v4_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        struct ip_reply_arg arg;
>  #ifdef CONFIG_TCP_MD5SIG
>        struct tcp_md5sig_key *key;
> +        __u8 *hash_location = NULL;
> +       unsigned char newhash[16];
> +       int genhash;
>  #endif
>        struct net *net;
>
> @@ -631,7 +634,33 @@ static void tcp_v4_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        arg.iov[0].iov_len  = sizeof(rep.th);
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : 
> NULL;
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket 
> + through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with 
> + finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = 
> + __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                           &tcp_hashinfo, 
> + ip_hdr(skb)->daddr,
> +                                           ntohs(th->source), 
> + inet_iif(skb));
> +               /* don't send rst if it can't find key */
> +               if (!sk)
> +                       return;

Now you have a socket in sk, you also are responsible to release the refcount taken in __inet_lookup_listener()

> +               key = tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr);
> +               if (!key)
> +                       return;

leak refcount

> +               genhash = tcp_v4_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 
> + 0)
> +                       return;

same leak

> +
> +       } else {
> +               key = sk ? tcp_v4_md5_do_lookup(sk, 
> + ip_hdr(skb)->saddr) : NULL;
> +       }
> +
>        if (key) {
>                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
>                                   (TCPOPT_NOP << 16) | diff --git 
> a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3edd05a..1da99fd 
> 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1074,6 +1074,12 @@ static void tcp_v6_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        const struct tcphdr *th = tcp_hdr(skb);
>        u32 seq = 0, ack_seq = 0;
>        struct tcp_md5sig_key *key = NULL;
> +#ifdef CONFIG_TCP_MD5SIG
> +       __u8 *hash_location = NULL;
> +       struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> +       unsigned char newhash[16];
> +       int genhash;
> +#endif
>
>        if (th->rst)
>                return;
> @@ -1082,8 +1088,32 @@ static void tcp_v6_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>                return;
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       if (sk)
> -               key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket 
> + through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with 
> + finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                          &tcp_hashinfo, 
> + &ipv6h->daddr,
> +                                          ntohs(th->source), 
> + inet6_iif(skb));
> +               if (!sk)
> +                       return;
> +
> +               key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> +               if (!key)
> +                       return;

same leak

> +
> +               genhash = tcp_v6_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 
> + 0)
> +                       return;

same leak

> +       } else {
> +               key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : 
> + NULL;
> +       }
>  #endif
>
>        if (th->ack)
> --
> 1.7.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in 
> the body of a message to majordomo@vger.kernel.org More majordomo info 
> at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31  2:16 ` Eric Dumazet
  2012-01-31  2:37   ` Shawn Lu
@ 2012-01-31  8:39   ` Shawn Lu
  2012-01-31  9:05     ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-01-31  8:39 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiaoclu

Resubmit after fixing the sk refcount leak problem pointed out by Eric.


TCP RST mechanism is broken in TCP md5(RFC2385).When
connection is gone, md5 key is lost, sending RST without
md5 hash is deem to ignored by peer. This can be a problem
since RST help protocal like bgp fast recove from peer crash.

In most case, users of tcp md5, such as bgp and ldp, have
listeners on both sides. md5 keys for peers are saved in
listening socket. When passive side connection is gone,
we can still get md5 key from listening socket. When active
side of connection is gone, we can try to find listening socket
through source port, and then md5 key.
we are not loosing sercuriy here: packet is valified checked with
md5 hash. No RST is generated if md5 hash doesn't match or no md5
key can be found.

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
---
 net/ipv4/tcp_ipv4.c |   38 +++++++++++++++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c |   41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4c..6ed1c4a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -601,6 +601,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
+	 __u8 *hash_location = NULL;
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
 #endif
 	struct net *net;
 
@@ -631,7 +635,33 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+					    &tcp_hashinfo, ip_hdr(skb)->daddr,
+					    ntohs(th->source), inet_iif(skb));
+		/* don't send rst if it can't find key */
+		if (!sk1)
+			return;
+		key = tcp_v4_md5_do_lookup(sk1, ip_hdr(skb)->saddr);
+		if (!key)
+			goto release_sk1;
+		genhash = tcp_v4_md5_hash_skb(newhash, key,
+					      NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+
+	} else {
+		key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	}
+
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -659,6 +689,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1)
+		sock_put(sk1);
+#endif
 }
 
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3edd05a..32507e8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1074,6 +1074,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+	__u8 *hash_location = NULL;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
+#endif
 
 	if (th->rst)
 		return;
@@ -1082,8 +1089,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		return;
 
 #ifdef CONFIG_TCP_MD5SIG
-	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+					   &tcp_hashinfo, &ipv6h->daddr,
+					   ntohs(th->source), inet6_iif(skb));
+		if (!sk1)
+			return;
+
+		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v6_md5_hash_skb(newhash, key,
+					      NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+	}
 #endif
 
 	if (th->ack)
@@ -1093,6 +1124,12 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			  (th->doff << 2);
 
 	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1)
+		sock_put(sk1);
+#endif
 }
 
 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
--
1.7.0.4

-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: Monday, January 30, 2012 6:16 PM
To: Shawn Lu
Cc: davem@davemloft.net; netdev@vger.kernel.org; xiaoclu@gmail.com
Subject: Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener

Le 31 janvier 2012 03:07, Shawn Lu <shawn.lu@ericsson.com> a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When connection is 
> gone, md5 key is lost, sending RST without
> md5 hash is deem to ignored by peer. This can be a problem since RST 
> help protocal like bgp fast recove from peer crash.
>
> In most case, users of tcp md5, such as bgp and ldp, have listeners on 
> both sides. md5 keys for peers are saved in listening socket.
> When passive side connection is gone, we can still get md5 key from 
> listening socket. When active side of connection is gone, we can try 
> to find listening socket through source port, and then md5 key.
> we are not loosing sercuriy here: packet is valified checked with
> md5 hash. No RST is generated if md5 hash doesn't match or no md5 key 
> can be found.
>
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---
>  net/ipv4/tcp_ipv4.c |   31 ++++++++++++++++++++++++++++++-
>  net/ipv6/tcp_ipv6.c |   34 ++++++++++++++++++++++++++++++++--
>  2 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 
> 337ba4c..b2358b4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -601,6 +601,9 @@ static void tcp_v4_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        struct ip_reply_arg arg;
>  #ifdef CONFIG_TCP_MD5SIG
>        struct tcp_md5sig_key *key;
> +        __u8 *hash_location = NULL;
> +       unsigned char newhash[16];
> +       int genhash;
>  #endif
>        struct net *net;
>
> @@ -631,7 +634,33 @@ static void tcp_v4_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        arg.iov[0].iov_len  = sizeof(rep.th);
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : 
> NULL;
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket 
> + through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with 
> + finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = 
> + __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                           &tcp_hashinfo, 
> + ip_hdr(skb)->daddr,
> +                                           ntohs(th->source), 
> + inet_iif(skb));
> +               /* don't send rst if it can't find key */
> +               if (!sk)
> +                       return;

Now you have a socket in sk, you also are responsible to release the refcount taken in __inet_lookup_listener()

> +               key = tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr);
> +               if (!key)
> +                       return;

leak refcount

> +               genhash = tcp_v4_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 
> + 0)
> +                       return;

same leak

> +
> +       } else {
> +               key = sk ? tcp_v4_md5_do_lookup(sk, 
> + ip_hdr(skb)->saddr) : NULL;
> +       }
> +
>        if (key) {
>                rep.opt[0] = htonl((TCPOPT_NOP << 24) |
>                                   (TCPOPT_NOP << 16) | diff --git 
> a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3edd05a..1da99fd 
> 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1074,6 +1074,12 @@ static void tcp_v6_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>        const struct tcphdr *th = tcp_hdr(skb);
>        u32 seq = 0, ack_seq = 0;
>        struct tcp_md5sig_key *key = NULL;
> +#ifdef CONFIG_TCP_MD5SIG
> +       __u8 *hash_location = NULL;
> +       struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> +       unsigned char newhash[16];
> +       int genhash;
> +#endif
>
>        if (th->rst)
>                return;
> @@ -1082,8 +1088,32 @@ static void tcp_v6_send_reset(struct sock *sk, 
> struct sk_buff *skb)
>                return;
>
>  #ifdef CONFIG_TCP_MD5SIG
> -       if (sk)
> -               key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
> +       hash_location = tcp_parse_md5sig_option(th);
> +       if (!sk && hash_location) {
> +               /*
> +                * active side is lost. Try to find listening socket 
> + through
> +                * source port, and then find md5 key through listening socket.
> +                * we are not loose security here:
> +                * Incoming packet is checked with md5 hash with 
> + finding key,
> +                * no RST generated if md5 hash doesn't match.
> +                */
> +               sk = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
> +                                          &tcp_hashinfo, 
> + &ipv6h->daddr,
> +                                          ntohs(th->source), 
> + inet6_iif(skb));
> +               if (!sk)
> +                       return;
> +
> +               key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> +               if (!key)
> +                       return;

same leak

> +
> +               genhash = tcp_v6_md5_hash_skb(newhash, key,
> +                                             NULL, NULL, skb);
> +               if (genhash || memcmp(hash_location, newhash, 16) != 
> + 0)
> +                       return;

same leak

> +       } else {
> +               key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : 
> + NULL;
> +       }
>  #endif
>
>        if (th->ack)
> --
> 1.7.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in 
> the body of a message to majordomo@vger.kernel.org More majordomo info 
> at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31  8:39   ` Shawn Lu
@ 2012-01-31  9:05     ` Eric Dumazet
  2012-01-31 13:33       ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31  9:05 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mardi 31 janvier 2012 à 03:39 -0500, Shawn Lu a écrit :
> Resubmit after fixing the sk refcount leak problem pointed out by Eric.
> 
> 
> TCP RST mechanism is broken in TCP md5(RFC2385).When
> connection is gone, md5 key is lost, sending RST without
> md5 hash is deem to ignored by peer. This can be a problem
> since RST help protocal like bgp fast recove from peer crash.
> 
> In most case, users of tcp md5, such as bgp and ldp, have
> listeners on both sides. md5 keys for peers are saved in
> listening socket. When passive side connection is gone,
> we can still get md5 key from listening socket. When active
> side of connection is gone, we can try to find listening socket
> through source port, and then md5 key.
> we are not loosing sercuriy here: packet is valified checked with
> md5 hash. No RST is generated if md5 hash doesn't match or no md5
> key can be found.
> 
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---
>  net/ipv4/tcp_ipv4.c |   38 +++++++++++++++++++++++++++++++++++++-
>  net/ipv6/tcp_ipv6.c |   41 +++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 76 insertions(+), 3 deletions(-)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4c..6ed1c4a 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -601,6 +601,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
>  	struct ip_reply_arg arg;
>  #ifdef CONFIG_TCP_MD5SIG
>  	struct tcp_md5sig_key *key;
> +	 __u8 *hash_location = NULL;
> +	unsigned char newhash[16];
> +	int genhash;
> +	struct sock *sk1 = NULL;
>  #endif
>  	struct net *net;
>  
> @@ -631,7 +635,33 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
>  	arg.iov[0].iov_len  = sizeof(rep.th);
>  
>  #ifdef CONFIG_TCP_MD5SIG
> -	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
> +	hash_location = tcp_parse_md5sig_option(th);
> +	if (!sk && hash_location) {
> +		/*
> +		 * active side is lost. Try to find listening socket through
> +		 * source port, and then find md5 key through listening socket.
> +		 * we are not loose security here:
> +		 * Incoming packet is checked with md5 hash with finding key,
> +		 * no RST generated if md5 hash doesn't match.
> +		 */
> +		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
> +					    &tcp_hashinfo, ip_hdr(skb)->daddr,
> +					    ntohs(th->source), inet_iif(skb));
> +		/* don't send rst if it can't find key */
> +		if (!sk1)
> +			return;
> +		key = tcp_v4_md5_do_lookup(sk1, ip_hdr(skb)->saddr);

Hmm... The second problem is that its not safe to call
tcp_v4_md5_do_lookup() on an unlocked socket.

And locking a listener is way too expensive, since a listener socket is
already a contention point.

An attacker could send forged tcp md5 packets to slow down a server.

A proper patch needs RCU conversion first.

> +		if (!key)
> +			goto release_sk1;
> +		genhash = tcp_v4_md5_hash_skb(newhash, key,
> +					      NULL, NULL, skb);
> +		if (genhash || memcmp(hash_location, newhash, 16) != 0)
> +			goto release_sk1;
> +
> +	} else {
> +		key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
> +	}
> +
>  	if (key) {
>  		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
>  				   (TCPOPT_NOP << 16) |
> @@ -659,6 +689,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
>  
>  	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
>  	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
> +
> +#ifdef CONFIG_TCP_MD5SIG
> +release_sk1:
> +	if (sk1)
> +		sock_put(sk1);
> +#endif
>  }

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31  9:05     ` Eric Dumazet
@ 2012-01-31 13:33       ` Eric Dumazet
  2012-01-31 15:18         ` [PATCH net-next] tcp: md5: rcu conversion Eric Dumazet
  2012-01-31 18:15         ` [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31 13:33 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mardi 31 janvier 2012 à 10:05 +0100, Eric Dumazet a écrit :

> Hmm... The second problem is that its not safe to call
> tcp_v4_md5_do_lookup() on an unlocked socket.
> 
> And locking a listener is way too expensive, since a listener socket is
> already a contention point.
> 
> An attacker could send forged tcp md5 packets to slow down a server.
> 
> A proper patch needs RCU conversion first.

I am working on this RCU conversion and big md5 cleanup, using a single
list of keys.

You can then add your fix on top of this work.


union tcp_md5_addr {
        struct in_addr  a4;
#if IS_ENABLED(CONFIG_IPV6)
        struct in6_addr a6;
#endif
};

/* - key database */
struct tcp_md5sig_key {
        struct hlist_node       node;
        u8                      keylen;
        u8                      family; /* AF_INET or AF_INET6 */
        union tcp_md5_addr      addr;
        u8                      key[TCP_MD5SIG_MAXKEYLEN];
        struct rcu_head         rcu;
};

/* - sock block */
struct tcp_md5sig_info {
        struct hlist_head       head;
};

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH net-next] tcp: md5: rcu conversion
  2012-01-31 13:33       ` Eric Dumazet
@ 2012-01-31 15:18         ` Eric Dumazet
  2012-01-31 15:38           ` Eric Dumazet
  2012-01-31 17:14           ` [PATCH net-next] tcp: md5: rcu conversion David Miller
  2012-01-31 18:15         ` [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
  1 sibling, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31 15:18 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

In order to be able to support proper RST messages for TCP MD5 flows, we
need to allow access to MD5 keys without locking listener socket.

This conversion is a nice cleanup, and shrinks size of timewait sockets
by 80 bytes.

IPv6 code reuses generic code found in IPv4 instead of duplicating it.

Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Shawn Lu <shawn.lu@ericsson.com>
---
 include/linux/tcp.h      |    3 
 include/net/tcp.h        |   61 ++++-----
 net/ipv4/tcp_ipv4.c      |  227 +++++++++++++++----------------------
 net/ipv4/tcp_minisocks.c |   12 -
 net/ipv6/tcp_ipv6.c      |  173 +---------------------------
 5 files changed, 141 insertions(+), 335 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 46a85c9..c2025f1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -486,8 +486,7 @@ struct tcp_timewait_sock {
 	u32			  tw_ts_recent;
 	long			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
-	u16			  tw_md5_keylen;
-	u8			  tw_md5_key[TCP_MD5SIG_MAXKEYLEN];
+	struct tcp_md5sig_key	*tw_md5_key;
 #endif
 	/* Few sockets in timewait have cookies; in that case, then this
 	 * object holds a reference to them (tw_cookie_values->kref).
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3c90346..10ae4c7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1130,35 +1130,26 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
 /* MD5 Signature */
 struct crypto_hash;
 
+union tcp_md5_addr {
+	struct in_addr  a4;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr	a6;
+#endif
+};
+
 /* - key database */
 struct tcp_md5sig_key {
-	u8			*key;
+	struct hlist_node	node;
 	u8			keylen;
-};
-
-struct tcp4_md5sig_key {
-	struct tcp_md5sig_key	base;
-	__be32			addr;
-};
-
-struct tcp6_md5sig_key {
-	struct tcp_md5sig_key	base;
-#if 0
-	u32			scope_id;	/* XXX */
-#endif
-	struct in6_addr		addr;
+	u8			family; /* AF_INET or AF_INET6 */
+	union tcp_md5_addr	addr;
+	u8			key[TCP_MD5SIG_MAXKEYLEN];
+	struct rcu_head		rcu;
 };
 
 /* - sock block */
 struct tcp_md5sig_info {
-	struct tcp4_md5sig_key	*keys4;
-#if IS_ENABLED(CONFIG_IPV6)
-	struct tcp6_md5sig_key	*keys6;
-	u32			entries6;
-	u32			alloced6;
-#endif
-	u32			entries4;
-	u32			alloced4;
+	struct hlist_head	head;
 };
 
 /* - pseudo header */
@@ -1195,19 +1186,25 @@ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 			       const struct sock *sk,
 			       const struct request_sock *req,
 			       const struct sk_buff *skb);
-extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk,
-						 struct sock *addr_sk);
-extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey,
-			     u8 newkeylen);
-extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr);
+extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+			  int family, const u8 *newkey,
+			  u8 newkeylen, gfp_t gfp);
+extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
+			  int family);
+extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
+					 struct sock *addr_sk);
 
 #ifdef CONFIG_TCP_MD5SIG
-#define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_keylen ? 		 \
-				 &(struct tcp_md5sig_key) {		 \
-					.key = (twsk)->tw_md5_key,	 \
-					.keylen = (twsk)->tw_md5_keylen, \
-				} : NULL)
+extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+			const union tcp_md5_addr *addr, int family);
+#define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_key)
 #else
+static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+					 const union tcp_md5_addr *addr,
+					 int family)
+{
+	return NULL;
+}
 #define tcp_twsk_md5_key(twsk)	NULL
 #endif
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 345e249..1d5fd82 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,16 +90,8 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
-						   __be32 addr);
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
-#else
-static inline
-struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
-{
-	return NULL;
-}
 #endif
 
 struct inet_hashinfo tcp_hashinfo;
@@ -631,7 +623,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	key = sk ? tcp_md5_do_lookup(sk,
+				     (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
+				     AF_INET) : NULL;
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -759,7 +753,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent,
 			0,
-			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+					  AF_INET),
 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 			ip_hdr(skb)->tos);
 }
@@ -876,146 +871,124 @@ static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
  */
 
 /* Find the Key structure for an address.  */
-static struct tcp_md5sig_key *
-			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+					 const union tcp_md5_addr *addr,
+					 int family)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
+	struct tcp_md5sig_key *key;
+	struct hlist_node *pos;
+	unsigned int size = sizeof(struct in_addr);
 
-	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
+	if (!tp->md5sig_info)
 		return NULL;
-	for (i = 0; i < tp->md5sig_info->entries4; i++) {
-		if (tp->md5sig_info->keys4[i].addr == addr)
-			return &tp->md5sig_info->keys4[i].base;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6)
+		size = sizeof(struct in6_addr);
+#endif
+	hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) {
+		if (key->family != family)
+			continue;
+		if (!memcmp(&key->addr, addr, size))
+			return key;
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_md5_do_lookup);
 
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 					 struct sock *addr_sk)
 {
-	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
+	union tcp_md5_addr *addr;
+
+	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
+	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 						      struct request_sock *req)
 {
-	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
+	union tcp_md5_addr *addr;
+
+	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 
 /* This can be called on a newly created socket, from other files */
-int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
-		      u8 *newkey, u8 newkeylen)
+int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 {
 	/* Add Key to the list */
 	struct tcp_md5sig_key *key;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp4_md5sig_key *keys;
+	struct tcp_md5sig_info *md5sig;
 
-	key = tcp_v4_md5_do_lookup(sk, addr);
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 	if (key) {
 		/* Pre-existing entry - just update that one. */
-		kfree(key->key);
-		key->key = newkey;
+		memcpy(key->key, newkey, newkeylen);
 		key->keylen = newkeylen;
-	} else {
-		struct tcp_md5sig_info *md5sig;
-
-		if (!tp->md5sig_info) {
-			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
-						  GFP_ATOMIC);
-			if (!tp->md5sig_info) {
-				kfree(newkey);
-				return -ENOMEM;
-			}
-			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-		}
+		return 0;
+	}
 
-		md5sig = tp->md5sig_info;
-		if (md5sig->entries4 == 0 &&
-		    tcp_alloc_md5sig_pool(sk) == NULL) {
-			kfree(newkey);
+	md5sig = tp->md5sig_info;
+	if (!md5sig) {
+		md5sig = kmalloc(sizeof(*md5sig), gfp);
+		if (!md5sig)
 			return -ENOMEM;
-		}
-
-		if (md5sig->alloced4 == md5sig->entries4) {
-			keys = kmalloc((sizeof(*keys) *
-					(md5sig->entries4 + 1)), GFP_ATOMIC);
-			if (!keys) {
-				kfree(newkey);
-				if (md5sig->entries4 == 0)
-					tcp_free_md5sig_pool();
-				return -ENOMEM;
-			}
 
-			if (md5sig->entries4)
-				memcpy(keys, md5sig->keys4,
-				       sizeof(*keys) * md5sig->entries4);
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+		INIT_HLIST_HEAD(&md5sig->head);
+		tp->md5sig_info = md5sig;
+	}
 
-			/* Free old key list, and reference new one */
-			kfree(md5sig->keys4);
-			md5sig->keys4 = keys;
-			md5sig->alloced4++;
-		}
-		md5sig->entries4++;
-		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
-		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
-		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
+	key = kmalloc(sizeof(*key), gfp);
+	if (!key)
+		return -ENOMEM;
+	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+		kfree(key);
+		return -ENOMEM;
 	}
+
+	memcpy(key->key, newkey, newkeylen);
+	key->keylen = newkeylen;
+	key->family = family;
+	memcpy(&key->addr, addr,
+	       (family == AF_INET6) ? sizeof(struct in6_addr) :
+				      sizeof(struct in_addr));
+	hlist_add_head_rcu(&key->node, &md5sig->head);
 	return 0;
 }
-EXPORT_SYMBOL(tcp_v4_md5_do_add);
+EXPORT_SYMBOL(tcp_md5_do_add);
 
-int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
+int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
-
-	for (i = 0; i < tp->md5sig_info->entries4; i++) {
-		if (tp->md5sig_info->keys4[i].addr == addr) {
-			/* Free the key */
-			kfree(tp->md5sig_info->keys4[i].base.key);
-			tp->md5sig_info->entries4--;
-
-			if (tp->md5sig_info->entries4 == 0) {
-				kfree(tp->md5sig_info->keys4);
-				tp->md5sig_info->keys4 = NULL;
-				tp->md5sig_info->alloced4 = 0;
-				tcp_free_md5sig_pool();
-			} else if (tp->md5sig_info->entries4 != i) {
-				/* Need to do some manipulation */
-				memmove(&tp->md5sig_info->keys4[i],
-					&tp->md5sig_info->keys4[i+1],
-					(tp->md5sig_info->entries4 - i) *
-					 sizeof(struct tcp4_md5sig_key));
-			}
-			return 0;
-		}
-	}
-	return -ENOENT;
+	struct tcp_md5sig_key *key;
+
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+	if (!key)
+		return -ENOENT;
+	hlist_del_rcu(&key->node);
+	kfree_rcu(key, rcu);
+	if (hlist_empty(&tp->md5sig_info->head))
+		tcp_free_md5sig_pool();
+	return 0;
 }
-EXPORT_SYMBOL(tcp_v4_md5_do_del);
+EXPORT_SYMBOL(tcp_md5_do_del);
 
-static void tcp_v4_clear_md5_list(struct sock *sk)
+void tcp_clear_md5_list(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_md5sig_key *key;
+	struct hlist_node *pos, *n;
 
-	/* Free each key, then the set of key keys,
-	 * the crypto element, and then decrement our
-	 * hold on the last resort crypto.
-	 */
-	if (tp->md5sig_info->entries4) {
-		int i;
-		for (i = 0; i < tp->md5sig_info->entries4; i++)
-			kfree(tp->md5sig_info->keys4[i].base.key);
-		tp->md5sig_info->entries4 = 0;
+	if (!hlist_empty(&tp->md5sig_info->head))
 		tcp_free_md5sig_pool();
-	}
-	if (tp->md5sig_info->keys4) {
-		kfree(tp->md5sig_info->keys4);
-		tp->md5sig_info->keys4 = NULL;
-		tp->md5sig_info->alloced4  = 0;
+	hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) {
+		hlist_del_rcu(&key->node);
+		kfree_rcu(key, rcu);
 	}
 }
 
@@ -1024,7 +997,6 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 {
 	struct tcp_md5sig cmd;
 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
-	u8 *newkey;
 
 	if (optlen < sizeof(cmd))
 		return -EINVAL;
@@ -1038,29 +1010,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
 		if (!tcp_sk(sk)->md5sig_info)
 			return -ENOENT;
-		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
+		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+				      AF_INET);
 	}
 
 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 		return -EINVAL;
 
-	if (!tcp_sk(sk)->md5sig_info) {
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_md5sig_info *p;
-
-		p = kzalloc(sizeof(*p), sk->sk_allocation);
-		if (!p)
-			return -EINVAL;
-
-		tp->md5sig_info = p;
-		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-	}
-
-	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
-	if (!newkey)
-		return -ENOMEM;
-	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
-				 newkey, cmd.tcpm_keylen);
+	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
+			      GFP_KERNEL);
 }
 
 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
@@ -1086,7 +1045,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
 }
 
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
 {
 	struct tcp_md5sig_pool *hp;
@@ -1186,7 +1145,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 	int genhash;
 	unsigned char newhash[16];
 
-	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
+					  AF_INET);
 	hash_location = tcp_parse_md5sig_option(th);
 
 	/* We've parsed the options - do we have a hash? */
@@ -1474,7 +1434,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
-	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
+				AF_INET);
 	if (key != NULL) {
 		/*
 		 * We're using one, so create a matching key
@@ -1482,10 +1443,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 * memory, then we end up not copying the key
 		 * across. Shucks.
 		 */
-		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
-		if (newkey != NULL)
-			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
-					  newkey, key->keylen);
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
+			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
 	}
 #endif
@@ -1934,7 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 #ifdef CONFIG_TCP_MD5SIG
 	/* Clean up the MD5 key list, if any */
 	if (tp->md5sig_info) {
-		tcp_v4_clear_md5_list(sk);
+		tcp_clear_md5_list(sk);
 		kfree(tp->md5sig_info);
 		tp->md5sig_info = NULL;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 550e755..3cabafb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -359,13 +359,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		 */
 		do {
 			struct tcp_md5sig_key *key;
-			memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key));
-			tcptw->tw_md5_keylen = 0;
+			tcptw->tw_md5_key = NULL;
 			key = tp->af_specific->md5_lookup(sk, sk);
 			if (key != NULL) {
-				memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
-				tcptw->tw_md5_keylen = key->keylen;
-				if (tcp_alloc_md5sig_pool(sk) == NULL)
+				tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+				if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL)
 					BUG();
 			}
 		} while (0);
@@ -405,8 +403,10 @@ void tcp_twsk_destructor(struct sock *sk)
 {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-	if (twsk->tw_md5_keylen)
+	if (twsk->tw_md5_key) {
 		tcp_free_md5sig_pool();
+		kfree_rcu(twsk->tw_md5_key, rcu);
+	}
 #endif
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f37769e..bec41f9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -540,19 +540,7 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 						   const struct in6_addr *addr)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
-
-	BUG_ON(tp == NULL);
-
-	if (!tp->md5sig_info || !tp->md5sig_info->entries6)
-		return NULL;
-
-	for (i = 0; i < tp->md5sig_info->entries6; i++) {
-		if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
-			return &tp->md5sig_info->keys6[i].base;
-	}
-	return NULL;
+	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
 }
 
 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
@@ -567,129 +555,11 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
 	return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
 }
 
-static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
-			     char *newkey, u8 newkeylen)
-{
-	/* Add key to the list */
-	struct tcp_md5sig_key *key;
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp6_md5sig_key *keys;
-
-	key = tcp_v6_md5_do_lookup(sk, peer);
-	if (key) {
-		/* modify existing entry - just update that one */
-		kfree(key->key);
-		key->key = newkey;
-		key->keylen = newkeylen;
-	} else {
-		/* reallocate new list if current one is full. */
-		if (!tp->md5sig_info) {
-			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
-			if (!tp->md5sig_info) {
-				kfree(newkey);
-				return -ENOMEM;
-			}
-			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-		}
-		if (tp->md5sig_info->entries6 == 0 &&
-			tcp_alloc_md5sig_pool(sk) == NULL) {
-			kfree(newkey);
-			return -ENOMEM;
-		}
-		if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
-			keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
-				       (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
-
-			if (!keys) {
-				kfree(newkey);
-				if (tp->md5sig_info->entries6 == 0)
-					tcp_free_md5sig_pool();
-				return -ENOMEM;
-			}
-
-			if (tp->md5sig_info->entries6)
-				memmove(keys, tp->md5sig_info->keys6,
-					(sizeof (tp->md5sig_info->keys6[0]) *
-					 tp->md5sig_info->entries6));
-
-			kfree(tp->md5sig_info->keys6);
-			tp->md5sig_info->keys6 = keys;
-			tp->md5sig_info->alloced6++;
-		}
-
-		tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer;
-		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
-		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
-
-		tp->md5sig_info->entries6++;
-	}
-	return 0;
-}
-
-static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
-
-	for (i = 0; i < tp->md5sig_info->entries6; i++) {
-		if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
-			/* Free the key */
-			kfree(tp->md5sig_info->keys6[i].base.key);
-			tp->md5sig_info->entries6--;
-
-			if (tp->md5sig_info->entries6 == 0) {
-				kfree(tp->md5sig_info->keys6);
-				tp->md5sig_info->keys6 = NULL;
-				tp->md5sig_info->alloced6 = 0;
-				tcp_free_md5sig_pool();
-			} else {
-				/* shrink the database */
-				if (tp->md5sig_info->entries6 != i)
-					memmove(&tp->md5sig_info->keys6[i],
-						&tp->md5sig_info->keys6[i+1],
-						(tp->md5sig_info->entries6 - i)
-						* sizeof (tp->md5sig_info->keys6[0]));
-			}
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-static void tcp_v6_clear_md5_list (struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
-
-	if (tp->md5sig_info->entries6) {
-		for (i = 0; i < tp->md5sig_info->entries6; i++)
-			kfree(tp->md5sig_info->keys6[i].base.key);
-		tp->md5sig_info->entries6 = 0;
-		tcp_free_md5sig_pool();
-	}
-
-	kfree(tp->md5sig_info->keys6);
-	tp->md5sig_info->keys6 = NULL;
-	tp->md5sig_info->alloced6 = 0;
-
-	if (tp->md5sig_info->entries4) {
-		for (i = 0; i < tp->md5sig_info->entries4; i++)
-			kfree(tp->md5sig_info->keys4[i].base.key);
-		tp->md5sig_info->entries4 = 0;
-		tcp_free_md5sig_pool();
-	}
-
-	kfree(tp->md5sig_info->keys4);
-	tp->md5sig_info->keys4 = NULL;
-	tp->md5sig_info->alloced4 = 0;
-}
-
 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 				  int optlen)
 {
 	struct tcp_md5sig cmd;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
-	u8 *newkey;
 
 	if (optlen < sizeof(cmd))
 		return -EINVAL;
@@ -704,33 +574,21 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 		if (!tcp_sk(sk)->md5sig_info)
 			return -ENOENT;
 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
-			return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
-		return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
+			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+					      AF_INET);
+		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+				      AF_INET6);
 	}
 
 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 		return -EINVAL;
 
-	if (!tcp_sk(sk)->md5sig_info) {
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_md5sig_info *p;
-
-		p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
-		if (!p)
-			return -ENOMEM;
-
-		tp->md5sig_info = p;
-		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-	}
+	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+				      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 
-	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-	if (!newkey)
-		return -ENOMEM;
-	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
-		return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
-					 newkey, cmd.tcpm_keylen);
-	}
-	return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
+	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
+			      AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 }
 
 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
@@ -1503,10 +1361,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 * memory, then we end up not copying the key
 		 * across. Shucks.
 		 */
-		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
-		if (newkey != NULL)
-			tcp_v6_md5_do_add(newsk, &newnp->daddr,
-					  newkey, key->keylen);
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+			       AF_INET6, key->key, key->keylen, GFP_ATOMIC);
 	}
 #endif
 
@@ -1995,11 +1851,6 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 static void tcp_v6_destroy_sock(struct sock *sk)
 {
-#ifdef CONFIG_TCP_MD5SIG
-	/* Clean up the MD5 key list */
-	if (tcp_sk(sk)->md5sig_info)
-		tcp_v6_clear_md5_list(sk);
-#endif
 	tcp_v4_destroy_sock(sk);
 	inet6_destroy_sock(sk);
 }

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next] tcp: md5: rcu conversion
  2012-01-31 15:18         ` [PATCH net-next] tcp: md5: rcu conversion Eric Dumazet
@ 2012-01-31 15:38           ` Eric Dumazet
  2012-01-31 17:15             ` David Miller
  2012-01-31 17:14           ` [PATCH net-next] tcp: md5: rcu conversion David Miller
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31 15:38 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mardi 31 janvier 2012 à 16:18 +0100, Eric Dumazet a écrit :
> In order to be able to support proper RST messages for TCP MD5 flows, we
> need to allow access to MD5 keys without locking listener socket.
> 
> This conversion is a nice cleanup, and shrinks size of timewait sockets
> by 80 bytes.
> 
> IPv6 code reuses generic code found in IPv4 instead of duplicating it.
> 
> Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Shawn Lu <shawn.lu@ericsson.com>

By the way, we probably need to add a limit on the number of keys a
socket can store. It seems previous code relied on kmalloc(big_array)
limits  (~4MB on x86) only.

sock_kmalloc() might be the right thing to use instead of kmalloc()

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next] tcp: md5: rcu conversion
  2012-01-31 15:18         ` [PATCH net-next] tcp: md5: rcu conversion Eric Dumazet
  2012-01-31 15:38           ` Eric Dumazet
@ 2012-01-31 17:14           ` David Miller
  1 sibling, 0 replies; 21+ messages in thread
From: David Miller @ 2012-01-31 17:14 UTC (permalink / raw)
  To: eric.dumazet; +Cc: shawn.lu, netdev, xiaoclu

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 16:18:33 +0100

> In order to be able to support proper RST messages for TCP MD5 flows, we
> need to allow access to MD5 keys without locking listener socket.
> 
> This conversion is a nice cleanup, and shrinks size of timewait sockets
> by 80 bytes.
> 
> IPv6 code reuses generic code found in IPv4 instead of duplicating it.
> 
> Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Beautiful :-)  Applied, thanks for doing this work Eric.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next] tcp: md5: rcu conversion
  2012-01-31 15:38           ` Eric Dumazet
@ 2012-01-31 17:15             ` David Miller
  2012-01-31 20:56               ` [PATCH net-next] tcp: md5: use sock_kmalloc() to limit md5 keys Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2012-01-31 17:15 UTC (permalink / raw)
  To: eric.dumazet; +Cc: shawn.lu, netdev, xiaoclu

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 16:38:37 +0100

> sock_kmalloc() might be the right thing to use instead of kmalloc()

This is almost certainly the case, it's a per-socket memory
resource rather than one shared with other entities in the
stack.  Therefore sock_kmalloc() is most appropriate.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31 13:33       ` Eric Dumazet
  2012-01-31 15:18         ` [PATCH net-next] tcp: md5: rcu conversion Eric Dumazet
@ 2012-01-31 18:15         ` Shawn Lu
  1 sibling, 0 replies; 21+ messages in thread
From: Shawn Lu @ 2012-01-31 18:15 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiaoclu

Thanks! Eric. Good catch.  Will using RCU to pretect the md5 key. 

-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: Tuesday, January 31, 2012 5:34 AM
To: Shawn Lu
Cc: davem@davemloft.net; netdev@vger.kernel.org; xiaoclu@gmail.com
Subject: RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener

Le mardi 31 janvier 2012 à 10:05 +0100, Eric Dumazet a écrit :

> Hmm... The second problem is that its not safe to call
> tcp_v4_md5_do_lookup() on an unlocked socket.
> 
> And locking a listener is way too expensive, since a listener socket 
> is already a contention point.
> 
> An attacker could send forged tcp md5 packets to slow down a server.
> 
> A proper patch needs RCU conversion first.

I am working on this RCU conversion and big md5 cleanup, using a single list of keys.

You can then add your fix on top of this work.
[shawnlu] Good idea. I will resumit using rcu.

union tcp_md5_addr {
        struct in_addr  a4;
#if IS_ENABLED(CONFIG_IPV6)
        struct in6_addr a6;
#endif
};

/* - key database */
struct tcp_md5sig_key {
        struct hlist_node       node;
        u8                      keylen;
        u8                      family; /* AF_INET or AF_INET6 */
        union tcp_md5_addr      addr;
        u8                      key[TCP_MD5SIG_MAXKEYLEN];
        struct rcu_head         rcu;
};

/* - sock block */
struct tcp_md5sig_info {
        struct hlist_head       head;
};

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH net-next] tcp: md5: use sock_kmalloc() to limit md5 keys
  2012-01-31 17:15             ` David Miller
@ 2012-01-31 20:56               ` Eric Dumazet
  2012-01-31 21:13                 ` David Miller
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-01-31 20:56 UTC (permalink / raw)
  To: David Miller; +Cc: shawn.lu, netdev, xiaoclu

There is no limit on number of MD5 keys an application can attach to a
tcp socket.

This patch adds a per tcp socket limit based
on /proc/sys/net/core/optmem_max

With current default optmem_max values, this allows about 150 keys on
64bit arches, and 88 keys on 32bit arches.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/tcp_ipv4.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1d5fd82..da5d322 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -943,11 +943,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 		tp->md5sig_info = md5sig;
 	}
 
-	key = kmalloc(sizeof(*key), gfp);
+	key = sock_kmalloc(sk, sizeof(*key), gfp);
 	if (!key)
 		return -ENOMEM;
 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
-		kfree(key);
+		sock_kfree_s(sk, key, sizeof(*key));
 		return -ENOMEM;
 	}
 
@@ -971,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 	if (!key)
 		return -ENOENT;
 	hlist_del_rcu(&key->node);
+	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 	kfree_rcu(key, rcu);
 	if (hlist_empty(&tp->md5sig_info->head))
 		tcp_free_md5sig_pool();
@@ -988,6 +989,7 @@ void tcp_clear_md5_list(struct sock *sk)
 		tcp_free_md5sig_pool();
 	hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) {
 		hlist_del_rcu(&key->node);
+		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 		kfree_rcu(key, rcu);
 	}
 }

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next] tcp: md5: use sock_kmalloc() to limit md5 keys
  2012-01-31 20:56               ` [PATCH net-next] tcp: md5: use sock_kmalloc() to limit md5 keys Eric Dumazet
@ 2012-01-31 21:13                 ` David Miller
  0 siblings, 0 replies; 21+ messages in thread
From: David Miller @ 2012-01-31 21:13 UTC (permalink / raw)
  To: eric.dumazet; +Cc: shawn.lu, netdev, xiaoclu

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 21:56:48 +0100

> There is no limit on number of MD5 keys an application can attach to a
> tcp socket.
> 
> This patch adds a per tcp socket limit based
> on /proc/sys/net/core/optmem_max
> 
> With current default optmem_max values, this allows about 150 keys on
> 64bit arches, and 88 keys on 32bit arches.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, thanks a lot Eric.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  8:11         ` Shawn Lu
@ 2012-02-01  9:25           ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-02-01  9:25 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mercredi 01 février 2012 à 03:11 -0500, Shawn Lu a écrit :
> Why? Do you mean packet is droped by netfilter before getting to tcp and No RST is generated. 

Hmm, net/ipv4/netfilter/ipt_REJECT.c / net/ipv6/netfilter/ip6t_REJECT.c
are able to sent RST packets. I presume this is not md5 aware.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  7:53       ` Eric Dumazet
@ 2012-02-01  8:11         ` Shawn Lu
  2012-02-01  9:25           ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-02-01  8:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiaoclu

Why? Do you mean packet is droped by netfilter before getting to tcp and No RST is generated. 

-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: Tuesday, January 31, 2012 11:54 PM
To: Shawn Lu
Cc: davem@davemloft.net; netdev@vger.kernel.org; xiaoclu@gmail.com
Subject: RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener

Le mercredi 01 février 2012 à 02:48 -0500, Shawn Lu a écrit :
> Hi, Eric:
> 
> How about change the title and log to following:
> 
>   tcp: md5: RST: getting md5 key from listener
> 
>     TCP RST mechanism is broken in TCP md5(RFC2385). When
>     connection is gone, md5 key is lost, sending RST
>     without md5 hash is deem to ignored by peer. This can
>     be a problem since RST help protocal like bgp to fast
>     recove from peer crash.
> 
>     In most case, users of tcp md5, such as bgp and ldp,
>     have listener on both side to accept connection from peer.
>     md5 keys for peers are saved in listening socket.
> 
>     There are two cases in finding md5 key when connection is
>     lost:
>     1.Passive receive RST: The message is send to well known port,
>     tcp will associate packet with listener. md5 key can be gotten
>     from listener.
> 
>     2.Active receive RST (no sock): The message is send to ative
>     side, there is no socket associated with message. In this case,
>     finding listener from source port, then find md5 key from
>     listener.
> 
>     we are not loosing sercuriy here:
>     packet is checked with md5 hash. No RST is generated
>     if md5 hash doesn't match or no md5 key can be found.
> 
> Note:
> Will send out a new version that is on top of your new patch
> -- "tcp: md5: protects md5sig_info with RCU"
> 

Seems good to me !

By the way, is the patch going to work if netfilter conntrack is enabled ?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  7:48     ` Shawn Lu
@ 2012-02-01  7:53       ` Eric Dumazet
  2012-02-01  8:11         ` Shawn Lu
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-02-01  7:53 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mercredi 01 février 2012 à 02:48 -0500, Shawn Lu a écrit :
> Hi, Eric:
> 
> How about change the title and log to following:
> 
>   tcp: md5: RST: getting md5 key from listener
> 
>     TCP RST mechanism is broken in TCP md5(RFC2385). When
>     connection is gone, md5 key is lost, sending RST
>     without md5 hash is deem to ignored by peer. This can
>     be a problem since RST help protocal like bgp to fast
>     recove from peer crash.
> 
>     In most case, users of tcp md5, such as bgp and ldp,
>     have listener on both side to accept connection from peer.
>     md5 keys for peers are saved in listening socket.
> 
>     There are two cases in finding md5 key when connection is
>     lost:
>     1.Passive receive RST: The message is send to well known port,
>     tcp will associate packet with listener. md5 key can be gotten
>     from listener.
> 
>     2.Active receive RST (no sock): The message is send to ative
>     side, there is no socket associated with message. In this case,
>     finding listener from source port, then find md5 key from
>     listener.
> 
>     we are not loosing sercuriy here:
>     packet is checked with md5 hash. No RST is generated
>     if md5 hash doesn't match or no md5 key can be found.
> 
> Note:
> Will send out a new version that is on top of your new patch
> -- "tcp: md5: protects md5sig_info with RCU"
> 

Seems good to me !

By the way, is the patch going to work if netfilter conntrack is
enabled ?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  5:09   ` Eric Dumazet
@ 2012-02-01  7:48     ` Shawn Lu
  2012-02-01  7:53       ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-02-01  7:48 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiaoclu

Hi, Eric:

How about change the title and log to following:

  tcp: md5: RST: getting md5 key from listener

    TCP RST mechanism is broken in TCP md5(RFC2385). When
    connection is gone, md5 key is lost, sending RST
    without md5 hash is deem to ignored by peer. This can
    be a problem since RST help protocal like bgp to fast
    recove from peer crash.

    In most case, users of tcp md5, such as bgp and ldp,
    have listener on both side to accept connection from peer.
    md5 keys for peers are saved in listening socket.

    There are two cases in finding md5 key when connection is
    lost:
    1.Passive receive RST: The message is send to well known port,
    tcp will associate packet with listener. md5 key can be gotten
    from listener.

    2.Active receive RST (no sock): The message is send to ative
    side, there is no socket associated with message. In this case,
    finding listener from source port, then find md5 key from
    listener.

    we are not loosing sercuriy here:
    packet is checked with md5 hash. No RST is generated
    if md5 hash doesn't match or no md5 key can be found.

Note:
Will send out a new version that is on top of your new patch
-- "tcp: md5: protects md5sig_info with RCU"


-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: Tuesday, January 31, 2012 9:09 PM
To: Shawn Lu
Cc: davem@davemloft.net; netdev@vger.kernel.org; xiaoclu@gmail.com
Subject: Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener

Le mardi 31 janvier 2012 à 15:53 -0800, Shawn Lu a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When connection is 
> gone, md5 key is lost, sending RST without md5 hash is deem to ignored 
> by peer. This can be a problem since RST help protocal like bgp to 
> fast recove from peer crash.
> 
> In most case, users of tcp md5, such as bgp and ldp, have listeners on 
> both sides. md5 keys for peers are saved in listening socket. When 
> passive side connection is gone, we can still get md5 key from 
> listening socket.
> When active side of connection is gone, we can try to find listening 
> socket through source port, and then md5 key.
> we are not loosing sercuriy here:
> packet is valified checked with md5 hash. No RST is generated if md5 
> hash doesn't match or no md5 key can be found.
> 
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---

Small notes : 

1) Always add [PATCH Vx] on your submission if it was a new version of a previous patch. (V2, V3, V4, ...)

If possible, add after the "---" separator an explanation of what changed in your new submission :

V3: added some rcu_read_lock()/rcu_read_unlock() sections

2) Also patch title and changelog are a bit confusing.

Only the machine receiving the TCP message and answering by RST message needs a listener, in case tcp frame cannot be associated to an existing tcp socket (ESTABLISHED or TIMEWAIT).

The other side doesnt need a listener, only a client using TCP MD5 option in its socket.

Other than that, your patch seems fine to me.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks !

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-01-31 23:53 ` Shawn Lu
@ 2012-02-01  5:09   ` Eric Dumazet
  2012-02-01  7:48     ` Shawn Lu
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-02-01  5:09 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mardi 31 janvier 2012 à 15:53 -0800, Shawn Lu a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When
> connection is gone, md5 key is lost, sending RST
> without md5 hash is deem to ignored by peer. This can
> be a problem since RST help protocal like bgp to fast
> recove from peer crash.
> 
> In most case, users of tcp md5, such as bgp and ldp,
> have listeners on both sides. md5 keys for peers are
> saved in listening socket. When passive side connection
> is gone, we can still get md5 key from listening socket.
> When active side of connection is gone, we can try to
> find listening socket through source port, and then md5
> key.
> we are not loosing sercuriy here:
> packet is valified checked with md5 hash. No RST is generated
> if md5 hash doesn't match or no md5 key can be found.
> 
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---

Small notes : 

1) Always add [PATCH Vx] on your submission if it was a new version of a
previous patch. (V2, V3, V4, ...)

If possible, add after the "---" separator an explanation of what
changed in your new submission :

V3: added some rcu_read_lock()/rcu_read_unlock() sections

2) Also patch title and changelog are a bit confusing.

Only the machine receiving the TCP message and answering by RST message
needs a listener, in case tcp frame cannot be associated to an existing
tcp socket (ESTABLISHED or TIMEWAIT).

The other side doesnt need a listener, only a client using TCP MD5
option in its socket.

Other than that, your patch seems fine to me.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks !

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  0:50 ` [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
@ 2012-02-01  4:08   ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-02-01  4:08 UTC (permalink / raw)
  To: Shawn Lu; +Cc: davem, netdev, xiaoclu

Le mardi 31 janvier 2012 à 16:50 -0800, Shawn Lu a écrit :
> TCP RST mechanism is broken in TCP md5(RFC2385). When
> connection is gone, md5 key is lost, sending RST
> without md5 hash is deem to ignored by peer. This can
> be a problem since RST help protocal like bgp to fast
> recove from peer crash.
> 
> In most case, users of tcp md5, such as bgp and ldp,
> have listeners on both sides. md5 keys for peers are
> saved in listening socket. When passive side connection
> is gone, we can still get md5 key from listening socket.
> When active side of connection is gone, we can try to
> find listening socket through source port, and then md5
> key.
> we are not loosing sercuriy here:
> packet is valified checked with md5 hash. No RST is generated
> if md5 hash doesn't match or no md5 key can be found.
> 
> Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
> ---
>  net/ipv4/tcp_ipv4.c |   45 ++++++++++++++++++++++++++++++++++++++++++---
>  net/ipv6/tcp_ipv6.c |   43 +++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 83 insertions(+), 5 deletions(-)
> 

Hmm, ok but using RCU for the md5 lookup (instead of mere sock lock as
done today) also means we need to protect struct tcp_md5sig_info itself

I send a patch for this in a separate patch first, then David can apply
your patch when I add my "Signed-off-by: ..."

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] tcp: md5: fix md5 RST when both sides have listener
  2012-02-01  0:50 (unknown), Shawn Lu
@ 2012-02-01  0:50 ` Shawn Lu
  2012-02-01  4:08   ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-02-01  0:50 UTC (permalink / raw)
  To: eric.dumazet; +Cc: davem, netdev, xiaoclu

TCP RST mechanism is broken in TCP md5(RFC2385). When
connection is gone, md5 key is lost, sending RST
without md5 hash is deem to ignored by peer. This can
be a problem since RST help protocal like bgp to fast
recove from peer crash.

In most case, users of tcp md5, such as bgp and ldp,
have listeners on both sides. md5 keys for peers are
saved in listening socket. When passive side connection
is gone, we can still get md5 key from listening socket.
When active side of connection is gone, we can try to
find listening socket through source port, and then md5
key.
we are not loosing sercuriy here:
packet is valified checked with md5 hash. No RST is generated
if md5 hash doesn't match or no md5 key can be found.

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
---
 net/ipv4/tcp_ipv4.c |   45 ++++++++++++++++++++++++++++++++++++++++++---
 net/ipv6/tcp_ipv6.c |   43 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5d322..1a761c8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
+	const __u8 *hash_location = NULL;
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
 #endif
 	struct net *net;
 
@@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_md5_do_lookup(sk,
-				     (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
-				     AF_INET) : NULL;
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     ntohs(th->source), inet_iif(skb));
+		/* don't send rst if it can't find key */
+		if (!sk1)
+			return;
+		rcu_read_lock();
+		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+					&ip_hdr(skb)->saddr, AF_INET);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+					     &ip_hdr(skb)->saddr,
+					     AF_INET) : NULL;
+	}
+
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -653,6 +684,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1) {
+		rcu_read_unlock();
+		sock_put(sk1);
+	}
+#endif
 }
 
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bec41f9..3b139e2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -925,6 +925,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+	const __u8 *hash_location = NULL;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
+#endif
 
 	if (th->rst)
 		return;
@@ -933,8 +940,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		return;
 
 #ifdef CONFIG_TCP_MD5SIG
-	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+					   &tcp_hashinfo, &ipv6h->daddr,
+					   ntohs(th->source), inet6_iif(skb));
+		if (!sk1)
+			return;
+
+		rcu_read_lock();
+		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+	}
 #endif
 
 	if (th->ack)
@@ -944,6 +975,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			  (th->doff << 2);
 
 	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1) {
+		rcu_read_unlock();
+		sock_put(sk1);
+	}
+#endif
 }
 
 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] tcp: md5: fix md5 RST when both sides have listener
       [not found] <RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener>
@ 2012-01-31 23:53 ` Shawn Lu
  2012-02-01  5:09   ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Shawn Lu @ 2012-01-31 23:53 UTC (permalink / raw)
  To: eric.dumazet; +Cc: davem, netdev, xiaoclu

TCP RST mechanism is broken in TCP md5(RFC2385). When
connection is gone, md5 key is lost, sending RST
without md5 hash is deem to ignored by peer. This can
be a problem since RST help protocal like bgp to fast
recove from peer crash.

In most case, users of tcp md5, such as bgp and ldp,
have listeners on both sides. md5 keys for peers are
saved in listening socket. When passive side connection
is gone, we can still get md5 key from listening socket.
When active side of connection is gone, we can try to
find listening socket through source port, and then md5
key.
we are not loosing sercuriy here:
packet is valified checked with md5 hash. No RST is generated
if md5 hash doesn't match or no md5 key can be found.

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
---
 net/ipv4/tcp_ipv4.c |   43 ++++++++++++++++++++++++++++++++++++++++---
 net/ipv6/tcp_ipv6.c |   40 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5d322..adafee8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
+	const __u8 *hash_location = NULL;
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
 #endif
 	struct net *net;
 
@@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_md5_do_lookup(sk,
-				     (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
-				     AF_INET) : NULL;
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     ntohs(th->source), inet_iif(skb));
+		/* don't send rst if it can't find key */
+		if (!sk1)
+			return;
+
+		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+					&ip_hdr(skb)->saddr, AF_INET);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+					     &ip_hdr(skb)->saddr,
+					     AF_INET) : NULL;
+	}
+
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -653,6 +684,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1)
+		sock_put(sk1);
+#endif
 }
 
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bec41f9..00da65c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -925,6 +925,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
+#ifdef CONFIG_TCP_MD5SIG
+	const __u8 *hash_location = NULL;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
+#endif
 
 	if (th->rst)
 		return;
@@ -933,8 +940,31 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		return;
 
 #ifdef CONFIG_TCP_MD5SIG
-	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
+					   &tcp_hashinfo, &ipv6h->daddr,
+					   ntohs(th->source), inet6_iif(skb));
+		if (!sk1)
+			return;
+
+		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
+	}
 #endif
 
 	if (th->ack)
@@ -944,6 +974,12 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			  (th->doff << 2);
 
 	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1)
+		sock_put(sk1);
+#endif
 }
 
 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-02-01  9:25 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31  2:07 [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
2012-01-31  2:16 ` Eric Dumazet
2012-01-31  2:37   ` Shawn Lu
2012-01-31  8:39   ` Shawn Lu
2012-01-31  9:05     ` Eric Dumazet
2012-01-31 13:33       ` Eric Dumazet
2012-01-31 15:18         ` [PATCH net-next] tcp: md5: rcu conversion Eric Dumazet
2012-01-31 15:38           ` Eric Dumazet
2012-01-31 17:15             ` David Miller
2012-01-31 20:56               ` [PATCH net-next] tcp: md5: use sock_kmalloc() to limit md5 keys Eric Dumazet
2012-01-31 21:13                 ` David Miller
2012-01-31 17:14           ` [PATCH net-next] tcp: md5: rcu conversion David Miller
2012-01-31 18:15         ` [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
     [not found] <RE: [PATCH] tcp: md5: fix md5 RST when both sides have listener>
2012-01-31 23:53 ` Shawn Lu
2012-02-01  5:09   ` Eric Dumazet
2012-02-01  7:48     ` Shawn Lu
2012-02-01  7:53       ` Eric Dumazet
2012-02-01  8:11         ` Shawn Lu
2012-02-01  9:25           ` Eric Dumazet
2012-02-01  0:50 (unknown), Shawn Lu
2012-02-01  0:50 ` [PATCH] tcp: md5: fix md5 RST when both sides have listener Shawn Lu
2012-02-01  4:08   ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.