All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] net: Toeplitz library functions
@ 2013-09-23 22:41 Tom Herbert
  2013-09-24  0:03 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-23 22:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, jesse.brandeburg

Introduce Toeplitz hash functions. Toeplitz is a hash used primarily in
NICs to performan RSS flow steering.  This is a software implemenation
of that. In order to make the hash calculation efficient, we precompute
the possible hash values for each inidividual byte of input. The input
length is up to 40 bytes, so we make an array of cache[40][256].

The implemenation was verified against MSDN "Verify RSS hash" sample
values.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/netdevice.h |  3 +++
 include/linux/toeplitz.h  | 27 +++++++++++++++++++
 lib/Kconfig               |  3 +++
 lib/Makefile              |  2 ++
 lib/toeplitz.c            | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig               |  5 ++++
 net/core/dev.c            | 11 ++++++++
 7 files changed, 117 insertions(+)
 create mode 100644 include/linux/toeplitz.h
 create mode 100644 lib/toeplitz.c

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49ac..546caf2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -38,6 +38,7 @@
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
 #include <linux/dynamic_queue_limits.h>
+#include <linux/toeplitz.h>
 
 #include <linux/ethtool.h>
 #include <net/net_namespace.h>
@@ -195,6 +196,8 @@ struct net_device_stats {
 extern struct static_key rps_needed;
 #endif
 
+extern struct toeplitz *toeplitz_net;
+
 struct neighbour;
 struct neigh_parms;
 struct sk_buff;
diff --git a/include/linux/toeplitz.h b/include/linux/toeplitz.h
new file mode 100644
index 0000000..bc0b8e8
--- /dev/null
+++ b/include/linux/toeplitz.h
@@ -0,0 +1,27 @@
+#ifndef __LINUX_TOEPLITZ_H
+#define __LINUX_TOEPLITZ_H
+
+#define TOEPLITZ_KEY_LEN 40
+
+struct toeplitz {
+	u8 key_vals[TOEPLITZ_KEY_LEN];
+	u32 key_cache[TOEPLITZ_KEY_LEN][256];
+};
+
+static inline unsigned int
+toeplitz_hash(const unsigned char *bytes,
+	      struct toeplitz *toeplitz, int n)
+{
+	int i;
+	unsigned int result = 0;
+
+	for (i = 0; i < n; i++)
+		result ^= toeplitz->key_cache[i][bytes[i]];
+
+        return result;
+};
+
+extern struct toeplitz *toeplitz_alloc(void);
+extern void toeplitz_init(struct toeplitz *toeplitz, u8 *key_vals);
+
+#endif /* __LINUX_TOEPLITZ_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index b3c8be0..463b2b1 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -359,6 +359,9 @@ config CPU_RMAP
 config DQL
 	bool
 
+config TOEPLITZ
+	bool
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index f3bb2cb..a28349b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -133,6 +133,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_TOEPLITZ) += toeplitz.o
+
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
diff --git a/lib/toeplitz.c b/lib/toeplitz.c
new file mode 100644
index 0000000..0951dd9
--- /dev/null
+++ b/lib/toeplitz.c
@@ -0,0 +1,66 @@
+/*
+ * Toeplitz hash implemenation. See include/linux/toeplitz.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/toeplitz.h>
+
+struct toeplitz *toeplitz_alloc(void)
+{
+	return kmalloc(sizeof(struct toeplitz), GFP_KERNEL);
+}
+
+static u32 toeplitz_get_kval(unsigned char *key, int idx)
+{
+	u32 v, r;
+	int off, rem;
+
+	off = idx / 8;
+	rem = idx % 8;
+
+	v = (((unsigned int)key[off]) << 24) +
+	    (((unsigned int)key[off + 1]) << 16) +
+	    (((unsigned int)key[off + 2]) << 8) +
+	    (((unsigned int)key[off + 3]));
+
+	r = v << rem | (unsigned int)key[off + 4] >> (8 - rem);
+	return r;
+}
+
+static inline int idx8(int idx)
+{
+#ifdef __LITTLE_ENDIAN
+        idx = (idx / 8) * 8 + (8 - (idx % 8 + 1));
+#endif
+        return idx;
+}
+
+void toeplitz_init(struct toeplitz *toeplitz, u8 *key_vals)
+{
+	int i;
+	unsigned long a, j;
+	unsigned int result = 0;
+
+	/* Set up key val table */
+	if (key_vals)
+		for (i = 0; i < TOEPLITZ_KEY_LEN; i++)
+			toeplitz->key_vals[i] = key_vals[i];
+	else
+		prandom_bytes(toeplitz->key_vals, TOEPLITZ_KEY_LEN);
+
+	/* Set up key cache table */
+	for (i = 0; i < TOEPLITZ_KEY_LEN; i++) {
+		for (j = 0; j < 256; j++) {
+			result = 0;
+			for (a = find_first_bit(&j, 8); a < 8;
+			    a = find_next_bit(&j, 8, a + 1))
+				result ^= toeplitz_get_kval(
+				   toeplitz->key_vals, idx8(a + (i * 8)));
+			toeplitz->key_cache[i][j] = result;
+		}
+	}
+}
diff --git a/net/Kconfig b/net/Kconfig
index b50dacc..860c9fa 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -254,6 +254,11 @@ config BQL
 	select DQL
 	default y
 
+config NET_TOEPLITZ
+	boolean
+	select TOEPLITZ
+	default n
+
 config BPF_JIT
 	bool "enable BPF Just In Time compiler"
 	depends on HAVE_BPF_JIT
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2..074f530 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6633,6 +6633,9 @@ static struct pernet_operations __net_initdata default_device_ops = {
 	.exit_batch = default_device_exit_batch,
 };
 
+struct toeplitz *toeplitz_net;
+EXPORT_SYMBOL(toeplitz_net);
+
 /*
  *	Initialize the DEV module. At boot time this walks the device list and
  *	unhooks any devices that fail to initialise (normally hardware not
@@ -6656,6 +6659,14 @@ static int __init net_dev_init(void)
 	if (netdev_kobject_init())
 		goto out;
 
+#ifdef CONFIG_NET_TOEPLITZ
+	toeplitz_net = toeplitz_alloc();
+	if (!toeplitz_net)
+		goto out;
+
+	toeplitz_init(toeplitz_net, NULL);
+#endif
+
 	INIT_LIST_HEAD(&ptype_all);
 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
-- 
1.8.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-23 22:41 [PATCH 1/2] net: Toeplitz library functions Tom Herbert
@ 2013-09-24  0:03 ` Eric Dumazet
  2013-09-24  1:39   ` David Miller
  2013-09-24  2:30   ` Hannes Frederic Sowa
  2013-09-24  8:32 ` David Laight
  2013-09-24 16:32 ` Ben Hutchings
  2 siblings, 2 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24  0:03 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, jesse.brandeburg

On Mon, 2013-09-23 at 15:41 -0700, Tom Herbert wrote:

> +#ifdef CONFIG_NET_TOEPLITZ
> +	toeplitz_net = toeplitz_alloc();
> +	if (!toeplitz_net)
> +		goto out;
> +
> +	toeplitz_init(toeplitz_net, NULL);
> +#endif
> +

Hmm

1) Security alert here.

Many devices (lets say Android phones) have no entropy at this point,
all devices will have same toeplitz key.

Check build_ehash_secret() for a possible point for the feeding of the
key. (and commit 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 )

If hardware toeplitz is ever used, we want to make sure every host uses
a private and hidden Toeplitz key.

2) Also it seems a given tuple would hash the same on different
namespaces. Could be a problem if one particular TCP hash bucket is
holding thousand of sockets.

3) jhash() is fast, there is no possible cache line misses

  With your implementation, toeplitz hashing 36 bytes could have a cost
of 36 additional cache line misses.

You do not see that on TCP_RR test because cpu caches are preloaded, but
it will show on latency sensitive workload.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  0:03 ` Eric Dumazet
@ 2013-09-24  1:39   ` David Miller
  2013-09-24  2:30   ` Hannes Frederic Sowa
  1 sibling, 0 replies; 50+ messages in thread
From: David Miller @ 2013-09-24  1:39 UTC (permalink / raw)
  To: eric.dumazet; +Cc: therbert, netdev, jesse.brandeburg

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 23 Sep 2013 17:03:11 -0700

> 1) Security alert here.
> 
> Many devices (lets say Android phones) have no entropy at this point,
> all devices will have same toeplitz key.
> 
> Check build_ehash_secret() for a possible point for the feeding of the
> key. (and commit 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 )
> 
> If hardware toeplitz is ever used, we want to make sure every host uses
> a private and hidden Toeplitz key.
> 
> 2) Also it seems a given tuple would hash the same on different
> namespaces. Could be a problem if one particular TCP hash bucket is
> holding thousand of sockets.
> 
> 3) jhash() is fast, there is no possible cache line misses

4) Random input to the hash is now not used at all, instant exploit
   because now any attacker can open up connections over and over that
   will all hash to the same hash bucket making our lookups linear.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  0:03 ` Eric Dumazet
  2013-09-24  1:39   ` David Miller
@ 2013-09-24  2:30   ` Hannes Frederic Sowa
  2013-09-24  3:35     ` Hannes Frederic Sowa
  1 sibling, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24  2:30 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Mon, Sep 23, 2013 at 05:03:11PM -0700, Eric Dumazet wrote:
> On Mon, 2013-09-23 at 15:41 -0700, Tom Herbert wrote:
> 
> > +#ifdef CONFIG_NET_TOEPLITZ
> > +	toeplitz_net = toeplitz_alloc();
> > +	if (!toeplitz_net)
> > +		goto out;
> > +
> > +	toeplitz_init(toeplitz_net, NULL);
> > +#endif
> > +
> 
> Hmm
> 
> 1) Security alert here.
> 
> Many devices (lets say Android phones) have no entropy at this point,
> all devices will have same toeplitz key.
> 
> Check build_ehash_secret() for a possible point for the feeding of the
> key. (and commit 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 )
> 
> If hardware toeplitz is ever used, we want to make sure every host uses
> a private and hidden Toeplitz key.

I just had a look at it myself and have one question:

ipv6/af_inet6.c:
112         if (sock->type != SOCK_RAW &&
113             sock->type != SOCK_DGRAM &&
114             !inet_ehash_secret)
115                 build_ehash_secret();

ipv4/af_inet.c:
289         if (unlikely(!inet_ehash_secret))
290                 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
291                         build_ehash_secret();


Why do we care about the sock->type?

build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
port randomization etc. Could we drop the check of sock->type? I guess the
idea was that in-kernel sockets of type raw/udp do not seed the keys when no
entropy is available?

Thanks,

  Hannes

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  2:30   ` Hannes Frederic Sowa
@ 2013-09-24  3:35     ` Hannes Frederic Sowa
  2013-09-24  5:38       ` Eric Dumazet
  0 siblings, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24  3:35 UTC (permalink / raw)
  To: Eric Dumazet, Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, Sep 24, 2013 at 04:30:38AM +0200, Hannes Frederic Sowa wrote:
> On Mon, Sep 23, 2013 at 05:03:11PM -0700, Eric Dumazet wrote:
> > On Mon, 2013-09-23 at 15:41 -0700, Tom Herbert wrote:
> > 
> > > +#ifdef CONFIG_NET_TOEPLITZ
> > > +	toeplitz_net = toeplitz_alloc();
> > > +	if (!toeplitz_net)
> > > +		goto out;
> > > +
> > > +	toeplitz_init(toeplitz_net, NULL);
> > > +#endif
> > > +
> > 
> > Hmm
> > 
> > 1) Security alert here.
> > 
> > Many devices (lets say Android phones) have no entropy at this point,
> > all devices will have same toeplitz key.
> > 
> > Check build_ehash_secret() for a possible point for the feeding of the
> > key. (and commit 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 )
> > 
> > If hardware toeplitz is ever used, we want to make sure every host uses
> > a private and hidden Toeplitz key.
> build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
> port randomization etc. Could we drop the check of sock->type? I guess the
> idea was that in-kernel sockets of type raw/udp do not seed the keys when no
> entropy is available?

Would this be better (I checked inet_ehash_secret, ipv6_hash_secret
and net_secret to actual get initialized)?

[PATCH] inet: initialize hash secret values on first non-kernel socket creation

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/ipv4/af_inet.c  | 5 ++---
 net/ipv6/af_inet6.c | 4 +---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b..489834a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -286,9 +286,8 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
-	if (unlikely(!inet_ehash_secret))
-		if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
-			build_ehash_secret();
+	if (unlikely(!inet_ehash_secret && !kern))
+		build_ehash_secret();
 
 	sock->state = SS_UNCONNECTED;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7c96100..dbf8c35 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -110,9 +110,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
-	if (sock->type != SOCK_RAW &&
-	    sock->type != SOCK_DGRAM &&
-	    !inet_ehash_secret)
+	if (unlikely(!inet_ehash_secret && !kern))
 		build_ehash_secret();
 
 	/* Look for the requested type/protocol pair. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  3:35     ` Hannes Frederic Sowa
@ 2013-09-24  5:38       ` Eric Dumazet
  2013-09-24  5:45         ` Hannes Frederic Sowa
  2013-09-24 16:01         ` [PATCH 1/2] net: Toeplitz library functions Hannes Frederic Sowa
  0 siblings, 2 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24  5:38 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 05:35 +0200, Hannes Frederic Sowa wrote:

> > build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
> > port randomization etc. Could we drop the check of sock->type? I guess the
> > idea was that in-kernel sockets of type raw/udp do not seed the keys when no
> > entropy is available?
> 
> Would this be better (I checked inet_ehash_secret, ipv6_hash_secret
> and net_secret to actual get initialized)?
> 

inet_ehash_secret is used only to make jhash() for tcp ehash, not for
fragmentation ids or other uses (port randomization).


> [PATCH] inet: initialize hash secret values on first non-kernel socket creation
> 
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---

Why ? This looks buggy to me.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  5:38       ` Eric Dumazet
@ 2013-09-24  5:45         ` Hannes Frederic Sowa
  2013-09-24 13:19           ` [PATCH] net: net_secret should not depend on TCP Eric Dumazet
  2013-09-24 16:01         ` [PATCH 1/2] net: Toeplitz library functions Hannes Frederic Sowa
  1 sibling, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24  5:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Mon, Sep 23, 2013 at 10:38:38PM -0700, Eric Dumazet wrote:
> On Tue, 2013-09-24 at 05:35 +0200, Hannes Frederic Sowa wrote:
> 
> > > build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
> > > port randomization etc. Could we drop the check of sock->type? I guess the
> > > idea was that in-kernel sockets of type raw/udp do not seed the keys when no
> > > entropy is available?
> > 
> > Would this be better (I checked inet_ehash_secret, ipv6_hash_secret
> > and net_secret to actual get initialized)?
> > 
> 
> inet_ehash_secret is used only to make jhash() for tcp ehash, not for
> fragmentation ids or other uses (port randomization).
> 
> 
> > [PATCH] inet: initialize hash secret values on first non-kernel socket creation
> > 
> > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > ---
> 
> Why ? This looks buggy to me.

It does initialize the rest of the key values (net_secret_init), too:

258 void build_ehash_secret(void)
259 {
260         u32 rnd;
261 
262         do {
263                 get_random_bytes(&rnd, sizeof(rnd));
264         } while (rnd == 0);
265 
266         if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
267                 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
268                 net_secret_init();
269         }
270 }

Maybe I overlooked something?

Thanks,

  Hannes

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH 1/2] net: Toeplitz library functions
  2013-09-23 22:41 [PATCH 1/2] net: Toeplitz library functions Tom Herbert
  2013-09-24  0:03 ` Eric Dumazet
@ 2013-09-24  8:32 ` David Laight
  2013-09-24 12:24   ` Eric Dumazet
                     ` (2 more replies)
  2013-09-24 16:32 ` Ben Hutchings
  2 siblings, 3 replies; 50+ messages in thread
From: David Laight @ 2013-09-24  8:32 UTC (permalink / raw)
  To: Tom Herbert, davem; +Cc: netdev, jesse.brandeburg

> +static inline unsigned int
> +toeplitz_hash(const unsigned char *bytes,
> +	      struct toeplitz *toeplitz, int n)
> +{
> +	int i;
> +	unsigned int result = 0;
> +
> +	for (i = 0; i < n; i++)
> +		result ^= toeplitz->key_cache[i][bytes[i]];
> +
> +        return result;
> +};

That is a horrid hash function to be calculating in software.

The code looks very much like a simple 32bit CRC.
It isn't entirely clears exactly where the 'key' gets included,
but I suspect it is just xored with the data bytes.

Using in it hardware is probably fine - the hardware can do
it cheaply (in dedicated logic) as the frame arrives.
The CRC polynomial probably collapses to a few XOR operations
when done byte by byte (the hdlc crc16 collapses to 3 levels
of xor).

IIRC jhash() works on 32bit quantities - so has far fewer
maths operations and well as not having all the random data
accesses (cache misses and displacing other parts of the
working set from the cache).

I also thought the hash was arranged so that tx and rx packets
for a single connection hash to the same value?

	David

^ permalink raw reply	[flat|nested] 50+ messages in thread

* RE: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  8:32 ` David Laight
@ 2013-09-24 12:24   ` Eric Dumazet
  2013-09-24 15:22   ` Tom Herbert
  2013-09-24 16:38   ` Ben Hutchings
  2 siblings, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 12:24 UTC (permalink / raw)
  To: David Laight; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 09:32 +0100, David Laight wrote:
> > +static inline unsigned int
> > +toeplitz_hash(const unsigned char *bytes,
> > +	      struct toeplitz *toeplitz, int n)
> > +{
> > +	int i;
> > +	unsigned int result = 0;
> > +
> > +	for (i = 0; i < n; i++)
> > +		result ^= toeplitz->key_cache[i][bytes[i]];
> > +
> > +        return result;
> > +};
> 
> That is a horrid hash function to be calculating in software.
> 
> The code looks very much like a simple 32bit CRC.
> It isn't entirely clears exactly where the 'key' gets included,
> but I suspect it is just xored with the data bytes.
> 
> Using in it hardware is probably fine - the hardware can do
> it cheaply (in dedicated logic) as the frame arrives.
> The CRC polynomial probably collapses to a few XOR operations
> when done byte by byte (the hdlc crc16 collapses to 3 levels
> of xor).
> 
> IIRC jhash() works on 32bit quantities - so has far fewer
> maths operations and well as not having all the random data
> accesses (cache misses and displacing other parts of the
> working set from the cache).

Anyway, I really hope Toeplitz hash is not restricted to 0-255 range

Tom implementation is not Toeplitz, but a degenerated hash.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH] net: net_secret should not depend on TCP
  2013-09-24  5:45         ` Hannes Frederic Sowa
@ 2013-09-24 13:19           ` Eric Dumazet
  2013-09-24 15:13             ` Hannes Frederic Sowa
                               ` (2 more replies)
  0 siblings, 3 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 13:19 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

From: Eric Dumazet <edumazet@google.com>

A host might need net_secret[] and never open a single socket. 

Problem added in commit aebda156a570782
("net: defer net_secret[] initialization")

Based on prior patch from Hannes Frederic Sowa.

Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/secure_seq.h |    1 -
 net/core/secure_seq.c    |   27 ++++++++++++++++++++++++---
 net/ipv4/af_inet.c       |    4 +---
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 6ca975b..c2e542b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 
-extern void net_secret_init(void);
 extern __u32 secure_ip_id(__be32 daddr);
 extern __u32 secure_ipv6_id(const __be32 daddr[4]);
 extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13c..3f1ec15 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
 
 #include <net/secure_seq.h>
 
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
 {
-	get_random_bytes(net_secret, sizeof(net_secret));
+	u32 tmp;
+	int i;
+
+	if (likely(net_secret[0]))
+		return;
+
+	for (i = NET_SECRET_SIZE; i > 0;) {
+		do {
+			get_random_bytes(&tmp, sizeof(tmp));
+		} while (!tmp);
+		cmpxchg(&net_secret[--i], 0, tmp);
+	}
 }
 
 #ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force __u32) daddr;
 	hash[1] = net_secret[13];
 	hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
 {
 	__u32 hash[4];
 
+	net_secret_init();
 	memcpy(hash, daddr, 16);
 	md5_transform(hash, net_secret);
 
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
 	u32 hash[MD5_DIGEST_WORDS];
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 	u32 hash[MD5_DIGEST_WORDS];
 	u64 seq;
 
+	net_secret_init();
 	hash[0] = (__force u32)saddr;
 	hash[1] = (__force u32)daddr;
 	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 	u64 seq;
 	u32 i;
 
+	net_secret_init();
 	memcpy(hash, saddr, 16);
 	for (i = 0; i < 4; i++)
 		secret[i] = net_secret[i] + daddr[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b..cfeb85c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
 		get_random_bytes(&rnd, sizeof(rnd));
 	} while (rnd == 0);
 
-	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
 		get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
-		net_secret_init();
-	}
 }
 EXPORT_SYMBOL(build_ehash_secret);
 

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 13:19           ` [PATCH] net: net_secret should not depend on TCP Eric Dumazet
@ 2013-09-24 15:13             ` Hannes Frederic Sowa
  2013-09-24 15:22               ` Eric Dumazet
  2013-09-24 23:51             ` Hannes Frederic Sowa
  2013-09-25  9:00             ` [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized Hannes Frederic Sowa
  2 siblings, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 15:13 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
> -void net_secret_init(void)
> +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
> +
> +static void net_secret_init(void)
>  {
> -	get_random_bytes(net_secret, sizeof(net_secret));
> +	u32 tmp;
> +	int i;
> +
> +	if (likely(net_secret[0]))
> +		return;
> +
> +	for (i = NET_SECRET_SIZE; i > 0;) {
> +		do {
> +			get_random_bytes(&tmp, sizeof(tmp));
> +		} while (!tmp);

I am afraid we can block here on embedded systems in an atomic section? Is
this actually an issue? It does get called in a spin_lock_h.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 15:13             ` Hannes Frederic Sowa
@ 2013-09-24 15:22               ` Eric Dumazet
  2013-09-24 15:28                 ` Hannes Frederic Sowa
  0 siblings, 1 reply; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 15:22 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 17:13 +0200, Hannes Frederic Sowa wrote:
> On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
> > -void net_secret_init(void)
> > +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
> > +
> > +static void net_secret_init(void)
> >  {
> > -	get_random_bytes(net_secret, sizeof(net_secret));
> > +	u32 tmp;
> > +	int i;
> > +
> > +	if (likely(net_secret[0]))
> > +		return;
> > +
> > +	for (i = NET_SECRET_SIZE; i > 0;) {
> > +		do {
> > +			get_random_bytes(&tmp, sizeof(tmp));
> > +		} while (!tmp);
> 
> I am afraid we can block here on embedded systems in an atomic section? Is
> this actually an issue? It does get called in a spin_lock_h.

I do not see issues : get_random_bytes() is irq safe.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  8:32 ` David Laight
  2013-09-24 12:24   ` Eric Dumazet
@ 2013-09-24 15:22   ` Tom Herbert
  2013-09-24 15:29     ` Eric Dumazet
  2013-09-24 15:39     ` David Miller
  2013-09-24 16:38   ` Ben Hutchings
  2 siblings, 2 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 15:22 UTC (permalink / raw)
  To: David Laight; +Cc: David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 1:32 AM, David Laight <David.Laight@aculab.com> wrote:
>> +static inline unsigned int
>> +toeplitz_hash(const unsigned char *bytes,
>> +           struct toeplitz *toeplitz, int n)
>> +{
>> +     int i;
>> +     unsigned int result = 0;
>> +
>> +     for (i = 0; i < n; i++)
>> +             result ^= toeplitz->key_cache[i][bytes[i]];
>> +
>> +        return result;
>> +};
>
> That is a horrid hash function to be calculating in software.
>
> The code looks very much like a simple 32bit CRC.
> It isn't entirely clears exactly where the 'key' gets included,
> but I suspect it is just xored with the data bytes.
>
Please google Toeplitz hash function to learn about the algorithm.
http://msdn.microsoft.com/en-us/library/windows/hardware/ff570725(v=vs.85).aspx

> Using in it hardware is probably fine - the hardware can do
> it cheaply (in dedicated logic) as the frame arrives.
> The CRC polynomial probably collapses to a few XOR operations
> when done byte by byte (the hdlc crc16 collapses to 3 levels
> of xor).
>
> IIRC jhash() works on 32bit quantities - so has far fewer
> maths operations and well as not having all the random data
> accesses (cache misses and displacing other parts of the
> working set from the cache).
>
Yes, but pretty much every NIC vendor implements Toeplitz hash and
provides it in their receive descriptor.  We use this value for
steering, and could use it for other uses like connection lookup.

> I also thought the hash was arranged so that tx and rx packets
> for a single connection hash to the same value?
>
ehashfn hashes consistently based on local and remote sides.
skb_get_rxhash orders the addresses and ports to make a consistent
hash in both directions. Presumably, this allows skb_get_rxhash to be
called from TX side to get same value of RX side, but when we get
rxhash from device (e.g. Toeplitz) this property is broken anyway.
Instead of jumping through this hoop, it might be better to have a
separate function from calculating RX hash for reverse path on TX.

Assuming skb_rx_hash does symmetric calculation is currently
incorrect.  For instance, looks like tun.c is trying to implement a
sort of 'flow director' logic to pair TX queues and RX queues using
skb_get_rxhash an expecting that the value is calculated
symmetrically.  If HW is providing RX hash, this is broken and we'll
never match the flows.  We could either recompute the hash in SW or
try to match HW hash.

Tom

>         David
>
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 15:22               ` Eric Dumazet
@ 2013-09-24 15:28                 ` Hannes Frederic Sowa
  2013-09-24 15:46                   ` Eric Dumazet
  0 siblings, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 15:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, Sep 24, 2013 at 08:22:27AM -0700, Eric Dumazet wrote:
> On Tue, 2013-09-24 at 17:13 +0200, Hannes Frederic Sowa wrote:
> > On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
> > > -void net_secret_init(void)
> > > +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
> > > +
> > > +static void net_secret_init(void)
> > >  {
> > > -	get_random_bytes(net_secret, sizeof(net_secret));
> > > +	u32 tmp;
> > > +	int i;
> > > +
> > > +	if (likely(net_secret[0]))
> > > +		return;
> > > +
> > > +	for (i = NET_SECRET_SIZE; i > 0;) {
> > > +		do {
> > > +			get_random_bytes(&tmp, sizeof(tmp));
> > > +		} while (!tmp);
> > 
> > I am afraid we can block here on embedded systems in an atomic section? Is
> > this actually an issue? It does get called in a spin_lock_h.
> 
> I do not see issues : get_random_bytes() is irq safe.

But couldn't it be that get_random_bytes always returns 0 and we won't make
any progress here. Does the reseed happen from irq context or from softirqs? I
always thought it would be from a softirq (which could be blocked).

Thanks,

  Hannes

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:22   ` Tom Herbert
@ 2013-09-24 15:29     ` Eric Dumazet
  2013-09-24 15:39     ` David Miller
  1 sibling, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 15:29 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Laight, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 08:22 -0700, Tom Herbert wrote:

> Assuming skb_rx_hash does symmetric calculation is currently
> incorrect.  For instance, looks like tun.c is trying to implement a
> sort of 'flow director' logic to pair TX queues and RX queues using
> skb_get_rxhash an expecting that the value is calculated
> symmetrically.  If HW is providing RX hash, this is broken and we'll
> never match the flows.  We could either recompute the hash in SW or
> try to match HW hash.

Its not incorrect, its an implementation choice.

Its software in linux, we do not have to care of how its done in
hardware.

This is done to reduce conntracking cost, in case RPS is used on a
router : Same cpu will process frames in both ways.

But conntracking does not 'rely' on rxhash being symmetric, thats an
optimization to have better data locality.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:22   ` Tom Herbert
  2013-09-24 15:29     ` Eric Dumazet
@ 2013-09-24 15:39     ` David Miller
  2013-09-24 15:54       ` Tom Herbert
  1 sibling, 1 reply; 50+ messages in thread
From: David Miller @ 2013-09-24 15:39 UTC (permalink / raw)
  To: therbert; +Cc: David.Laight, netdev, jesse.brandeburg

From: Tom Herbert <therbert@google.com>
Date: Tue, 24 Sep 2013 08:22:55 -0700

> We use this value for steering, and could use it for other uses like
> connection lookup.

For security reasons we absolutely cannot use it for that purpose,
please stop claiming this.

Any hash function which an attacker can reproduce is attackable.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 15:28                 ` Hannes Frederic Sowa
@ 2013-09-24 15:46                   ` Eric Dumazet
  0 siblings, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 15:46 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 17:28 +0200, Hannes Frederic Sowa wrote:

> But couldn't it be that get_random_bytes always returns 0 and we won't make
> any progress here. Does the reseed happen from irq context or from softirqs? I
> always thought it would be from a softirq (which could be blocked).

Really if get_random_bytes() could return 0 forever I think we would
have a big problem with its implementation and documentation :

/*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for key generation, seeding
 * TCP sequence numbers, etc.  It does not use the hw random number
 * generator, if available; use get_random_bytes_arch() for that.
 */
void get_random_bytes(void *buf, int nbytes)
{
        extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:39     ` David Miller
@ 2013-09-24 15:54       ` Tom Herbert
  2013-09-24 16:00         ` Hannes Frederic Sowa
                           ` (2 more replies)
  0 siblings, 3 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 15:54 UTC (permalink / raw)
  To: David Miller; +Cc: David Laight, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Tue, 24 Sep 2013 08:22:55 -0700
>
>> We use this value for steering, and could use it for other uses like
>> connection lookup.
>
> For security reasons we absolutely cannot use it for that purpose,
> please stop claiming this.
>
> Any hash function which an attacker can reproduce is attackable.

The Toeplitz function uses a secret key whose length is based on the
input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
attacker can reproduce this if the key is random.  If the problem is
that devices are not being configured with a sufficiently random key
(some actually are using a fixed key :-( ), that's a separate issue
that should be addressed.  It is possible to DoS attack through the
steering mechanism.

Tom

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:54       ` Tom Herbert
@ 2013-09-24 16:00         ` Hannes Frederic Sowa
  2013-09-24 16:10         ` Eric Dumazet
  2013-09-24 18:03         ` David Miller
  2 siblings, 0 replies; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 16:00 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, David Laight, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 08:54:24AM -0700, Tom Herbert wrote:
> On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
> > From: Tom Herbert <therbert@google.com>
> > Date: Tue, 24 Sep 2013 08:22:55 -0700
> >
> >> We use this value for steering, and could use it for other uses like
> >> connection lookup.
> >
> > For security reasons we absolutely cannot use it for that purpose,
> > please stop claiming this.
> >
> > Any hash function which an attacker can reproduce is attackable.
> 
> The Toeplitz function uses a secret key whose length is based on the
> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> attacker can reproduce this if the key is random.  If the problem is
> that devices are not being configured with a sufficiently random key
> (some actually are using a fixed key :-( ), that's a separate issue
> that should be addressed.  It is possible to DoS attack through the
> steering mechanism.

I agree, my first comment on the second patch was wrong. I did not assume that
the hashing function does seed itself. We also do not rehash the connection
tables. So if Eric's comments would be addressed its use could be fine.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  5:38       ` Eric Dumazet
  2013-09-24  5:45         ` Hannes Frederic Sowa
@ 2013-09-24 16:01         ` Hannes Frederic Sowa
  2013-09-24 16:14           ` Eric Dumazet
  1 sibling, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 16:01 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Mon, Sep 23, 2013 at 10:38:38PM -0700, Eric Dumazet wrote:
> On Tue, 2013-09-24 at 05:35 +0200, Hannes Frederic Sowa wrote:
> 
> > > build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
> > > port randomization etc. Could we drop the check of sock->type? I guess the
> > > idea was that in-kernel sockets of type raw/udp do not seed the keys when no
> > > entropy is available?
> > 
> > Would this be better (I checked inet_ehash_secret, ipv6_hash_secret
> > and net_secret to actual get initialized)?
> > 
> 
> inet_ehash_secret is used only to make jhash() for tcp ehash, not for
> fragmentation ids or other uses (port randomization).

inet(6)_ehashfn does get used by udp btw. But I do not understand the code,
yet. ;)

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:54       ` Tom Herbert
  2013-09-24 16:00         ` Hannes Frederic Sowa
@ 2013-09-24 16:10         ` Eric Dumazet
  2013-09-24 18:03         ` David Miller
  2 siblings, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 16:10 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, David Laight, Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 08:54 -0700, Tom Herbert wrote:

> The Toeplitz function uses a secret key whose length is based on the
> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> attacker can reproduce this if the key is random.  If the problem is
> that devices are not being configured with a sufficiently random key
> (some actually are using a fixed key :-( ), that's a separate issue
> that should be addressed.  It is possible to DoS attack through the
> steering mechanism.

Well, your patch would make sense [1] only if we could use hardware
assistance, but right now we have no idea of how safe are the existing
assistances.

[1] Computing Toeplitz in software is way more expensive than jhash.

Dos attack is quite simple right now, even without knowing if the target
uses or not steering.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 16:01         ` [PATCH 1/2] net: Toeplitz library functions Hannes Frederic Sowa
@ 2013-09-24 16:14           ` Eric Dumazet
  2013-09-24 16:35             ` Tom Herbert
  0 siblings, 1 reply; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 16:14 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 18:01 +0200, Hannes Frederic Sowa wrote:

> inet(6)_ehashfn does get used by udp btw. But I do not understand the code,
> yet. ;)

Oh well, thats the SO_REUSEPORT thing. We do not really care, thats only
used to try to steer udp flows on a given socket, but its a hint.

Presumably it should use a separate secret key.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-23 22:41 [PATCH 1/2] net: Toeplitz library functions Tom Herbert
  2013-09-24  0:03 ` Eric Dumazet
  2013-09-24  8:32 ` David Laight
@ 2013-09-24 16:32 ` Ben Hutchings
  2 siblings, 0 replies; 50+ messages in thread
From: Ben Hutchings @ 2013-09-24 16:32 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, jesse.brandeburg

On Mon, 2013-09-23 at 15:41 -0700, Tom Herbert wrote:
> Introduce Toeplitz hash functions. Toeplitz is a hash used primarily in
> NICs to performan RSS flow steering.  This is a software implemenation
> of that. In order to make the hash calculation efficient, we precompute
> the possible hash values for each inidividual byte of input. The input
> length is up to 40 bytes, so we make an array of cache[40][256].

The input length is up to 36 bytes, but we need an extra 31 bits in the
key so that each bit of input is multiplied by 32 bits of the key.  The
first dimension should therefore be 36.

But this is still pretty big. :-/

> The implemenation was verified against MSDN "Verify RSS hash" sample
> values.
[...]
> --- /dev/null
> +++ b/lib/toeplitz.c

I'm not convinced that this is going to be useful outside of net.

> @@ -0,0 +1,66 @@
> +/*
> + * Toeplitz hash implemenation. See include/linux/toeplitz.h
> + *
> + * Copyright (c) 2011, Tom Herbert <therbert@google.com>
> + */
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/random.h>
> +#include <linux/toeplitz.h>
> +
> +struct toeplitz *toeplitz_alloc(void)
> +{
> +	return kmalloc(sizeof(struct toeplitz), GFP_KERNEL);
> +}
> +
> +static u32 toeplitz_get_kval(unsigned char *key, int idx)

unsigned int idx

> +{
> +	u32 v, r;
> +	int off, rem;
> +
> +	off = idx / 8;
> +	rem = idx % 8;

Otherwise this requires a 'real' division if the compiler doesn't work
out that idx >= 0.

> +	v = (((unsigned int)key[off]) << 24) +
> +	    (((unsigned int)key[off + 1]) << 16) +
> +	    (((unsigned int)key[off + 2]) << 8) +
> +	    (((unsigned int)key[off + 3]));
> +
> +	r = v << rem | (unsigned int)key[off + 4] >> (8 - rem);
> +	return r;
> +}
> +
> +static inline int idx8(int idx)
> +{
> +#ifdef __LITTLE_ENDIAN
> +        idx = (idx / 8) * 8 + (8 - (idx % 8 + 1));

Or in short: idx ^= 7

> +#endif
> +        return idx;
> +}
> +
> +void toeplitz_init(struct toeplitz *toeplitz, u8 *key_vals)
> +{
> +	int i;
> +	unsigned long a, j;
> +	unsigned int result = 0;
> +
> +	/* Set up key val table */
> +	if (key_vals)
> +		for (i = 0; i < TOEPLITZ_KEY_LEN; i++)
> +			toeplitz->key_vals[i] = key_vals[i];
> +	else
> +		prandom_bytes(toeplitz->key_vals, TOEPLITZ_KEY_LEN);

Should have braces around the if and else bodies.

Is it worth sacrificing a little randomness here by avoiding long runs
of zeroes in the key?

> +       /* Set up key cache table */
> +       for (i = 0; i < TOEPLITZ_KEY_LEN; i++) {
[...]

As I said above, the outer dimension of key_cache should be 36.  The
current loop causes toeplitz_get_kval() to over-run the bounds of the
key_vals array.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 16:14           ` Eric Dumazet
@ 2013-09-24 16:35             ` Tom Herbert
  2013-09-24 16:46               ` Eric Dumazet
  0 siblings, 1 reply; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 16:35 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Hannes Frederic Sowa, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 9:14 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2013-09-24 at 18:01 +0200, Hannes Frederic Sowa wrote:
>
>> inet(6)_ehashfn does get used by udp btw. But I do not understand the code,
>> yet. ;)
>
> Oh well, thats the SO_REUSEPORT thing. We do not really care, thats only
> used to try to steer udp flows on a given socket, but its a hint.
>
> Presumably it should use a separate secret key.
>
>
We should really be using rxhash for that anyway, eliminate this
ehashfn.  This would entail adding rxhash argument in the various
udp_lookup functions.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24  8:32 ` David Laight
  2013-09-24 12:24   ` Eric Dumazet
  2013-09-24 15:22   ` Tom Herbert
@ 2013-09-24 16:38   ` Ben Hutchings
  2 siblings, 0 replies; 50+ messages in thread
From: Ben Hutchings @ 2013-09-24 16:38 UTC (permalink / raw)
  To: David Laight; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 09:32 +0100, David Laight wrote:
> > +static inline unsigned int
> > +toeplitz_hash(const unsigned char *bytes,
> > +	      struct toeplitz *toeplitz, int n)
> > +{
> > +	int i;
> > +	unsigned int result = 0;
> > +
> > +	for (i = 0; i < n; i++)
> > +		result ^= toeplitz->key_cache[i][bytes[i]];
> > +
> > +        return result;
> > +};
> 
> That is a horrid hash function to be calculating in software.
> 
> The code looks very much like a simple 32bit CRC.
> It isn't entirely clears exactly where the 'key' gets included,
> but I suspect it is just xored with the data bytes.

Each input bit is multiplied by a 32-bit slice of the key and the
products are then bitwise-summed (i.e. xored).

[...]
> I also thought the hash was arranged so that tx and rx packets
> for a single connection hash to the same value?

Not in general.  I think I saw that one of the BSDs (DragonflyBSD?)
generates Toeplitz keys with this property and has an efficient software
implementation that works for those keys.  But that significantly
weakens the hash.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 16:35             ` Tom Herbert
@ 2013-09-24 16:46               ` Eric Dumazet
  2013-09-24 17:02                 ` Ben Hutchings
  2013-09-24 17:03                 ` Tom Herbert
  0 siblings, 2 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 16:46 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Hannes Frederic Sowa, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:

> We should really be using rxhash for that anyway, eliminate this
> ehashfn.  This would entail adding rxhash argument in the various
> udp_lookup functions.

Nope : Some NICs provide UDP rxhash only using L3  (source IP,
destination IP), not L4 (adding source & destination ports)

This is again a case where we want our own hashing.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 16:46               ` Eric Dumazet
@ 2013-09-24 17:02                 ` Ben Hutchings
  2013-09-24 17:03                 ` Tom Herbert
  1 sibling, 0 replies; 50+ messages in thread
From: Ben Hutchings @ 2013-09-24 17:02 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, Hannes Frederic Sowa, David Miller,
	Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 09:46 -0700, Eric Dumazet wrote:
> On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:
> 
> > We should really be using rxhash for that anyway, eliminate this
> > ehashfn.  This would entail adding rxhash argument in the various
> > udp_lookup functions.
> 
> Nope : Some NICs provide UDP rxhash only using L3  (source IP,
> destination IP), not L4 (adding source & destination ports)

Indeed the Microsoft RSS spec doesn't define an L4 hash for UDP/IP,
since it can't work with fragmented datagrams (which TCP avoids).  Using
the TCP/IP hash function for UDP is a non-standard option - presumably
useful where no fragmentation is expected.

Ben.

> This is again a case where we want our own hashing.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 16:46               ` Eric Dumazet
  2013-09-24 17:02                 ` Ben Hutchings
@ 2013-09-24 17:03                 ` Tom Herbert
  2013-09-24 17:34                   ` Eric Dumazet
  1 sibling, 1 reply; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 17:03 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Hannes Frederic Sowa, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 9:46 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:
>
>> We should really be using rxhash for that anyway, eliminate this
>> ehashfn.  This would entail adding rxhash argument in the various
>> udp_lookup functions.
>
> Nope : Some NICs provide UDP rxhash only using L3  (source IP,
> destination IP), not L4 (adding source & destination ports)
>
Then the NIC won't set l4_rxhash and we'll rehash over 4-tuple when
skb_get_rxhash is called.

> This is again a case where we want our own hashing.
>
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 17:03                 ` Tom Herbert
@ 2013-09-24 17:34                   ` Eric Dumazet
  2013-09-24 17:37                     ` Rick Jones
  2013-09-24 18:02                     ` Tom Herbert
  0 siblings, 2 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 17:34 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Hannes Frederic Sowa, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 10:03 -0700, Tom Herbert wrote:
> On Tue, Sep 24, 2013 at 9:46 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:
> >
> >> We should really be using rxhash for that anyway, eliminate this
> >> ehashfn.  This would entail adding rxhash argument in the various
> >> udp_lookup functions.
> >
> > Nope : Some NICs provide UDP rxhash only using L3  (source IP,
> > destination IP), not L4 (adding source & destination ports)
> >
> Then the NIC won't set l4_rxhash and we'll rehash over 4-tuple when
> skb_get_rxhash is called.

Yes, but then in this case you add cpu cycles for no reason.

If you have multiqueue NIC, you do not use RPS/RFS, so skb->rxhash might
be 0

hash = inet_ehashfn(net, daddr, hnum, saddr, sport);

is faster than the whole flow dissection game.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 17:34                   ` Eric Dumazet
@ 2013-09-24 17:37                     ` Rick Jones
  2013-09-24 17:44                       ` Eric Dumazet
  2013-09-24 18:02                     ` Tom Herbert
  1 sibling, 1 reply; 50+ messages in thread
From: Rick Jones @ 2013-09-24 17:37 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, Hannes Frederic Sowa, David Miller,
	Linux Netdev List, Brandeburg, Jesse

On 09/24/2013 10:34 AM, Eric Dumazet wrote:
> If you have multiqueue NIC, you do not use RPS/RFS, so skb->rxhash might
> be 0

If one has a CPU count >> the NIC queue count, mightn't one still want 
to use RPS/RFS?

rick jones

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 17:37                     ` Rick Jones
@ 2013-09-24 17:44                       ` Eric Dumazet
  0 siblings, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 17:44 UTC (permalink / raw)
  To: Rick Jones
  Cc: Tom Herbert, Hannes Frederic Sowa, David Miller,
	Linux Netdev List, Brandeburg, Jesse

On Tue, 2013-09-24 at 10:37 -0700, Rick Jones wrote:
> On 09/24/2013 10:34 AM, Eric Dumazet wrote:
> > If you have multiqueue NIC, you do not use RPS/RFS, so skb->rxhash might
> > be 0
> 
> If one has a CPU count >> the NIC queue count, mightn't one still want 
> to use RPS/RFS?

Maybe, but UDP stack should not assume anything.

By the way, there is a problem with RFS under stress, I am cooking a
patch to address this.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 17:34                   ` Eric Dumazet
  2013-09-24 17:37                     ` Rick Jones
@ 2013-09-24 18:02                     ` Tom Herbert
  2013-09-24 18:48                       ` David Miller
  2013-09-24 19:42                       ` Hannes Frederic Sowa
  1 sibling, 2 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 18:02 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Hannes Frederic Sowa, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 10:34 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2013-09-24 at 10:03 -0700, Tom Herbert wrote:
>> On Tue, Sep 24, 2013 at 9:46 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> > On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:
>> >
>> >> We should really be using rxhash for that anyway, eliminate this
>> >> ehashfn.  This would entail adding rxhash argument in the various
>> >> udp_lookup functions.
>> >
>> > Nope : Some NICs provide UDP rxhash only using L3  (source IP,
>> > destination IP), not L4 (adding source & destination ports)
>> >
>> Then the NIC won't set l4_rxhash and we'll rehash over 4-tuple when
>> skb_get_rxhash is called.
>
> Yes, but then in this case you add cpu cycles for no reason.
>
> If you have multiqueue NIC, you do not use RPS/RFS, so skb->rxhash might
> be 0
>
> hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
>
> is faster than the whole flow dissection game.
>
But if we already have valid l4_rxhash we're just wasting time
recomputing it (very like the same value).  So just do:

if (skb->l4_hash)
    hash = skb->rxhash
else
    hash = inet_ehashfn(net, daddr, hnum, saddr, sport);

Even if we go the inet_ehashfn route in the packet, it's makes sense
to store this in skb->rxhash so that subsequent functions don't need
to compute the hash

By the way, I believe there is an insidious in using the same hash
value or even related hash values in multiple places for steering.
Since we typically do something like hash % numqueues, we introduce
bias if multiple steering levels look at the same bits.  With
SO_REUSEPORT this might actually be beneficial since we could get port
selection to match RSS locality, but this is not by design!

>
>
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 15:54       ` Tom Herbert
  2013-09-24 16:00         ` Hannes Frederic Sowa
  2013-09-24 16:10         ` Eric Dumazet
@ 2013-09-24 18:03         ` David Miller
  2013-09-24 18:06           ` Tom Herbert
                             ` (2 more replies)
  2 siblings, 3 replies; 50+ messages in thread
From: David Miller @ 2013-09-24 18:03 UTC (permalink / raw)
  To: therbert; +Cc: David.Laight, netdev, jesse.brandeburg

From: Tom Herbert <therbert@google.com>
Date: Tue, 24 Sep 2013 08:54:24 -0700

> On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
>> From: Tom Herbert <therbert@google.com>
>> Date: Tue, 24 Sep 2013 08:22:55 -0700
>>
>>> We use this value for steering, and could use it for other uses like
>>> connection lookup.
>>
>> For security reasons we absolutely cannot use it for that purpose,
>> please stop claiming this.
>>
>> Any hash function which an attacker can reproduce is attackable.
> 
> The Toeplitz function uses a secret key whose length is based on the
> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> attacker can reproduce this if the key is random.  If the problem is
> that devices are not being configured with a sufficiently random key
> (some actually are using a fixed key :-( ), that's a separate issue
> that should be addressed.  It is possible to DoS attack through the
> steering mechanism.

All of them are using a fixed, defined, key.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:03         ` David Miller
@ 2013-09-24 18:06           ` Tom Herbert
  2013-09-24 18:10           ` Ben Hutchings
  2013-09-24 18:48           ` Jesse Brandeburg
  2 siblings, 0 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 18:06 UTC (permalink / raw)
  To: David Miller; +Cc: David Laight, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 11:03 AM, David Miller <davem@redhat.com> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Tue, 24 Sep 2013 08:54:24 -0700
>
>> On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
>>> From: Tom Herbert <therbert@google.com>
>>> Date: Tue, 24 Sep 2013 08:22:55 -0700
>>>
>>>> We use this value for steering, and could use it for other uses like
>>>> connection lookup.
>>>
>>> For security reasons we absolutely cannot use it for that purpose,
>>> please stop claiming this.
>>>
>>> Any hash function which an attacker can reproduce is attackable.
>>
>> The Toeplitz function uses a secret key whose length is based on the
>> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
>> attacker can reproduce this if the key is random.  If the problem is
>> that devices are not being configured with a sufficiently random key
>> (some actually are using a fixed key :-( ), that's a separate issue
>> that should be addressed.  It is possible to DoS attack through the
>> steering mechanism.
>
> All of them are using a fixed, defined, key.

bnx2x, at least, seems to being trying to set a random key:

prandom_bytes(params.rss_key, T_ETH_RSS_KEY * 4);

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:03         ` David Miller
  2013-09-24 18:06           ` Tom Herbert
@ 2013-09-24 18:10           ` Ben Hutchings
  2013-09-24 18:24             ` Tom Herbert
  2013-09-24 18:49             ` David Miller
  2013-09-24 18:48           ` Jesse Brandeburg
  2 siblings, 2 replies; 50+ messages in thread
From: Ben Hutchings @ 2013-09-24 18:10 UTC (permalink / raw)
  To: David Miller; +Cc: therbert, David.Laight, netdev, jesse.brandeburg

On Tue, 2013-09-24 at 14:03 -0400, David Miller wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Tue, 24 Sep 2013 08:54:24 -0700
> 
> > On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
> >> From: Tom Herbert <therbert@google.com>
> >> Date: Tue, 24 Sep 2013 08:22:55 -0700
> >>
> >>> We use this value for steering, and could use it for other uses like
> >>> connection lookup.
> >>
> >> For security reasons we absolutely cannot use it for that purpose,
> >> please stop claiming this.
> >>
> >> Any hash function which an attacker can reproduce is attackable.
> > 
> > The Toeplitz function uses a secret key whose length is based on the
> > input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> > attacker can reproduce this if the key is random.  If the problem is
> > that devices are not being configured with a sufficiently random key
> > (some actually are using a fixed key :-( ), that's a separate issue
> > that should be addressed.  It is possible to DoS attack through the
> > steering mechanism.
> 
> All of them are using a fixed, defined, key.

This is certainly false, as I know sfc randomises the key.  And the
Microsoft RSS spec appears to require that the key is programmable.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:10           ` Ben Hutchings
@ 2013-09-24 18:24             ` Tom Herbert
  2013-09-24 19:14               ` Eric Dumazet
  2013-09-24 18:49             ` David Miller
  1 sibling, 1 reply; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 18:24 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: David Miller, David Laight, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 11:10 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Tue, 2013-09-24 at 14:03 -0400, David Miller wrote:
>> From: Tom Herbert <therbert@google.com>
>> Date: Tue, 24 Sep 2013 08:54:24 -0700
>>
>> > On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
>> >> From: Tom Herbert <therbert@google.com>
>> >> Date: Tue, 24 Sep 2013 08:22:55 -0700
>> >>
>> >>> We use this value for steering, and could use it for other uses like
>> >>> connection lookup.
>> >>
>> >> For security reasons we absolutely cannot use it for that purpose,
>> >> please stop claiming this.
>> >>
>> >> Any hash function which an attacker can reproduce is attackable.
>> >
>> > The Toeplitz function uses a secret key whose length is based on the
>> > input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
>> > attacker can reproduce this if the key is random.  If the problem is
>> > that devices are not being configured with a sufficiently random key
>> > (some actually are using a fixed key :-( ), that's a separate issue
>> > that should be addressed.  It is possible to DoS attack through the
>> > steering mechanism.
>>
>> All of them are using a fixed, defined, key.
>
> This is certainly false, as I know sfc randomises the key.  And the
> Microsoft RSS spec appears to require that the key is programmable.
>
There are some drivers are using fixed keys.  This is not good.  I'll
take a look at those.  It should be sufficient to randomly set the key
once at initialization.


> Ben.
>
> --
> Ben Hutchings, Staff Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:02                     ` Tom Herbert
@ 2013-09-24 18:48                       ` David Miller
  2013-09-24 19:42                       ` Hannes Frederic Sowa
  1 sibling, 0 replies; 50+ messages in thread
From: David Miller @ 2013-09-24 18:48 UTC (permalink / raw)
  To: therbert; +Cc: eric.dumazet, hannes, netdev, jesse.brandeburg

From: Tom Herbert <therbert@google.com>
Date: Tue, 24 Sep 2013 11:02:11 -0700

> But if we already have valid l4_rxhash we're just wasting time
> recomputing it (very like the same value).  So just do:

Honestly, you're arguing over 12 or so cycles.  That's about how much
jhash costs.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:03         ` David Miller
  2013-09-24 18:06           ` Tom Herbert
  2013-09-24 18:10           ` Ben Hutchings
@ 2013-09-24 18:48           ` Jesse Brandeburg
  2013-09-24 19:04             ` Tom Herbert
  2 siblings, 1 reply; 50+ messages in thread
From: Jesse Brandeburg @ 2013-09-24 18:48 UTC (permalink / raw)
  To: David Miller; +Cc: therbert, David.Laight, netdev

On Tue, 24 Sep 2013 14:03:12 -0400 David Miller <davem@redhat.com> wrote:
...
> >> For security reasons we absolutely cannot use it for that purpose,
> >> please stop claiming this.
> >>
> >> Any hash function which an attacker can reproduce is attackable.
> > 
...
> > that should be addressed.  It is possible to DoS attack through the
> > steering mechanism.
> 
> All of them are using a fixed, defined, key.

We selected the fixed key on purpose.  The existing mechanisms built
into the stack for preventing the impact of DOS attacks like NAPI
polling will prevent any actual damage even if someone sends lots of
packets on a single flow.  If someone overflows a receive queue that
CPU runs until it can't keep up and then hardware drops further
packets.  In this case even with a randomized seed key any single flow
can still be targeted at a queue, which is no different than a single
queue adapter.

I'm not convinced there is an actual impact in practice.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:10           ` Ben Hutchings
  2013-09-24 18:24             ` Tom Herbert
@ 2013-09-24 18:49             ` David Miller
  1 sibling, 0 replies; 50+ messages in thread
From: David Miller @ 2013-09-24 18:49 UTC (permalink / raw)
  To: bhutchings; +Cc: therbert, David.Laight, netdev, jesse.brandeburg

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 24 Sep 2013 19:10:45 +0100

> On Tue, 2013-09-24 at 14:03 -0400, David Miller wrote:
>> From: Tom Herbert <therbert@google.com>
>> Date: Tue, 24 Sep 2013 08:54:24 -0700
>> 
>> > On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
>> >> From: Tom Herbert <therbert@google.com>
>> >> Date: Tue, 24 Sep 2013 08:22:55 -0700
>> >>
>> >>> We use this value for steering, and could use it for other uses like
>> >>> connection lookup.
>> >>
>> >> For security reasons we absolutely cannot use it for that purpose,
>> >> please stop claiming this.
>> >>
>> >> Any hash function which an attacker can reproduce is attackable.
>> > 
>> > The Toeplitz function uses a secret key whose length is based on the
>> > input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
>> > attacker can reproduce this if the key is random.  If the problem is
>> > that devices are not being configured with a sufficiently random key
>> > (some actually are using a fixed key :-( ), that's a separate issue
>> > that should be addressed.  It is possible to DoS attack through the
>> > steering mechanism.
>> 
>> All of them are using a fixed, defined, key.
> 
> This is certainly false, as I know sfc randomises the key.  And the
> Microsoft RSS spec appears to require that the key is programmable.

Then I stand corrected.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:48           ` Jesse Brandeburg
@ 2013-09-24 19:04             ` Tom Herbert
  0 siblings, 0 replies; 50+ messages in thread
From: Tom Herbert @ 2013-09-24 19:04 UTC (permalink / raw)
  To: Jesse Brandeburg; +Cc: David Miller, David Laight, Linux Netdev List

On Tue, Sep 24, 2013 at 11:48 AM, Jesse Brandeburg
<jesse.brandeburg@intel.com> wrote:
> On Tue, 24 Sep 2013 14:03:12 -0400 David Miller <davem@redhat.com> wrote:
> ...
>> >> For security reasons we absolutely cannot use it for that purpose,
>> >> please stop claiming this.
>> >>
>> >> Any hash function which an attacker can reproduce is attackable.
>> >
> ...
>> > that should be addressed.  It is possible to DoS attack through the
>> > steering mechanism.
>>
>> All of them are using a fixed, defined, key.
>
> We selected the fixed key on purpose.  The existing mechanisms built
> into the stack for preventing the impact of DOS attacks like NAPI
> polling will prevent any actual damage even if someone sends lots of
> packets on a single flow.  If someone overflows a receive queue that
> CPU runs until it can't keep up and then hardware drops further
> packets.  In this case even with a randomized seed key any single flow
> can still be targeted at a queue, which is no different than a single
> queue adapter.
>
> I'm not convinced there is an actual impact in practice.
>
But what is the purpose of using a fixed key, what are the benefits?
A clever attacker could attack a specific queue with different
4-tuples.  While this might be similar to a single flow attack, it
will be much harder to detect and be used to attack multiple servers
simultaneously in the say way (since they all share the same key).  I
don't think we could deploy a NIC with a static RSS key.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:24             ` Tom Herbert
@ 2013-09-24 19:14               ` Eric Dumazet
  0 siblings, 0 replies; 50+ messages in thread
From: Eric Dumazet @ 2013-09-24 19:14 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Ben Hutchings, David Miller, David Laight, Linux Netdev List,
	Brandeburg, Jesse

On Tue, 2013-09-24 at 11:24 -0700, Tom Herbert wrote:
> >
> There are some drivers are using fixed keys.  This is not good.  I'll
> take a look at those.  It should be sufficient to randomly set the key
> once at initialization.
> 

If the software fallback (we need this for dumb NIC, or encapsulated
traffic) is more expensive than jhash, I do not think we will ever use
this Toeplitz thing.

We can not use jhash or Toeplitz.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2] net: Toeplitz library functions
  2013-09-24 18:02                     ` Tom Herbert
  2013-09-24 18:48                       ` David Miller
@ 2013-09-24 19:42                       ` Hannes Frederic Sowa
  1 sibling, 0 replies; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 19:42 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Eric Dumazet, David Miller, Linux Netdev List, Brandeburg, Jesse

On Tue, Sep 24, 2013 at 11:02:11AM -0700, Tom Herbert wrote:
> On Tue, Sep 24, 2013 at 10:34 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Tue, 2013-09-24 at 10:03 -0700, Tom Herbert wrote:
> >> On Tue, Sep 24, 2013 at 9:46 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >> > On Tue, 2013-09-24 at 09:35 -0700, Tom Herbert wrote:
> >> >
> >> >> We should really be using rxhash for that anyway, eliminate this
> >> >> ehashfn.  This would entail adding rxhash argument in the various
> >> >> udp_lookup functions.
> >> >
> >> > Nope : Some NICs provide UDP rxhash only using L3  (source IP,
> >> > destination IP), not L4 (adding source & destination ports)
> >> >
> >> Then the NIC won't set l4_rxhash and we'll rehash over 4-tuple when
> >> skb_get_rxhash is called.
> >
> > Yes, but then in this case you add cpu cycles for no reason.
> >
> > If you have multiqueue NIC, you do not use RPS/RFS, so skb->rxhash might
> > be 0
> >
> > hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
> >
> > is faster than the whole flow dissection game.
> >
> But if we already have valid l4_rxhash we're just wasting time
> recomputing it (very like the same value).  So just do:
> 
> if (skb->l4_hash)
>     hash = skb->rxhash
> else
>     hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
> 
> Even if we go the inet_ehashfn route in the packet, it's makes sense
> to store this in skb->rxhash so that subsequent functions don't need
> to compute the hash
> 
> By the way, I believe there is an insidious in using the same hash
> value or even related hash values in multiple places for steering.
> Since we typically do something like hash % numqueues, we introduce
> bias if multiple steering levels look at the same bits.  With
> SO_REUSEPORT this might actually be beneficial since we could get port
> selection to match RSS locality, but this is not by design!

Wouldn't this also need infrastructure to sync the keys over multiple network
cards to have best benefits?

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 13:19           ` [PATCH] net: net_secret should not depend on TCP Eric Dumazet
  2013-09-24 15:13             ` Hannes Frederic Sowa
@ 2013-09-24 23:51             ` Hannes Frederic Sowa
  2013-09-28 22:20               ` David Miller
  2013-09-25  9:00             ` [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized Hannes Frederic Sowa
  2 siblings, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-24 23:51 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg

On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> A host might need net_secret[] and never open a single socket. 
> 
> Problem added in commit aebda156a570782
> ("net: defer net_secret[] initialization")
> 
> Based on prior patch from Hannes Frederic Sowa.
> 
> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

FWIW, I reviewed that the loop indeed cannot block and gave the patch a
quick test drive. Also, the ipv6_hash_secret is only used by the ehash
functions. So this is a superior patch than mine:

Acked-by: Hannes Frederic Sowa <hannes@strressinduktion.org>

Thanks, Eric!

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-09-24 13:19           ` [PATCH] net: net_secret should not depend on TCP Eric Dumazet
  2013-09-24 15:13             ` Hannes Frederic Sowa
  2013-09-24 23:51             ` Hannes Frederic Sowa
@ 2013-09-25  9:00             ` Hannes Frederic Sowa
  2013-09-25 12:06               ` Eric Dumazet
  2013-10-02 15:10               ` Theodore Ts'o
  2 siblings, 2 replies; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-25  9:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, davem, netdev, jesse.brandeburg, tytso, linux-kernel

On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> A host might need net_secret[] and never open a single socket. 
> 
> Problem added in commit aebda156a570782
> ("net: defer net_secret[] initialization")
> 
> Based on prior patch from Hannes Frederic Sowa.
> 
> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Perhaps we can even do a bit better? This patch is a RFC and I could split the
random and network parts if needed.

[PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized

We want to use good entropy for initializing the secret keys used for
hashing in the core network stack. So busy wait before extracting random
data until the nonblocking_pool is initialized.

Further entropy is also gathered by interrupts, so we are guaranteed to
make progress here.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 drivers/char/random.c  | 18 ++++++++++++++++++
 include/linux/random.h |  1 +
 net/core/secure_seq.c  |  3 ++-
 net/ipv4/af_inet.c     |  2 +-
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7737b5b..50e8030 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1058,6 +1058,24 @@ void get_random_bytes(void *buf, int nbytes)
 EXPORT_SYMBOL(get_random_bytes);
 
 /*
+ * Busy loop until the nonblocking_pool is intialized and return
+ * random data in buf of size nbytes.
+ *
+ * This is used by the network stack to defer the extraction of
+ * entropy from the nonblocking_pool until the pool is initialized.
+ *
+ * We need to busy loop here, because we could be called from an
+ * atomic section.
+ */
+void get_random_bytes_busy_wait_initialized(void *buf, int nbytes)
+{
+	while (!nonblocking_pool.initialized)
+		cpu_relax();
+	get_random_bytes(buf, nbytes);
+}
+EXPORT_SYMBOL(get_random_bytes_busy_wait_initialized);
+
+/*
  * This function will use the architecture-specific hardware random
  * number generator if it is available.  The arch-specific hw RNG will
  * almost certainly be faster than what we can do in software, but it
diff --git a/include/linux/random.h b/include/linux/random.h
index 3b9377d..0b7e7dd 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -15,6 +15,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
+void get_random_bytes_busy_wait_initialized(void *buf, int nbbytes);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 3f1ec15..ac55cb7 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -24,7 +24,8 @@ static void net_secret_init(void)
 
 	for (i = NET_SECRET_SIZE; i > 0;) {
 		do {
-			get_random_bytes(&tmp, sizeof(tmp));
+			get_random_bytes_busy_wait_initialized(&tmp,
+							       sizeof(tmp));
 		} while (!tmp);
 		cmpxchg(&net_secret[--i], 0, tmp);
 	}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cfeb85c..3edd277 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -260,7 +260,7 @@ void build_ehash_secret(void)
 	u32 rnd;
 
 	do {
-		get_random_bytes(&rnd, sizeof(rnd));
+		get_random_bytes_busy_wait_initialized(&rnd, sizeof(rnd));
 	} while (rnd == 0);
 
 	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-09-25  9:00             ` [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized Hannes Frederic Sowa
@ 2013-09-25 12:06               ` Eric Dumazet
  2013-09-25 13:35                 ` Hannes Frederic Sowa
  2013-10-02 15:10               ` Theodore Ts'o
  1 sibling, 1 reply; 50+ messages in thread
From: Eric Dumazet @ 2013-09-25 12:06 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, davem, netdev, jesse.brandeburg, tytso, linux-kernel

On Wed, 2013-09-25 at 11:00 +0200, Hannes Frederic Sowa wrote:

>  /*
> + * Busy loop until the nonblocking_pool is intialized and return
> + * random data in buf of size nbytes.
> + *
> + * This is used by the network stack to defer the extraction of
> + * entropy from the nonblocking_pool until the pool is initialized.
> + *
> + * We need to busy loop here, because we could be called from an
> + * atomic section.
> + */
> +void get_random_bytes_busy_wait_initialized(void *buf, int nbytes)
> +{
> +	while (!nonblocking_pool.initialized)
> +		cpu_relax();
> +	get_random_bytes(buf, nbytes);
> +}

No idea if this can work if called from IRQ context.

How is nonblocking_poll initialized if host has a single cpu ?




^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-09-25 12:06               ` Eric Dumazet
@ 2013-09-25 13:35                 ` Hannes Frederic Sowa
  0 siblings, 0 replies; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-09-25 13:35 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, davem, netdev, jesse.brandeburg, tytso, linux-kernel

On Wed, Sep 25, 2013 at 05:06:50AM -0700, Eric Dumazet wrote:
> On Wed, 2013-09-25 at 11:00 +0200, Hannes Frederic Sowa wrote:
> 
> >  /*
> > + * Busy loop until the nonblocking_pool is intialized and return
> > + * random data in buf of size nbytes.
> > + *
> > + * This is used by the network stack to defer the extraction of
> > + * entropy from the nonblocking_pool until the pool is initialized.
> > + *
> > + * We need to busy loop here, because we could be called from an
> > + * atomic section.
> > + */
> > +void get_random_bytes_busy_wait_initialized(void *buf, int nbytes)
> > +{
> > +	while (!nonblocking_pool.initialized)
> > +		cpu_relax();
> > +	get_random_bytes(buf, nbytes);
> > +}
> 
> No idea if this can work if called from IRQ context.
> 
> How is nonblocking_poll initialized if host has a single cpu ?

We increase the entropy_count and can initialize the random pool from
handle_irq_event.

We can document that it should not get called from irq context and
maybe add a WARN_ON(irqs_disabled())?


^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] net: net_secret should not depend on TCP
  2013-09-24 23:51             ` Hannes Frederic Sowa
@ 2013-09-28 22:20               ` David Miller
  0 siblings, 0 replies; 50+ messages in thread
From: David Miller @ 2013-09-28 22:20 UTC (permalink / raw)
  To: hannes; +Cc: eric.dumazet, therbert, netdev, jesse.brandeburg

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 25 Sep 2013 01:51:47 +0200

> On Tue, Sep 24, 2013 at 06:19:57AM -0700, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>> 
>> A host might need net_secret[] and never open a single socket. 
>> 
>> Problem added in commit aebda156a570782
>> ("net: defer net_secret[] initialization")
>> 
>> Based on prior patch from Hannes Frederic Sowa.
>> 
>> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
> 
> FWIW, I reviewed that the loop indeed cannot block and gave the patch a
> quick test drive. Also, the ipv6_hash_secret is only used by the ehash
> functions. So this is a superior patch than mine:
> 
> Acked-by: Hannes Frederic Sowa <hannes@strressinduktion.org>

Applied and queued up for -stable, thanks.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-09-25  9:00             ` [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized Hannes Frederic Sowa
  2013-09-25 12:06               ` Eric Dumazet
@ 2013-10-02 15:10               ` Theodore Ts'o
  2013-10-02 17:18                 ` Hannes Frederic Sowa
  1 sibling, 1 reply; 50+ messages in thread
From: Theodore Ts'o @ 2013-10-02 15:10 UTC (permalink / raw)
  To: Eric Dumazet, Tom Herbert, davem, netdev, jesse.brandeburg, linux-kernel

On Wed, Sep 25, 2013 at 11:00:34AM +0200, Hannes Frederic Sowa wrote:
> [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
> 
> We want to use good entropy for initializing the secret keys used for
> hashing in the core network stack. So busy wait before extracting random
> data until the nonblocking_pool is initialized.
> 
> Further entropy is also gathered by interrupts, so we are guaranteed to
> make progress here.

One thing that makes me a bit worried is that on certain
architectures, it may take quite a while before we get enough entropy
such that the non-blocking pool gets initialized.

Speaking more generally, there are many different reasons why
different parts of the kernel needs randomness.  I've found a number
of places (mostly in various file systems so far because I know that
subsystem better) because we are trying to use a random number
generator with a higher level of guarantees than what was really
required.

What's not completely clear to me is what's the potential danger if
build_ehash_secret() is initialized with a value that might be known
to an adversary.  I'll note that inet_ehash_secret() is a 32-bit uint.
A 32-bit number isn't all that hard for an adversary to brute force.
If the answer is there's now oracle that can be used so an adversary
can tell whether or not they have correctly figured out the ehash
secret, then it's not that clear that it's worth blocking until the
urandom pool has 128 bits of entropy, when ehash_secret is only a
32-bit value.

Speaking even more generally, any time you have subsystems grabbing
random entropy very shortly after boot, especially if it's less than
64 bits, it's really good idea of the secret gets periodically
rekeyed.  I understand why that may be hard in this case, since it
would require rehashing all of the currently open sockets, and maybe
in this case the security requirements are such that it's not really
necessary.  But it's something we should definitely keep in mind for
situations where we are generating random session keys for CIFS,
ipsec, etc.

Regards,

						- Ted

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-10-02 15:10               ` Theodore Ts'o
@ 2013-10-02 17:18                 ` Hannes Frederic Sowa
  2013-10-02 19:40                   ` Theodore Ts'o
  0 siblings, 1 reply; 50+ messages in thread
From: Hannes Frederic Sowa @ 2013-10-02 17:18 UTC (permalink / raw)
  To: Theodore Ts'o, Eric Dumazet, Tom Herbert, davem, netdev,
	jesse.brandeburg, linux-kernel

Hi!

Thanks for looking into this!

On Wed, Oct 02, 2013 at 11:10:18AM -0400, Theodore Ts'o wrote:
> On Wed, Sep 25, 2013 at 11:00:34AM +0200, Hannes Frederic Sowa wrote:
> > [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
> > 
> > We want to use good entropy for initializing the secret keys used for
> > hashing in the core network stack. So busy wait before extracting random
> > data until the nonblocking_pool is initialized.
> > 
> > Further entropy is also gathered by interrupts, so we are guaranteed to
> > make progress here.
> 
> One thing that makes me a bit worried is that on certain
> architectures, it may take quite a while before we get enough entropy
> such that the non-blocking pool gets initialized.

Yes, I understand. My main concern is that we would feed instruction
addresses of limited randomness into the entropy pool while busy looping
until the pool is initialized on uni-processor machines. It would only be
helpful on multiprocessor machines I guess.

So, I am currently a bit skeptic if this change does improve things and
have to think about this again.

> Speaking more generally, there are many different reasons why
> different parts of the kernel needs randomness.  I've found a number
> of places (mostly in various file systems so far because I know that
> subsystem better) because we are trying to use a random number
> generator with a higher level of guarantees than what was really
> required.
> 
> What's not completely clear to me is what's the potential danger if
> build_ehash_secret() is initialized with a value that might be known
> to an adversary.  I'll note that inet_ehash_secret() is a 32-bit uint.
> A 32-bit number isn't all that hard for an adversary to brute force.
> If the answer is there's now oracle that can be used so an adversary
> can tell whether or not they have correctly figured out the ehash
> secret, then it's not that clear that it's worth blocking until the
> urandom pool has 128 bits of entropy, when ehash_secret is only a
> 32-bit value.

It may be possible to find multicollisions if the hash is known which could
lead to a DoS against the hash table if the lookups become linear (and someone
finds a fast way to do this for jash in this case, also depending if one
hashes variable or static size data). So this is minor issue currently.

But this is not my main concern:

We currently initialize the syncookie secrets pretty early on boot-up:
http://lxr.free-electrons.com/source/net/ipv4/syncookies.c#L31

Because of this problem I came up with another solution. My plan is to
introduce a macro 'net_get_random_once' which could be used to initialize
secret keys used for hashing as late as possible:
http://patchwork.ozlabs.org/patch/278308/

David Miller suggested that I should use static_keys to reduce overhead in the
fast-path and I am currently testing this change. I'll send it for review
maybe tomorrow. Otherwise I have to come up with another solution, maybe only
specific for syncookies in the beginning.

> Speaking even more generally, any time you have subsystems grabbing
> random entropy very shortly after boot, especially if it's less than
> 64 bits, it's really good idea of the secret gets periodically
> rekeyed.  I understand why that may be hard in this case, since it
> would require rehashing all of the currently open sockets, and maybe
> in this case the security requirements are such that it's not really
> necessary.  But it's something we should definitely keep in mind for
> situations where we are generating random session keys for CIFS,
> ipsec, etc.

I agree. I will look if this is easily possible for secure_seq and
syncookies but depending on the data structure and its size it is a much
harder thing to do. I wanted to try the low-hanging fruits first. ;)

Thanks,

  Hannes


^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized
  2013-10-02 17:18                 ` Hannes Frederic Sowa
@ 2013-10-02 19:40                   ` Theodore Ts'o
  0 siblings, 0 replies; 50+ messages in thread
From: Theodore Ts'o @ 2013-10-02 19:40 UTC (permalink / raw)
  To: Eric Dumazet, Tom Herbert, davem, netdev, jesse.brandeburg, linux-kernel

On Wed, Oct 02, 2013 at 07:18:40PM +0200, Hannes Frederic Sowa wrote:
> 
> I agree. I will look if this is easily possible for secure_seq and
> syncookies but depending on the data structure and its size it is a much
> harder thing to do. I wanted to try the low-hanging fruits first. ;)

To use syncookies as an example, you shouldn't need to store all of
the old syncookies.  Instead, if every 10 minutes or so, you rekey,
and you keep both the old and the new secrets around, you could just
simply check an incoming TCP packet using first the new key, and then
the old key.  During the transition window it would take a wee bit
more CPU time, but most of the time, it wouldn't cost anything extra
in CPU time, and the only extra cost is the space for the old key.

>From a security perspective it would be much better if we tried to
make all of the places where we draw randomness and somehow try to
rekey on a periodic basis.  That way, even if the initial value isn't
super-secure, that situation will heal itself fairly rapidly.  And it
also means that even if an adversary can brute-force break a 32-bit
secret, they would have to do so within 5 or 10 minutes in order for
it to be useful, and even if they could, it would only be useful for a
short window of time.

I know it won't always be possible, but to the extent that we can do
this, it would be a big improvement from a security perspective.

Cheers,

							- Ted

^ permalink raw reply	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2013-10-02 19:40 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-23 22:41 [PATCH 1/2] net: Toeplitz library functions Tom Herbert
2013-09-24  0:03 ` Eric Dumazet
2013-09-24  1:39   ` David Miller
2013-09-24  2:30   ` Hannes Frederic Sowa
2013-09-24  3:35     ` Hannes Frederic Sowa
2013-09-24  5:38       ` Eric Dumazet
2013-09-24  5:45         ` Hannes Frederic Sowa
2013-09-24 13:19           ` [PATCH] net: net_secret should not depend on TCP Eric Dumazet
2013-09-24 15:13             ` Hannes Frederic Sowa
2013-09-24 15:22               ` Eric Dumazet
2013-09-24 15:28                 ` Hannes Frederic Sowa
2013-09-24 15:46                   ` Eric Dumazet
2013-09-24 23:51             ` Hannes Frederic Sowa
2013-09-28 22:20               ` David Miller
2013-09-25  9:00             ` [PATCH RFC] random: introduce get_random_bytes_busy_wait_initialized Hannes Frederic Sowa
2013-09-25 12:06               ` Eric Dumazet
2013-09-25 13:35                 ` Hannes Frederic Sowa
2013-10-02 15:10               ` Theodore Ts'o
2013-10-02 17:18                 ` Hannes Frederic Sowa
2013-10-02 19:40                   ` Theodore Ts'o
2013-09-24 16:01         ` [PATCH 1/2] net: Toeplitz library functions Hannes Frederic Sowa
2013-09-24 16:14           ` Eric Dumazet
2013-09-24 16:35             ` Tom Herbert
2013-09-24 16:46               ` Eric Dumazet
2013-09-24 17:02                 ` Ben Hutchings
2013-09-24 17:03                 ` Tom Herbert
2013-09-24 17:34                   ` Eric Dumazet
2013-09-24 17:37                     ` Rick Jones
2013-09-24 17:44                       ` Eric Dumazet
2013-09-24 18:02                     ` Tom Herbert
2013-09-24 18:48                       ` David Miller
2013-09-24 19:42                       ` Hannes Frederic Sowa
2013-09-24  8:32 ` David Laight
2013-09-24 12:24   ` Eric Dumazet
2013-09-24 15:22   ` Tom Herbert
2013-09-24 15:29     ` Eric Dumazet
2013-09-24 15:39     ` David Miller
2013-09-24 15:54       ` Tom Herbert
2013-09-24 16:00         ` Hannes Frederic Sowa
2013-09-24 16:10         ` Eric Dumazet
2013-09-24 18:03         ` David Miller
2013-09-24 18:06           ` Tom Herbert
2013-09-24 18:10           ` Ben Hutchings
2013-09-24 18:24             ` Tom Herbert
2013-09-24 19:14               ` Eric Dumazet
2013-09-24 18:49             ` David Miller
2013-09-24 18:48           ` Jesse Brandeburg
2013-09-24 19:04             ` Tom Herbert
2013-09-24 16:38   ` Ben Hutchings
2013-09-24 16:32 ` Ben Hutchings

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.