netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched
@ 2015-03-01 22:09 Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Replace instance of calling flow_dissect and jhash with call to
skb_get_hash. This also eliminates use of flow_keys in skb->cb[].

Tom Herbert (6):
  net: Add skb_get_hash_perturb
  sched: Eliminate use of flow_keys in sch_choke
  sched: Eliminate use of flow_keys in sch_fq_codel
  sched: Eliminate use of flow_keys in sch_hhf
  sched: Eliminate use of flow_keys in sch_sfb
  sched: Eliminate use of flow_keys in sch_sfq

 include/linux/skbuff.h   | 15 +++++++++++++++
 net/sched/sch_choke.c    | 32 ++------------------------------
 net/sched/sch_fq_codel.c | 11 ++---------
 net/sched/sch_hhf.c      | 19 +------------------
 net/sched/sch_sfb.c      | 24 ++++++++----------------
 net/sched/sch_sfq.c      | 29 +++--------------------------
 6 files changed, 31 insertions(+), 99 deletions(-)

-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next 1/6] net: Add skb_get_hash_perturb
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke Tom Herbert
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

This is used to get the skb->hash and then perturb it for a local use.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d898b32..48c1978 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/kmemcheck.h>
 #include <linux/compiler.h>
+#include <linux/jhash.h>
 #include <linux/time.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
@@ -922,6 +923,20 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
 	return skb->hash;
 }
 
+static inline __u32 skb_get_hash_perturb(struct sk_buff *skb,
+					 u32 perturb)
+{
+	u32 hash = skb_get_hash(skb);
+
+	if (likely(hash)) {
+		hash = jhash_1word((__force __u32) hash, perturb);
+		if (unlikely(!hash))
+			hash = 1;
+	}
+
+	return hash;
+}
+
 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
 {
 	return skb->hash;
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

In choke_match_flow compare skb_get_hash values for the skbuffs
instead of explicitly matching keys after calling flow_dissector.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_choke.c | 32 ++------------------------------
 1 file changed, 2 insertions(+), 30 deletions(-)

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9..8faa375 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,6 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/red.h>
-#include <net/flow_keys.h>
 
 /*
    CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +132,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 	--sch->q.qlen;
 }
 
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
 struct choke_skb_cb {
 	u16			classid;
-	u8			keys_valid;
-	u8			keys[QDISC_CB_PRIV_LEN - 3];
 };
 
 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -169,26 +160,8 @@ static u16 choke_get_classid(const struct sk_buff *skb)
 static bool choke_match_flow(struct sk_buff *skb1,
 			     struct sk_buff *skb2)
 {
-	struct flow_keys temp;
-
-	if (skb1->protocol != skb2->protocol)
-		return false;
-
-	if (!choke_skb_cb(skb1)->keys_valid) {
-		choke_skb_cb(skb1)->keys_valid = 1;
-		skb_flow_dissect(skb1, &temp);
-		memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
-	}
-
-	if (!choke_skb_cb(skb2)->keys_valid) {
-		choke_skb_cb(skb2)->keys_valid = 1;
-		skb_flow_dissect(skb2, &temp);
-		memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
-	}
-
-	return !memcmp(&choke_skb_cb(skb1)->keys,
-		       &choke_skb_cb(skb2)->keys,
-		       CHOKE_K_LEN);
+	return (skb1->protocol == skb2->protocol &&
+		skb_get_hash(skb1) == skb_get_hash(skb2));
 }
 
 /*
@@ -279,7 +252,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			goto other_drop;	/* Packet was eaten by filter */
 	}
 
-	choke_skb_cb(skb)->keys_valid = 0;
 	/* Compute average queue usage (see RED) */
 	q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 	if (red_is_idling(&q->vars))
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf Tom Herbert
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_fq_codel.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52dec..a6fc53d 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/flow_keys.h>
 #include <net/codel.h>
 
 /*	Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
 };
 
 static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
-				  const struct sk_buff *skb)
+				  struct sk_buff *skb)
 {
-	struct flow_keys keys;
-	unsigned int hash;
-
-	skb_flow_dissect(skb, &keys);
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src ^ keys.ip_proto,
-			    (__force u32)keys.ports, q->perturbation);
+	u32 hash = skb_get_hash_perturb(skb, q->perturbation);
 
 	return reciprocal_scale(hash, q->flows_cnt);
 }
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
                   ` (2 preceding siblings ...)
  2015-03-01 22:09 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
  5 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_hhf.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aab..9d15cb6 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
-#include <net/flow_keys.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
 	return jiffies;
 }
 
-static unsigned int skb_hash(const struct hhf_sched_data *q,
-			     const struct sk_buff *skb)
-{
-	struct flow_keys keys;
-	unsigned int hash;
-
-	if (skb->sk && skb->sk->sk_hash)
-		return skb->sk->sk_hash;
-
-	skb_flow_dissect(skb, &keys);
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src ^ keys.ip_proto,
-			    (__force u32)keys.ports, q->perturbation);
-	return hash;
-}
-
 /* Looks up a heavy-hitter flow in a chaining list of table T. */
 static struct hh_flow_state *seek_list(const u32 hash,
 				       struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	/* Get hashed flow-id of the skb. */
-	hash = skb_hash(q, skb);
+	hash = skb_get_hash_perturb(skb, q->perturbation);
 
 	/* Check if this packet belongs to an already established HH flow. */
 	flow_pos = hash & HHF_BIT_MASK;
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
                   ` (3 preceding siblings ...)
  2015-03-01 22:09 ` [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-01 22:09 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
  5 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_sfb.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd8..402d051 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
 #include <net/ip.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
-#include <net/flow_keys.h>
 
 /*
  * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	int i;
 	u32 p_min = ~0;
 	u32 minqlen = ~0;
-	u32 r, slot, salt, sfbhash;
+	u32 r, sfbhash;
+	u32 slot = q->slot;
 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	struct flow_keys keys;
 
 	if (unlikely(sch->q.qlen >= q->limit)) {
 		qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	fl = rcu_dereference_bh(q->filter_list);
 	if (fl) {
+		u32 salt;
+
 		/* If using external classifiers, get result and record it. */
 		if (!sfb_classify(skb, fl, &ret, &salt))
 			goto other_drop;
-		keys.src = salt;
-		keys.dst = 0;
-		keys.ports = 0;
+		sfbhash = jhash_1word(salt, q->bins[q->slot].perturbation);
 	} else {
-		skb_flow_dissect(skb, &keys);
+		sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
 	}
 
-	slot = q->slot;
 
-	sfbhash = jhash_3words((__force u32)keys.dst,
-			       (__force u32)keys.src,
-			       (__force u32)keys.ports,
-			       q->bins[slot].perturbation);
 	if (!sfbhash)
 		sfbhash = 1;
 	sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (unlikely(p_min >= SFB_MAX_PROB)) {
 		/* Inelastic flow */
 		if (q->double_buffering) {
-			sfbhash = jhash_3words((__force u32)keys.dst,
-					       (__force u32)keys.src,
-					       (__force u32)keys.ports,
-					       q->bins[slot].perturbation);
+			sfbhash = skb_get_hash_perturb(skb,
+			    q->bins[slot].perturbation);
 			if (!sfbhash)
 				sfbhash = 1;
 			sfb_skb_cb(skb)->hashes[slot] = sfbhash;
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
                   ` (4 preceding siblings ...)
  2015-03-01 22:09 ` [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb Tom Herbert
@ 2015-03-01 22:09 ` Tom Herbert
  2015-03-02  0:58   ` Eric Dumazet
  5 siblings, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2015-03-01 22:09 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_sfq.c | 29 +++--------------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140..e7ed8a5 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/flow_keys.h>
 #include <net/red.h>
 
 
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
 	return &q->dep[val - SFQ_MAX_FLOWS];
 }
 
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
-       struct flow_keys        keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
-	qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
-	return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
 static unsigned int sfq_hash(const struct sfq_sched_data *q,
-			     const struct sk_buff *skb)
+			     struct sk_buff *skb)
 {
-	const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
-	unsigned int hash;
-
-	hash = jhash_3words((__force u32)keys->dst,
-			    (__force u32)keys->src ^ keys->ip_proto,
-			    (__force u32)keys->ports, q->perturbation);
-	return hash & (q->divisor - 1);
+	return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
 }
 
 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return TC_H_MIN(skb->priority);
 
 	fl = rcu_dereference_bh(q->filter_list);
-	if (!fl) {
-		skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+	if (!fl)
 		return sfq_hash(q, skb) + 1;
-	}
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	result = tc_classify(skb, fl, &res);
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-01 22:09 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
@ 2015-03-02  0:58   ` Eric Dumazet
  2015-03-02  4:06     ` David Miller
  2015-03-02 15:31     ` Tom Herbert
  0 siblings, 2 replies; 19+ messages in thread
From: Eric Dumazet @ 2015-03-02  0:58 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, fw

On Sun, 2015-03-01 at 14:09 -0800, Tom Herbert wrote:
> Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
> jhash by hand.

This defeats one of the perturbation goal :

If two flows hashes into same hash, then skb_get_hash_perturb(skb,
q->perturbation) will also give same hash forever and map to same hash
bucket.

Ideally, we could add a 'u32 perturbation' salt to
__skb_get_hash()/__flow_hash_from_keys()/__flow_hash_3words instead of
using a net_get_random_once(&hashrnd, sizeof(hashrnd)); 

but I guess all these functions are becoming very fat.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02  0:58   ` Eric Dumazet
@ 2015-03-02  4:06     ` David Miller
  2015-03-02 15:31     ` Tom Herbert
  1 sibling, 0 replies; 19+ messages in thread
From: David Miller @ 2015-03-02  4:06 UTC (permalink / raw)
  To: eric.dumazet; +Cc: therbert, netdev, fw

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 01 Mar 2015 16:58:52 -0800

> On Sun, 2015-03-01 at 14:09 -0800, Tom Herbert wrote:
>> Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
>> jhash by hand.
> 
> This defeats one of the perturbation goal :
> 
> If two flows hashes into same hash, then skb_get_hash_perturb(skb,
> q->perturbation) will also give same hash forever and map to same hash
> bucket.

Yes, this needs to be resolved.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02  0:58   ` Eric Dumazet
  2015-03-02  4:06     ` David Miller
@ 2015-03-02 15:31     ` Tom Herbert
  2015-03-02 16:25       ` Tom Herbert
  2015-03-02 19:24       ` Eric Dumazet
  1 sibling, 2 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-02 15:31 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Sun, Mar 1, 2015 at 4:58 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sun, 2015-03-01 at 14:09 -0800, Tom Herbert wrote:
>> Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
>> jhash by hand.
>
> This defeats one of the perturbation goal :
>
> If two flows hashes into same hash, then skb_get_hash_perturb(skb,
> q->perturbation) will also give same hash forever and map to same hash
> bucket.
>
As I already mentioned, the probability of skb_get_hash returning the
same value for two flows is 1/2^32. I suspect it's greater probability
to get a hash collision between two IPv6 flows with subtlety different
addresses or simply match 4-tuples in different address spaces (like
VLAN or with VNID). If hash collision is really a problem, we can
periodically rekey skb->hash (either SW or HW) or maybe move to 64 bit
hashes somehow. Anyway, we potentially have the same problem in RSS
and other uses of the hash, if hashes collide then two flows share the
same bucket forever unless there is an action to break that.

> Ideally, we could add a 'u32 perturbation' salt to
> __skb_get_hash()/__flow_hash_from_keys()/__flow_hash_3words instead of
> using a net_get_random_once(&hashrnd, sizeof(hashrnd));
>
> but I guess all these functions are becoming very fat.
>
Same thing as rekeying.

>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 15:31     ` Tom Herbert
@ 2015-03-02 16:25       ` Tom Herbert
  2015-03-02 19:28         ` Eric Dumazet
  2015-03-02 19:24       ` Eric Dumazet
  1 sibling, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2015-03-02 16:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, Mar 2, 2015 at 7:31 AM, Tom Herbert <therbert@google.com> wrote:
>
> On Sun, Mar 1, 2015 at 4:58 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Sun, 2015-03-01 at 14:09 -0800, Tom Herbert wrote:
> >> Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
> >> jhash by hand.
> >
> > This defeats one of the perturbation goal :
> >
> > If two flows hashes into same hash, then skb_get_hash_perturb(skb,
> > q->perturbation) will also give same hash forever and map to same hash
> > bucket.
> >
> As I already mentioned, the probability of skb_get_hash returning the
> same value for two flows is 1/2^32. I suspect it's greater probability
> to get a hash collision between two IPv6 flows with subtlety different
> addresses or simply match 4-tuples in different address spaces (like
> VLAN or with VNID). If hash collision is really a problem, we can
> periodically rekey skb->hash (either SW or HW) or maybe move to 64 bit
> hashes somehow. Anyway, we potentially have the same problem in RSS
> and other uses of the hash, if hashes collide then two flows share the
> same bucket forever unless there is an action to break that.
>
One other possibility is that with the txhash changes we can rehash
individual connections that are sourced from the host. I was planning
to do this on too many TCP retransmits to try to get a different ECMP
path (type of source routing), but we could also do this periodically
or based on request from qdisc. ooo_okay could be taken into account
to avoid OOO packets.

>
> > Ideally, we could add a 'u32 perturbation' salt to
> > __skb_get_hash()/__flow_hash_from_keys()/__flow_hash_3words instead of
> > using a net_get_random_once(&hashrnd, sizeof(hashrnd));
> >
> > but I guess all these functions are becoming very fat.
> >
> Same thing as rekeying.
>
> >
> >

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 15:31     ` Tom Herbert
  2015-03-02 16:25       ` Tom Herbert
@ 2015-03-02 19:24       ` Eric Dumazet
  2015-03-03 21:00         ` Tom Herbert
  1 sibling, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2015-03-02 19:24 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, 2015-03-02 at 07:31 -0800, Tom Herbert wrote:

> As I already mentioned, the probability of skb_get_hash returning the
> same value for two flows is 1/2^32. I suspect it's greater probability
> to get a hash collision between two IPv6 flows with subtlety different
> addresses or simply match 4-tuples in different address spaces (like
> VLAN or with VNID). If hash collision is really a problem, we can
> periodically rekey skb->hash (either SW or HW) or maybe move to 64 bit
> hashes somehow. Anyway, we potentially have the same problem in RSS
> and other uses of the hash, if hashes collide then two flows share the
> same bucket forever unless there is an action to break that.

Main historical use of SFQ is for routers.

We do not control skb->hash for forwarding workload, unless you
reprogram RSS keys on the NIC.

If you remember, we used to have a problem on some NIC using a well
known RSS key. It is fortunate SFQ/fq_codel did not rely on skb->hash
too much.

If to target one flow, attackers need to attack your big boxes to flood
one particular RX queue at NIC level, this has a huge cost for them.

While targeting one hash bucket on a qdisc might be much easier, if they
know even a hash perturbation has no effect against their attack.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 16:25       ` Tom Herbert
@ 2015-03-02 19:28         ` Eric Dumazet
  2015-03-02 19:34           ` Tom Herbert
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2015-03-02 19:28 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List, Florian Westphal


On Mon, 2015-03-02 at 08:25 -0800, Tom Herbert wrote:

> One other possibility is that with the txhash changes we can rehash
> individual connections that are sourced from the host. I was planning
> to do this on too many TCP retransmits to try to get a different ECMP
> path (type of source routing), but we could also do this periodically
> or based on request from qdisc. ooo_okay could be taken into account
> to avoid OOO packets.

About locally generated traffic, skb->txhash potential uses would be

1) Somewhat spray txhash on the wire (Hmmm...)
   Hard to do that with regular IPv4/tcp traffic, or quite hacky.
   (like VLAN spraying...)

2.1) Slave selection on bonding driver
2.2) TX queue selection on multiqueue NIC.

    In this model (way different than XPS), then skb->ooo_okay is of
    no use :

    In both cases, if you change skb->txhash after congestion event,
    you do not care of reorders anyway. They are going to be generated
    most probably because of different paths.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 19:28         ` Eric Dumazet
@ 2015-03-02 19:34           ` Tom Herbert
  2015-03-02 19:43             ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2015-03-02 19:34 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, Mar 2, 2015 at 11:28 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> On Mon, 2015-03-02 at 08:25 -0800, Tom Herbert wrote:
>
>> One other possibility is that with the txhash changes we can rehash
>> individual connections that are sourced from the host. I was planning
>> to do this on too many TCP retransmits to try to get a different ECMP
>> path (type of source routing), but we could also do this periodically
>> or based on request from qdisc. ooo_okay could be taken into account
>> to avoid OOO packets.
>
> About locally generated traffic, skb->txhash potential uses would be
>
> 1) Somewhat spray txhash on the wire (Hmmm...)
>    Hard to do that with regular IPv4/tcp traffic, or quite hacky.
>    (like VLAN spraying...)
>
Spraying would be quite easy by modulating txhash from TCP and using
either  UDP encapsulation like GUE or IPv6 flow labels. Only problem
is that this would constantly be changing rxhash at the receiver for
the connection, although flow labels might 'just work' with IPv6
Toeplitz in NICs.

> 2.1) Slave selection on bonding driver
> 2.2) TX queue selection on multiqueue NIC.
>
>     In this model (way different than XPS), then skb->ooo_okay is of
>     no use :
>
>     In both cases, if you change skb->txhash after congestion event,
>     you do not care of reorders anyway. They are going to be generated
>     most probably because of different paths.
>
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 19:34           ` Tom Herbert
@ 2015-03-02 19:43             ` Eric Dumazet
  2015-03-02 19:51               ` Tom Herbert
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2015-03-02 19:43 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, 2015-03-02 at 11:34 -0800, Tom Herbert wrote:

> Spraying would be quite easy by modulating txhash from TCP and using
> either  UDP encapsulation like GUE or IPv6 flow labels. Only problem
> is that this would constantly be changing rxhash at the receiver for
> the connection, although flow labels might 'just work' with IPv6
> Toeplitz in NICs.

When I said 'regular IPv4/tcp traffic' this did not include any added
encapsulation.

Most NIC are not able to properly offload TCP (TSO) with such
encapsulations.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 19:43             ` Eric Dumazet
@ 2015-03-02 19:51               ` Tom Herbert
  2015-03-02 20:02                 ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2015-03-02 19:51 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, Mar 2, 2015 at 11:43 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Mon, 2015-03-02 at 11:34 -0800, Tom Herbert wrote:
>
>> Spraying would be quite easy by modulating txhash from TCP and using
>> either  UDP encapsulation like GUE or IPv6 flow labels. Only problem
>> is that this would constantly be changing rxhash at the receiver for
>> the connection, although flow labels might 'just work' with IPv6
>> Toeplitz in NICs.
>
> When I said 'regular IPv4/tcp traffic' this did not include any added
> encapsulation.
>
> Most NIC are not able to properly offload TCP (TSO) with such
> encapsulations.
>
IPv6 flow labels should work through TSO, they would just be
replicated for each segment. You would need switches that can hash
based on flow label though, and of course IPv6 deployed in the network
:-).

>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 19:51               ` Tom Herbert
@ 2015-03-02 20:02                 ` Eric Dumazet
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Dumazet @ 2015-03-02 20:02 UTC (permalink / raw)
  To: Tom Herbert; +Cc: netdev

On Mon, 2015-03-02 at 11:51 -0800, Tom Herbert wrote:
> On Mon, Mar 2, 2015 at 11:43 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Mon, 2015-03-02 at 11:34 -0800, Tom Herbert wrote:
> >
> >> Spraying would be quite easy by modulating txhash from TCP and using
> >> either  UDP encapsulation like GUE or IPv6 flow labels. Only problem
> >> is that this would constantly be changing rxhash at the receiver for
> >> the connection, although flow labels might 'just work' with IPv6
> >> Toeplitz in NICs.
> >
> > When I said 'regular IPv4/tcp traffic' this did not include any added
> > encapsulation.
> >
> > Most NIC are not able to properly offload TCP (TSO) with such
> > encapsulations.
> >
> IPv6 flow labels should work through TSO, they would just be
> replicated for each segment. You would need switches that can hash
> based on flow label though, and of course IPv6 deployed in the network
> :-).


Yeah, but I explicitly talked about native IPv4 + TCP ;)

not IPv6 + TCP, or IPv6 + IPV4 + TCP or some encapsulation...

Trend is to generalize random packet spraying, even within a flow.

https://www.cs.purdue.edu/homes/kompella/publications/infocom13.pdf

This forces us to build a resilient TCP stack.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
  2015-03-02 19:24       ` Eric Dumazet
@ 2015-03-03 21:00         ` Tom Herbert
  0 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-03 21:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal

On Mon, Mar 2, 2015 at 11:24 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Mon, 2015-03-02 at 07:31 -0800, Tom Herbert wrote:
>
>> As I already mentioned, the probability of skb_get_hash returning the
>> same value for two flows is 1/2^32. I suspect it's greater probability
>> to get a hash collision between two IPv6 flows with subtlety different
>> addresses or simply match 4-tuples in different address spaces (like
>> VLAN or with VNID). If hash collision is really a problem, we can
>> periodically rekey skb->hash (either SW or HW) or maybe move to 64 bit
>> hashes somehow. Anyway, we potentially have the same problem in RSS
>> and other uses of the hash, if hashes collide then two flows share the
>> same bucket forever unless there is an action to break that.
>
> Main historical use of SFQ is for routers.
>
> We do not control skb->hash for forwarding workload, unless you
> reprogram RSS keys on the NIC.
>
> If you remember, we used to have a problem on some NIC using a well
> known RSS key. It is fortunate SFQ/fq_codel did not rely on skb->hash
> too much.
>
Okay, I'll audit the drivers returning skb->hash fro HW to make sure
they are properly randomizing the initial hash key.

Unless, there's other material objections, I'll formally post these
patches. Consolidating the packet hash for different use cases should
be more efficient and we'll be able focus efforts on strengthening it
over time in one place. As for SW vs. HW hash, it still seems like
getting a Toeplitz hash from the hardware is going to be stronger at
least for TCP/IPv6 than what we can currently do with jhash as the
input is the full 40 bytes of 4-tuple. I must admit I wish Microsoft
would have continued to make advancements with Toeplitz hash, it would
be nice if there was a real definition for use with UDP and maybe we
had instructions for this in x86!

Tom

> If to target one flow, attackers need to attack your big boxes to flood
> one particular RX queue at NIC level, this has a huge cost for them.
>
> While targeting one hash bucket on a qdisc might be much easier, if they
> know even a hash perturbation has no effect against their attack.
>
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel
  2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
  0 siblings, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
  To: davem, netdev, eric.dumazet, fw

Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 net/sched/sch_fq_codel.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52dec..a6fc53d 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/flow_keys.h>
 #include <net/codel.h>
 
 /*	Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
 };
 
 static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
-				  const struct sk_buff *skb)
+				  struct sk_buff *skb)
 {
-	struct flow_keys keys;
-	unsigned int hash;
-
-	skb_flow_dissect(skb, &keys);
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src ^ keys.ip_proto,
-			    (__force u32)keys.ports, q->perturbation);
+	u32 hash = skb_get_hash_perturb(skb, q->perturbation);
 
 	return reciprocal_scale(hash, q->flows_cnt);
 }
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-03-04 18:40 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-01 22:09 [PATCH RFC net-next 0/6] net: Call skb_get_hash in net/sched Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb Tom Herbert
2015-03-01 22:09 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
2015-03-02  0:58   ` Eric Dumazet
2015-03-02  4:06     ` David Miller
2015-03-02 15:31     ` Tom Herbert
2015-03-02 16:25       ` Tom Herbert
2015-03-02 19:28         ` Eric Dumazet
2015-03-02 19:34           ` Tom Herbert
2015-03-02 19:43             ` Eric Dumazet
2015-03-02 19:51               ` Tom Herbert
2015-03-02 20:02                 ` Eric Dumazet
2015-03-02 19:24       ` Eric Dumazet
2015-03-03 21:00         ` Tom Herbert
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).