* [PATCH net-next 1/9] net: openvswitch: add flow-mask cache for performance
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
@ 2019-09-29 17:09 ` xiangxia.m.yue
2019-09-29 17:09 ` [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array xiangxia.m.yue
` (9 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:09 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
The idea of this optimization comes from a patch which
is committed in 2014, openvswitch community. The author
is Pravin B Shelar. In order to get high performance, I
implement it again. Later patches will use it.
Pravin B Shelar, says:
| On every packet OVS needs to lookup flow-table with every
| mask until it finds a match. The packet flow-key is first
| masked with mask in the list and then the masked key is
| looked up in flow-table. Therefore number of masks can
| affect packet processing performance.
Link: https://github.com/openvswitch/ovs/commit/5604935e4e1cbc16611d2d97f50b717aa31e8ec5
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/datapath.c | 3 +-
net/openvswitch/flow_table.c | 109 +++++++++++++++++++++++++++++++++++++------
net/openvswitch/flow_table.h | 11 ++++-
3 files changed, 107 insertions(+), 16 deletions(-)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index dde9d76..3d7b1c4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf3582c..3d515c0 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -36,6 +36,10 @@
#define TBL_MIN_BUCKETS 1024
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_HASH_SHIFT 8
+#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
@@ -168,10 +172,15 @@ int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+ MC_HASH_ENTRIES,
+ __alignof__(struct mask_cache_entry));
+ if (!table->mask_cache)
+ return -ENOMEM;
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- return -ENOMEM;
+ goto free_mask_cache;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -187,6 +196,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
+free_mask_cache:
+ free_percpu(table->mask_cache);
return -ENOMEM;
}
@@ -243,6 +254,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
+ free_percpu(table->mask_cache);
table_instance_destroy(ti, ufid_ti, false);
}
@@ -425,7 +437,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -435,6 +448,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
+ (*n_mask_hit)++;
+
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,30 +458,97 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL;
}
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 *n_mask_hit)
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+ struct table_instance *ti,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
- *n_mask_hit = 0;
list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- (*n_mask_hit)++;
- flow = masked_flow_lookup(ti, key, mask);
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) /* Found */
return flow;
}
return NULL;
}
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 skb_hash,
+ u32 *n_mask_hit)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_cache_entry *entries, *ce, *del;
+ struct sw_flow *flow;
+ u32 hash = skb_hash;
+ int seg;
+
+ *n_mask_hit = 0;
+ if (unlikely(!skb_hash))
+ return flow_lookup(tbl, ti, key, n_mask_hit);
+
+ del = NULL;
+ entries = this_cpu_ptr(tbl->mask_cache);
+
+ for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+ int index;
+
+ index = hash & (MC_HASH_ENTRIES - 1);
+ ce = &entries[index];
+
+ if (ce->skb_hash == skb_hash) {
+ struct sw_flow_mask *mask;
+ int i;
+
+ i = 0;
+ list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
+ if (ce->mask_index == i++) {
+ flow = masked_flow_lookup(ti, key, mask,
+ n_mask_hit);
+ if (flow) /* Found */
+ return flow;
+
+ break;
+ }
+ }
+
+ del = ce;
+ break;
+ }
+
+ if (!del || (del->skb_hash && !ce->skb_hash)) {
+ del = ce;
+ }
+
+ hash >>= MC_HASH_SHIFT;
+ }
+
+ flow = flow_lookup(tbl, ti, key, n_mask_hit);
+
+ if (flow) {
+ del->skb_hash = skb_hash;
+ del->mask_index = (*n_mask_hit - 1);
+ }
+
+ return flow;
+}
+
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 __always_unused n_mask_hit;
- return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+ return flow_lookup(tbl, ti, key, &n_mask_hit);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
@@ -475,10 +557,11 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
+ u32 __always_unused n_mask_hit;
/* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) {
- flow = masked_flow_lookup(ti, match->key, mask);
+ flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
ovs_flow_cmp_unmasked_key(flow, match))
return flow;
@@ -631,7 +714,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_rcu(&mask->list, &tbl->mask_list);
+ list_add_tail_rcu(&mask->list, &tbl->mask_list);
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index bc52045..04b6b1c 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -22,6 +22,11 @@
#include "flow.h"
+struct mask_cache_entry {
+ u32 skb_hash;
+ u32 mask_index;
+};
+
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
@@ -34,6 +39,7 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
+ struct mask_cache_entry __percpu *mask_cache;
struct list_head mask_list;
unsigned long last_rehash;
unsigned int count;
@@ -60,8 +66,9 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
- const struct sw_flow_key *,
- u32 *n_mask_hit);
+ const struct sw_flow_key *,
+ u32 skb_hash,
+ u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
2019-09-29 17:09 ` [PATCH net-next 1/9] net: openvswitch: add flow-mask cache for performance xiangxia.m.yue
@ 2019-09-29 17:09 ` xiangxia.m.yue
2019-10-02 2:06 ` Pravin Shelar
2019-09-29 17:10 ` [PATCH net-next 3/9] net: openvswitch: shrink the mask array if necessary xiangxia.m.yue
` (8 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:09 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Port the codes to linux upstream and with little changes.
Pravin B Shelar, says:
| mask caches index of mask in mask_list. On packet recv OVS
| need to traverse mask-list to get cached mask. Therefore array
| is better for retrieving cached mask. This also allows better
| cache replacement algorithm by directly checking mask's existence.
Link: https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow.h | 1 -
net/openvswitch/flow_table.c | 218 +++++++++++++++++++++++++++++++++----------
net/openvswitch/flow_table.h | 8 +-
3 files changed, 175 insertions(+), 52 deletions(-)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b830d5f..8080518 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -166,7 +166,6 @@ struct sw_flow_key_range {
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
- struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3d515c0..99954fa 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -34,6 +34,7 @@
#include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024
+#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ)
#define MC_HASH_SHIFT 8
@@ -168,9 +169,59 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti;
}
+static void mask_array_rcu_cb(struct rcu_head *rcu)
+{
+ struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
+
+ kfree(ma);
+}
+
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+ struct mask_array *new;
+
+ size = max(MASK_ARRAY_SIZE_MIN, size);
+ new = kzalloc(sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->count = 0;
+ new->max = size;
+
+ return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+ struct mask_array *old;
+ struct mask_array *new;
+
+ new = tbl_mask_array_alloc(size);
+ if (!new)
+ return -ENOMEM;
+
+ old = ovsl_dereference(tbl->mask_array);
+ if (old) {
+ int i;
+
+ for (i = 0; i < old->max; i++) {
+ if (ovsl_dereference(old->masks[i]))
+ new->masks[new->count++] = old->masks[i];
+ }
+ }
+ rcu_assign_pointer(tbl->mask_array, new);
+
+ if (old)
+ call_rcu(&old->rcu, mask_array_rcu_cb);
+
+ return 0;
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
+ struct mask_array *ma;
table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
MC_HASH_ENTRIES,
@@ -178,9 +229,13 @@ int ovs_flow_tbl_init(struct flow_table *table)
if (!table->mask_cache)
return -ENOMEM;
+ ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+ if (!ma)
+ goto free_mask_cache;
+
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- goto free_mask_cache;
+ goto free_mask_array;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -188,7 +243,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
- INIT_LIST_HEAD(&table->mask_list);
+ rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -196,6 +251,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
+free_mask_array:
+ kfree(ma);
free_mask_cache:
free_percpu(table->mask_cache);
return -ENOMEM;
@@ -255,6 +312,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
free_percpu(table->mask_cache);
+ kfree(rcu_dereference_raw(table->mask_array));
table_instance_destroy(ti, ufid_ti, false);
}
@@ -460,17 +518,27 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
+ struct mask_array *ma,
const struct sw_flow_key *key,
- u32 *n_mask_hit)
+ u32 *n_mask_hit,
+ u32 *index)
{
- struct sw_flow_mask *mask;
struct sw_flow *flow;
+ int i;
- list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow) /* Found */
- return flow;
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *mask;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
+ }
+ }
}
+
return NULL;
}
@@ -486,6 +554,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
u32 skb_hash,
u32 *n_mask_hit)
{
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct mask_cache_entry *entries, *ce, *del;
struct sw_flow *flow;
@@ -493,8 +562,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
int seg;
*n_mask_hit = 0;
- if (unlikely(!skb_hash))
- return flow_lookup(tbl, ti, key, n_mask_hit);
+ if (unlikely(!skb_hash)) {
+ u32 __always_unused mask_index;
+
+ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+ }
del = NULL;
entries = this_cpu_ptr(tbl->mask_cache);
@@ -507,37 +579,33 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
if (ce->skb_hash == skb_hash) {
struct sw_flow_mask *mask;
- int i;
-
- i = 0;
- list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- if (ce->mask_index == i++) {
- flow = masked_flow_lookup(ti, key, mask,
- n_mask_hit);
- if (flow) /* Found */
- return flow;
-
- break;
- }
+ struct sw_flow *flow;
+
+ mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, key, mask,
+ n_mask_hit);
+ if (flow) /* Found */
+ return flow;
}
del = ce;
break;
}
- if (!del || (del->skb_hash && !ce->skb_hash)) {
+ if (!del || (del->skb_hash && !ce->skb_hash) ||
+ (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
+ !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
del = ce;
}
hash >>= MC_HASH_SHIFT;
}
- flow = flow_lookup(tbl, ti, key, n_mask_hit);
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
- if (flow) {
+ if (flow)
del->skb_hash = skb_hash;
- del->mask_index = (*n_mask_hit - 1);
- }
return flow;
}
@@ -546,26 +614,38 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
+
u32 __always_unused n_mask_hit;
+ u32 __always_unused index;
- return flow_lookup(tbl, ti, key, &n_mask_hit);
+ return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
- u32 __always_unused n_mask_hit;
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i;
/* Always called under ovs-mutex. */
- list_for_each_entry(mask, &tbl->mask_list, list) {
+ for (i = 0; i < ma->max; i++) {
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 __always_unused n_mask_hit;
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ mask = ovsl_dereference(ma->masks[i]);
+ if (!mask)
+ continue;
+
flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
- ovs_flow_cmp_unmasked_key(flow, match))
+ ovs_flow_cmp_unmasked_key(flow, match)) {
return flow;
+ }
}
+
return NULL;
}
@@ -611,13 +691,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
- struct sw_flow_mask *mask;
- int num = 0;
-
- list_for_each_entry(mask, &table->mask_list, list)
- num++;
-
- return num;
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+ return ma->count;
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -638,8 +713,19 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
mask->ref_count--;
if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- kfree_rcu(mask, rcu);
+ struct mask_array *ma;
+ int i;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ for (i = 0; i < ma->max; i++) {
+ if (mask == ovsl_dereference(ma->masks[i])) {
+ RCU_INIT_POINTER(ma->masks[i], NULL);
+ ma->count--;
+ kfree_rcu(mask, rcu);
+ return;
+ }
+ }
+ BUG();
}
}
}
@@ -689,13 +775,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask)
{
- struct list_head *ml;
+ struct mask_array *ma;
+ int i;
- list_for_each(ml, &tbl->mask_list) {
- struct sw_flow_mask *m;
- m = container_of(ml, struct sw_flow_mask, list);
- if (mask_equal(mask, m))
- return m;
+ ma = ovsl_dereference(tbl->mask_array);
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *t;
+ t = ovsl_dereference(ma->masks[i]);
+
+ if (t && mask_equal(mask, t))
+ return t;
}
return NULL;
@@ -706,15 +795,44 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
+
mask = flow_mask_find(tbl, new);
if (!mask) {
+ struct mask_array *ma;
+ int i;
+
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
if (!mask)
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_tail_rcu(&mask->list, &tbl->mask_list);
+
+ /* Add mask to mask-list. */
+ ma = ovsl_dereference(tbl->mask_array);
+ if (ma->count >= ma->max) {
+ int err;
+
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err) {
+ kfree(mask);
+ return err;
+ }
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ for (i = 0; i < ma->max; i++) {
+ const struct sw_flow_mask *t;
+
+ t = ovsl_dereference(ma->masks[i]);
+ if (!t) {
+ rcu_assign_pointer(ma->masks[i], mask);
+ ma->count++;
+ break;
+ }
+ }
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 04b6b1c..8a5cea6 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -27,6 +27,12 @@ struct mask_cache_entry {
u32 mask_index;
};
+struct mask_array {
+ struct rcu_head rcu;
+ int count, max;
+ struct sw_flow_mask __rcu *masks[];
+};
+
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
@@ -40,7 +46,7 @@ struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
struct mask_cache_entry __percpu *mask_cache;
- struct list_head mask_list;
+ struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
unsigned int ufid_count;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array
2019-09-29 17:09 ` [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array xiangxia.m.yue
@ 2019-10-02 2:06 ` Pravin Shelar
2019-10-08 1:44 ` Tonghao Zhang
0 siblings, 1 reply; 20+ messages in thread
From: Pravin Shelar @ 2019-10-02 2:06 UTC (permalink / raw)
To: Tonghao Zhang; +Cc: Greg Rose, Linux Kernel Network Developers
On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Port the codes to linux upstream and with little changes.
>
> Pravin B Shelar, says:
> | mask caches index of mask in mask_list. On packet recv OVS
> | need to traverse mask-list to get cached mask. Therefore array
> | is better for retrieving cached mask. This also allows better
> | cache replacement algorithm by directly checking mask's existence.
>
> Link: https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
> net/openvswitch/flow.h | 1 -
> net/openvswitch/flow_table.c | 218 +++++++++++++++++++++++++++++++++----------
> net/openvswitch/flow_table.h | 8 +-
> 3 files changed, 175 insertions(+), 52 deletions(-)
>
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index b830d5f..8080518 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -166,7 +166,6 @@ struct sw_flow_key_range {
> struct sw_flow_mask {
> int ref_count;
> struct rcu_head rcu;
> - struct list_head list;
> struct sw_flow_key_range range;
> struct sw_flow_key key;
> };
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index 3d515c0..99954fa 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -34,6 +34,7 @@
> #include <net/ndisc.h>
>
> #define TBL_MIN_BUCKETS 1024
> +#define MASK_ARRAY_SIZE_MIN 16
> #define REHASH_INTERVAL (10 * 60 * HZ)
>
> #define MC_HASH_SHIFT 8
> @@ -168,9 +169,59 @@ static struct table_instance *table_instance_alloc(int new_size)
> return ti;
> }
>
> +static void mask_array_rcu_cb(struct rcu_head *rcu)
> +{
> + struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
> +
> + kfree(ma);
> +}
> +
> +static struct mask_array *tbl_mask_array_alloc(int size)
> +{
> + struct mask_array *new;
> +
> + size = max(MASK_ARRAY_SIZE_MIN, size);
> + new = kzalloc(sizeof(struct mask_array) +
> + sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
> + if (!new)
> + return NULL;
> +
> + new->count = 0;
> + new->max = size;
> +
> + return new;
> +}
> +
> +static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
> +{
> + struct mask_array *old;
> + struct mask_array *new;
> +
> + new = tbl_mask_array_alloc(size);
> + if (!new)
> + return -ENOMEM;
> +
> + old = ovsl_dereference(tbl->mask_array);
> + if (old) {
> + int i;
> +
> + for (i = 0; i < old->max; i++) {
> + if (ovsl_dereference(old->masks[i]))
> + new->masks[new->count++] = old->masks[i];
> + }
> + }
> + rcu_assign_pointer(tbl->mask_array, new);
> +
> + if (old)
> + call_rcu(&old->rcu, mask_array_rcu_cb);
> +
kfree_rcu can be used instead of call_rcu.
> + return 0;
> +}
> +
> int ovs_flow_tbl_init(struct flow_table *table)
> {
> struct table_instance *ti, *ufid_ti;
> + struct mask_array *ma;
>
> table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
> MC_HASH_ENTRIES,
> @@ -178,9 +229,13 @@ int ovs_flow_tbl_init(struct flow_table *table)
> if (!table->mask_cache)
> return -ENOMEM;
>
> + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> + if (!ma)
> + goto free_mask_cache;
> +
> ti = table_instance_alloc(TBL_MIN_BUCKETS);
> if (!ti)
> - goto free_mask_cache;
> + goto free_mask_array;
>
> ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
> if (!ufid_ti)
> @@ -188,7 +243,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>
> rcu_assign_pointer(table->ti, ti);
> rcu_assign_pointer(table->ufid_ti, ufid_ti);
> - INIT_LIST_HEAD(&table->mask_list);
> + rcu_assign_pointer(table->mask_array, ma);
> table->last_rehash = jiffies;
> table->count = 0;
> table->ufid_count = 0;
> @@ -196,6 +251,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
>
> free_ti:
> __table_instance_destroy(ti);
> +free_mask_array:
> + kfree(ma);
> free_mask_cache:
> free_percpu(table->mask_cache);
> return -ENOMEM;
> @@ -255,6 +312,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
> struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
>
> free_percpu(table->mask_cache);
> + kfree(rcu_dereference_raw(table->mask_array));
> table_instance_destroy(ti, ufid_ti, false);
> }
>
> @@ -460,17 +518,27 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>
> static struct sw_flow *flow_lookup(struct flow_table *tbl,
> struct table_instance *ti,
> + struct mask_array *ma,
> const struct sw_flow_key *key,
> - u32 *n_mask_hit)
> + u32 *n_mask_hit,
> + u32 *index)
> {
> - struct sw_flow_mask *mask;
> struct sw_flow *flow;
> + int i;
>
> - list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
> - flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> - if (flow) /* Found */
> - return flow;
> + for (i = 0; i < ma->max; i++) {
> + struct sw_flow_mask *mask;
> +
> + mask = rcu_dereference_ovsl(ma->masks[i]);
> + if (mask) {
> + flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> + if (flow) { /* Found */
> + *index = i;
> + return flow;
> + }
> + }
> }
> +
> return NULL;
> }
>
> @@ -486,6 +554,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> u32 skb_hash,
> u32 *n_mask_hit)
> {
> + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> struct mask_cache_entry *entries, *ce, *del;
> struct sw_flow *flow;
> @@ -493,8 +562,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> int seg;
>
> *n_mask_hit = 0;
> - if (unlikely(!skb_hash))
> - return flow_lookup(tbl, ti, key, n_mask_hit);
> + if (unlikely(!skb_hash)) {
> + u32 __always_unused mask_index;
> +
> + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
> + }
>
> del = NULL;
> entries = this_cpu_ptr(tbl->mask_cache);
> @@ -507,37 +579,33 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>
> if (ce->skb_hash == skb_hash) {
> struct sw_flow_mask *mask;
> - int i;
> -
> - i = 0;
> - list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
> - if (ce->mask_index == i++) {
> - flow = masked_flow_lookup(ti, key, mask,
> - n_mask_hit);
> - if (flow) /* Found */
> - return flow;
> -
> - break;
> - }
> + struct sw_flow *flow;
> +
> + mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
> + if (mask) {
> + flow = masked_flow_lookup(ti, key, mask,
> + n_mask_hit);
> + if (flow) /* Found */
> + return flow;
> }
>
> del = ce;
> break;
> }
>
> - if (!del || (del->skb_hash && !ce->skb_hash)) {
> + if (!del || (del->skb_hash && !ce->skb_hash) ||
> + (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
> + !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
> del = ce;
> }
>
> hash >>= MC_HASH_SHIFT;
> }
>
> - flow = flow_lookup(tbl, ti, key, n_mask_hit);
> + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
>
> - if (flow) {
> + if (flow)
> del->skb_hash = skb_hash;
> - del->mask_index = (*n_mask_hit - 1);
> - }
>
> return flow;
> }
> @@ -546,26 +614,38 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
> const struct sw_flow_key *key)
> {
> struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> +
> u32 __always_unused n_mask_hit;
> + u32 __always_unused index;
>
> - return flow_lookup(tbl, ti, key, &n_mask_hit);
> + return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
> }
>
> struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> const struct sw_flow_match *match)
> {
> - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> - struct sw_flow_mask *mask;
> - struct sw_flow *flow;
> - u32 __always_unused n_mask_hit;
> + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> + int i;
>
> /* Always called under ovs-mutex. */
> - list_for_each_entry(mask, &tbl->mask_list, list) {
> + for (i = 0; i < ma->max; i++) {
> + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> + u32 __always_unused n_mask_hit;
> + struct sw_flow_mask *mask;
> + struct sw_flow *flow;
> +
> + mask = ovsl_dereference(ma->masks[i]);
> + if (!mask)
> + continue;
> +
> flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> if (flow && ovs_identifier_is_key(&flow->id) &&
> - ovs_flow_cmp_unmasked_key(flow, match))
> + ovs_flow_cmp_unmasked_key(flow, match)) {
> return flow;
> + }
> }
> +
> return NULL;
> }
>
> @@ -611,13 +691,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>
> int ovs_flow_tbl_num_masks(const struct flow_table *table)
> {
> - struct sw_flow_mask *mask;
> - int num = 0;
> -
> - list_for_each_entry(mask, &table->mask_list, list)
> - num++;
> -
> - return num;
> + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> + return ma->count;
> }
>
> static struct table_instance *table_instance_expand(struct table_instance *ti,
> @@ -638,8 +713,19 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
> mask->ref_count--;
>
> if (!mask->ref_count) {
> - list_del_rcu(&mask->list);
> - kfree_rcu(mask, rcu);
> + struct mask_array *ma;
> + int i;
> +
> + ma = ovsl_dereference(tbl->mask_array);
> + for (i = 0; i < ma->max; i++) {
> + if (mask == ovsl_dereference(ma->masks[i])) {
> + RCU_INIT_POINTER(ma->masks[i], NULL);
> + ma->count--;
> + kfree_rcu(mask, rcu);
> + return;
> + }
> + }
> + BUG();
> }
> }
> }
> @@ -689,13 +775,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
> static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
> const struct sw_flow_mask *mask)
> {
> - struct list_head *ml;
> + struct mask_array *ma;
> + int i;
>
> - list_for_each(ml, &tbl->mask_list) {
> - struct sw_flow_mask *m;
> - m = container_of(ml, struct sw_flow_mask, list);
> - if (mask_equal(mask, m))
> - return m;
> + ma = ovsl_dereference(tbl->mask_array);
> + for (i = 0; i < ma->max; i++) {
> + struct sw_flow_mask *t;
> + t = ovsl_dereference(ma->masks[i]);
> +
> + if (t && mask_equal(mask, t))
> + return t;
> }
>
> return NULL;
> @@ -706,15 +795,44 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
> const struct sw_flow_mask *new)
> {
> struct sw_flow_mask *mask;
> +
> mask = flow_mask_find(tbl, new);
> if (!mask) {
> + struct mask_array *ma;
> + int i;
> +
> /* Allocate a new mask if none exsits. */
> mask = mask_alloc();
> if (!mask)
> return -ENOMEM;
> mask->key = new->key;
> mask->range = new->range;
> - list_add_tail_rcu(&mask->list, &tbl->mask_list);
> +
> + /* Add mask to mask-list. */
> + ma = ovsl_dereference(tbl->mask_array);
> + if (ma->count >= ma->max) {
> + int err;
> +
> + err = tbl_mask_array_realloc(tbl, ma->max +
> + MASK_ARRAY_SIZE_MIN);
> + if (err) {
> + kfree(mask);
> + return err;
> + }
> +
> + ma = ovsl_dereference(tbl->mask_array);
> + }
> +
> + for (i = 0; i < ma->max; i++) {
> + const struct sw_flow_mask *t;
> +
> + t = ovsl_dereference(ma->masks[i]);
> + if (!t) {
> + rcu_assign_pointer(ma->masks[i], mask);
> + ma->count++;
> + break;
> + }
> + }
> } else {
> BUG_ON(!mask->ref_count);
> mask->ref_count++;
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index 04b6b1c..8a5cea6 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -27,6 +27,12 @@ struct mask_cache_entry {
> u32 mask_index;
> };
>
> +struct mask_array {
> + struct rcu_head rcu;
> + int count, max;
> + struct sw_flow_mask __rcu *masks[];
> +};
> +
> struct table_instance {
> struct hlist_head *buckets;
> unsigned int n_buckets;
> @@ -40,7 +46,7 @@ struct flow_table {
> struct table_instance __rcu *ti;
> struct table_instance __rcu *ufid_ti;
> struct mask_cache_entry __percpu *mask_cache;
> - struct list_head mask_list;
> + struct mask_array __rcu *mask_array;
> unsigned long last_rehash;
> unsigned int count;
> unsigned int ufid_count;
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array
2019-10-02 2:06 ` Pravin Shelar
@ 2019-10-08 1:44 ` Tonghao Zhang
0 siblings, 0 replies; 20+ messages in thread
From: Tonghao Zhang @ 2019-10-08 1:44 UTC (permalink / raw)
To: Pravin Shelar; +Cc: Greg Rose, Linux Kernel Network Developers
On Wed, Oct 2, 2019 at 10:02 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > Port the codes to linux upstream and with little changes.
> >
> > Pravin B Shelar, says:
> > | mask caches index of mask in mask_list. On packet recv OVS
> > | need to traverse mask-list to get cached mask. Therefore array
> > | is better for retrieving cached mask. This also allows better
> > | cache replacement algorithm by directly checking mask's existence.
> >
> > Link: https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> > net/openvswitch/flow.h | 1 -
> > net/openvswitch/flow_table.c | 218 +++++++++++++++++++++++++++++++++----------
> > net/openvswitch/flow_table.h | 8 +-
> > 3 files changed, 175 insertions(+), 52 deletions(-)
> >
> > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> > index b830d5f..8080518 100644
> > --- a/net/openvswitch/flow.h
> > +++ b/net/openvswitch/flow.h
> > @@ -166,7 +166,6 @@ struct sw_flow_key_range {
> > struct sw_flow_mask {
> > int ref_count;
> > struct rcu_head rcu;
> > - struct list_head list;
> > struct sw_flow_key_range range;
> > struct sw_flow_key key;
> > };
> > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> > index 3d515c0..99954fa 100644
> > --- a/net/openvswitch/flow_table.c
> > +++ b/net/openvswitch/flow_table.c
> > @@ -34,6 +34,7 @@
> > #include <net/ndisc.h>
> >
> > #define TBL_MIN_BUCKETS 1024
> > +#define MASK_ARRAY_SIZE_MIN 16
> > #define REHASH_INTERVAL (10 * 60 * HZ)
> >
> > #define MC_HASH_SHIFT 8
> > @@ -168,9 +169,59 @@ static struct table_instance *table_instance_alloc(int new_size)
> > return ti;
> > }
> >
> > +static void mask_array_rcu_cb(struct rcu_head *rcu)
> > +{
> > + struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
> > +
> > + kfree(ma);
> > +}
> > +
> > +static struct mask_array *tbl_mask_array_alloc(int size)
> > +{
> > + struct mask_array *new;
> > +
> > + size = max(MASK_ARRAY_SIZE_MIN, size);
> > + new = kzalloc(sizeof(struct mask_array) +
> > + sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
> > + if (!new)
> > + return NULL;
> > +
> > + new->count = 0;
> > + new->max = size;
> > +
> > + return new;
> > +}
> > +
> > +static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
> > +{
> > + struct mask_array *old;
> > + struct mask_array *new;
> > +
> > + new = tbl_mask_array_alloc(size);
> > + if (!new)
> > + return -ENOMEM;
> > +
> > + old = ovsl_dereference(tbl->mask_array);
> > + if (old) {
> > + int i;
> > +
> > + for (i = 0; i < old->max; i++) {
> > + if (ovsl_dereference(old->masks[i]))
> > + new->masks[new->count++] = old->masks[i];
> > + }
> > + }
> > + rcu_assign_pointer(tbl->mask_array, new);
> > +
> > + if (old)
> > + call_rcu(&old->rcu, mask_array_rcu_cb);
> > +
> kfree_rcu can be used instead of call_rcu.
will be changed in v2. thanks.
>
> > + return 0;
> > +}
> > +
> > int ovs_flow_tbl_init(struct flow_table *table)
> > {
> > struct table_instance *ti, *ufid_ti;
> > + struct mask_array *ma;
> >
> > table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
> > MC_HASH_ENTRIES,
> > @@ -178,9 +229,13 @@ int ovs_flow_tbl_init(struct flow_table *table)
> > if (!table->mask_cache)
> > return -ENOMEM;
> >
> > + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> > + if (!ma)
> > + goto free_mask_cache;
> > +
> > ti = table_instance_alloc(TBL_MIN_BUCKETS);
> > if (!ti)
> > - goto free_mask_cache;
> > + goto free_mask_array;
> >
> > ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
> > if (!ufid_ti)
> > @@ -188,7 +243,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >
> > rcu_assign_pointer(table->ti, ti);
> > rcu_assign_pointer(table->ufid_ti, ufid_ti);
> > - INIT_LIST_HEAD(&table->mask_list);
> > + rcu_assign_pointer(table->mask_array, ma);
> > table->last_rehash = jiffies;
> > table->count = 0;
> > table->ufid_count = 0;
> > @@ -196,6 +251,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >
> > free_ti:
> > __table_instance_destroy(ti);
> > +free_mask_array:
> > + kfree(ma);
> > free_mask_cache:
> > free_percpu(table->mask_cache);
> > return -ENOMEM;
> > @@ -255,6 +312,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
> > struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
> >
> > free_percpu(table->mask_cache);
> > + kfree(rcu_dereference_raw(table->mask_array));
> > table_instance_destroy(ti, ufid_ti, false);
> > }
> >
> > @@ -460,17 +518,27 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
> >
> > static struct sw_flow *flow_lookup(struct flow_table *tbl,
> > struct table_instance *ti,
> > + struct mask_array *ma,
> > const struct sw_flow_key *key,
> > - u32 *n_mask_hit)
> > + u32 *n_mask_hit,
> > + u32 *index)
> > {
> > - struct sw_flow_mask *mask;
> > struct sw_flow *flow;
> > + int i;
> >
> > - list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
> > - flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> > - if (flow) /* Found */
> > - return flow;
> > + for (i = 0; i < ma->max; i++) {
> > + struct sw_flow_mask *mask;
> > +
> > + mask = rcu_dereference_ovsl(ma->masks[i]);
> > + if (mask) {
> > + flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> > + if (flow) { /* Found */
> > + *index = i;
> > + return flow;
> > + }
> > + }
> > }
> > +
> > return NULL;
> > }
> >
> > @@ -486,6 +554,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> > u32 skb_hash,
> > u32 *n_mask_hit)
> > {
> > + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> > struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > struct mask_cache_entry *entries, *ce, *del;
> > struct sw_flow *flow;
> > @@ -493,8 +562,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> > int seg;
> >
> > *n_mask_hit = 0;
> > - if (unlikely(!skb_hash))
> > - return flow_lookup(tbl, ti, key, n_mask_hit);
> > + if (unlikely(!skb_hash)) {
> > + u32 __always_unused mask_index;
> > +
> > + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
> > + }
> >
> > del = NULL;
> > entries = this_cpu_ptr(tbl->mask_cache);
> > @@ -507,37 +579,33 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> >
> > if (ce->skb_hash == skb_hash) {
> > struct sw_flow_mask *mask;
> > - int i;
> > -
> > - i = 0;
> > - list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
> > - if (ce->mask_index == i++) {
> > - flow = masked_flow_lookup(ti, key, mask,
> > - n_mask_hit);
> > - if (flow) /* Found */
> > - return flow;
> > -
> > - break;
> > - }
> > + struct sw_flow *flow;
> > +
> > + mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
> > + if (mask) {
> > + flow = masked_flow_lookup(ti, key, mask,
> > + n_mask_hit);
> > + if (flow) /* Found */
> > + return flow;
> > }
> >
> > del = ce;
> > break;
> > }
> >
> > - if (!del || (del->skb_hash && !ce->skb_hash)) {
> > + if (!del || (del->skb_hash && !ce->skb_hash) ||
> > + (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
> > + !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
> > del = ce;
> > }
> >
> > hash >>= MC_HASH_SHIFT;
> > }
> >
> > - flow = flow_lookup(tbl, ti, key, n_mask_hit);
> > + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
> >
> > - if (flow) {
> > + if (flow)
> > del->skb_hash = skb_hash;
> > - del->mask_index = (*n_mask_hit - 1);
> > - }
> >
> > return flow;
> > }
> > @@ -546,26 +614,38 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
> > const struct sw_flow_key *key)
> > {
> > struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> > +
> > u32 __always_unused n_mask_hit;
> > + u32 __always_unused index;
> >
> > - return flow_lookup(tbl, ti, key, &n_mask_hit);
> > + return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
> > }
> >
> > struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> > const struct sw_flow_match *match)
> > {
> > - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > - struct sw_flow_mask *mask;
> > - struct sw_flow *flow;
> > - u32 __always_unused n_mask_hit;
> > + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> > + int i;
> >
> > /* Always called under ovs-mutex. */
> > - list_for_each_entry(mask, &tbl->mask_list, list) {
> > + for (i = 0; i < ma->max; i++) {
> > + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > + u32 __always_unused n_mask_hit;
> > + struct sw_flow_mask *mask;
> > + struct sw_flow *flow;
> > +
> > + mask = ovsl_dereference(ma->masks[i]);
> > + if (!mask)
> > + continue;
> > +
> > flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> > if (flow && ovs_identifier_is_key(&flow->id) &&
> > - ovs_flow_cmp_unmasked_key(flow, match))
> > + ovs_flow_cmp_unmasked_key(flow, match)) {
> > return flow;
> > + }
> > }
> > +
> > return NULL;
> > }
> >
> > @@ -611,13 +691,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
> >
> > int ovs_flow_tbl_num_masks(const struct flow_table *table)
> > {
> > - struct sw_flow_mask *mask;
> > - int num = 0;
> > -
> > - list_for_each_entry(mask, &table->mask_list, list)
> > - num++;
> > -
> > - return num;
> > + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> > + return ma->count;
> > }
> >
> > static struct table_instance *table_instance_expand(struct table_instance *ti,
> > @@ -638,8 +713,19 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
> > mask->ref_count--;
> >
> > if (!mask->ref_count) {
> > - list_del_rcu(&mask->list);
> > - kfree_rcu(mask, rcu);
> > + struct mask_array *ma;
> > + int i;
> > +
> > + ma = ovsl_dereference(tbl->mask_array);
> > + for (i = 0; i < ma->max; i++) {
> > + if (mask == ovsl_dereference(ma->masks[i])) {
> > + RCU_INIT_POINTER(ma->masks[i], NULL);
> > + ma->count--;
> > + kfree_rcu(mask, rcu);
> > + return;
> > + }
> > + }
> > + BUG();
> > }
> > }
> > }
> > @@ -689,13 +775,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
> > static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
> > const struct sw_flow_mask *mask)
> > {
> > - struct list_head *ml;
> > + struct mask_array *ma;
> > + int i;
> >
> > - list_for_each(ml, &tbl->mask_list) {
> > - struct sw_flow_mask *m;
> > - m = container_of(ml, struct sw_flow_mask, list);
> > - if (mask_equal(mask, m))
> > - return m;
> > + ma = ovsl_dereference(tbl->mask_array);
> > + for (i = 0; i < ma->max; i++) {
> > + struct sw_flow_mask *t;
> > + t = ovsl_dereference(ma->masks[i]);
> > +
> > + if (t && mask_equal(mask, t))
> > + return t;
> > }
> >
> > return NULL;
> > @@ -706,15 +795,44 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
> > const struct sw_flow_mask *new)
> > {
> > struct sw_flow_mask *mask;
> > +
> > mask = flow_mask_find(tbl, new);
> > if (!mask) {
> > + struct mask_array *ma;
> > + int i;
> > +
> > /* Allocate a new mask if none exsits. */
> > mask = mask_alloc();
> > if (!mask)
> > return -ENOMEM;
> > mask->key = new->key;
> > mask->range = new->range;
> > - list_add_tail_rcu(&mask->list, &tbl->mask_list);
> > +
> > + /* Add mask to mask-list. */
> > + ma = ovsl_dereference(tbl->mask_array);
> > + if (ma->count >= ma->max) {
> > + int err;
> > +
> > + err = tbl_mask_array_realloc(tbl, ma->max +
> > + MASK_ARRAY_SIZE_MIN);
> > + if (err) {
> > + kfree(mask);
> > + return err;
> > + }
> > +
> > + ma = ovsl_dereference(tbl->mask_array);
> > + }
> > +
> > + for (i = 0; i < ma->max; i++) {
> > + const struct sw_flow_mask *t;
> > +
> > + t = ovsl_dereference(ma->masks[i]);
> > + if (!t) {
> > + rcu_assign_pointer(ma->masks[i], mask);
> > + ma->count++;
> > + break;
> > + }
> > + }
> > } else {
> > BUG_ON(!mask->ref_count);
> > mask->ref_count++;
> > diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> > index 04b6b1c..8a5cea6 100644
> > --- a/net/openvswitch/flow_table.h
> > +++ b/net/openvswitch/flow_table.h
> > @@ -27,6 +27,12 @@ struct mask_cache_entry {
> > u32 mask_index;
> > };
> >
> > +struct mask_array {
> > + struct rcu_head rcu;
> > + int count, max;
> > + struct sw_flow_mask __rcu *masks[];
> > +};
> > +
> > struct table_instance {
> > struct hlist_head *buckets;
> > unsigned int n_buckets;
> > @@ -40,7 +46,7 @@ struct flow_table {
> > struct table_instance __rcu *ti;
> > struct table_instance __rcu *ufid_ti;
> > struct mask_cache_entry __percpu *mask_cache;
> > - struct list_head mask_list;
> > + struct mask_array __rcu *mask_array;
> > unsigned long last_rehash;
> > unsigned int count;
> > unsigned int ufid_count;
> > --
> > 1.8.3.1
> >
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next 3/9] net: openvswitch: shrink the mask array if necessary
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
2019-09-29 17:09 ` [PATCH net-next 1/9] net: openvswitch: add flow-mask cache for performance xiangxia.m.yue
2019-09-29 17:09 ` [PATCH net-next 2/9] net: openvswitch: convert mask list in mask array xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-09-29 17:10 ` [PATCH net-next 4/9] net: openvswitch: optimize flow mask cache hash collision xiangxia.m.yue
` (7 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
When creating and inserting flow-mask, if there is no available
flow-mask, we realloc the mask array. When removing flow-mask,
if necessary, we shrink mask array.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 99954fa..9c72aab 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -701,6 +701,23 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
+static void tbl_mask_array_delete_mask(struct mask_array *ma,
+ struct sw_flow_mask *mask)
+{
+ int i;
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma->max; i++) {
+ if (mask == ovsl_dereference(ma->masks[i])) {
+ RCU_INIT_POINTER(ma->masks[i], NULL);
+ ma->count--;
+ kfree_rcu(mask, rcu);
+ return;
+ }
+ }
+ BUG();
+}
+
/* Remove 'mask' from the mask list, if it is not needed any more. */
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
{
@@ -714,18 +731,14 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
if (!mask->ref_count) {
struct mask_array *ma;
- int i;
ma = ovsl_dereference(tbl->mask_array);
- for (i = 0; i < ma->max; i++) {
- if (mask == ovsl_dereference(ma->masks[i])) {
- RCU_INIT_POINTER(ma->masks[i], NULL);
- ma->count--;
- kfree_rcu(mask, rcu);
- return;
- }
- }
- BUG();
+ tbl_mask_array_delete_mask(ma, mask);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma->count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
}
}
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH net-next 4/9] net: openvswitch: optimize flow mask cache hash collision
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (2 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 3/9] net: openvswitch: shrink the mask array if necessary xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-09-29 17:10 ` [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up xiangxia.m.yue
` (6 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Port the codes to linux upstream and with little changes.
Pravin B Shelar, says:
| In case hash collision on mask cache, OVS does extra flow
| lookup. Following patch avoid it.
Link: https://github.com/openvswitch/ovs/commit/0e6efbe2712da03522532dc5e84806a96f6a0dd1
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 95 ++++++++++++++++++++++++--------------------
1 file changed, 53 insertions(+), 42 deletions(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 9c72aab..e59fac5 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -516,6 +516,9 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL;
}
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
struct mask_array *ma,
@@ -524,18 +527,31 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
u32 *index)
{
struct sw_flow *flow;
+ struct sw_flow_mask *mask;
int i;
- for (i = 0; i < ma->max; i++) {
- struct sw_flow_mask *mask;
-
- mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (*index < ma->max) {
+ mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow) { /* Found */
- *index = i;
+ if (flow)
return flow;
- }
+ }
+ }
+
+ for (i = 0; i < ma->max; i++) {
+
+ if (i == *index)
+ continue;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (!mask)
+ continue;
+
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
}
}
@@ -554,58 +570,54 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
u32 skb_hash,
u32 *n_mask_hit)
{
- struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct mask_cache_entry *entries, *ce, *del;
+ struct mask_array *ma = rcu_dereference(tbl->mask_array);
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
- u32 hash = skb_hash;
+ u32 hash;
int seg;
*n_mask_hit = 0;
if (unlikely(!skb_hash)) {
- u32 __always_unused mask_index;
+ u32 mask_index = 0;
return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
}
- del = NULL;
+ /* Pre and post recirulation flows usually have the same skb_hash
+ * value. To avoid hash collisions, rehash the 'skb_hash' with
+ * 'recirc_id'. */
+ if (key->recirc_id)
+ skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+ ce = NULL;
+ hash = skb_hash;
entries = this_cpu_ptr(tbl->mask_cache);
+ /* Find the cache entry 'ce' to operate on. */
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
- int index;
-
- index = hash & (MC_HASH_ENTRIES - 1);
- ce = &entries[index];
-
- if (ce->skb_hash == skb_hash) {
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
-
- mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
- if (mask) {
- flow = masked_flow_lookup(ti, key, mask,
- n_mask_hit);
- if (flow) /* Found */
- return flow;
- }
-
- del = ce;
- break;
+ int index = hash & (MC_HASH_ENTRIES - 1);
+ struct mask_cache_entry *e;
+
+ e = &entries[index];
+ if (e->skb_hash == skb_hash) {
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+ &e->mask_index);
+ if (!flow)
+ e->skb_hash = 0;
+ return flow;
}
- if (!del || (del->skb_hash && !ce->skb_hash) ||
- (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
- !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
- del = ce;
- }
+ if (!ce || e->skb_hash < ce->skb_hash)
+ ce = e; /* A better replacement cache candidate. */
hash >>= MC_HASH_SHIFT;
}
- flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
-
+ /* Cache miss, do full lookup. */
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
if (flow)
- del->skb_hash = skb_hash;
+ ce->skb_hash = skb_hash;
return flow;
}
@@ -615,9 +627,8 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
-
u32 __always_unused n_mask_hit;
- u32 __always_unused index;
+ u32 index = 0;
return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (3 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 4/9] net: openvswitch: optimize flow mask cache hash collision xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-10-02 2:06 ` Pravin Shelar
2019-09-29 17:10 ` [PATCH net-next 6/9] net: openvswitch: simplify the flow_hash xiangxia.m.yue
` (5 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
The full looking up on flow table traverses all mask array.
If mask-array is too large, the number of invalid flow-mask
increase, performance will be drop.
This patch optimizes mask-array operation:
* Inserting, insert it [ma->count- 1] directly.
* Removing, only change last and current mask point, and free current mask.
* Looking up, full looking up will break if mask is NULL.
The function which changes or gets *count* of struct mask_array,
is protected by ovs_lock, but flow_lookup (not protected) should use *max* of
struct mask_array.
Functions protected by ovs_lock:
* tbl_mask_array_del_mask
* tbl_mask_array_add_mask
* flow_mask_find
* ovs_flow_tbl_lookup_exact
* ovs_flow_tbl_num_masks
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 101 +++++++++++++++++++++----------------------
1 file changed, 49 insertions(+), 52 deletions(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index e59fac5..5257e4a 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -546,7 +546,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
mask = rcu_dereference_ovsl(ma->masks[i]);
if (!mask)
- continue;
+ break;
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
@@ -640,15 +640,13 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
int i;
/* Always called under ovs-mutex. */
- for (i = 0; i < ma->max; i++) {
+ for (i = 0; i < ma->count; i++) {
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 __always_unused n_mask_hit;
struct sw_flow_mask *mask;
struct sw_flow *flow;
mask = ovsl_dereference(ma->masks[i]);
- if (!mask)
- continue;
flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
@@ -712,21 +710,31 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-static void tbl_mask_array_delete_mask(struct mask_array *ma,
- struct sw_flow_mask *mask)
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
int i;
/* Remove the deleted mask pointers from the array */
- for (i = 0; i < ma->max; i++) {
- if (mask == ovsl_dereference(ma->masks[i])) {
- RCU_INIT_POINTER(ma->masks[i], NULL);
- ma->count--;
- kfree_rcu(mask, rcu);
- return;
- }
+ for (i = 0; i < ma->count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
}
+
BUG();
+ return;
+
+found:
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma->count -1]);
+ RCU_INIT_POINTER(ma->masks[ma->count -1], NULL);
+ ma->count--;
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma->count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -740,17 +748,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
BUG_ON(!mask->ref_count);
mask->ref_count--;
- if (!mask->ref_count) {
- struct mask_array *ma;
-
- ma = ovsl_dereference(tbl->mask_array);
- tbl_mask_array_delete_mask(ma, mask);
-
- /* Shrink the mask array if necessary. */
- if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
- ma->count <= (ma->max / 3))
- tbl_mask_array_realloc(tbl, ma->max / 2);
- }
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
}
}
@@ -803,17 +802,38 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
int i;
ma = ovsl_dereference(tbl->mask_array);
- for (i = 0; i < ma->max; i++) {
+ for (i = 0; i < ma->count; i++) {
struct sw_flow_mask *t;
t = ovsl_dereference(ma->masks[i]);
- if (t && mask_equal(mask, t))
+ if (mask_equal(mask, t))
return t;
}
return NULL;
}
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err;
+
+ if (ma->count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma->count]));
+
+ rcu_assign_pointer(ma->masks[ma->count], new);
+ ma->count++;
+
+ return 0;
+}
+
/* Add 'mask' into the mask list, if it is not already there. */
static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
@@ -822,9 +842,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask = flow_mask_find(tbl, new);
if (!mask) {
- struct mask_array *ma;
- int i;
-
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
if (!mask)
@@ -833,29 +850,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask->range = new->range;
/* Add mask to mask-list. */
- ma = ovsl_dereference(tbl->mask_array);
- if (ma->count >= ma->max) {
- int err;
-
- err = tbl_mask_array_realloc(tbl, ma->max +
- MASK_ARRAY_SIZE_MIN);
- if (err) {
- kfree(mask);
- return err;
- }
-
- ma = ovsl_dereference(tbl->mask_array);
- }
-
- for (i = 0; i < ma->max; i++) {
- const struct sw_flow_mask *t;
-
- t = ovsl_dereference(ma->masks[i]);
- if (!t) {
- rcu_assign_pointer(ma->masks[i], mask);
- ma->count++;
- break;
- }
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
}
} else {
BUG_ON(!mask->ref_count);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up
2019-09-29 17:10 ` [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up xiangxia.m.yue
@ 2019-10-02 2:06 ` Pravin Shelar
2019-10-08 3:15 ` Tonghao Zhang
0 siblings, 1 reply; 20+ messages in thread
From: Pravin Shelar @ 2019-10-02 2:06 UTC (permalink / raw)
To: Tonghao Zhang; +Cc: Greg Rose, Linux Kernel Network Developers
On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> The full looking up on flow table traverses all mask array.
> If mask-array is too large, the number of invalid flow-mask
> increase, performance will be drop.
>
> This patch optimizes mask-array operation:
>
> * Inserting, insert it [ma->count- 1] directly.
> * Removing, only change last and current mask point, and free current mask.
> * Looking up, full looking up will break if mask is NULL.
>
> The function which changes or gets *count* of struct mask_array,
> is protected by ovs_lock, but flow_lookup (not protected) should use *max* of
> struct mask_array.
>
> Functions protected by ovs_lock:
> * tbl_mask_array_del_mask
> * tbl_mask_array_add_mask
> * flow_mask_find
> * ovs_flow_tbl_lookup_exact
> * ovs_flow_tbl_num_masks
>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
> net/openvswitch/flow_table.c | 101 +++++++++++++++++++++----------------------
> 1 file changed, 49 insertions(+), 52 deletions(-)
>
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index e59fac5..5257e4a 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -546,7 +546,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>
> mask = rcu_dereference_ovsl(ma->masks[i]);
> if (!mask)
> - continue;
> + break;
>
> flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> if (flow) { /* Found */
> @@ -640,15 +640,13 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> int i;
>
> /* Always called under ovs-mutex. */
> - for (i = 0; i < ma->max; i++) {
> + for (i = 0; i < ma->count; i++) {
> struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> u32 __always_unused n_mask_hit;
> struct sw_flow_mask *mask;
> struct sw_flow *flow;
>
> mask = ovsl_dereference(ma->masks[i]);
> - if (!mask)
> - continue;
>
> flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> if (flow && ovs_identifier_is_key(&flow->id) &&
> @@ -712,21 +710,31 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
> return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> }
>
> -static void tbl_mask_array_delete_mask(struct mask_array *ma,
> - struct sw_flow_mask *mask)
> +static void tbl_mask_array_del_mask(struct flow_table *tbl,
> + struct sw_flow_mask *mask)
> {
> + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> int i;
>
> /* Remove the deleted mask pointers from the array */
> - for (i = 0; i < ma->max; i++) {
> - if (mask == ovsl_dereference(ma->masks[i])) {
> - RCU_INIT_POINTER(ma->masks[i], NULL);
> - ma->count--;
> - kfree_rcu(mask, rcu);
> - return;
> - }
> + for (i = 0; i < ma->count; i++) {
> + if (mask == ovsl_dereference(ma->masks[i]))
> + goto found;
> }
> +
> BUG();
> + return;
> +
> +found:
> + rcu_assign_pointer(ma->masks[i], ma->masks[ma->count -1]);
> + RCU_INIT_POINTER(ma->masks[ma->count -1], NULL);
> + ma->count--;
> + kfree_rcu(mask, rcu);
> +
> + /* Shrink the mask array if necessary. */
> + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
> + ma->count <= (ma->max / 3))
> + tbl_mask_array_realloc(tbl, ma->max / 2);
> }
>
> /* Remove 'mask' from the mask list, if it is not needed any more. */
> @@ -740,17 +748,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
> BUG_ON(!mask->ref_count);
> mask->ref_count--;
>
> - if (!mask->ref_count) {
> - struct mask_array *ma;
> -
> - ma = ovsl_dereference(tbl->mask_array);
> - tbl_mask_array_delete_mask(ma, mask);
> -
> - /* Shrink the mask array if necessary. */
> - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
> - ma->count <= (ma->max / 3))
> - tbl_mask_array_realloc(tbl, ma->max / 2);
> - }
> + if (!mask->ref_count)
> + tbl_mask_array_del_mask(tbl, mask);
> }
> }
>
> @@ -803,17 +802,38 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
> int i;
>
> ma = ovsl_dereference(tbl->mask_array);
> - for (i = 0; i < ma->max; i++) {
> + for (i = 0; i < ma->count; i++) {
> struct sw_flow_mask *t;
> t = ovsl_dereference(ma->masks[i]);
>
> - if (t && mask_equal(mask, t))
> + if (mask_equal(mask, t))
> return t;
> }
>
> return NULL;
> }
>
> +static int tbl_mask_array_add_mask(struct flow_table *tbl,
> + struct sw_flow_mask *new)
> +{
> + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> + int err;
> +
> + if (ma->count >= ma->max) {
> + err = tbl_mask_array_realloc(tbl, ma->max +
> + MASK_ARRAY_SIZE_MIN);
> + if (err)
> + return err;
> + }
> +
> + BUG_ON(ovsl_dereference(ma->masks[ma->count]));
> +
> + rcu_assign_pointer(ma->masks[ma->count], new);
> + ma->count++;
There is no barrier for ma->count. Fastpath is accessing ma->count
without synchronization. Same issue with mask delete operation.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up
2019-10-02 2:06 ` Pravin Shelar
@ 2019-10-08 3:15 ` Tonghao Zhang
0 siblings, 0 replies; 20+ messages in thread
From: Tonghao Zhang @ 2019-10-08 3:15 UTC (permalink / raw)
To: Pravin Shelar; +Cc: Greg Rose, Linux Kernel Network Developers
On Wed, Oct 2, 2019 at 10:03 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > The full looking up on flow table traverses all mask array.
> > If mask-array is too large, the number of invalid flow-mask
> > increase, performance will be drop.
> >
> > This patch optimizes mask-array operation:
> >
> > * Inserting, insert it [ma->count- 1] directly.
> > * Removing, only change last and current mask point, and free current mask.
> > * Looking up, full looking up will break if mask is NULL.
> >
> > The function which changes or gets *count* of struct mask_array,
> > is protected by ovs_lock, but flow_lookup (not protected) should use *max* of
> > struct mask_array.
> >
> > Functions protected by ovs_lock:
> > * tbl_mask_array_del_mask
> > * tbl_mask_array_add_mask
> > * flow_mask_find
> > * ovs_flow_tbl_lookup_exact
> > * ovs_flow_tbl_num_masks
> >
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> > net/openvswitch/flow_table.c | 101 +++++++++++++++++++++----------------------
> > 1 file changed, 49 insertions(+), 52 deletions(-)
> >
> > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> > index e59fac5..5257e4a 100644
> > --- a/net/openvswitch/flow_table.c
> > +++ b/net/openvswitch/flow_table.c
> > @@ -546,7 +546,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> >
> > mask = rcu_dereference_ovsl(ma->masks[i]);
> > if (!mask)
> > - continue;
> > + break;
> >
> > flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> > if (flow) { /* Found */
> > @@ -640,15 +640,13 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> > int i;
> >
> > /* Always called under ovs-mutex. */
> > - for (i = 0; i < ma->max; i++) {
> > + for (i = 0; i < ma->count; i++) {
> > struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > u32 __always_unused n_mask_hit;
> > struct sw_flow_mask *mask;
> > struct sw_flow *flow;
> >
> > mask = ovsl_dereference(ma->masks[i]);
> > - if (!mask)
> > - continue;
> >
> > flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> > if (flow && ovs_identifier_is_key(&flow->id) &&
> > @@ -712,21 +710,31 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
> > return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> > }
> >
> > -static void tbl_mask_array_delete_mask(struct mask_array *ma,
> > - struct sw_flow_mask *mask)
> > +static void tbl_mask_array_del_mask(struct flow_table *tbl,
> > + struct sw_flow_mask *mask)
> > {
> > + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> > int i;
> >
> > /* Remove the deleted mask pointers from the array */
> > - for (i = 0; i < ma->max; i++) {
> > - if (mask == ovsl_dereference(ma->masks[i])) {
> > - RCU_INIT_POINTER(ma->masks[i], NULL);
> > - ma->count--;
> > - kfree_rcu(mask, rcu);
> > - return;
> > - }
> > + for (i = 0; i < ma->count; i++) {
> > + if (mask == ovsl_dereference(ma->masks[i]))
> > + goto found;
> > }
> > +
> > BUG();
> > + return;
> > +
> > +found:
> > + rcu_assign_pointer(ma->masks[i], ma->masks[ma->count -1]);
> > + RCU_INIT_POINTER(ma->masks[ma->count -1], NULL);
> > + ma->count--;
> > + kfree_rcu(mask, rcu);
> > +
> > + /* Shrink the mask array if necessary. */
> > + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
> > + ma->count <= (ma->max / 3))
> > + tbl_mask_array_realloc(tbl, ma->max / 2);
> > }
> >
> > /* Remove 'mask' from the mask list, if it is not needed any more. */
> > @@ -740,17 +748,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
> > BUG_ON(!mask->ref_count);
> > mask->ref_count--;
> >
> > - if (!mask->ref_count) {
> > - struct mask_array *ma;
> > -
> > - ma = ovsl_dereference(tbl->mask_array);
> > - tbl_mask_array_delete_mask(ma, mask);
> > -
> > - /* Shrink the mask array if necessary. */
> > - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
> > - ma->count <= (ma->max / 3))
> > - tbl_mask_array_realloc(tbl, ma->max / 2);
> > - }
> > + if (!mask->ref_count)
> > + tbl_mask_array_del_mask(tbl, mask);
> > }
> > }
> >
> > @@ -803,17 +802,38 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
> > int i;
> >
> > ma = ovsl_dereference(tbl->mask_array);
> > - for (i = 0; i < ma->max; i++) {
> > + for (i = 0; i < ma->count; i++) {
> > struct sw_flow_mask *t;
> > t = ovsl_dereference(ma->masks[i]);
> >
> > - if (t && mask_equal(mask, t))
> > + if (mask_equal(mask, t))
> > return t;
> > }
> >
> > return NULL;
> > }
> >
> > +static int tbl_mask_array_add_mask(struct flow_table *tbl,
> > + struct sw_flow_mask *new)
> > +{
> > + struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> > + int err;
> > +
> > + if (ma->count >= ma->max) {
> > + err = tbl_mask_array_realloc(tbl, ma->max +
> > + MASK_ARRAY_SIZE_MIN);
> > + if (err)
> > + return err;
> > + }
> > +
> > + BUG_ON(ovsl_dereference(ma->masks[ma->count]));
> > +
> > + rcu_assign_pointer(ma->masks[ma->count], new);
> > + ma->count++;
> There is no barrier for ma->count. Fastpath is accessing ma->count
> without synchronization. Same issue with mask delete operation.
barrier will be added in v2,
In the fastpath, we use the ma->count and check ma->masks[*index] is
valid. if NULL we will break. so we can access ma->count without
synchronization in the fastpath.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next 6/9] net: openvswitch: simplify the flow_hash
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (4 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 5/9] net: openvswitch: optimize flow-mask looking up xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-09-29 17:10 ` [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup xiangxia.m.yue
` (4 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Simplify the code and remove the unnecessary BUILD_BUG_ON.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5257e4a..c8e79c1 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -440,13 +440,9 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
-
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (5 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 6/9] net: openvswitch: simplify the flow_hash xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-10-02 2:11 ` Pravin Shelar
2019-09-29 17:10 ` [PATCH net-next 8/9] net: openvswitch: fix possible memleak on destroy flow table xiangxia.m.yue
` (3 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
The most case *index < ma->max, we add likely for performance.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c8e79c1..c21fd52 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -526,7 +526,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct sw_flow_mask *mask;
int i;
- if (*index < ma->max) {
+ if (likely(*index < ma->max)) {
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup
2019-09-29 17:10 ` [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup xiangxia.m.yue
@ 2019-10-02 2:11 ` Pravin Shelar
2019-10-08 3:21 ` Tonghao Zhang
0 siblings, 1 reply; 20+ messages in thread
From: Pravin Shelar @ 2019-10-02 2:11 UTC (permalink / raw)
To: Tonghao Zhang; +Cc: Greg Rose, Linux Kernel Network Developers
On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> The most case *index < ma->max, we add likely for performance.
>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
> net/openvswitch/flow_table.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index c8e79c1..c21fd52 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -526,7 +526,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> struct sw_flow_mask *mask;
> int i;
>
> - if (*index < ma->max) {
> + if (likely(*index < ma->max)) {
After changes from patch 5, ma->count is the limit for mask array. so
why not use ma->count here.
> mask = rcu_dereference_ovsl(ma->masks[*index]);
> if (mask) {
> flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup
2019-10-02 2:11 ` Pravin Shelar
@ 2019-10-08 3:21 ` Tonghao Zhang
0 siblings, 0 replies; 20+ messages in thread
From: Tonghao Zhang @ 2019-10-08 3:21 UTC (permalink / raw)
To: Pravin Shelar; +Cc: Greg Rose, Linux Kernel Network Developers
On Wed, Oct 2, 2019 at 10:07 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sun, Sep 29, 2019 at 7:09 PM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > The most case *index < ma->max, we add likely for performance.
> >
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> > net/openvswitch/flow_table.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> > index c8e79c1..c21fd52 100644
> > --- a/net/openvswitch/flow_table.c
> > +++ b/net/openvswitch/flow_table.c
> > @@ -526,7 +526,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> > struct sw_flow_mask *mask;
> > int i;
> >
> > - if (*index < ma->max) {
> > + if (likely(*index < ma->max)) {
>
> After changes from patch 5, ma->count is the limit for mask array. so
> why not use ma->count here.
because we will check the mask is valid, so use the ma->count and
ma->max are ok.
but i will use the ma->count in v2.
>
> > mask = rcu_dereference_ovsl(ma->masks[*index]);
> > if (mask) {
> > flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> > --
> > 1.8.3.1
> >
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next 8/9] net: openvswitch: fix possible memleak on destroy flow table
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (6 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 7/9] net: openvswitch: add likely in flow_lookup xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-09-29 17:10 ` [PATCH net-next 9/9] net: openvswitch: simplify the ovs_dp_cmd_new xiangxia.m.yue
` (2 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
When we destroy the flow tables which may contain the flow_mask,
so release the flow mask struct.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/flow_table.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c21fd52..a2e0cb1 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -218,6 +218,18 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
return 0;
}
+static void tbl_mask_array_destroy(struct flow_table *tbl)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i;
+
+ /* Free the flow-mask and kfree_rcu the NULL is allowed. */
+ for (i = 0; i < ma->count; i++)
+ kfree_rcu(ma->masks[i], rcu);
+
+ kfree_rcu(tbl->mask_array, rcu);
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
@@ -312,7 +324,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
free_percpu(table->mask_cache);
- kfree(rcu_dereference_raw(table->mask_array));
+ tbl_mask_array_destroy(table);
table_instance_destroy(ti, ufid_ti, false);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH net-next 9/9] net: openvswitch: simplify the ovs_dp_cmd_new
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (7 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 8/9] net: openvswitch: fix possible memleak on destroy flow table xiangxia.m.yue
@ 2019-09-29 17:10 ` xiangxia.m.yue
2019-10-01 8:53 ` [PATCH net-next 0/9] optimize openvswitch flow looking up Eelco Chaudron
2019-10-01 21:58 ` David Miller
10 siblings, 0 replies; 20+ messages in thread
From: xiangxia.m.yue @ 2019-09-29 17:10 UTC (permalink / raw)
To: gvrose8192, pshelar; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
use the specified functions to init resource.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
net/openvswitch/datapath.c | 60 +++++++++++++++++++++++++++++-----------------
1 file changed, 38 insertions(+), 22 deletions(-)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3d7b1c4..78b442e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1576,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return 0;
}
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
+}
+
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1584,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1597,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1675,15 +1691,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err_destroy_meters:
ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 0/9] optimize openvswitch flow looking up
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (8 preceding siblings ...)
2019-09-29 17:10 ` [PATCH net-next 9/9] net: openvswitch: simplify the ovs_dp_cmd_new xiangxia.m.yue
@ 2019-10-01 8:53 ` Eelco Chaudron
2019-10-03 17:08 ` [ovs-dev] " William Tu
2019-10-01 21:58 ` David Miller
10 siblings, 1 reply; 20+ messages in thread
From: Eelco Chaudron @ 2019-10-01 8:53 UTC (permalink / raw)
To: xiangxia.m.yue, ovs dev; +Cc: gvrose8192, pshelar, netdev
Interesting performance gain, copied in the OVS development mailing
list.
//Eelco
On 29 Sep 2019, at 19:09, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This series patch optimize openvswitch.
>
> Patch 1, 2, 4: Port Pravin B Shelar patches to
> linux upstream with little changes.
>
> Patch 5, 6, 7: Optimize the flow looking up and
> simplify the flow hash.
>
> Patch 8: is a bugfix.
>
> The performance test is on Intel Xeon E5-2630 v4.
> The test topology is show as below:
>
> +-----------------------------------+
> | +---------------------------+ |
> | | eth0 ovs-switch eth1 | | Host0
> | +---------------------------+ |
> +-----------------------------------+
> ^ |
> | |
> | |
> | |
> | v
> +-----+----+ +----+-----+
> | netperf | Host1 | netserver| Host2
> +----------+ +----------+
>
> We use netperf send the 64B frame, and insert 255+ flow-mask:
> $ ovs-dpctl add-flow ovs-switch
> "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)"
> 2
> ...
> $ ovs-dpctl add-flow ovs-switch
> "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
> 2
> $ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18
>
> * Without series patch, throughput 8.28Mbps
> * With series patch, throughput 46.05Mbps
>
> Tonghao Zhang (9):
> net: openvswitch: add flow-mask cache for performance
> net: openvswitch: convert mask list in mask array
> net: openvswitch: shrink the mask array if necessary
> net: openvswitch: optimize flow mask cache hash collision
> net: openvswitch: optimize flow-mask looking up
> net: openvswitch: simplify the flow_hash
> net: openvswitch: add likely in flow_lookup
> net: openvswitch: fix possible memleak on destroy flow table
> net: openvswitch: simplify the ovs_dp_cmd_new
>
> net/openvswitch/datapath.c | 63 +++++----
> net/openvswitch/flow.h | 1 -
> net/openvswitch/flow_table.c | 318
> +++++++++++++++++++++++++++++++++++++------
> net/openvswitch/flow_table.h | 19 ++-
> 4 files changed, 330 insertions(+), 71 deletions(-)
>
> --
> 1.8.3.1
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [ovs-dev] [PATCH net-next 0/9] optimize openvswitch flow looking up
2019-10-01 8:53 ` [PATCH net-next 0/9] optimize openvswitch flow looking up Eelco Chaudron
@ 2019-10-03 17:08 ` William Tu
2019-10-08 1:41 ` Tonghao Zhang
0 siblings, 1 reply; 20+ messages in thread
From: William Tu @ 2019-10-03 17:08 UTC (permalink / raw)
To: xiangxia.m.yue
Cc: ovs dev, Linux Kernel Network Developers, David Miller,
Greg Rose, Eelco Chaudron, pravin shelar
Hi Tonghao,
Thanks for the patch.
> On 29 Sep 2019, at 19:09, xiangxia.m.yue@gmail.com wrote:
>
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > This series patch optimize openvswitch.
> >
> > Patch 1, 2, 4: Port Pravin B Shelar patches to
> > linux upstream with little changes.
> >
I thought the idea of adding another cache before the flow mask
was rejected before, due to all the potential issue of caches, ex:
cache is exploitable, and performance still suffers when your cache
is full. See David's slides below:
[1] http://vger.kernel.org/~davem/columbia2012.pdf
Do you have a rough number about how many flows this flow mask
cache can handle?
> > Patch 5, 6, 7: Optimize the flow looking up and
> > simplify the flow hash.
I think this is great.
I wonder what's the performance improvement when flow mask
cache is full?
Thanks
William
> >
> > Patch 8: is a bugfix.
> >
> > The performance test is on Intel Xeon E5-2630 v4.
> > The test topology is show as below:
> >
> > +-----------------------------------+
> > | +---------------------------+ |
> > | | eth0 ovs-switch eth1 | | Host0
> > | +---------------------------+ |
> > +-----------------------------------+
> > ^ |
> > | |
> > | |
> > | |
> > | v
> > +-----+----+ +----+-----+
> > | netperf | Host1 | netserver| Host2
> > +----------+ +----------+
> >
> > We use netperf send the 64B frame, and insert 255+ flow-mask:
> > $ ovs-dpctl add-flow ovs-switch
> > "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)"
> > 2
> > ...
> > $ ovs-dpctl add-flow ovs-switch
> > "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
> > 2
> > $ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18
> >
> > * Without series patch, throughput 8.28Mbps
> > * With series patch, throughput 46.05Mbps
> >
> > Tonghao Zhang (9):
> > net: openvswitch: add flow-mask cache for performance
> > net: openvswitch: convert mask list in mask array
> > net: openvswitch: shrink the mask array if necessary
> > net: openvswitch: optimize flow mask cache hash collision
> > net: openvswitch: optimize flow-mask looking up
> > net: openvswitch: simplify the flow_hash
> > net: openvswitch: add likely in flow_lookup
> > net: openvswitch: fix possible memleak on destroy flow table
> > net: openvswitch: simplify the ovs_dp_cmd_new
> >
> > net/openvswitch/datapath.c | 63 +++++----
> > net/openvswitch/flow.h | 1 -
> > net/openvswitch/flow_table.c | 318
> > +++++++++++++++++++++++++++++++++++++------
> > net/openvswitch/flow_table.h | 19 ++-
> > 4 files changed, 330 insertions(+), 71 deletions(-)
> >
> > --
> > 1.8.3.1
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [ovs-dev] [PATCH net-next 0/9] optimize openvswitch flow looking up
2019-10-03 17:08 ` [ovs-dev] " William Tu
@ 2019-10-08 1:41 ` Tonghao Zhang
0 siblings, 0 replies; 20+ messages in thread
From: Tonghao Zhang @ 2019-10-08 1:41 UTC (permalink / raw)
To: William Tu
Cc: ovs dev, Linux Kernel Network Developers, David Miller,
Greg Rose, Eelco Chaudron, pravin shelar
On Fri, Oct 4, 2019 at 1:09 AM William Tu <u9012063@gmail.com> wrote:
>
> Hi Tonghao,
>
> Thanks for the patch.
>
> > On 29 Sep 2019, at 19:09, xiangxia.m.yue@gmail.com wrote:
> >
> > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >
> > > This series patch optimize openvswitch.
> > >
> > > Patch 1, 2, 4: Port Pravin B Shelar patches to
> > > linux upstream with little changes.
> > >
>
> I thought the idea of adding another cache before the flow mask
> was rejected before, due to all the potential issue of caches, ex:
> cache is exploitable, and performance still suffers when your cache
> is full. See David's slides below:
> [1] http://vger.kernel.org/~davem/columbia2012.pdf
>
> Do you have a rough number about how many flows this flow mask
> cache can handle?
Now we can cache 256 flows on a CPU, so if there are 40 CPUs, 256*10
flows will be cached.
the value of flow-mask is defined using MC_HASH_ENTRIES macro define.
We can change the value
according to different use case and CPU L1d cache.
> > > Patch 5, 6, 7: Optimize the flow looking up and
> > > simplify the flow hash.
>
> I think this is great.
> I wonder what's the performance improvement when flow mask
> cache is full?
I will test the case, I think this feature should work well with RSS
and irq affinity.
> Thanks
> William
>
> > >
> > > Patch 8: is a bugfix.
> > >
> > > The performance test is on Intel Xeon E5-2630 v4.
> > > The test topology is show as below:
> > >
> > > +-----------------------------------+
> > > | +---------------------------+ |
> > > | | eth0 ovs-switch eth1 | | Host0
> > > | +---------------------------+ |
> > > +-----------------------------------+
> > > ^ |
> > > | |
> > > | |
> > > | |
> > > | v
> > > +-----+----+ +----+-----+
> > > | netperf | Host1 | netserver| Host2
> > > +----------+ +----------+
> > >
> > > We use netperf send the 64B frame, and insert 255+ flow-mask:
> > > $ ovs-dpctl add-flow ovs-switch
> > > "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)"
> > > 2
> > > ...
> > > $ ovs-dpctl add-flow ovs-switch
> > > "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
> > > 2
> > > $ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18
> > >
> > > * Without series patch, throughput 8.28Mbps
> > > * With series patch, throughput 46.05Mbps
> > >
> > > Tonghao Zhang (9):
> > > net: openvswitch: add flow-mask cache for performance
> > > net: openvswitch: convert mask list in mask array
> > > net: openvswitch: shrink the mask array if necessary
> > > net: openvswitch: optimize flow mask cache hash collision
> > > net: openvswitch: optimize flow-mask looking up
> > > net: openvswitch: simplify the flow_hash
> > > net: openvswitch: add likely in flow_lookup
> > > net: openvswitch: fix possible memleak on destroy flow table
> > > net: openvswitch: simplify the ovs_dp_cmd_new
> > >
> > > net/openvswitch/datapath.c | 63 +++++----
> > > net/openvswitch/flow.h | 1 -
> > > net/openvswitch/flow_table.c | 318
> > > +++++++++++++++++++++++++++++++++++++------
> > > net/openvswitch/flow_table.h | 19 ++-
> > > 4 files changed, 330 insertions(+), 71 deletions(-)
> > >
> > > --
> > > 1.8.3.1
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next 0/9] optimize openvswitch flow looking up
2019-09-29 17:09 [PATCH net-next 0/9] optimize openvswitch flow looking up xiangxia.m.yue
` (9 preceding siblings ...)
2019-10-01 8:53 ` [PATCH net-next 0/9] optimize openvswitch flow looking up Eelco Chaudron
@ 2019-10-01 21:58 ` David Miller
10 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2019-10-01 21:58 UTC (permalink / raw)
To: xiangxia.m.yue; +Cc: gvrose8192, pshelar, netdev
From: xiangxia.m.yue@gmail.com
Date: Mon, 30 Sep 2019 01:09:57 +0800
> This series patch optimize openvswitch.
Someone please review this series.
Thank you.
^ permalink raw reply [flat|nested] 20+ messages in thread