All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v3 0/2] net: openvswitch: masks cache enhancements
@ 2020-07-29 13:25 Eelco Chaudron
  2020-07-29 13:26 ` [PATCH net-next v3 1/2] net: openvswitch: add masks cache hit counter Eelco Chaudron
  2020-07-29 13:27 ` [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable Eelco Chaudron
  0 siblings, 2 replies; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-29 13:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, dev, kuba, pabeni, pshelar, fw, xiangxia.m.yue

This patchset adds two enhancements to the Open vSwitch masks cache.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

Changes in v3 [patch 2/2 only]:
 - Use is_power_of_2() function
 - Use array_size() function
 - Fix remaining sparse errors

Changes in v2 [patch 2/2 only]:
 - Fix sparse warnings
 - Fix netlink policy items reported by Florian Westphal

Eelco Chaudron (2):
      net: openvswitch: add masks cache hit counter
      net: openvswitch: make masks cache size configurable


 include/uapi/linux/openvswitch.h |    3 +
 net/openvswitch/datapath.c       |   19 ++++++
 net/openvswitch/datapath.h       |    3 +
 net/openvswitch/flow_table.c     |  116 ++++++++++++++++++++++++++++++++------
 net/openvswitch/flow_table.h     |   13 ++++
 5 files changed, 132 insertions(+), 22 deletions(-)


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next v3 1/2] net: openvswitch: add masks cache hit counter
  2020-07-29 13:25 [PATCH net-next v3 0/2] net: openvswitch: masks cache enhancements Eelco Chaudron
@ 2020-07-29 13:26 ` Eelco Chaudron
  2020-07-29 13:27 ` [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable Eelco Chaudron
  1 sibling, 0 replies; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-29 13:26 UTC (permalink / raw)
  To: netdev; +Cc: davem, dev, kuba, pabeni, pshelar, fw, xiangxia.m.yue

Add a counter that counts the number of masks cache hits, and
export it through the megaflow netlink statistics.

Reviewed-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 include/uapi/linux/openvswitch.h |    2 +-
 net/openvswitch/datapath.c       |    5 ++++-
 net/openvswitch/datapath.h       |    3 +++
 net/openvswitch/flow_table.c     |   19 ++++++++++++++-----
 net/openvswitch/flow_table.h     |    3 ++-
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 9b14519e74d9..7cb76e5ca7cf 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -102,8 +102,8 @@ struct ovs_dp_megaflow_stats {
 	__u64 n_mask_hit;	 /* Number of masks used for flow lookups. */
 	__u32 n_masks;		 /* Number of masks for the datapath. */
 	__u32 pad0;		 /* Pad for future expension. */
+	__u64 n_cache_hit;       /* Number of cache matches for flow lookups. */
 	__u64 pad1;		 /* Pad for future expension. */
-	__u64 pad2;		 /* Pad for future expension. */
 };
 
 struct ovs_vport_stats {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 95805f0e27bd..a54df1fe3ec4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -225,13 +225,14 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 	struct dp_stats_percpu *stats;
 	u64 *stats_counter;
 	u32 n_mask_hit;
+	u32 n_cache_hit;
 	int error;
 
 	stats = this_cpu_ptr(dp->stats_percpu);
 
 	/* Look up flow. */
 	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
-					 &n_mask_hit);
+					 &n_mask_hit, &n_cache_hit);
 	if (unlikely(!flow)) {
 		struct dp_upcall_info upcall;
 
@@ -262,6 +263,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 	u64_stats_update_begin(&stats->syncp);
 	(*stats_counter)++;
 	stats->n_mask_hit += n_mask_hit;
+	stats->n_cache_hit += n_cache_hit;
 	u64_stats_update_end(&stats->syncp);
 }
 
@@ -699,6 +701,7 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 		stats->n_missed += local_stats.n_missed;
 		stats->n_lost += local_stats.n_lost;
 		mega_stats->n_mask_hit += local_stats.n_mask_hit;
+		mega_stats->n_cache_hit += local_stats.n_cache_hit;
 	}
 }
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 697a2354194b..86d78613edb4 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -38,12 +38,15 @@
  * @n_mask_hit: Number of masks looked up for flow match.
  *   @n_mask_hit / (@n_hit + @n_missed)  will be the average masks looked
  *   up per packet.
+ * @n_cache_hit: The number of received packets that had their mask found using
+ * the mask cache.
  */
 struct dp_stats_percpu {
 	u64 n_hit;
 	u64 n_missed;
 	u64 n_lost;
 	u64 n_mask_hit;
+	u64 n_cache_hit;
 	struct u64_stats_sync syncp;
 };
 
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index af22c9ee28dd..a5912ea05352 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -667,6 +667,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 				   struct mask_array *ma,
 				   const struct sw_flow_key *key,
 				   u32 *n_mask_hit,
+				   u32 *n_cache_hit,
 				   u32 *index)
 {
 	u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
@@ -682,6 +683,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 				u64_stats_update_begin(&ma->syncp);
 				usage_counters[*index]++;
 				u64_stats_update_end(&ma->syncp);
+				(*n_cache_hit)++;
 				return flow;
 			}
 		}
@@ -719,7 +721,8 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 					  const struct sw_flow_key *key,
 					  u32 skb_hash,
-					  u32 *n_mask_hit)
+					  u32 *n_mask_hit,
+					  u32 *n_cache_hit)
 {
 	struct mask_array *ma = rcu_dereference(tbl->mask_array);
 	struct table_instance *ti = rcu_dereference(tbl->ti);
@@ -729,10 +732,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 	int seg;
 
 	*n_mask_hit = 0;
+	*n_cache_hit = 0;
 	if (unlikely(!skb_hash)) {
 		u32 mask_index = 0;
+		u32 cache = 0;
 
-		return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+		return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache,
+				   &mask_index);
 	}
 
 	/* Pre and post recirulation flows usually have the same skb_hash
@@ -753,7 +759,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 		e = &entries[index];
 		if (e->skb_hash == skb_hash) {
 			flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
-					   &e->mask_index);
+					   n_cache_hit, &e->mask_index);
 			if (!flow)
 				e->skb_hash = 0;
 			return flow;
@@ -766,10 +772,12 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 	}
 
 	/* Cache miss, do full lookup. */
-	flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+	flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit,
+			   &ce->mask_index);
 	if (flow)
 		ce->skb_hash = skb_hash;
 
+	*n_cache_hit = 0;
 	return flow;
 }
 
@@ -779,9 +787,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
 	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
 	u32 __always_unused n_mask_hit;
+	u32 __always_unused n_cache_hit;
 	u32 index = 0;
 
-	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
+	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
 }
 
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 1f664b050e3b..325e939371d8 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -82,7 +82,8 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
 					  const struct sw_flow_key *,
 					  u32 skb_hash,
-					  u32 *n_mask_hit);
+					  u32 *n_mask_hit,
+					  u32 *n_cache_hit);
 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-29 13:25 [PATCH net-next v3 0/2] net: openvswitch: masks cache enhancements Eelco Chaudron
  2020-07-29 13:26 ` [PATCH net-next v3 1/2] net: openvswitch: add masks cache hit counter Eelco Chaudron
@ 2020-07-29 13:27 ` Eelco Chaudron
  2020-07-30  5:07   ` Tonghao Zhang
  1 sibling, 1 reply; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-29 13:27 UTC (permalink / raw)
  To: netdev; +Cc: davem, dev, kuba, pabeni, pshelar, fw, xiangxia.m.yue

This patch makes the masks cache size configurable, or with
a size of 0, disable it.

Reviewed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
Changes in v3:
 - Use is_power_of_2() function
 - Use array_size() function
 - Fix remaining sparse errors

Changes in v2:
 - Fix sparse warnings
 - Fix netlink policy items reported by Florian Westphal

 include/uapi/linux/openvswitch.h |    1 
 net/openvswitch/datapath.c       |   14 +++++
 net/openvswitch/flow_table.c     |   97 +++++++++++++++++++++++++++++++++-----
 net/openvswitch/flow_table.h     |   10 ++++
 4 files changed, 108 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7cb76e5ca7cf..8300cc29dec8 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -86,6 +86,7 @@ enum ovs_datapath_attr {
 	OVS_DP_ATTR_MEGAFLOW_STATS,	/* struct ovs_dp_megaflow_stats */
 	OVS_DP_ATTR_USER_FEATURES,	/* OVS_DP_F_*  */
 	OVS_DP_ATTR_PAD,
+	OVS_DP_ATTR_MASKS_CACHE_SIZE,
 	__OVS_DP_ATTR_MAX
 };
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a54df1fe3ec4..114b2ddb8037 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
 
 	return msgsize;
 }
@@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
+			ovs_flow_tbl_masks_cache_size(&dp->table)))
+		goto nla_put_failure;
+
 	genlmsg_end(skb, ovs_header);
 	return 0;
 
@@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 #endif
 	}
 
+	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
+		u32 cache_size;
+
+		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
+		ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
+	}
+
 	dp->user_features = user_features;
 
 	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
@@ -1894,6 +1906,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
+		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
 };
 
 static const struct genl_ops dp_datapath_genl_ops[] = {
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index a5912ea05352..5280aeeef628 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -38,8 +38,8 @@
 #define MASK_ARRAY_SIZE_MIN	16
 #define REHASH_INTERVAL		(10 * 60 * HZ)
 
+#define MC_DEFAULT_HASH_ENTRIES	256
 #define MC_HASH_SHIFT		8
-#define MC_HASH_ENTRIES		(1u << MC_HASH_SHIFT)
 #define MC_HASH_SEGS		((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
 
 static struct kmem_cache *flow_cache;
@@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 	}
 }
 
+static void __mask_cache_destroy(struct mask_cache *mc)
+{
+	if (mc->mask_cache)
+		free_percpu(mc->mask_cache);
+	kfree(mc);
+}
+
+static void mask_cache_rcu_cb(struct rcu_head *rcu)
+{
+	struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
+
+	__mask_cache_destroy(mc);
+}
+
+static struct mask_cache *tbl_mask_cache_alloc(u32 size)
+{
+	struct mask_cache_entry __percpu *cache = NULL;
+	struct mask_cache *new;
+
+	/* Only allow size to be 0, or a power of 2, and does not exceed
+	 * percpu allocation size.
+	 */
+	if ((!is_power_of_2(size) && size != 0) ||
+	    (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+		return NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	new->cache_size = size;
+	if (new->cache_size > 0) {
+		cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
+						  new->cache_size),
+				       __alignof__(struct mask_cache_entry));
+		if (!cache) {
+			kfree(new);
+			return NULL;
+		}
+	}
+
+	new->mask_cache = cache;
+	return new;
+}
+
+void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
+{
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+	struct mask_cache *new;
+
+	if (size == mc->cache_size)
+		return;
+
+	new = tbl_mask_cache_alloc(size);
+	if (!new)
+		return;
+
+	rcu_assign_pointer(table->mask_cache, new);
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
 	struct table_instance *ti, *ufid_ti;
+	struct mask_cache *mc;
 	struct mask_array *ma;
 
-	table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
-					   MC_HASH_ENTRIES,
-					   __alignof__(struct mask_cache_entry));
-	if (!table->mask_cache)
+	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
+	if (!mc)
 		return -ENOMEM;
 
 	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
@@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
 	rcu_assign_pointer(table->ti, ti);
 	rcu_assign_pointer(table->ufid_ti, ufid_ti);
 	rcu_assign_pointer(table->mask_array, ma);
+	rcu_assign_pointer(table->mask_cache, mc);
 	table->last_rehash = jiffies;
 	table->count = 0;
 	table->ufid_count = 0;
@@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
 free_mask_array:
 	__mask_array_destroy(ma);
 free_mask_cache:
-	free_percpu(table->mask_cache);
+	__mask_cache_destroy(mc);
 	return -ENOMEM;
 }
 
@@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = rcu_dereference_raw(table->ti);
 	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
 
-	free_percpu(table->mask_cache);
-	call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
+	call_rcu(&mc->rcu, mask_cache_rcu_cb);
+	call_rcu(&ma->rcu, mask_array_rcu_cb);
 	table_instance_destroy(table, ti, ufid_ti, false);
 }
 
@@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 					  u32 *n_mask_hit,
 					  u32 *n_cache_hit)
 {
+	struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
 	struct mask_array *ma = rcu_dereference(tbl->mask_array);
 	struct table_instance *ti = rcu_dereference(tbl->ti);
 	struct mask_cache_entry *entries, *ce;
@@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 
 	*n_mask_hit = 0;
 	*n_cache_hit = 0;
-	if (unlikely(!skb_hash)) {
+	if (unlikely(!skb_hash || mc->cache_size == 0)) {
 		u32 mask_index = 0;
 		u32 cache = 0;
 
@@ -749,11 +813,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
 
 	ce = NULL;
 	hash = skb_hash;
-	entries = this_cpu_ptr(tbl->mask_cache);
+	entries = this_cpu_ptr(mc->mask_cache);
 
 	/* Find the cache entry 'ce' to operate on. */
 	for (seg = 0; seg < MC_HASH_SEGS; seg++) {
-		int index = hash & (MC_HASH_ENTRIES - 1);
+		int index = hash & (mc->cache_size - 1);
 		struct mask_cache_entry *e;
 
 		e = &entries[index];
@@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return READ_ONCE(ma->count);
 }
 
+u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
+{
+	struct mask_cache *mc = rcu_dereference(table->mask_cache);
+
+	return READ_ONCE(mc->cache_size);
+}
+
 static struct table_instance *table_instance_expand(struct table_instance *ti,
 						    bool ufid)
 {
@@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
 	for (i = 0; i < masks_entries; i++) {
 		int index = masks_and_count[i].index;
 
-		new->masks[new->count++] =
-			rcu_dereference_ovsl(ma->masks[index]);
+		if (ovsl_dereference(ma->masks[index]))
+			new->masks[new->count++] = ma->masks[index];
 	}
 
 	rcu_assign_pointer(table->mask_array, new);
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 325e939371d8..f2dba952db2f 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -27,6 +27,12 @@ struct mask_cache_entry {
 	u32 mask_index;
 };
 
+struct mask_cache {
+	struct rcu_head rcu;
+	u32 cache_size;  /* Must be ^2 value. */
+	struct mask_cache_entry __percpu *mask_cache;
+};
+
 struct mask_count {
 	int index;
 	u64 counter;
@@ -53,7 +59,7 @@ struct table_instance {
 struct flow_table {
 	struct table_instance __rcu *ti;
 	struct table_instance __rcu *ufid_ti;
-	struct mask_cache_entry __percpu *mask_cache;
+	struct mask_cache __rcu *mask_cache;
 	struct mask_array __rcu *mask_array;
 	unsigned long last_rehash;
 	unsigned int count;
@@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask);
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
 int  ovs_flow_tbl_num_masks(const struct flow_table *table);
+u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
+void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
 				       u32 *bucket, u32 *idx);
 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-29 13:27 ` [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable Eelco Chaudron
@ 2020-07-30  5:07   ` Tonghao Zhang
  2020-07-30 12:33     ` Eelco Chaudron
  0 siblings, 1 reply; 9+ messages in thread
From: Tonghao Zhang @ 2020-07-30  5:07 UTC (permalink / raw)
  To: Eelco Chaudron
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal

On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> This patch makes the masks cache size configurable, or with
> a size of 0, disable it.
>
> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> Changes in v3:
>  - Use is_power_of_2() function
>  - Use array_size() function
>  - Fix remaining sparse errors
>
> Changes in v2:
>  - Fix sparse warnings
>  - Fix netlink policy items reported by Florian Westphal
>
>  include/uapi/linux/openvswitch.h |    1
>  net/openvswitch/datapath.c       |   14 +++++
>  net/openvswitch/flow_table.c     |   97 +++++++++++++++++++++++++++++++++-----
>  net/openvswitch/flow_table.h     |   10 ++++
>  4 files changed, 108 insertions(+), 14 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 7cb76e5ca7cf..8300cc29dec8 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct ovs_dp_megaflow_stats */
>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>         OVS_DP_ATTR_PAD,
> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>         __OVS_DP_ATTR_MAX
>  };
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index a54df1fe3ec4..114b2ddb8037 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
>         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
> +       msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
>
>         return msgsize;
>  }
> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
>                 goto nla_put_failure;
>
> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> +               goto nla_put_failure;
> +
>         genlmsg_end(skb, ovs_header);
>         return 0;
>
> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  #endif
>         }
>
> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> +               u32 cache_size;
> +
> +               cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> +               ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
Do we should return error code, if we can't change the "mask_cache"
size ? for example, -EINVAL, -ENOMEM
> +       }
> +
>         dp->user_features = user_features;
>
>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> @@ -1894,6 +1906,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
> +               PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
>  };
>
>  static const struct genl_ops dp_datapath_genl_ops[] = {
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index a5912ea05352..5280aeeef628 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -38,8 +38,8 @@
>  #define MASK_ARRAY_SIZE_MIN    16
>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>
> +#define MC_DEFAULT_HASH_ENTRIES        256
>  #define MC_HASH_SHIFT          8
> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
>
>  static struct kmem_cache *flow_cache;
> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>         }
>  }
>
> +static void __mask_cache_destroy(struct mask_cache *mc)
> +{
> +       if (mc->mask_cache)
> +               free_percpu(mc->mask_cache);
free_percpu the NULL is safe. we can remove the "if".
> +       kfree(mc);
> +}
> +
> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> +{
> +       struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
> +
> +       __mask_cache_destroy(mc);
> +}
> +
> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> +{
> +       struct mask_cache_entry __percpu *cache = NULL;
> +       struct mask_cache *new;
> +
> +       /* Only allow size to be 0, or a power of 2, and does not exceed
> +        * percpu allocation size.
> +        */
> +       if ((!is_power_of_2(size) && size != 0) ||
> +           (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
> +               return NULL;
> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> +       if (!new)
> +               return NULL;
> +
> +       new->cache_size = size;
> +       if (new->cache_size > 0) {
> +               cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
> +                                                 new->cache_size),
> +                                      __alignof__(struct mask_cache_entry));
> +               if (!cache) {
> +                       kfree(new);
> +                       return NULL;
> +               }
> +       }
> +
> +       new->mask_cache = cache;
> +       return new;
> +}
> +
> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
> +{
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +       struct mask_cache *new;
> +
> +       if (size == mc->cache_size)
> +               return;
> +
> +       new = tbl_mask_cache_alloc(size);
> +       if (!new)
> +               return;
> +
> +       rcu_assign_pointer(table->mask_cache, new);
> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +}
> +
>  int ovs_flow_tbl_init(struct flow_table *table)
>  {
>         struct table_instance *ti, *ufid_ti;
> +       struct mask_cache *mc;
>         struct mask_array *ma;
>
> -       table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
> -                                          MC_HASH_ENTRIES,
> -                                          __alignof__(struct mask_cache_entry));
> -       if (!table->mask_cache)
> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> +       if (!mc)
>                 return -ENOMEM;
>
>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>         rcu_assign_pointer(table->ti, ti);
>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>         rcu_assign_pointer(table->mask_array, ma);
> +       rcu_assign_pointer(table->mask_cache, mc);
>         table->last_rehash = jiffies;
>         table->count = 0;
>         table->ufid_count = 0;
> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  free_mask_array:
>         __mask_array_destroy(ma);
>  free_mask_cache:
> -       free_percpu(table->mask_cache);
> +       __mask_cache_destroy(mc);
>         return -ENOMEM;
>  }
>
> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  {
>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>         struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +       struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
>
> -       free_percpu(table->mask_cache);
> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>         table_instance_destroy(table, ti, ufid_ti, false);
>  }
>
> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>                                           u32 *n_mask_hit,
>                                           u32 *n_cache_hit)
>  {
> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>         struct table_instance *ti = rcu_dereference(tbl->ti);
>         struct mask_cache_entry *entries, *ce;
> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>
>         *n_mask_hit = 0;
>         *n_cache_hit = 0;
> -       if (unlikely(!skb_hash)) {
> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>                 u32 mask_index = 0;
>                 u32 cache = 0;
>
> @@ -749,11 +813,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>
>         ce = NULL;
>         hash = skb_hash;
> -       entries = this_cpu_ptr(tbl->mask_cache);
> +       entries = this_cpu_ptr(mc->mask_cache);
>
>         /* Find the cache entry 'ce' to operate on. */
>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> -               int index = hash & (MC_HASH_ENTRIES - 1);
> +               int index = hash & (mc->cache_size - 1);
>                 struct mask_cache_entry *e;
>
>                 e = &entries[index];
> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
>         return READ_ONCE(ma->count);
>  }
>
> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> +{
> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> +
> +       return READ_ONCE(mc->cache_size);
> +}
> +
>  static struct table_instance *table_instance_expand(struct table_instance *ti,
>                                                     bool ufid)
>  {
> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>         for (i = 0; i < masks_entries; i++) {
>                 int index = masks_and_count[i].index;
>
> -               new->masks[new->count++] =
> -                       rcu_dereference_ovsl(ma->masks[index]);
> +               if (ovsl_dereference(ma->masks[index]))
> +                       new->masks[new->count++] = ma->masks[index];
>         }
>
>         rcu_assign_pointer(table->mask_array, new);
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index 325e939371d8..f2dba952db2f 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>         u32 mask_index;
>  };
>
> +struct mask_cache {
> +       struct rcu_head rcu;
> +       u32 cache_size;  /* Must be ^2 value. */
> +       struct mask_cache_entry __percpu *mask_cache;
> +};
> +
>  struct mask_count {
>         int index;
>         u64 counter;
> @@ -53,7 +59,7 @@ struct table_instance {
>  struct flow_table {
>         struct table_instance __rcu *ti;
>         struct table_instance __rcu *ufid_ti;
> -       struct mask_cache_entry __percpu *mask_cache;
> +       struct mask_cache __rcu *mask_cache;
>         struct mask_array __rcu *mask_array;
>         unsigned long last_rehash;
>         unsigned int count;
> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
>                         const struct sw_flow_mask *mask);
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
>                                        u32 *bucket, u32 *idx);
>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>


-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-30  5:07   ` Tonghao Zhang
@ 2020-07-30 12:33     ` Eelco Chaudron
  2020-07-30 13:25       ` Tonghao Zhang
  0 siblings, 1 reply; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-30 12:33 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal

Thanks Tonghao for the review, see comments inline below!

If I get no more reviews by the end of the week I’ll send out a v4.

//Eelco


On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:

> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>> This patch makes the masks cache size configurable, or with
>> a size of 0, disable it.
>>
>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>> Changes in v3:
>>  - Use is_power_of_2() function
>>  - Use array_size() function
>>  - Fix remaining sparse errors
>>
>> Changes in v2:
>>  - Fix sparse warnings
>>  - Fix netlink policy items reported by Florian Westphal
>>
>>  include/uapi/linux/openvswitch.h |    1
>>  net/openvswitch/datapath.c       |   14 +++++
>>  net/openvswitch/flow_table.c     |   97 
>> +++++++++++++++++++++++++++++++++-----
>>  net/openvswitch/flow_table.h     |   10 ++++
>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/uapi/linux/openvswitch.h 
>> b/include/uapi/linux/openvswitch.h
>> index 7cb76e5ca7cf..8300cc29dec8 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct 
>> ovs_dp_megaflow_stats */
>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>         OVS_DP_ATTR_PAD,
>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>         __OVS_DP_ATTR_MAX
>>  };
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index a54df1fe3ec4..114b2ddb8037 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
>>         msgsize += nla_total_size_64bit(sizeof(struct 
>> ovs_dp_megaflow_stats));
>>         msgsize += nla_total_size(sizeof(u32)); /* 
>> OVS_DP_ATTR_USER_FEATURES */
>> +       msgsize += nla_total_size(sizeof(u32)); /* 
>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>
>>         return msgsize;
>>  }
>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct 
>> datapath *dp, struct sk_buff *skb,
>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, 
>> dp->user_features))
>>                 goto nla_put_failure;
>>
>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
>> +               goto nla_put_failure;
>> +
>>         genlmsg_end(skb, ovs_header);
>>         return 0;
>>
>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, 
>> struct nlattr *a[])
>>  #endif
>>         }
>>
>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>> +               u32 cache_size;
>> +
>> +               cache_size = 
>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>> +               ovs_flow_tbl_masks_cache_resize(&dp->table, 
>> cache_size);
> Do we should return error code, if we can't change the "mask_cache"
> size ? for example, -EINVAL, -ENOMEM

Initially, I did not do this due to the fact the new value is reported, 
and on failure, the old value is shown.
However thinking about it again, it makes more sense to return an error. 
Will sent a v4 with the following to return:

-
-void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
size)
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
  {
         struct mask_cache *mc = rcu_dereference(table->mask_cache);
         struct mask_cache *new;

         if (size == mc->cache_size)
-               return;
+               return 0;
+
+       if (!is_power_of_2(size) && size != 0)
+               return -EINVAL;

         new = tbl_mask_cache_alloc(size);
         if (!new)
-               return;
+               return -ENOMEM;

         rcu_assign_pointer(table->mask_cache, new);
         call_rcu(&mc->rcu, mask_cache_rcu_cb);
+
+       return 0;
  }

>> +       }
>> +
>>         dp->user_features = user_features;
>>
>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>> @@ -1894,6 +1906,8 @@ static const struct nla_policy 
>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = 
>> IFNAMSIZ - 1 },
>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 
>> 0,
>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct 
>> mask_cache_entry)),
>>  };
>>
>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>> diff --git a/net/openvswitch/flow_table.c 
>> b/net/openvswitch/flow_table.c
>> index a5912ea05352..5280aeeef628 100644
>> --- a/net/openvswitch/flow_table.c
>> +++ b/net/openvswitch/flow_table.c
>> @@ -38,8 +38,8 @@
>>  #define MASK_ARRAY_SIZE_MIN    16
>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>
>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>  #define MC_HASH_SHIFT          8
>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) / 
>> MC_HASH_SHIFT)
>>
>>  static struct kmem_cache *flow_cache;
>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table 
>> *tbl, struct sw_flow_mask *mask)
>>         }
>>  }
>>
>> +static void __mask_cache_destroy(struct mask_cache *mc)
>> +{
>> +       if (mc->mask_cache)
>> +               free_percpu(mc->mask_cache);
> free_percpu the NULL is safe. we can remove the "if".

Makes sense, will remove the if() check.

>> +       kfree(mc);
>> +}
>> +
>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>> +{
>> +       struct mask_cache *mc = container_of(rcu, struct mask_cache, 
>> rcu);
>> +
>> +       __mask_cache_destroy(mc);
>> +}
>> +
>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>> +{
>> +       struct mask_cache_entry __percpu *cache = NULL;
>> +       struct mask_cache *new;
>> +
>> +       /* Only allow size to be 0, or a power of 2, and does not 
>> exceed
>> +        * percpu allocation size.
>> +        */
>> +       if ((!is_power_of_2(size) && size != 0) ||
>> +           (size * sizeof(struct mask_cache_entry)) > 
>> PCPU_MIN_UNIT_SIZE)
>> +               return NULL;
>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>> +       if (!new)
>> +               return NULL;
>> +
>> +       new->cache_size = size;
>> +       if (new->cache_size > 0) {
>> +               cache = __alloc_percpu(array_size(sizeof(struct 
>> mask_cache_entry),
>> +                                                 new->cache_size),
>> +                                      __alignof__(struct 
>> mask_cache_entry));
>> +               if (!cache) {
>> +                       kfree(new);
>> +                       return NULL;
>> +               }
>> +       }
>> +
>> +       new->mask_cache = cache;
>> +       return new;
>> +}
>> +
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size)
>> +{
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +       struct mask_cache *new;
>> +
>> +       if (size == mc->cache_size)
>> +               return;
>> +
>> +       new = tbl_mask_cache_alloc(size);
>> +       if (!new)
>> +               return;
>> +
>> +       rcu_assign_pointer(table->mask_cache, new);
>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +}
>> +
>>  int ovs_flow_tbl_init(struct flow_table *table)
>>  {
>>         struct table_instance *ti, *ufid_ti;
>> +       struct mask_cache *mc;
>>         struct mask_array *ma;
>>
>> -       table->mask_cache = __alloc_percpu(sizeof(struct 
>> mask_cache_entry) *
>> -                                          MC_HASH_ENTRIES,
>> -                                          __alignof__(struct 
>> mask_cache_entry));
>> -       if (!table->mask_cache)
>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>> +       if (!mc)
>>                 return -ENOMEM;
>>
>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>         rcu_assign_pointer(table->ti, ti);
>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>         rcu_assign_pointer(table->mask_array, ma);
>> +       rcu_assign_pointer(table->mask_cache, mc);
>>         table->last_rehash = jiffies;
>>         table->count = 0;
>>         table->ufid_count = 0;
>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>  free_mask_array:
>>         __mask_array_destroy(ma);
>>  free_mask_cache:
>> -       free_percpu(table->mask_cache);
>> +       __mask_cache_destroy(mc);
>>         return -ENOMEM;
>>  }
>>
>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table 
>> *table)
>>  {
>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>>         struct table_instance *ufid_ti = 
>> rcu_dereference_raw(table->ufid_ti);
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +       struct mask_array *ma = 
>> rcu_dereference_ovsl(table->mask_array);
>>
>> -       free_percpu(table->mask_cache);
>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>         table_instance_destroy(table, ti, ufid_ti, false);
>>  }
>>
>> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
>> flow_table *tbl,
>>                                           u32 *n_mask_hit,
>>                                           u32 *n_cache_hit)
>>  {
>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>         struct mask_cache_entry *entries, *ce;
>> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
>> flow_table *tbl,
>>
>>         *n_mask_hit = 0;
>>         *n_cache_hit = 0;
>> -       if (unlikely(!skb_hash)) {
>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>                 u32 mask_index = 0;
>>                 u32 cache = 0;
>>
>> @@ -749,11 +813,11 @@ struct sw_flow 
>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>>         ce = NULL;
>>         hash = skb_hash;
>> -       entries = this_cpu_ptr(tbl->mask_cache);
>> +       entries = this_cpu_ptr(mc->mask_cache);
>>
>>         /* Find the cache entry 'ce' to operate on. */
>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>> +               int index = hash & (mc->cache_size - 1);
>>                 struct mask_cache_entry *e;
>>
>>                 e = &entries[index];
>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct 
>> flow_table *table)
>>         return READ_ONCE(ma->count);
>>  }
>>
>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>> +{
>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +
>> +       return READ_ONCE(mc->cache_size);
>> +}
>> +
>>  static struct table_instance *table_instance_expand(struct 
>> table_instance *ti,
>>                                                     bool ufid)
>>  {
>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table 
>> *table)
>>         for (i = 0; i < masks_entries; i++) {
>>                 int index = masks_and_count[i].index;
>>
>> -               new->masks[new->count++] =
>> -                       rcu_dereference_ovsl(ma->masks[index]);
>> +               if (ovsl_dereference(ma->masks[index]))
>> +                       new->masks[new->count++] = ma->masks[index];
>>         }
>>
>>         rcu_assign_pointer(table->mask_array, new);
>> diff --git a/net/openvswitch/flow_table.h 
>> b/net/openvswitch/flow_table.h
>> index 325e939371d8..f2dba952db2f 100644
>> --- a/net/openvswitch/flow_table.h
>> +++ b/net/openvswitch/flow_table.h
>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>         u32 mask_index;
>>  };
>>
>> +struct mask_cache {
>> +       struct rcu_head rcu;
>> +       u32 cache_size;  /* Must be ^2 value. */
>> +       struct mask_cache_entry __percpu *mask_cache;
>> +};
>> +
>>  struct mask_count {
>>         int index;
>>         u64 counter;
>> @@ -53,7 +59,7 @@ struct table_instance {
>>  struct flow_table {
>>         struct table_instance __rcu *ti;
>>         struct table_instance __rcu *ufid_ti;
>> -       struct mask_cache_entry __percpu *mask_cache;
>> +       struct mask_cache __rcu *mask_cache;
>>         struct mask_array __rcu *mask_array;
>>         unsigned long last_rehash;
>>         unsigned int count;
>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, 
>> struct sw_flow *flow,
>>                         const struct sw_flow_mask *mask);
>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow 
>> *flow);
>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size);
>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
>>                                        u32 *bucket, u32 *idx);
>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>
>
>
> -- 
> Best regards, Tonghao


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-30 12:33     ` Eelco Chaudron
@ 2020-07-30 13:25       ` Tonghao Zhang
  2020-07-30 13:51         ` Eelco Chaudron
  0 siblings, 1 reply; 9+ messages in thread
From: Tonghao Zhang @ 2020-07-30 13:25 UTC (permalink / raw)
  To: Eelco Chaudron
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal

On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
> Thanks Tonghao for the review, see comments inline below!
>
> If I get no more reviews by the end of the week I’ll send out a v4.
>
> //Eelco
>
>
> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>
> > On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
> > wrote:
> >>
> >> This patch makes the masks cache size configurable, or with
> >> a size of 0, disable it.
> >>
> >> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >> ---
> >> Changes in v3:
> >>  - Use is_power_of_2() function
> >>  - Use array_size() function
> >>  - Fix remaining sparse errors
> >>
> >> Changes in v2:
> >>  - Fix sparse warnings
> >>  - Fix netlink policy items reported by Florian Westphal
> >>
> >>  include/uapi/linux/openvswitch.h |    1
> >>  net/openvswitch/datapath.c       |   14 +++++
> >>  net/openvswitch/flow_table.c     |   97
> >> +++++++++++++++++++++++++++++++++-----
> >>  net/openvswitch/flow_table.h     |   10 ++++
> >>  4 files changed, 108 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/include/uapi/linux/openvswitch.h
> >> b/include/uapi/linux/openvswitch.h
> >> index 7cb76e5ca7cf..8300cc29dec8 100644
> >> --- a/include/uapi/linux/openvswitch.h
> >> +++ b/include/uapi/linux/openvswitch.h
> >> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
> >>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
> >> ovs_dp_megaflow_stats */
> >>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
> >>         OVS_DP_ATTR_PAD,
> >> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>         __OVS_DP_ATTR_MAX
> >>  };
> >>
> >> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> >> index a54df1fe3ec4..114b2ddb8037 100644
> >> --- a/net/openvswitch/datapath.c
> >> +++ b/net/openvswitch/datapath.c
> >> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
> >>         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
> >>         msgsize += nla_total_size_64bit(sizeof(struct
> >> ovs_dp_megaflow_stats));
> >>         msgsize += nla_total_size(sizeof(u32)); /*
> >> OVS_DP_ATTR_USER_FEATURES */
> >> +       msgsize += nla_total_size(sizeof(u32)); /*
> >> OVS_DP_ATTR_MASKS_CACHE_SIZE */
> >>
> >>         return msgsize;
> >>  }
> >> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
> >> datapath *dp, struct sk_buff *skb,
> >>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
> >> dp->user_features))
> >>                 goto nla_put_failure;
> >>
> >> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> >> +               goto nla_put_failure;
> >> +
> >>         genlmsg_end(skb, ovs_header);
> >>         return 0;
> >>
> >> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp,
> >> struct nlattr *a[])
> >>  #endif
> >>         }
> >>
> >> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> >> +               u32 cache_size;
> >> +
> >> +               cache_size =
> >> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> >> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
> >> cache_size);
> > Do we should return error code, if we can't change the "mask_cache"
> > size ? for example, -EINVAL, -ENOMEM
>
> Initially, I did not do this due to the fact the new value is reported,
> and on failure, the old value is shown.
> However thinking about it again, it makes more sense to return an error.
> Will sent a v4 with the following to return:
>
> -
> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> size)
> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>   {
>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
>          struct mask_cache *new;
>
>          if (size == mc->cache_size)
> -               return;
> +               return 0;
> +
> +       if (!is_power_of_2(size) && size != 0)
> +               return -EINVAL;
add check as below, and we can remove the check in
tbl_mask_cache_alloc function. That
function will only return NULL, if there is no memory. And we can
comment for tbl_mask_cache_alloc, to indicate that size should be a
power of 2.
 if ((!is_power_of_2(size) && size != 0) ||
           (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)

Thanks Eelco.
Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>

>          new = tbl_mask_cache_alloc(size);
>          if (!new)
> -               return;
> +               return -ENOMEM;
>
>          rcu_assign_pointer(table->mask_cache, new);
>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
> +
> +       return 0;
>   }
>
> >> +       }
> >> +
> >>         dp->user_features = user_features;
> >>
> >>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> >> @@ -1894,6 +1906,8 @@ static const struct nla_policy
> >> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
> >>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
> >> IFNAMSIZ - 1 },
> >>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
> >>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> >> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
> >> 0,
> >> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
> >> mask_cache_entry)),
> >>  };
> >>
> >>  static const struct genl_ops dp_datapath_genl_ops[] = {
> >> diff --git a/net/openvswitch/flow_table.c
> >> b/net/openvswitch/flow_table.c
> >> index a5912ea05352..5280aeeef628 100644
> >> --- a/net/openvswitch/flow_table.c
> >> +++ b/net/openvswitch/flow_table.c
> >> @@ -38,8 +38,8 @@
> >>  #define MASK_ARRAY_SIZE_MIN    16
> >>  #define REHASH_INTERVAL                (10 * 60 * HZ)
> >>
> >> +#define MC_DEFAULT_HASH_ENTRIES        256
> >>  #define MC_HASH_SHIFT          8
> >> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
> >>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
> >> MC_HASH_SHIFT)
> >>
> >>  static struct kmem_cache *flow_cache;
> >> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table
> >> *tbl, struct sw_flow_mask *mask)
> >>         }
> >>  }
> >>
> >> +static void __mask_cache_destroy(struct mask_cache *mc)
> >> +{
> >> +       if (mc->mask_cache)
> >> +               free_percpu(mc->mask_cache);
> > free_percpu the NULL is safe. we can remove the "if".
>
> Makes sense, will remove the if() check.
>
> >> +       kfree(mc);
> >> +}
> >> +
> >> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> >> +{
> >> +       struct mask_cache *mc = container_of(rcu, struct mask_cache,
> >> rcu);
> >> +
> >> +       __mask_cache_destroy(mc);
> >> +}
> >> +
> >> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> >> +{
> >> +       struct mask_cache_entry __percpu *cache = NULL;
> >> +       struct mask_cache *new;
> >> +
> >> +       /* Only allow size to be 0, or a power of 2, and does not
> >> exceed
> >> +        * percpu allocation size.
> >> +        */
> >> +       if ((!is_power_of_2(size) && size != 0) ||
> >> +           (size * sizeof(struct mask_cache_entry)) >
> >> PCPU_MIN_UNIT_SIZE)
> >> +               return NULL;
> >> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> >> +       if (!new)
> >> +               return NULL;
> >> +
> >> +       new->cache_size = size;
> >> +       if (new->cache_size > 0) {
> >> +               cache = __alloc_percpu(array_size(sizeof(struct
> >> mask_cache_entry),
> >> +                                                 new->cache_size),
> >> +                                      __alignof__(struct
> >> mask_cache_entry));
> >> +               if (!cache) {
> >> +                       kfree(new);
> >> +                       return NULL;
> >> +               }
> >> +       }
> >> +
> >> +       new->mask_cache = cache;
> >> +       return new;
> >> +}
> >> +
> >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >> +{
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +       struct mask_cache *new;
> >> +
> >> +       if (size == mc->cache_size)
> >> +               return;
> >> +
> >> +       new = tbl_mask_cache_alloc(size);
> >> +       if (!new)
> >> +               return;
> >> +
> >> +       rcu_assign_pointer(table->mask_cache, new);
> >> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +}
> >> +
> >>  int ovs_flow_tbl_init(struct flow_table *table)
> >>  {
> >>         struct table_instance *ti, *ufid_ti;
> >> +       struct mask_cache *mc;
> >>         struct mask_array *ma;
> >>
> >> -       table->mask_cache = __alloc_percpu(sizeof(struct
> >> mask_cache_entry) *
> >> -                                          MC_HASH_ENTRIES,
> >> -                                          __alignof__(struct
> >> mask_cache_entry));
> >> -       if (!table->mask_cache)
> >> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> >> +       if (!mc)
> >>                 return -ENOMEM;
> >>
> >>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> >> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>         rcu_assign_pointer(table->ti, ti);
> >>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
> >>         rcu_assign_pointer(table->mask_array, ma);
> >> +       rcu_assign_pointer(table->mask_cache, mc);
> >>         table->last_rehash = jiffies;
> >>         table->count = 0;
> >>         table->ufid_count = 0;
> >> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>  free_mask_array:
> >>         __mask_array_destroy(ma);
> >>  free_mask_cache:
> >> -       free_percpu(table->mask_cache);
> >> +       __mask_cache_destroy(mc);
> >>         return -ENOMEM;
> >>  }
> >>
> >> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
> >> *table)
> >>  {
> >>         struct table_instance *ti = rcu_dereference_raw(table->ti);
> >>         struct table_instance *ufid_ti =
> >> rcu_dereference_raw(table->ufid_ti);
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +       struct mask_array *ma =
> >> rcu_dereference_ovsl(table->mask_array);
> >>
> >> -       free_percpu(table->mask_cache);
> >> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> >> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
> >>         table_instance_destroy(table, ti, ufid_ti, false);
> >>  }
> >>
> >> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> >> flow_table *tbl,
> >>                                           u32 *n_mask_hit,
> >>                                           u32 *n_cache_hit)
> >>  {
> >> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
> >>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
> >>         struct table_instance *ti = rcu_dereference(tbl->ti);
> >>         struct mask_cache_entry *entries, *ce;
> >> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
> >> flow_table *tbl,
> >>
> >>         *n_mask_hit = 0;
> >>         *n_cache_hit = 0;
> >> -       if (unlikely(!skb_hash)) {
> >> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
> >>                 u32 mask_index = 0;
> >>                 u32 cache = 0;
> >>
> >> @@ -749,11 +813,11 @@ struct sw_flow
> >> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> >>
> >>         ce = NULL;
> >>         hash = skb_hash;
> >> -       entries = this_cpu_ptr(tbl->mask_cache);
> >> +       entries = this_cpu_ptr(mc->mask_cache);
> >>
> >>         /* Find the cache entry 'ce' to operate on. */
> >>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> >> -               int index = hash & (MC_HASH_ENTRIES - 1);
> >> +               int index = hash & (mc->cache_size - 1);
> >>                 struct mask_cache_entry *e;
> >>
> >>                 e = &entries[index];
> >> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
> >> flow_table *table)
> >>         return READ_ONCE(ma->count);
> >>  }
> >>
> >> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> >> +{
> >> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >> +
> >> +       return READ_ONCE(mc->cache_size);
> >> +}
> >> +
> >>  static struct table_instance *table_instance_expand(struct
> >> table_instance *ti,
> >>                                                     bool ufid)
> >>  {
> >> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table
> >> *table)
> >>         for (i = 0; i < masks_entries; i++) {
> >>                 int index = masks_and_count[i].index;
> >>
> >> -               new->masks[new->count++] =
> >> -                       rcu_dereference_ovsl(ma->masks[index]);
> >> +               if (ovsl_dereference(ma->masks[index]))
> >> +                       new->masks[new->count++] = ma->masks[index];
> >>         }
> >>
> >>         rcu_assign_pointer(table->mask_array, new);
> >> diff --git a/net/openvswitch/flow_table.h
> >> b/net/openvswitch/flow_table.h
> >> index 325e939371d8..f2dba952db2f 100644
> >> --- a/net/openvswitch/flow_table.h
> >> +++ b/net/openvswitch/flow_table.h
> >> @@ -27,6 +27,12 @@ struct mask_cache_entry {
> >>         u32 mask_index;
> >>  };
> >>
> >> +struct mask_cache {
> >> +       struct rcu_head rcu;
> >> +       u32 cache_size;  /* Must be ^2 value. */
> >> +       struct mask_cache_entry __percpu *mask_cache;
> >> +};
> >> +
> >>  struct mask_count {
> >>         int index;
> >>         u64 counter;
> >> @@ -53,7 +59,7 @@ struct table_instance {
> >>  struct flow_table {
> >>         struct table_instance __rcu *ti;
> >>         struct table_instance __rcu *ufid_ti;
> >> -       struct mask_cache_entry __percpu *mask_cache;
> >> +       struct mask_cache __rcu *mask_cache;
> >>         struct mask_array __rcu *mask_array;
> >>         unsigned long last_rehash;
> >>         unsigned int count;
> >> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> >> struct sw_flow *flow,
> >>                         const struct sw_flow_mask *mask);
> >>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
> >> *flow);
> >>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> >> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
> >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size);
> >>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
> >>                                        u32 *bucket, u32 *idx);
> >>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
> >>
> >
> >
> > --
> > Best regards, Tonghao
>


-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-30 13:25       ` Tonghao Zhang
@ 2020-07-30 13:51         ` Eelco Chaudron
  2020-07-30 14:23           ` Tonghao Zhang
  0 siblings, 1 reply; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-30 13:51 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal



On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:

> On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>> Thanks Tonghao for the review, see comments inline below!
>>
>> If I get no more reviews by the end of the week I’ll send out a v4.
>>
>> //Eelco
>>
>>
>> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>>
>>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
>>> wrote:
>>>>
>>>> This patch makes the masks cache size configurable, or with
>>>> a size of 0, disable it.
>>>>
>>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>> ---
>>>> Changes in v3:
>>>>  - Use is_power_of_2() function
>>>>  - Use array_size() function
>>>>  - Fix remaining sparse errors
>>>>
>>>> Changes in v2:
>>>>  - Fix sparse warnings
>>>>  - Fix netlink policy items reported by Florian Westphal
>>>>
>>>>  include/uapi/linux/openvswitch.h |    1
>>>>  net/openvswitch/datapath.c       |   14 +++++
>>>>  net/openvswitch/flow_table.c     |   97
>>>> +++++++++++++++++++++++++++++++++-----
>>>>  net/openvswitch/flow_table.h     |   10 ++++
>>>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/include/uapi/linux/openvswitch.h
>>>> b/include/uapi/linux/openvswitch.h
>>>> index 7cb76e5ca7cf..8300cc29dec8 100644
>>>> --- a/include/uapi/linux/openvswitch.h
>>>> +++ b/include/uapi/linux/openvswitch.h
>>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
>>>> ovs_dp_megaflow_stats */
>>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>>>         OVS_DP_ATTR_PAD,
>>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>         __OVS_DP_ATTR_MAX
>>>>  };
>>>>
>>>> diff --git a/net/openvswitch/datapath.c 
>>>> b/net/openvswitch/datapath.c
>>>> index a54df1fe3ec4..114b2ddb8037 100644
>>>> --- a/net/openvswitch/datapath.c
>>>> +++ b/net/openvswitch/datapath.c
>>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>>>         msgsize += nla_total_size_64bit(sizeof(struct 
>>>> ovs_dp_stats));
>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>> ovs_dp_megaflow_stats));
>>>>         msgsize += nla_total_size(sizeof(u32)); /*
>>>> OVS_DP_ATTR_USER_FEATURES */
>>>> +       msgsize += nla_total_size(sizeof(u32)); /*
>>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>>>
>>>>         return msgsize;
>>>>  }
>>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
>>>> datapath *dp, struct sk_buff *skb,
>>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
>>>> dp->user_features))
>>>>                 goto nla_put_failure;
>>>>
>>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
>>>> +               goto nla_put_failure;
>>>> +
>>>>         genlmsg_end(skb, ovs_header);
>>>>         return 0;
>>>>
>>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath 
>>>> *dp,
>>>> struct nlattr *a[])
>>>>  #endif
>>>>         }
>>>>
>>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>>>> +               u32 cache_size;
>>>> +
>>>> +               cache_size =
>>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
>>>> cache_size);
>>> Do we should return error code, if we can't change the "mask_cache"
>>> size ? for example, -EINVAL, -ENOMEM
>>
>> Initially, I did not do this due to the fact the new value is 
>> reported,
>> and on failure, the old value is shown.
>> However thinking about it again, it makes more sense to return an 
>> error.
>> Will sent a v4 with the following to return:
>>
>> -
>> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>> size)
>> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 
>> size)
>>   {
>>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>          struct mask_cache *new;
>>
>>          if (size == mc->cache_size)
>> -               return;
>> +               return 0;
>> +
>> +       if (!is_power_of_2(size) && size != 0)
>> +               return -EINVAL;
> add check as below, and we can remove the check in
> tbl_mask_cache_alloc function. That
> function will only return NULL, if there is no memory. And we can
> comment for tbl_mask_cache_alloc, to indicate that size should be a
> power of 2.
>  if ((!is_power_of_2(size) && size != 0) ||
>            (size * sizeof(struct mask_cache_entry)) > 
> PCPU_MIN_UNIT_SIZE)

I do not think the extra check hurts as it’s not in any fast path.
The comment would easily be missed, especially if someone changes the 
default value, so I would prefer to keep it as is.

> Thanks Eelco.
> Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
>>          new = tbl_mask_cache_alloc(size);
>>          if (!new)
>> -               return;
>> +               return -ENOMEM;
>>
>>          rcu_assign_pointer(table->mask_cache, new);
>>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +
>> +       return 0;
>>   }
>>
>>>> +       }
>>>> +
>>>>         dp->user_features = user_features;
>>>>
>>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
>>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
>>>> IFNAMSIZ - 1 },
>>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
>>>> 0,
>>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
>>>> mask_cache_entry)),
>>>>  };
>>>>
>>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>>>> diff --git a/net/openvswitch/flow_table.c
>>>> b/net/openvswitch/flow_table.c
>>>> index a5912ea05352..5280aeeef628 100644
>>>> --- a/net/openvswitch/flow_table.c
>>>> +++ b/net/openvswitch/flow_table.c
>>>> @@ -38,8 +38,8 @@
>>>>  #define MASK_ARRAY_SIZE_MIN    16
>>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>>>
>>>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>>>  #define MC_HASH_SHIFT          8
>>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
>>>> MC_HASH_SHIFT)
>>>>
>>>>  static struct kmem_cache *flow_cache;
>>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct 
>>>> flow_table
>>>> *tbl, struct sw_flow_mask *mask)
>>>>         }
>>>>  }
>>>>
>>>> +static void __mask_cache_destroy(struct mask_cache *mc)
>>>> +{
>>>> +       if (mc->mask_cache)
>>>> +               free_percpu(mc->mask_cache);
>>> free_percpu the NULL is safe. we can remove the "if".
>>
>> Makes sense, will remove the if() check.
>>
>>>> +       kfree(mc);
>>>> +}
>>>> +
>>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>>>> +{
>>>> +       struct mask_cache *mc = container_of(rcu, struct 
>>>> mask_cache,
>>>> rcu);
>>>> +
>>>> +       __mask_cache_destroy(mc);
>>>> +}
>>>> +
>>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>>>> +{
>>>> +       struct mask_cache_entry __percpu *cache = NULL;
>>>> +       struct mask_cache *new;
>>>> +
>>>> +       /* Only allow size to be 0, or a power of 2, and does not
>>>> exceed
>>>> +        * percpu allocation size.
>>>> +        */
>>>> +       if ((!is_power_of_2(size) && size != 0) ||
>>>> +           (size * sizeof(struct mask_cache_entry)) >
>>>> PCPU_MIN_UNIT_SIZE)
>>>> +               return NULL;
>>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>>>> +       if (!new)
>>>> +               return NULL;
>>>> +
>>>> +       new->cache_size = size;
>>>> +       if (new->cache_size > 0) {
>>>> +               cache = __alloc_percpu(array_size(sizeof(struct
>>>> mask_cache_entry),
>>>> +                                                 new->cache_size),
>>>> +                                      __alignof__(struct
>>>> mask_cache_entry));
>>>> +               if (!cache) {
>>>> +                       kfree(new);
>>>> +                       return NULL;
>>>> +               }
>>>> +       }
>>>> +
>>>> +       new->mask_cache = cache;
>>>> +       return new;
>>>> +}
>>>> +
>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>> +{
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +       struct mask_cache *new;
>>>> +
>>>> +       if (size == mc->cache_size)
>>>> +               return;
>>>> +
>>>> +       new = tbl_mask_cache_alloc(size);
>>>> +       if (!new)
>>>> +               return;
>>>> +
>>>> +       rcu_assign_pointer(table->mask_cache, new);
>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +}
>>>> +
>>>>  int ovs_flow_tbl_init(struct flow_table *table)
>>>>  {
>>>>         struct table_instance *ti, *ufid_ti;
>>>> +       struct mask_cache *mc;
>>>>         struct mask_array *ma;
>>>>
>>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
>>>> mask_cache_entry) *
>>>> -                                          MC_HASH_ENTRIES,
>>>> -                                          __alignof__(struct
>>>> mask_cache_entry));
>>>> -       if (!table->mask_cache)
>>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>>>> +       if (!mc)
>>>>                 return -ENOMEM;
>>>>
>>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>>>         rcu_assign_pointer(table->ti, ti);
>>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>>>         rcu_assign_pointer(table->mask_array, ma);
>>>> +       rcu_assign_pointer(table->mask_cache, mc);
>>>>         table->last_rehash = jiffies;
>>>>         table->count = 0;
>>>>         table->ufid_count = 0;
>>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>>>>  free_mask_array:
>>>>         __mask_array_destroy(ma);
>>>>  free_mask_cache:
>>>> -       free_percpu(table->mask_cache);
>>>> +       __mask_cache_destroy(mc);
>>>>         return -ENOMEM;
>>>>  }
>>>>
>>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
>>>> *table)
>>>>  {
>>>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
>>>>         struct table_instance *ufid_ti =
>>>> rcu_dereference_raw(table->ufid_ti);
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +       struct mask_array *ma =
>>>> rcu_dereference_ovsl(table->mask_array);
>>>>
>>>> -       free_percpu(table->mask_cache);
>>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>>>         table_instance_destroy(table, ti, ufid_ti, false);
>>>>  }
>>>>
>>>> @@ -724,6 +787,7 @@ struct sw_flow 
>>>> *ovs_flow_tbl_lookup_stats(struct
>>>> flow_table *tbl,
>>>>                                           u32 *n_mask_hit,
>>>>                                           u32 *n_cache_hit)
>>>>  {
>>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>>>         struct mask_cache_entry *entries, *ce;
>>>> @@ -733,7 +797,7 @@ struct sw_flow 
>>>> *ovs_flow_tbl_lookup_stats(struct
>>>> flow_table *tbl,
>>>>
>>>>         *n_mask_hit = 0;
>>>>         *n_cache_hit = 0;
>>>> -       if (unlikely(!skb_hash)) {
>>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>>>                 u32 mask_index = 0;
>>>>                 u32 cache = 0;
>>>>
>>>> @@ -749,11 +813,11 @@ struct sw_flow
>>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>>>
>>>>         ce = NULL;
>>>>         hash = skb_hash;
>>>> -       entries = this_cpu_ptr(tbl->mask_cache);
>>>> +       entries = this_cpu_ptr(mc->mask_cache);
>>>>
>>>>         /* Find the cache entry 'ce' to operate on. */
>>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>>>> +               int index = hash & (mc->cache_size - 1);
>>>>                 struct mask_cache_entry *e;
>>>>
>>>>                 e = &entries[index];
>>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
>>>> flow_table *table)
>>>>         return READ_ONCE(ma->count);
>>>>  }
>>>>
>>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>>>> +{
>>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
>>>> +
>>>> +       return READ_ONCE(mc->cache_size);
>>>> +}
>>>> +
>>>>  static struct table_instance *table_instance_expand(struct
>>>> table_instance *ti,
>>>>                                                     bool ufid)
>>>>  {
>>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct 
>>>> flow_table
>>>> *table)
>>>>         for (i = 0; i < masks_entries; i++) {
>>>>                 int index = masks_and_count[i].index;
>>>>
>>>> -               new->masks[new->count++] =
>>>> -                       rcu_dereference_ovsl(ma->masks[index]);
>>>> +               if (ovsl_dereference(ma->masks[index]))
>>>> +                       new->masks[new->count++] = 
>>>> ma->masks[index];
>>>>         }
>>>>
>>>>         rcu_assign_pointer(table->mask_array, new);
>>>> diff --git a/net/openvswitch/flow_table.h
>>>> b/net/openvswitch/flow_table.h
>>>> index 325e939371d8..f2dba952db2f 100644
>>>> --- a/net/openvswitch/flow_table.h
>>>> +++ b/net/openvswitch/flow_table.h
>>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>>>         u32 mask_index;
>>>>  };
>>>>
>>>> +struct mask_cache {
>>>> +       struct rcu_head rcu;
>>>> +       u32 cache_size;  /* Must be ^2 value. */
>>>> +       struct mask_cache_entry __percpu *mask_cache;
>>>> +};
>>>> +
>>>>  struct mask_count {
>>>>         int index;
>>>>         u64 counter;
>>>> @@ -53,7 +59,7 @@ struct table_instance {
>>>>  struct flow_table {
>>>>         struct table_instance __rcu *ti;
>>>>         struct table_instance __rcu *ufid_ti;
>>>> -       struct mask_cache_entry __percpu *mask_cache;
>>>> +       struct mask_cache __rcu *mask_cache;
>>>>         struct mask_array __rcu *mask_array;
>>>>         unsigned long last_rehash;
>>>>         unsigned int count;
>>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
>>>> struct sw_flow *flow,
>>>>                         const struct sw_flow_mask *mask);
>>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
>>>> *flow);
>>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table 
>>>> *table);
>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size);
>>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance 
>>>> *table,
>>>>                                        u32 *bucket, u32 *idx);
>>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>>>
>>>
>>>
>>> --
>>> Best regards, Tonghao
>>
>
>
> -- 
> Best regards, Tonghao


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-30 13:51         ` Eelco Chaudron
@ 2020-07-30 14:23           ` Tonghao Zhang
  2020-07-30 14:28             ` Eelco Chaudron
  0 siblings, 1 reply; 9+ messages in thread
From: Tonghao Zhang @ 2020-07-30 14:23 UTC (permalink / raw)
  To: Eelco Chaudron
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal

On Thu, Jul 30, 2020 at 9:52 PM Eelco Chaudron <echaudro@redhat.com> wrote:
>
>
>
> On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:
>
> > On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com>
> > wrote:
> >>
> >> Thanks Tonghao for the review, see comments inline below!
> >>
> >> If I get no more reviews by the end of the week I’ll send out a v4.
> >>
> >> //Eelco
> >>
> >>
> >> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
> >>
> >>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@redhat.com>
> >>> wrote:
> >>>>
> >>>> This patch makes the masks cache size configurable, or with
> >>>> a size of 0, disable it.
> >>>>
> >>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> >>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >>>> ---
> >>>> Changes in v3:
> >>>>  - Use is_power_of_2() function
> >>>>  - Use array_size() function
> >>>>  - Fix remaining sparse errors
> >>>>
> >>>> Changes in v2:
> >>>>  - Fix sparse warnings
> >>>>  - Fix netlink policy items reported by Florian Westphal
> >>>>
> >>>>  include/uapi/linux/openvswitch.h |    1
> >>>>  net/openvswitch/datapath.c       |   14 +++++
> >>>>  net/openvswitch/flow_table.c     |   97
> >>>> +++++++++++++++++++++++++++++++++-----
> >>>>  net/openvswitch/flow_table.h     |   10 ++++
> >>>>  4 files changed, 108 insertions(+), 14 deletions(-)
> >>>>
> >>>> diff --git a/include/uapi/linux/openvswitch.h
> >>>> b/include/uapi/linux/openvswitch.h
> >>>> index 7cb76e5ca7cf..8300cc29dec8 100644
> >>>> --- a/include/uapi/linux/openvswitch.h
> >>>> +++ b/include/uapi/linux/openvswitch.h
> >>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
> >>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
> >>>> ovs_dp_megaflow_stats */
> >>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
> >>>>         OVS_DP_ATTR_PAD,
> >>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>>>         __OVS_DP_ATTR_MAX
> >>>>  };
> >>>>
> >>>> diff --git a/net/openvswitch/datapath.c
> >>>> b/net/openvswitch/datapath.c
> >>>> index a54df1fe3ec4..114b2ddb8037 100644
> >>>> --- a/net/openvswitch/datapath.c
> >>>> +++ b/net/openvswitch/datapath.c
> >>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
> >>>>         msgsize += nla_total_size_64bit(sizeof(struct
> >>>> ovs_dp_stats));
> >>>>         msgsize += nla_total_size_64bit(sizeof(struct
> >>>> ovs_dp_megaflow_stats));
> >>>>         msgsize += nla_total_size(sizeof(u32)); /*
> >>>> OVS_DP_ATTR_USER_FEATURES */
> >>>> +       msgsize += nla_total_size(sizeof(u32)); /*
> >>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
> >>>>
> >>>>         return msgsize;
> >>>>  }
> >>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
> >>>> datapath *dp, struct sk_buff *skb,
> >>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
> >>>> dp->user_features))
> >>>>                 goto nla_put_failure;
> >>>>
> >>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> >>>> +                       ovs_flow_tbl_masks_cache_size(&dp->table)))
> >>>> +               goto nla_put_failure;
> >>>> +
> >>>>         genlmsg_end(skb, ovs_header);
> >>>>         return 0;
> >>>>
> >>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath
> >>>> *dp,
> >>>> struct nlattr *a[])
> >>>>  #endif
> >>>>         }
> >>>>
> >>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
> >>>> +               u32 cache_size;
> >>>> +
> >>>> +               cache_size =
> >>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> >>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
> >>>> cache_size);
> >>> Do we should return error code, if we can't change the "mask_cache"
> >>> size ? for example, -EINVAL, -ENOMEM
> >>
> >> Initially, I did not do this due to the fact the new value is
> >> reported,
> >> and on failure, the old value is shown.
> >> However thinking about it again, it makes more sense to return an
> >> error.
> >> Will sent a v4 with the following to return:
> >>
> >> -
> >> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >> size)
> >>   {
> >>          struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>          struct mask_cache *new;
> >>
> >>          if (size == mc->cache_size)
> >> -               return;
> >> +               return 0;
> >> +
> >> +       if (!is_power_of_2(size) && size != 0)
> >> +               return -EINVAL;
> > add check as below, and we can remove the check in
> > tbl_mask_cache_alloc function. That
> > function will only return NULL, if there is no memory. And we can
> > comment for tbl_mask_cache_alloc, to indicate that size should be a
> > power of 2.
> >  if ((!is_power_of_2(size) && size != 0) ||
> >            (size * sizeof(struct mask_cache_entry)) >
> > PCPU_MIN_UNIT_SIZE)
>
> I do not think the extra check hurts as it’s not in any fast path.
Hi Eelco.
Yes, I agree. Sorry for not being clear. I mean that:
if we don't check the "(size * sizeof(struct mask_cache_entry))  >
PCPU_MIN_UNIT_SIZE)" in the ovs_flow_tbl_masks_cache_resize.
tbl_mask_cache_alloc (ovs_flow_tbl_masks_cache_resize will invoke this
function)will return the NULL,
ovs_flow_tbl_masks_cache_resize will return -ENOMEM. I guess we should
return -EINVAL in that case.

> The comment would easily be missed, especially if someone changes the
> default value, so I would prefer to keep it as is.
>
> > Thanks Eelco.
> > Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> >>          new = tbl_mask_cache_alloc(size);
> >>          if (!new)
> >> -               return;
> >> +               return -ENOMEM;
> >>
> >>          rcu_assign_pointer(table->mask_cache, new);
> >>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >> +
> >> +       return 0;
> >>   }
> >>
> >>>> +       }
> >>>> +
> >>>>         dp->user_features = user_features;
> >>>>
> >>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> >>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
> >>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
> >>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
> >>>> IFNAMSIZ - 1 },
> >>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
> >>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
> >>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32,
> >>>> 0,
> >>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
> >>>> mask_cache_entry)),
> >>>>  };
> >>>>
> >>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
> >>>> diff --git a/net/openvswitch/flow_table.c
> >>>> b/net/openvswitch/flow_table.c
> >>>> index a5912ea05352..5280aeeef628 100644
> >>>> --- a/net/openvswitch/flow_table.c
> >>>> +++ b/net/openvswitch/flow_table.c
> >>>> @@ -38,8 +38,8 @@
> >>>>  #define MASK_ARRAY_SIZE_MIN    16
> >>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
> >>>>
> >>>> +#define MC_DEFAULT_HASH_ENTRIES        256
> >>>>  #define MC_HASH_SHIFT          8
> >>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
> >>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
> >>>> MC_HASH_SHIFT)
> >>>>
> >>>>  static struct kmem_cache *flow_cache;
> >>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct
> >>>> flow_table
> >>>> *tbl, struct sw_flow_mask *mask)
> >>>>         }
> >>>>  }
> >>>>
> >>>> +static void __mask_cache_destroy(struct mask_cache *mc)
> >>>> +{
> >>>> +       if (mc->mask_cache)
> >>>> +               free_percpu(mc->mask_cache);
> >>> free_percpu the NULL is safe. we can remove the "if".
> >>
> >> Makes sense, will remove the if() check.
> >>
> >>>> +       kfree(mc);
> >>>> +}
> >>>> +
> >>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
> >>>> +{
> >>>> +       struct mask_cache *mc = container_of(rcu, struct
> >>>> mask_cache,
> >>>> rcu);
> >>>> +
> >>>> +       __mask_cache_destroy(mc);
> >>>> +}
> >>>> +
> >>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
> >>>> +{
> >>>> +       struct mask_cache_entry __percpu *cache = NULL;
> >>>> +       struct mask_cache *new;
> >>>> +
> >>>> +       /* Only allow size to be 0, or a power of 2, and does not
> >>>> exceed
> >>>> +        * percpu allocation size.
> >>>> +        */
> >>>> +       if ((!is_power_of_2(size) && size != 0) ||
> >>>> +           (size * sizeof(struct mask_cache_entry)) >
> >>>> PCPU_MIN_UNIT_SIZE)
> >>>> +               return NULL;
> >>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
> >>>> +       if (!new)
> >>>> +               return NULL;
> >>>> +
> >>>> +       new->cache_size = size;
> >>>> +       if (new->cache_size > 0) {
> >>>> +               cache = __alloc_percpu(array_size(sizeof(struct
> >>>> mask_cache_entry),
> >>>> +                                                 new->cache_size),
> >>>> +                                      __alignof__(struct
> >>>> mask_cache_entry));
> >>>> +               if (!cache) {
> >>>> +                       kfree(new);
> >>>> +                       return NULL;
> >>>> +               }
> >>>> +       }
> >>>> +
> >>>> +       new->mask_cache = cache;
> >>>> +       return new;
> >>>> +}
> >>>> +
> >>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >>>> size)
> >>>> +{
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +       struct mask_cache *new;
> >>>> +
> >>>> +       if (size == mc->cache_size)
> >>>> +               return;
> >>>> +
> >>>> +       new = tbl_mask_cache_alloc(size);
> >>>> +       if (!new)
> >>>> +               return;
> >>>> +
> >>>> +       rcu_assign_pointer(table->mask_cache, new);
> >>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >>>> +}
> >>>> +
> >>>>  int ovs_flow_tbl_init(struct flow_table *table)
> >>>>  {
> >>>>         struct table_instance *ti, *ufid_ti;
> >>>> +       struct mask_cache *mc;
> >>>>         struct mask_array *ma;
> >>>>
> >>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
> >>>> mask_cache_entry) *
> >>>> -                                          MC_HASH_ENTRIES,
> >>>> -                                          __alignof__(struct
> >>>> mask_cache_entry));
> >>>> -       if (!table->mask_cache)
> >>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
> >>>> +       if (!mc)
> >>>>                 return -ENOMEM;
> >>>>
> >>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
> >>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>>>         rcu_assign_pointer(table->ti, ti);
> >>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
> >>>>         rcu_assign_pointer(table->mask_array, ma);
> >>>> +       rcu_assign_pointer(table->mask_cache, mc);
> >>>>         table->last_rehash = jiffies;
> >>>>         table->count = 0;
> >>>>         table->ufid_count = 0;
> >>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
> >>>>  free_mask_array:
> >>>>         __mask_array_destroy(ma);
> >>>>  free_mask_cache:
> >>>> -       free_percpu(table->mask_cache);
> >>>> +       __mask_cache_destroy(mc);
> >>>>         return -ENOMEM;
> >>>>  }
> >>>>
> >>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
> >>>> *table)
> >>>>  {
> >>>>         struct table_instance *ti = rcu_dereference_raw(table->ti);
> >>>>         struct table_instance *ufid_ti =
> >>>> rcu_dereference_raw(table->ufid_ti);
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +       struct mask_array *ma =
> >>>> rcu_dereference_ovsl(table->mask_array);
> >>>>
> >>>> -       free_percpu(table->mask_cache);
> >>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
> >>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
> >>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
> >>>>         table_instance_destroy(table, ti, ufid_ti, false);
> >>>>  }
> >>>>
> >>>> @@ -724,6 +787,7 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct
> >>>> flow_table *tbl,
> >>>>                                           u32 *n_mask_hit,
> >>>>                                           u32 *n_cache_hit)
> >>>>  {
> >>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
> >>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
> >>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
> >>>>         struct mask_cache_entry *entries, *ce;
> >>>> @@ -733,7 +797,7 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct
> >>>> flow_table *tbl,
> >>>>
> >>>>         *n_mask_hit = 0;
> >>>>         *n_cache_hit = 0;
> >>>> -       if (unlikely(!skb_hash)) {
> >>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
> >>>>                 u32 mask_index = 0;
> >>>>                 u32 cache = 0;
> >>>>
> >>>> @@ -749,11 +813,11 @@ struct sw_flow
> >>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
> >>>>
> >>>>         ce = NULL;
> >>>>         hash = skb_hash;
> >>>> -       entries = this_cpu_ptr(tbl->mask_cache);
> >>>> +       entries = this_cpu_ptr(mc->mask_cache);
> >>>>
> >>>>         /* Find the cache entry 'ce' to operate on. */
> >>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> >>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
> >>>> +               int index = hash & (mc->cache_size - 1);
> >>>>                 struct mask_cache_entry *e;
> >>>>
> >>>>                 e = &entries[index];
> >>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
> >>>> flow_table *table)
> >>>>         return READ_ONCE(ma->count);
> >>>>  }
> >>>>
> >>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
> >>>> +{
> >>>> +       struct mask_cache *mc = rcu_dereference(table->mask_cache);
> >>>> +
> >>>> +       return READ_ONCE(mc->cache_size);
> >>>> +}
> >>>> +
> >>>>  static struct table_instance *table_instance_expand(struct
> >>>> table_instance *ti,
> >>>>                                                     bool ufid)
> >>>>  {
> >>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct
> >>>> flow_table
> >>>> *table)
> >>>>         for (i = 0; i < masks_entries; i++) {
> >>>>                 int index = masks_and_count[i].index;
> >>>>
> >>>> -               new->masks[new->count++] =
> >>>> -                       rcu_dereference_ovsl(ma->masks[index]);
> >>>> +               if (ovsl_dereference(ma->masks[index]))
> >>>> +                       new->masks[new->count++] =
> >>>> ma->masks[index];
> >>>>         }
> >>>>
> >>>>         rcu_assign_pointer(table->mask_array, new);
> >>>> diff --git a/net/openvswitch/flow_table.h
> >>>> b/net/openvswitch/flow_table.h
> >>>> index 325e939371d8..f2dba952db2f 100644
> >>>> --- a/net/openvswitch/flow_table.h
> >>>> +++ b/net/openvswitch/flow_table.h
> >>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
> >>>>         u32 mask_index;
> >>>>  };
> >>>>
> >>>> +struct mask_cache {
> >>>> +       struct rcu_head rcu;
> >>>> +       u32 cache_size;  /* Must be ^2 value. */
> >>>> +       struct mask_cache_entry __percpu *mask_cache;
> >>>> +};
> >>>> +
> >>>>  struct mask_count {
> >>>>         int index;
> >>>>         u64 counter;
> >>>> @@ -53,7 +59,7 @@ struct table_instance {
> >>>>  struct flow_table {
> >>>>         struct table_instance __rcu *ti;
> >>>>         struct table_instance __rcu *ufid_ti;
> >>>> -       struct mask_cache_entry __percpu *mask_cache;
> >>>> +       struct mask_cache __rcu *mask_cache;
> >>>>         struct mask_array __rcu *mask_array;
> >>>>         unsigned long last_rehash;
> >>>>         unsigned int count;
> >>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
> >>>> struct sw_flow *flow,
> >>>>                         const struct sw_flow_mask *mask);
> >>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
> >>>> *flow);
> >>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
> >>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table
> >>>> *table);
> >>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
> >>>> size);
> >>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance
> >>>> *table,
> >>>>                                        u32 *bucket, u32 *idx);
> >>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
> >>>>
> >>>
> >>>
> >>> --
> >>> Best regards, Tonghao
> >>
> >
> >
> > --
> > Best regards, Tonghao
>


--
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable
  2020-07-30 14:23           ` Tonghao Zhang
@ 2020-07-30 14:28             ` Eelco Chaudron
  0 siblings, 0 replies; 9+ messages in thread
From: Eelco Chaudron @ 2020-07-30 14:28 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Linux Kernel Network Developers, David Miller, ovs dev,
	Jakub Kicinski, Paolo Abeni, Pravin Shelar, Florian Westphal



On 30 Jul 2020, at 16:23, Tonghao Zhang wrote:

> On Thu, Jul 30, 2020 at 9:52 PM Eelco Chaudron <echaudro@redhat.com> 
> wrote:
>>
>>
>>
>> On 30 Jul 2020, at 15:25, Tonghao Zhang wrote:
>>
>>> On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echaudro@redhat.com>
>>> wrote:
>>>>
>>>> Thanks Tonghao for the review, see comments inline below!
>>>>
>>>> If I get no more reviews by the end of the week I’ll send out a 
>>>> v4.
>>>>
>>>> //Eelco
>>>>
>>>>
>>>> On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
>>>>
>>>>> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron 
>>>>> <echaudro@redhat.com>
>>>>> wrote:
>>>>>>
>>>>>> This patch makes the masks cache size configurable, or with
>>>>>> a size of 0, disable it.
>>>>>>
>>>>>> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
>>>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>>>> ---
>>>>>> Changes in v3:
>>>>>>  - Use is_power_of_2() function
>>>>>>  - Use array_size() function
>>>>>>  - Fix remaining sparse errors
>>>>>>
>>>>>> Changes in v2:
>>>>>>  - Fix sparse warnings
>>>>>>  - Fix netlink policy items reported by Florian Westphal
>>>>>>
>>>>>>  include/uapi/linux/openvswitch.h |    1
>>>>>>  net/openvswitch/datapath.c       |   14 +++++
>>>>>>  net/openvswitch/flow_table.c     |   97
>>>>>> +++++++++++++++++++++++++++++++++-----
>>>>>>  net/openvswitch/flow_table.h     |   10 ++++
>>>>>>  4 files changed, 108 insertions(+), 14 deletions(-)
>>>>>>
>>>>>> diff --git a/include/uapi/linux/openvswitch.h
>>>>>> b/include/uapi/linux/openvswitch.h
>>>>>> index 7cb76e5ca7cf..8300cc29dec8 100644
>>>>>> --- a/include/uapi/linux/openvswitch.h
>>>>>> +++ b/include/uapi/linux/openvswitch.h
>>>>>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>>>>>>         OVS_DP_ATTR_MEGAFLOW_STATS,     /* struct
>>>>>> ovs_dp_megaflow_stats */
>>>>>>         OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
>>>>>>         OVS_DP_ATTR_PAD,
>>>>>> +       OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>>>         __OVS_DP_ATTR_MAX
>>>>>>  };
>>>>>>
>>>>>> diff --git a/net/openvswitch/datapath.c
>>>>>> b/net/openvswitch/datapath.c
>>>>>> index a54df1fe3ec4..114b2ddb8037 100644
>>>>>> --- a/net/openvswitch/datapath.c
>>>>>> +++ b/net/openvswitch/datapath.c
>>>>>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>>>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>>>> ovs_dp_stats));
>>>>>>         msgsize += nla_total_size_64bit(sizeof(struct
>>>>>> ovs_dp_megaflow_stats));
>>>>>>         msgsize += nla_total_size(sizeof(u32)); /*
>>>>>> OVS_DP_ATTR_USER_FEATURES */
>>>>>> +       msgsize += nla_total_size(sizeof(u32)); /*
>>>>>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>>>>>
>>>>>>         return msgsize;
>>>>>>  }
>>>>>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
>>>>>> datapath *dp, struct sk_buff *skb,
>>>>>>         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
>>>>>> dp->user_features))
>>>>>>                 goto nla_put_failure;
>>>>>>
>>>>>> +       if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>>>>>> +                       
>>>>>> ovs_flow_tbl_masks_cache_size(&dp->table)))
>>>>>> +               goto nla_put_failure;
>>>>>> +
>>>>>>         genlmsg_end(skb, ovs_header);
>>>>>>         return 0;
>>>>>>
>>>>>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath
>>>>>> *dp,
>>>>>> struct nlattr *a[])
>>>>>>  #endif
>>>>>>         }
>>>>>>
>>>>>> +       if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>>>>>> +               u32 cache_size;
>>>>>> +
>>>>>> +               cache_size =
>>>>>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>>>>>> +               ovs_flow_tbl_masks_cache_resize(&dp->table,
>>>>>> cache_size);
>>>>> Do we should return error code, if we can't change the 
>>>>> "mask_cache"
>>>>> size ? for example, -EINVAL, -ENOMEM
>>>>
>>>> Initially, I did not do this due to the fact the new value is
>>>> reported,
>>>> and on failure, the old value is shown.
>>>> However thinking about it again, it makes more sense to return an
>>>> error.
>>>> Will sent a v4 with the following to return:
>>>>
>>>> -
>>>> -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>> +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>>>> size)
>>>>   {
>>>>          struct mask_cache *mc = 
>>>> rcu_dereference(table->mask_cache);
>>>>          struct mask_cache *new;
>>>>
>>>>          if (size == mc->cache_size)
>>>> -               return;
>>>> +               return 0;
>>>> +
>>>> +       if (!is_power_of_2(size) && size != 0)
>>>> +               return -EINVAL;
>>> add check as below, and we can remove the check in
>>> tbl_mask_cache_alloc function. That
>>> function will only return NULL, if there is no memory. And we can
>>> comment for tbl_mask_cache_alloc, to indicate that size should be a
>>> power of 2.
>>>  if ((!is_power_of_2(size) && size != 0) ||
>>>            (size * sizeof(struct mask_cache_entry)) >
>>> PCPU_MIN_UNIT_SIZE)
>>
>> I do not think the extra check hurts as it’s not in any fast path.
> Hi Eelco.
> Yes, I agree. Sorry for not being clear. I mean that:
> if we don't check the "(size * sizeof(struct mask_cache_entry))  >
> PCPU_MIN_UNIT_SIZE)" in the ovs_flow_tbl_masks_cache_resize.
> tbl_mask_cache_alloc (ovs_flow_tbl_masks_cache_resize will invoke this
> function)will return the NULL,
> ovs_flow_tbl_masks_cache_resize will return -ENOMEM. I guess we should
> return -EINVAL in that case.

Thanks got it now, will add the additional check, and not remove it from 
tbl_mask_cache_alloc().

>> The comment would easily be missed, especially if someone changes the
>> default value, so I would prefer to keep it as is.
>>
>>> Thanks Eelco.
>>> Reviewed-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>
>>>>          new = tbl_mask_cache_alloc(size);
>>>>          if (!new)
>>>> -               return;
>>>> +               return -ENOMEM;
>>>>
>>>>          rcu_assign_pointer(table->mask_cache, new);
>>>>          call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>> +
>>>> +       return 0;
>>>>   }
>>>>
>>>>>> +       }
>>>>>> +
>>>>>>         dp->user_features = user_features;
>>>>>>
>>>>>>         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>>>>>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
>>>>>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>>>>>>         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
>>>>>> IFNAMSIZ - 1 },
>>>>>>         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>>>>>>         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>>>>>> +       [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  
>>>>>> NLA_POLICY_RANGE(NLA_U32,
>>>>>> 0,
>>>>>> +               PCPU_MIN_UNIT_SIZE / sizeof(struct
>>>>>> mask_cache_entry)),
>>>>>>  };
>>>>>>
>>>>>>  static const struct genl_ops dp_datapath_genl_ops[] = {
>>>>>> diff --git a/net/openvswitch/flow_table.c
>>>>>> b/net/openvswitch/flow_table.c
>>>>>> index a5912ea05352..5280aeeef628 100644
>>>>>> --- a/net/openvswitch/flow_table.c
>>>>>> +++ b/net/openvswitch/flow_table.c
>>>>>> @@ -38,8 +38,8 @@
>>>>>>  #define MASK_ARRAY_SIZE_MIN    16
>>>>>>  #define REHASH_INTERVAL                (10 * 60 * HZ)
>>>>>>
>>>>>> +#define MC_DEFAULT_HASH_ENTRIES        256
>>>>>>  #define MC_HASH_SHIFT          8
>>>>>> -#define MC_HASH_ENTRIES                (1u << MC_HASH_SHIFT)
>>>>>>  #define MC_HASH_SEGS           ((sizeof(uint32_t) * 8) /
>>>>>> MC_HASH_SHIFT)
>>>>>>
>>>>>>  static struct kmem_cache *flow_cache;
>>>>>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct
>>>>>> flow_table
>>>>>> *tbl, struct sw_flow_mask *mask)
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> +static void __mask_cache_destroy(struct mask_cache *mc)
>>>>>> +{
>>>>>> +       if (mc->mask_cache)
>>>>>> +               free_percpu(mc->mask_cache);
>>>>> free_percpu the NULL is safe. we can remove the "if".
>>>>
>>>> Makes sense, will remove the if() check.
>>>>
>>>>>> +       kfree(mc);
>>>>>> +}
>>>>>> +
>>>>>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = container_of(rcu, struct
>>>>>> mask_cache,
>>>>>> rcu);
>>>>>> +
>>>>>> +       __mask_cache_destroy(mc);
>>>>>> +}
>>>>>> +
>>>>>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>>>>>> +{
>>>>>> +       struct mask_cache_entry __percpu *cache = NULL;
>>>>>> +       struct mask_cache *new;
>>>>>> +
>>>>>> +       /* Only allow size to be 0, or a power of 2, and does not
>>>>>> exceed
>>>>>> +        * percpu allocation size.
>>>>>> +        */
>>>>>> +       if ((!is_power_of_2(size) && size != 0) ||
>>>>>> +           (size * sizeof(struct mask_cache_entry)) >
>>>>>> PCPU_MIN_UNIT_SIZE)
>>>>>> +               return NULL;
>>>>>> +       new = kzalloc(sizeof(*new), GFP_KERNEL);
>>>>>> +       if (!new)
>>>>>> +               return NULL;
>>>>>> +
>>>>>> +       new->cache_size = size;
>>>>>> +       if (new->cache_size > 0) {
>>>>>> +               cache = __alloc_percpu(array_size(sizeof(struct
>>>>>> mask_cache_entry),
>>>>>> +                                                 
>>>>>> new->cache_size),
>>>>>> +                                      __alignof__(struct
>>>>>> mask_cache_entry));
>>>>>> +               if (!cache) {
>>>>>> +                       kfree(new);
>>>>>> +                       return NULL;
>>>>>> +               }
>>>>>> +       }
>>>>>> +
>>>>>> +       new->mask_cache = cache;
>>>>>> +       return new;
>>>>>> +}
>>>>>> +
>>>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, 
>>>>>> u32
>>>>>> size)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +       struct mask_cache *new;
>>>>>> +
>>>>>> +       if (size == mc->cache_size)
>>>>>> +               return;
>>>>>> +
>>>>>> +       new = tbl_mask_cache_alloc(size);
>>>>>> +       if (!new)
>>>>>> +               return;
>>>>>> +
>>>>>> +       rcu_assign_pointer(table->mask_cache, new);
>>>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>>>> +}
>>>>>> +
>>>>>>  int ovs_flow_tbl_init(struct flow_table *table)
>>>>>>  {
>>>>>>         struct table_instance *ti, *ufid_ti;
>>>>>> +       struct mask_cache *mc;
>>>>>>         struct mask_array *ma;
>>>>>>
>>>>>> -       table->mask_cache = __alloc_percpu(sizeof(struct
>>>>>> mask_cache_entry) *
>>>>>> -                                          MC_HASH_ENTRIES,
>>>>>> -                                          __alignof__(struct
>>>>>> mask_cache_entry));
>>>>>> -       if (!table->mask_cache)
>>>>>> +       mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>>>>>> +       if (!mc)
>>>>>>                 return -ENOMEM;
>>>>>>
>>>>>>         ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>>>>>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table 
>>>>>> *table)
>>>>>>         rcu_assign_pointer(table->ti, ti);
>>>>>>         rcu_assign_pointer(table->ufid_ti, ufid_ti);
>>>>>>         rcu_assign_pointer(table->mask_array, ma);
>>>>>> +       rcu_assign_pointer(table->mask_cache, mc);
>>>>>>         table->last_rehash = jiffies;
>>>>>>         table->count = 0;
>>>>>>         table->ufid_count = 0;
>>>>>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table 
>>>>>> *table)
>>>>>>  free_mask_array:
>>>>>>         __mask_array_destroy(ma);
>>>>>>  free_mask_cache:
>>>>>> -       free_percpu(table->mask_cache);
>>>>>> +       __mask_cache_destroy(mc);
>>>>>>         return -ENOMEM;
>>>>>>  }
>>>>>>
>>>>>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
>>>>>> *table)
>>>>>>  {
>>>>>>         struct table_instance *ti = 
>>>>>> rcu_dereference_raw(table->ti);
>>>>>>         struct table_instance *ufid_ti =
>>>>>> rcu_dereference_raw(table->ufid_ti);
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +       struct mask_array *ma =
>>>>>> rcu_dereference_ovsl(table->mask_array);
>>>>>>
>>>>>> -       free_percpu(table->mask_cache);
>>>>>> -       call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>>>>>> +       call_rcu(&mc->rcu, mask_cache_rcu_cb);
>>>>>> +       call_rcu(&ma->rcu, mask_array_rcu_cb);
>>>>>>         table_instance_destroy(table, ti, ufid_ti, false);
>>>>>>  }
>>>>>>
>>>>>> @@ -724,6 +787,7 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct
>>>>>> flow_table *tbl,
>>>>>>                                           u32 *n_mask_hit,
>>>>>>                                           u32 *n_cache_hit)
>>>>>>  {
>>>>>> +       struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>>>>>>         struct mask_array *ma = rcu_dereference(tbl->mask_array);
>>>>>>         struct table_instance *ti = rcu_dereference(tbl->ti);
>>>>>>         struct mask_cache_entry *entries, *ce;
>>>>>> @@ -733,7 +797,7 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct
>>>>>> flow_table *tbl,
>>>>>>
>>>>>>         *n_mask_hit = 0;
>>>>>>         *n_cache_hit = 0;
>>>>>> -       if (unlikely(!skb_hash)) {
>>>>>> +       if (unlikely(!skb_hash || mc->cache_size == 0)) {
>>>>>>                 u32 mask_index = 0;
>>>>>>                 u32 cache = 0;
>>>>>>
>>>>>> @@ -749,11 +813,11 @@ struct sw_flow
>>>>>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>>>>>
>>>>>>         ce = NULL;
>>>>>>         hash = skb_hash;
>>>>>> -       entries = this_cpu_ptr(tbl->mask_cache);
>>>>>> +       entries = this_cpu_ptr(mc->mask_cache);
>>>>>>
>>>>>>         /* Find the cache entry 'ce' to operate on. */
>>>>>>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>>>>>> -               int index = hash & (MC_HASH_ENTRIES - 1);
>>>>>> +               int index = hash & (mc->cache_size - 1);
>>>>>>                 struct mask_cache_entry *e;
>>>>>>
>>>>>>                 e = &entries[index];
>>>>>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
>>>>>> flow_table *table)
>>>>>>         return READ_ONCE(ma->count);
>>>>>>  }
>>>>>>
>>>>>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table 
>>>>>> *table)
>>>>>> +{
>>>>>> +       struct mask_cache *mc = 
>>>>>> rcu_dereference(table->mask_cache);
>>>>>> +
>>>>>> +       return READ_ONCE(mc->cache_size);
>>>>>> +}
>>>>>> +
>>>>>>  static struct table_instance *table_instance_expand(struct
>>>>>> table_instance *ti,
>>>>>>                                                     bool ufid)
>>>>>>  {
>>>>>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct
>>>>>> flow_table
>>>>>> *table)
>>>>>>         for (i = 0; i < masks_entries; i++) {
>>>>>>                 int index = masks_and_count[i].index;
>>>>>>
>>>>>> -               new->masks[new->count++] =
>>>>>> -                       rcu_dereference_ovsl(ma->masks[index]);
>>>>>> +               if (ovsl_dereference(ma->masks[index]))
>>>>>> +                       new->masks[new->count++] =
>>>>>> ma->masks[index];
>>>>>>         }
>>>>>>
>>>>>>         rcu_assign_pointer(table->mask_array, new);
>>>>>> diff --git a/net/openvswitch/flow_table.h
>>>>>> b/net/openvswitch/flow_table.h
>>>>>> index 325e939371d8..f2dba952db2f 100644
>>>>>> --- a/net/openvswitch/flow_table.h
>>>>>> +++ b/net/openvswitch/flow_table.h
>>>>>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>>>>>>         u32 mask_index;
>>>>>>  };
>>>>>>
>>>>>> +struct mask_cache {
>>>>>> +       struct rcu_head rcu;
>>>>>> +       u32 cache_size;  /* Must be ^2 value. */
>>>>>> +       struct mask_cache_entry __percpu *mask_cache;
>>>>>> +};
>>>>>> +
>>>>>>  struct mask_count {
>>>>>>         int index;
>>>>>>         u64 counter;
>>>>>> @@ -53,7 +59,7 @@ struct table_instance {
>>>>>>  struct flow_table {
>>>>>>         struct table_instance __rcu *ti;
>>>>>>         struct table_instance __rcu *ufid_ti;
>>>>>> -       struct mask_cache_entry __percpu *mask_cache;
>>>>>> +       struct mask_cache __rcu *mask_cache;
>>>>>>         struct mask_array __rcu *mask_array;
>>>>>>         unsigned long last_rehash;
>>>>>>         unsigned int count;
>>>>>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table 
>>>>>> *table,
>>>>>> struct sw_flow *flow,
>>>>>>                         const struct sw_flow_mask *mask);
>>>>>>  void ovs_flow_tbl_remove(struct flow_table *table, struct 
>>>>>> sw_flow
>>>>>> *flow);
>>>>>>  int  ovs_flow_tbl_num_masks(const struct flow_table *table);
>>>>>> +u32  ovs_flow_tbl_masks_cache_size(const struct flow_table
>>>>>> *table);
>>>>>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, 
>>>>>> u32
>>>>>> size);
>>>>>>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance
>>>>>> *table,
>>>>>>                                        u32 *bucket, u32 *idx);
>>>>>>  struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Best regards, Tonghao
>>>>
>>>
>>>
>>> --
>>> Best regards, Tonghao
>>
>
>
> --
> Best regards, Tonghao


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-07-30 14:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-29 13:25 [PATCH net-next v3 0/2] net: openvswitch: masks cache enhancements Eelco Chaudron
2020-07-29 13:26 ` [PATCH net-next v3 1/2] net: openvswitch: add masks cache hit counter Eelco Chaudron
2020-07-29 13:27 ` [PATCH net-next v3 2/2] net: openvswitch: make masks cache size configurable Eelco Chaudron
2020-07-30  5:07   ` Tonghao Zhang
2020-07-30 12:33     ` Eelco Chaudron
2020-07-30 13:25       ` Tonghao Zhang
2020-07-30 13:51         ` Eelco Chaudron
2020-07-30 14:23           ` Tonghao Zhang
2020-07-30 14:28             ` Eelco Chaudron

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.