All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
@ 2020-03-23 13:10 xiangxia.m.yue
  2020-03-23 13:10 ` [PATCH net-next v1 2/3] net: openvswitch: set max limitation to meters xiangxia.m.yue
                   ` (5 more replies)
  0 siblings, 6 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-03-23 13:10 UTC (permalink / raw)
  To: pshelar; +Cc: netdev, dev, Tonghao Zhang, Andy Zhou

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In kernel datapath of Open vSwitch, there are only 1024
buckets of meter in one dp. If installing more than 1024
(e.g. 8192) meters, it may lead to the performance drop.
But in some case, for example, Open vSwitch used as edge
gateway, there should be 200,000+ at least, meters used for
IP address bandwidth limitation.

[Open vSwitch userspace datapath has this issue too.]

For more scalable meter, this patch expands the buckets
when necessary, so we can install more meters in the datapath.

* Introducing the struct *dp_meter_instance*, it's easy to
  expand meter though change the *ti* point in the struct
  *dp_meter_table*.
* Using kvmalloc_array instead of kmalloc_array.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
 net/openvswitch/meter.h    |  17 +++-
 3 files changed, 153 insertions(+), 34 deletions(-)

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index e239a46c2f94..785105578448 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -82,7 +82,7 @@ struct datapath {
 	u32 max_headroom;
 
 	/* Switch meters. */
-	struct hlist_head *meters;
+	struct dp_meter_table *meters;
 };
 
 /**
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5010d1ddd4bd..98003b201b45 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
 	kfree_rcu(meter, rcu);
 }
 
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
+static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
 					    u32 meter_id)
 {
-	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
+	u32 hash = jhash_1word(meter_id, ti->hash_seed);
+
+	return &ti->buckets[hash & (ti->n_buckets - 1)];
 }
 
 /* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
 				     u32 meter_id)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	struct dp_meter *meter;
 	struct hlist_head *head;
 
-	head = meter_hash_bucket(dp, meter_id);
-	hlist_for_each_entry_rcu(meter, head, dp_hash_node,
-				lockdep_ovsl_is_held()) {
+	head = meter_hash_bucket(ti, meter_id);
+	hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
+				 lockdep_ovsl_is_held()) {
 		if (meter->id == meter_id)
 			return meter;
 	}
+
 	return NULL;
 }
 
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
+{
+	struct dp_meter_instance *ti;
+	int i;
+
+	ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
+				     GFP_KERNEL);
+	if (!ti->buckets) {
+		kfree(ti);
+		return NULL;
+	}
+
+	for (i = 0; i < size; i++)
+		INIT_HLIST_HEAD(&ti->buckets[i]);
+
+	ti->n_buckets = size;
+	ti->node_ver = 0;
+	get_random_bytes(&ti->hash_seed, sizeof(u32));
+
+	return ti;
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
 {
-	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+	struct dp_meter_instance *ti;
 
-	hlist_add_head_rcu(&meter->dp_hash_node, head);
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti->buckets);
+	kfree(ti);
 }
 
-static void detach_meter(struct dp_meter *meter)
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	struct hlist_head *head = meter_hash_bucket(ti, meter->id);
+
+	hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
 {
+	hlist_del_rcu(&meter->hash_node[ti->node_ver]);
+}
+
+static struct dp_meter_instance *
+dp_meter_instance_expand(struct dp_meter_instance *ti)
+{
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
+	if (!new_ti)
+		return NULL;
+
+	new_ti->node_ver = !ti->node_ver;
+
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct hlist_head *head = &ti->buckets[i];
+		struct dp_meter *meter;
+
+		hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
+					 lockdep_ovsl_is_held())
+			dp_meter_instance_insert(new_ti, meter);
+	}
+
+	return new_ti;
+}
+
+static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *new_ti;
+	struct dp_meter_instance *ti;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_insert(ti, meter);
+
+	/* operate the counter safely, because called with ovs_lock. */
+	tbl->count++;
+
+	if (tbl->count > ti->n_buckets) {
+		new_ti = dp_meter_instance_expand(ti);
+
+		if (new_ti) {
+			rcu_assign_pointer(tbl->ti, new_ti);
+			call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+		}
+	}
+}
+
+static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+
 	ASSERT_OVSL();
-	if (meter)
-		hlist_del_rcu(&meter->dp_hash_node);
+	if (meter) {
+		/* operate the counter safely, because called with ovs_lock. */
+		tbl->count--;
+		dp_meter_instance_remove(ti, meter);
+	}
 }
 
 static struct sk_buff *
@@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	/* Cannot fail after this. */
-	old_meter = lookup_meter(dp, meter_id);
-	detach_meter(old_meter);
-	attach_meter(dp, meter);
+	old_meter = lookup_meter(dp->meters, meter_id);
+	detach_meter(dp->meters, old_meter);
+	attach_meter(dp->meters, meter);
 	ovs_unlock();
 
 	/* Build response with the meter_id and stats from
@@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Locate meter, copy stats. */
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(dp->meters, meter_id);
 	if (!meter) {
 		err = -ENOENT;
 		goto exit_unlock;
@@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	old_meter = lookup_meter(dp, meter_id);
+	old_meter = lookup_meter(dp->meters, meter_id);
 	if (old_meter) {
 		spin_lock_bh(&old_meter->lock);
 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 		WARN_ON(err);
 		spin_unlock_bh(&old_meter->lock);
-		detach_meter(old_meter);
+		detach_meter(dp->meters, old_meter);
 	}
 	ovs_unlock();
 	ovs_meter_free(old_meter);
@@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 	int i, band_exceeded_max = -1;
 	u32 band_exceeded_rate = 0;
 
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(dp->meters, meter_id);
 	/* Do not drop the packet when there is no meter. */
 	if (!meter)
 		return false;
@@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
 
 int ovs_meters_init(struct datapath *dp)
 {
-	int i;
+	struct dp_meter_instance *ti;
+	struct dp_meter_table *tbl;
 
-	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
-				   sizeof(struct hlist_head), GFP_KERNEL);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+	if (!tbl)
+		return -ENOMEM;
 
-	if (!dp->meters)
+	tbl->count = 0;
+
+	ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
+	if (!ti) {
+		kfree(tbl);
 		return -ENOMEM;
+	}
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->meters[i]);
+	rcu_assign_pointer(tbl->ti, ti);
+	dp->meters = tbl;
 
 	return 0;
 }
 
 void ovs_meters_exit(struct datapath *dp)
 {
+	struct dp_meter_table *tbl = dp->meters;
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	int i;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++) {
-		struct hlist_head *head = &dp->meters[i];
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct hlist_head *head = &ti->buckets[i];
 		struct dp_meter *meter;
 		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
-			kfree(meter);
+		hlist_for_each_entry_safe(meter, n, head,
+					  hash_node[ti->node_ver])
+			ovs_meter_free(meter);
 	}
 
-	kfree(dp->meters);
+	kvfree(ti->buckets);
+	kfree(ti);
+	kfree(tbl);
 }
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f645913870bd..bc84796d7d4d 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -30,9 +30,7 @@ struct dp_meter_band {
 struct dp_meter {
 	spinlock_t lock;    /* Per meter lock */
 	struct rcu_head rcu;
-	struct hlist_node dp_hash_node; /*Element in datapath->meters
-					 * hash table.
-					 */
+	struct hlist_node hash_node[2];
 	u32 id;
 	u16 kbps:1, keep_stats:1;
 	u16 n_bands;
@@ -42,6 +40,19 @@ struct dp_meter {
 	struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+	struct hlist_head *buckets;
+	struct rcu_head rcu;
+	u32 n_buckets;
+	u32 hash_seed;
+	u8 node_ver;
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+};
+
 extern struct genl_family dp_meter_genl_family;
 int ovs_meters_init(struct datapath *dp);
 void ovs_meters_exit(struct datapath *dp);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v1 2/3] net: openvswitch: set max limitation to meters
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
@ 2020-03-23 13:10 ` xiangxia.m.yue
  2020-03-23 13:10 ` [PATCH net-next v1 3/3] net: openvswitch: remove the unnecessary check xiangxia.m.yue
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-03-23 13:10 UTC (permalink / raw)
  To: pshelar; +Cc: netdev, dev, Tonghao Zhang, Andy Zhou

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Don't allow user to create meter unlimitedly, which
may cause to consume a large amount of kernel memory.
The 200,000 meters may be fine in general case.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 14 +++++++++-----
 net/openvswitch/meter.h |  3 ++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 98003b201b45..5efd48e024f0 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -256,7 +256,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, DP_MAX_METERS) ||
 	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
@@ -284,13 +284,17 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 
 static struct dp_meter *dp_meter_create(struct nlattr **a)
 {
+	u32 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	struct dp_meter_band *band;
+	struct dp_meter *meter;
 	struct nlattr *nla;
-	int rem;
 	u16 n_bands = 0;
-	struct dp_meter *meter;
-	struct dp_meter_band *band;
+	int rem;
 	int err;
 
+	if (meter_id > DP_MAX_METERS)
+		return ERR_PTR(-EFBIG);
+
 	/* Validate attributes, count the bands. */
 	if (!a[OVS_METER_ATTR_BANDS])
 		return ERR_PTR(-EINVAL);
@@ -304,7 +308,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 	if (!meter)
 		return ERR_PTR(-ENOMEM);
 
-	meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	meter->id = meter_id;
 	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
 	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
 	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index bc84796d7d4d..9ff7a9200d0d 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -17,7 +17,8 @@
 #include "flow.h"
 struct datapath;
 
-#define DP_MAX_BANDS		1
+#define DP_MAX_METERS	(200000ULL)
+#define DP_MAX_BANDS	1
 
 struct dp_meter_band {
 	u32 type;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v1 3/3] net: openvswitch: remove the unnecessary check
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
  2020-03-23 13:10 ` [PATCH net-next v1 2/3] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-03-23 13:10 ` xiangxia.m.yue
  2020-03-29 16:46 ` [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported Pravin Shelar
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-03-23 13:10 UTC (permalink / raw)
  To: pshelar; +Cc: netdev, dev, Tonghao Zhang, Andy Zhou

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Before calling the ovs_meter_cmd_reply_stats, "meter"
is checked, so don't check it agin in that function.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5efd48e024f0..03b39b0eb4ea 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -212,12 +212,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 		goto error;
 
-	if (!meter)
-		return 0;
-
 	if (nla_put(reply, OVS_METER_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &meter->stats) ||
-	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
  2020-03-23 13:10 ` [PATCH net-next v1 2/3] net: openvswitch: set max limitation to meters xiangxia.m.yue
  2020-03-23 13:10 ` [PATCH net-next v1 3/3] net: openvswitch: remove the unnecessary check xiangxia.m.yue
@ 2020-03-29 16:46 ` Pravin Shelar
  2020-03-30  0:34   ` Tonghao Zhang
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: Pravin Shelar @ 2020-03-29 16:46 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: Linux Kernel Network Developers, ovs dev, Andy Zhou

On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> In kernel datapath of Open vSwitch, there are only 1024
> buckets of meter in one dp. If installing more than 1024
> (e.g. 8192) meters, it may lead to the performance drop.
> But in some case, for example, Open vSwitch used as edge
> gateway, there should be 200,000+ at least, meters used for
> IP address bandwidth limitation.
>
> [Open vSwitch userspace datapath has this issue too.]
>
> For more scalable meter, this patch expands the buckets
> when necessary, so we can install more meters in the datapath.
>
> * Introducing the struct *dp_meter_instance*, it's easy to
>   expand meter though change the *ti* point in the struct
>   *dp_meter_table*.
> * Using kvmalloc_array instead of kmalloc_array.
>
Thanks for working on this, I have couple of comments.

> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/datapath.h |   2 +-
>  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
>  net/openvswitch/meter.h    |  17 +++-
>  3 files changed, 153 insertions(+), 34 deletions(-)
>
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index e239a46c2f94..785105578448 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -82,7 +82,7 @@ struct datapath {
>         u32 max_headroom;
>
>         /* Switch meters. */
> -       struct hlist_head *meters;
> +       struct dp_meter_table *meters;
>  };
>
>  /**
> diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> index 5010d1ddd4bd..98003b201b45 100644
> --- a/net/openvswitch/meter.c
> +++ b/net/openvswitch/meter.c
> @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
>         kfree_rcu(meter, rcu);
>  }
>
> -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
>                                             u32 meter_id)
>  {
> -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> +
I do not see any need to hash meter-id, can you explain it.


> +       return &ti->buckets[hash & (ti->n_buckets - 1)];
>  }
>
>  /* Call with ovs_mutex or RCU read lock. */
> -static struct dp_meter *lookup_meter(const struct datapath *dp,
> +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
>                                      u32 meter_id)
>  {
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
>         struct dp_meter *meter;
>         struct hlist_head *head;
>
> -       head = meter_hash_bucket(dp, meter_id);
> -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> -                               lockdep_ovsl_is_held()) {
> +       head = meter_hash_bucket(ti, meter_id);
> +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> +                                lockdep_ovsl_is_held()) {
>                 if (meter->id == meter_id)
>                         return meter;
>         }
> +
This patch is expanding meter table linearly with number meters added
to datapath. so I do not see need to have hash table. it can be a
simple array. This would also improve lookup efficiency.
For hash collision we could find next free slot in array. let me know
what do you think about this approach.


>         return NULL;
>  }
>
> -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> +{
> +       struct dp_meter_instance *ti;
> +       int i;
> +
> +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> +       if (!ti)
> +               return NULL;
> +
> +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> +                                    GFP_KERNEL);
> +       if (!ti->buckets) {
> +               kfree(ti);
> +               return NULL;
> +       }
> +
> +       for (i = 0; i < size; i++)
> +               INIT_HLIST_HEAD(&ti->buckets[i]);
> +
> +       ti->n_buckets = size;
> +       ti->node_ver = 0;
> +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> +
> +       return ti;
> +}
> +
> +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
>  {
> -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> +       struct dp_meter_instance *ti;
>
> -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> +       kvfree(ti->buckets);
> +       kfree(ti);
>  }
>
> -static void detach_meter(struct dp_meter *meter)
> +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> +                                    struct dp_meter *meter)
> +{
> +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> +
> +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> +}
> +
> +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> +                                    struct dp_meter *meter)
>  {
> +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> +}
> +
> +static struct dp_meter_instance *
> +dp_meter_instance_expand(struct dp_meter_instance *ti)
> +{
> +       struct dp_meter_instance *new_ti;
> +       int i;
> +
> +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> +       if (!new_ti)
> +               return NULL;
> +
> +       new_ti->node_ver = !ti->node_ver;
> +
> +       for (i = 0; i < ti->n_buckets; i++) {
> +               struct hlist_head *head = &ti->buckets[i];
> +               struct dp_meter *meter;
> +
> +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> +                                        lockdep_ovsl_is_held())
> +                       dp_meter_instance_insert(new_ti, meter);
> +       }
> +
> +       return new_ti;
> +}
> +
> +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> +{
> +       struct dp_meter_instance *new_ti;
> +       struct dp_meter_instance *ti;
> +
> +       ti = rcu_dereference_ovsl(tbl->ti);
> +       dp_meter_instance_insert(ti, meter);
> +
> +       /* operate the counter safely, because called with ovs_lock. */
> +       tbl->count++;
> +
> +       if (tbl->count > ti->n_buckets) {
> +               new_ti = dp_meter_instance_expand(ti);
> +


> +               if (new_ti) {
> +                       rcu_assign_pointer(tbl->ti, new_ti);
> +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> +               }
> +       }
> +}
> +
> +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> +{
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +
>         ASSERT_OVSL();
> -       if (meter)
> -               hlist_del_rcu(&meter->dp_hash_node);
> +       if (meter) {
> +               /* operate the counter safely, because called with ovs_lock. */
> +               tbl->count--;
> +               dp_meter_instance_remove(ti, meter);
> +       }
>  }
>
>  static struct sk_buff *
> @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
>         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
>
>         /* Cannot fail after this. */
> -       old_meter = lookup_meter(dp, meter_id);
> -       detach_meter(old_meter);
> -       attach_meter(dp, meter);
> +       old_meter = lookup_meter(dp->meters, meter_id);
> +       detach_meter(dp->meters, old_meter);
> +       attach_meter(dp->meters, meter);
>         ovs_unlock();
>
>         /* Build response with the meter_id and stats from
> @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
>         }
>
>         /* Locate meter, copy stats. */
> -       meter = lookup_meter(dp, meter_id);
> +       meter = lookup_meter(dp->meters, meter_id);
>         if (!meter) {
>                 err = -ENOENT;
>                 goto exit_unlock;
> @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
>                 goto exit_unlock;
>         }
>
> -       old_meter = lookup_meter(dp, meter_id);
> +       old_meter = lookup_meter(dp->meters, meter_id);
>         if (old_meter) {
>                 spin_lock_bh(&old_meter->lock);
>                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
>                 WARN_ON(err);
>                 spin_unlock_bh(&old_meter->lock);
> -               detach_meter(old_meter);
> +               detach_meter(dp->meters, old_meter);
>         }
>         ovs_unlock();
>         ovs_meter_free(old_meter);
> @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
>         int i, band_exceeded_max = -1;
>         u32 band_exceeded_rate = 0;
>
> -       meter = lookup_meter(dp, meter_id);
> +       meter = lookup_meter(dp->meters, meter_id);
>         /* Do not drop the packet when there is no meter. */
>         if (!meter)
>                 return false;
> @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
>
>  int ovs_meters_init(struct datapath *dp)
>  {
> -       int i;
> +       struct dp_meter_instance *ti;
> +       struct dp_meter_table *tbl;
>
> -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> -                                  sizeof(struct hlist_head), GFP_KERNEL);
> +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> +       if (!tbl)
> +               return -ENOMEM;
>
> -       if (!dp->meters)
> +       tbl->count = 0;
> +
> +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> +       if (!ti) {
> +               kfree(tbl);
>                 return -ENOMEM;
> +       }
>
> -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> -               INIT_HLIST_HEAD(&dp->meters[i]);
> +       rcu_assign_pointer(tbl->ti, ti);
> +       dp->meters = tbl;
>
>         return 0;
>  }
>
>  void ovs_meters_exit(struct datapath *dp)
>  {
> +       struct dp_meter_table *tbl = dp->meters;
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
>         int i;
>
> -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> -               struct hlist_head *head = &dp->meters[i];
> +       for (i = 0; i < ti->n_buckets; i++) {
> +               struct hlist_head *head = &ti->buckets[i];
>                 struct dp_meter *meter;
>                 struct hlist_node *n;
>
> -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> -                       kfree(meter);
> +               hlist_for_each_entry_safe(meter, n, head,
> +                                         hash_node[ti->node_ver])
> +                       ovs_meter_free(meter);
>         }
>
> -       kfree(dp->meters);
> +       kvfree(ti->buckets);
> +       kfree(ti);
> +       kfree(tbl);
>  }
> diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> index f645913870bd..bc84796d7d4d 100644
> --- a/net/openvswitch/meter.h
> +++ b/net/openvswitch/meter.h
> @@ -30,9 +30,7 @@ struct dp_meter_band {
>  struct dp_meter {
>         spinlock_t lock;    /* Per meter lock */
>         struct rcu_head rcu;
> -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> -                                        * hash table.
> -                                        */
> +       struct hlist_node hash_node[2];
>         u32 id;
>         u16 kbps:1, keep_stats:1;
>         u16 n_bands;
> @@ -42,6 +40,19 @@ struct dp_meter {
>         struct dp_meter_band bands[];
>  };
>
> +struct dp_meter_instance {
> +       struct hlist_head *buckets;
> +       struct rcu_head rcu;
> +       u32 n_buckets;
> +       u32 hash_seed;
> +       u8 node_ver;
> +};
> +
> +struct dp_meter_table {
> +       struct dp_meter_instance __rcu *ti;
> +       u32 count;
> +};
> +
>  extern struct genl_family dp_meter_genl_family;
>  int ovs_meters_init(struct datapath *dp);
>  void ovs_meters_exit(struct datapath *dp);
> --
> 2.23.0
>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-03-29 16:46 ` [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported Pravin Shelar
@ 2020-03-30  0:34   ` Tonghao Zhang
  2020-03-31  3:57     ` Pravin Shelar
  0 siblings, 1 reply; 48+ messages in thread
From: Tonghao Zhang @ 2020-03-30  0:34 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: Linux Kernel Network Developers, ovs dev, Andy Zhou

On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > In kernel datapath of Open vSwitch, there are only 1024
> > buckets of meter in one dp. If installing more than 1024
> > (e.g. 8192) meters, it may lead to the performance drop.
> > But in some case, for example, Open vSwitch used as edge
> > gateway, there should be 200,000+ at least, meters used for
> > IP address bandwidth limitation.
> >
> > [Open vSwitch userspace datapath has this issue too.]
> >
> > For more scalable meter, this patch expands the buckets
> > when necessary, so we can install more meters in the datapath.
> >
> > * Introducing the struct *dp_meter_instance*, it's easy to
> >   expand meter though change the *ti* point in the struct
> >   *dp_meter_table*.
> > * Using kvmalloc_array instead of kmalloc_array.
> >
> Thanks for working on this, I have couple of comments.
>
> > Cc: Pravin B Shelar <pshelar@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> >  net/openvswitch/datapath.h |   2 +-
> >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> >  net/openvswitch/meter.h    |  17 +++-
> >  3 files changed, 153 insertions(+), 34 deletions(-)
> >
> > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > index e239a46c2f94..785105578448 100644
> > --- a/net/openvswitch/datapath.h
> > +++ b/net/openvswitch/datapath.h
> > @@ -82,7 +82,7 @@ struct datapath {
> >         u32 max_headroom;
> >
> >         /* Switch meters. */
> > -       struct hlist_head *meters;
> > +       struct dp_meter_table *meters;
> >  };
> >
> >  /**
> > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > index 5010d1ddd4bd..98003b201b45 100644
> > --- a/net/openvswitch/meter.c
> > +++ b/net/openvswitch/meter.c
> > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> >         kfree_rcu(meter, rcu);
> >  }
> >
> > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> >                                             u32 meter_id)
> >  {
> > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > +
> I do not see any need to hash meter-id, can you explain it.
>
> > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> >  }
> >
> >  /* Call with ovs_mutex or RCU read lock. */
> > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> >                                      u32 meter_id)
> >  {
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> >         struct dp_meter *meter;
> >         struct hlist_head *head;
> >
> > -       head = meter_hash_bucket(dp, meter_id);
> > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > -                               lockdep_ovsl_is_held()) {
> > +       head = meter_hash_bucket(ti, meter_id);
> > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > +                                lockdep_ovsl_is_held()) {
> >                 if (meter->id == meter_id)
> >                         return meter;
> >         }
> > +
> This patch is expanding meter table linearly with number meters added
> to datapath. so I do not see need to have hash table. it can be a
> simple array. This would also improve lookup efficiency.
> For hash collision we could find next free slot in array. let me know
> what do you think about this approach.
Hi Pravin
If we use the simple array, when inserting the meter, for hash collision, we can
find next free slot, but one case, when there are many meters in the array.
we may find many slot for the free slot.
And when we lookup the meter, for hash collision, we may find many
array slots, and
then find it, or that meter does not exist in the array, In that case,
there may be a lookup performance
drop.

For hash meter-id in meter_hash_bucket, I am not 100% sure it is
useful. it just update
hash_seed when expand meters. For performance, we can remove it. Thanks.
>
> >         return NULL;
> >  }
> >
> > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > +{
> > +       struct dp_meter_instance *ti;
> > +       int i;
> > +
> > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > +       if (!ti)
> > +               return NULL;
> > +
> > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > +                                    GFP_KERNEL);
> > +       if (!ti->buckets) {
> > +               kfree(ti);
> > +               return NULL;
> > +       }
> > +
> > +       for (i = 0; i < size; i++)
> > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > +
> > +       ti->n_buckets = size;
> > +       ti->node_ver = 0;
> > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > +
> > +       return ti;
> > +}
> > +
> > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> >  {
> > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > +       struct dp_meter_instance *ti;
> >
> > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > +       kvfree(ti->buckets);
> > +       kfree(ti);
> >  }
> >
> > -static void detach_meter(struct dp_meter *meter)
> > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> > +{
> > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > +
> > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > +}
> > +
> > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> >  {
> > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > +}
> > +
> > +static struct dp_meter_instance *
> > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > +{
> > +       struct dp_meter_instance *new_ti;
> > +       int i;
> > +
> > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > +       if (!new_ti)
> > +               return NULL;
> > +
> > +       new_ti->node_ver = !ti->node_ver;
> > +
> > +       for (i = 0; i < ti->n_buckets; i++) {
> > +               struct hlist_head *head = &ti->buckets[i];
> > +               struct dp_meter *meter;
> > +
> > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > +                                        lockdep_ovsl_is_held())
> > +                       dp_meter_instance_insert(new_ti, meter);
> > +       }
> > +
> > +       return new_ti;
> > +}
> > +
> > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > +{
> > +       struct dp_meter_instance *new_ti;
> > +       struct dp_meter_instance *ti;
> > +
> > +       ti = rcu_dereference_ovsl(tbl->ti);
> > +       dp_meter_instance_insert(ti, meter);
> > +
> > +       /* operate the counter safely, because called with ovs_lock. */
> > +       tbl->count++;
> > +
> > +       if (tbl->count > ti->n_buckets) {
> > +               new_ti = dp_meter_instance_expand(ti);
> > +
>
>
> > +               if (new_ti) {
> > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > +               }
> > +       }
> > +}
> > +
> > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > +{
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +
> >         ASSERT_OVSL();
> > -       if (meter)
> > -               hlist_del_rcu(&meter->dp_hash_node);
> > +       if (meter) {
> > +               /* operate the counter safely, because called with ovs_lock. */
> > +               tbl->count--;
> > +               dp_meter_instance_remove(ti, meter);
> > +       }
> >  }
> >
> >  static struct sk_buff *
> > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> >
> >         /* Cannot fail after this. */
> > -       old_meter = lookup_meter(dp, meter_id);
> > -       detach_meter(old_meter);
> > -       attach_meter(dp, meter);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> > +       detach_meter(dp->meters, old_meter);
> > +       attach_meter(dp->meters, meter);
> >         ovs_unlock();
> >
> >         /* Build response with the meter_id and stats from
> > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> >         }
> >
> >         /* Locate meter, copy stats. */
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         if (!meter) {
> >                 err = -ENOENT;
> >                 goto exit_unlock;
> > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> >                 goto exit_unlock;
> >         }
> >
> > -       old_meter = lookup_meter(dp, meter_id);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> >         if (old_meter) {
> >                 spin_lock_bh(&old_meter->lock);
> >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> >                 WARN_ON(err);
> >                 spin_unlock_bh(&old_meter->lock);
> > -               detach_meter(old_meter);
> > +               detach_meter(dp->meters, old_meter);
> >         }
> >         ovs_unlock();
> >         ovs_meter_free(old_meter);
> > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> >         int i, band_exceeded_max = -1;
> >         u32 band_exceeded_rate = 0;
> >
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         /* Do not drop the packet when there is no meter. */
> >         if (!meter)
> >                 return false;
> > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> >
> >  int ovs_meters_init(struct datapath *dp)
> >  {
> > -       int i;
> > +       struct dp_meter_instance *ti;
> > +       struct dp_meter_table *tbl;
> >
> > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > +       if (!tbl)
> > +               return -ENOMEM;
> >
> > -       if (!dp->meters)
> > +       tbl->count = 0;
> > +
> > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > +       if (!ti) {
> > +               kfree(tbl);
> >                 return -ENOMEM;
> > +       }
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > +       rcu_assign_pointer(tbl->ti, ti);
> > +       dp->meters = tbl;
> >
> >         return 0;
> >  }
> >
> >  void ovs_meters_exit(struct datapath *dp)
> >  {
> > +       struct dp_meter_table *tbl = dp->meters;
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> >         int i;
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > -               struct hlist_head *head = &dp->meters[i];
> > +       for (i = 0; i < ti->n_buckets; i++) {
> > +               struct hlist_head *head = &ti->buckets[i];
> >                 struct dp_meter *meter;
> >                 struct hlist_node *n;
> >
> > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > -                       kfree(meter);
> > +               hlist_for_each_entry_safe(meter, n, head,
> > +                                         hash_node[ti->node_ver])
> > +                       ovs_meter_free(meter);
> >         }
> >
> > -       kfree(dp->meters);
> > +       kvfree(ti->buckets);
> > +       kfree(ti);
> > +       kfree(tbl);
> >  }
> > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > index f645913870bd..bc84796d7d4d 100644
> > --- a/net/openvswitch/meter.h
> > +++ b/net/openvswitch/meter.h
> > @@ -30,9 +30,7 @@ struct dp_meter_band {
> >  struct dp_meter {
> >         spinlock_t lock;    /* Per meter lock */
> >         struct rcu_head rcu;
> > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > -                                        * hash table.
> > -                                        */
> > +       struct hlist_node hash_node[2];
> >         u32 id;
> >         u16 kbps:1, keep_stats:1;
> >         u16 n_bands;
> > @@ -42,6 +40,19 @@ struct dp_meter {
> >         struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_meter_instance {
> > +       struct hlist_head *buckets;
> > +       struct rcu_head rcu;
> > +       u32 n_buckets;
> > +       u32 hash_seed;
> > +       u8 node_ver;
> > +};
> > +
> > +struct dp_meter_table {
> > +       struct dp_meter_instance __rcu *ti;
> > +       u32 count;
> > +};
> > +
> >  extern struct genl_family dp_meter_genl_family;
> >  int ovs_meters_init(struct datapath *dp);
> >  void ovs_meters_exit(struct datapath *dp);
> > --
> > 2.23.0
> >



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-03-30  0:34   ` Tonghao Zhang
@ 2020-03-31  3:57     ` Pravin Shelar
  2020-04-01 10:50       ` Tonghao Zhang
  0 siblings, 1 reply; 48+ messages in thread
From: Pravin Shelar @ 2020-03-31  3:57 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: Linux Kernel Network Developers, ovs dev, Andy Zhou

On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> >
> > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >
> > > In kernel datapath of Open vSwitch, there are only 1024
> > > buckets of meter in one dp. If installing more than 1024
> > > (e.g. 8192) meters, it may lead to the performance drop.
> > > But in some case, for example, Open vSwitch used as edge
> > > gateway, there should be 200,000+ at least, meters used for
> > > IP address bandwidth limitation.
> > >
> > > [Open vSwitch userspace datapath has this issue too.]
> > >
> > > For more scalable meter, this patch expands the buckets
> > > when necessary, so we can install more meters in the datapath.
> > >
> > > * Introducing the struct *dp_meter_instance*, it's easy to
> > >   expand meter though change the *ti* point in the struct
> > >   *dp_meter_table*.
> > > * Using kvmalloc_array instead of kmalloc_array.
> > >
> > Thanks for working on this, I have couple of comments.
> >
> > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > Cc: Andy Zhou <azhou@ovn.org>
> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > ---
> > >  net/openvswitch/datapath.h |   2 +-
> > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > >  net/openvswitch/meter.h    |  17 +++-
> > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > >
> > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > index e239a46c2f94..785105578448 100644
> > > --- a/net/openvswitch/datapath.h
> > > +++ b/net/openvswitch/datapath.h
> > > @@ -82,7 +82,7 @@ struct datapath {
> > >         u32 max_headroom;
> > >
> > >         /* Switch meters. */
> > > -       struct hlist_head *meters;
> > > +       struct dp_meter_table *meters;
> > >  };
> > >
> > >  /**
> > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > index 5010d1ddd4bd..98003b201b45 100644
> > > --- a/net/openvswitch/meter.c
> > > +++ b/net/openvswitch/meter.c
> > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > >         kfree_rcu(meter, rcu);
> > >  }
> > >
> > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > >                                             u32 meter_id)
> > >  {
> > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > +
> > I do not see any need to hash meter-id, can you explain it.
> >
> > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > >  }
> > >
> > >  /* Call with ovs_mutex or RCU read lock. */
> > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > >                                      u32 meter_id)
> > >  {
> > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > >         struct dp_meter *meter;
> > >         struct hlist_head *head;
> > >
> > > -       head = meter_hash_bucket(dp, meter_id);
> > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > -                               lockdep_ovsl_is_held()) {
> > > +       head = meter_hash_bucket(ti, meter_id);
> > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > +                                lockdep_ovsl_is_held()) {
> > >                 if (meter->id == meter_id)
> > >                         return meter;
> > >         }
> > > +
> > This patch is expanding meter table linearly with number meters added
> > to datapath. so I do not see need to have hash table. it can be a
> > simple array. This would also improve lookup efficiency.
> > For hash collision we could find next free slot in array. let me know
> > what do you think about this approach.
> Hi Pravin
> If we use the simple array, when inserting the meter, for hash collision, we can
> find next free slot, but one case, when there are many meters in the array.
> we may find many slot for the free slot.
> And when we lookup the meter, for hash collision, we may find many
> array slots, and
> then find it, or that meter does not exist in the array, In that case,
> there may be a lookup performance
> drop.
>
I was thinking that users can insure that there are no hash collision,
but time complexity of negative case is expensive. so I am fine with
the hash table.

> For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> useful. it just update
> hash_seed when expand meters. For performance, we can remove it. Thanks.
ok.

> > >         return NULL;
> > >  }
> > >
> > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > +{
> > > +       struct dp_meter_instance *ti;
> > > +       int i;
> > > +
> > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > +       if (!ti)
> > > +               return NULL;
> > > +
> > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > +                                    GFP_KERNEL);
> > > +       if (!ti->buckets) {
> > > +               kfree(ti);
> > > +               return NULL;
> > > +       }
> > > +
> > > +       for (i = 0; i < size; i++)
> > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > +
> > > +       ti->n_buckets = size;
> > > +       ti->node_ver = 0;
> > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > +
> > > +       return ti;
> > > +}
> > > +
> > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > >  {
> > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > +       struct dp_meter_instance *ti;
> > >
> > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > +       kvfree(ti->buckets);
> > > +       kfree(ti);
> > >  }
> > >
> > > -static void detach_meter(struct dp_meter *meter)
> > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > +                                    struct dp_meter *meter)
> > > +{
> > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > +
> > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > +}
> > > +
> > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > +                                    struct dp_meter *meter)
> > >  {
> > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > +}
> > > +
> > > +static struct dp_meter_instance *
> > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > +{
> > > +       struct dp_meter_instance *new_ti;
> > > +       int i;
> > > +
> > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > +       if (!new_ti)
> > > +               return NULL;
> > > +
> > > +       new_ti->node_ver = !ti->node_ver;
> > > +
> > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > +               struct hlist_head *head = &ti->buckets[i];
> > > +               struct dp_meter *meter;
> > > +
> > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > +                                        lockdep_ovsl_is_held())
> > > +                       dp_meter_instance_insert(new_ti, meter);
> > > +       }
> > > +
> > > +       return new_ti;
> > > +}
> > > +
> > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > +{
> > > +       struct dp_meter_instance *new_ti;
> > > +       struct dp_meter_instance *ti;
> > > +
> > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > +       dp_meter_instance_insert(ti, meter);
> > > +
> > > +       /* operate the counter safely, because called with ovs_lock. */
> > > +       tbl->count++;
> > > +
> > > +       if (tbl->count > ti->n_buckets) {
> > > +               new_ti = dp_meter_instance_expand(ti);
> > > +
> >
> >
> > > +               if (new_ti) {
> > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > +               }
> > > +       }
> > > +}
> > > +
> > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > +{
> > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > +
> > >         ASSERT_OVSL();
> > > -       if (meter)
> > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > +       if (meter) {
> > > +               /* operate the counter safely, because called with ovs_lock. */
> > > +               tbl->count--;
> > > +               dp_meter_instance_remove(ti, meter);
> > > +       }
> > >  }
> > >
> > >  static struct sk_buff *
> > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > >
> > >         /* Cannot fail after this. */
> > > -       old_meter = lookup_meter(dp, meter_id);
> > > -       detach_meter(old_meter);
> > > -       attach_meter(dp, meter);
> > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > +       detach_meter(dp->meters, old_meter);
> > > +       attach_meter(dp->meters, meter);
> > >         ovs_unlock();
> > >
> > >         /* Build response with the meter_id and stats from
> > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > >         }
> > >
> > >         /* Locate meter, copy stats. */
> > > -       meter = lookup_meter(dp, meter_id);
> > > +       meter = lookup_meter(dp->meters, meter_id);
> > >         if (!meter) {
> > >                 err = -ENOENT;
> > >                 goto exit_unlock;
> > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > >                 goto exit_unlock;
> > >         }
> > >
> > > -       old_meter = lookup_meter(dp, meter_id);
> > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > >         if (old_meter) {
> > >                 spin_lock_bh(&old_meter->lock);
> > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > >                 WARN_ON(err);
> > >                 spin_unlock_bh(&old_meter->lock);
> > > -               detach_meter(old_meter);
> > > +               detach_meter(dp->meters, old_meter);
> > >         }
> > >         ovs_unlock();
> > >         ovs_meter_free(old_meter);
> > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > >         int i, band_exceeded_max = -1;
> > >         u32 band_exceeded_rate = 0;
> > >
> > > -       meter = lookup_meter(dp, meter_id);
> > > +       meter = lookup_meter(dp->meters, meter_id);
> > >         /* Do not drop the packet when there is no meter. */
> > >         if (!meter)
> > >                 return false;
> > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > >
> > >  int ovs_meters_init(struct datapath *dp)
> > >  {
> > > -       int i;
> > > +       struct dp_meter_instance *ti;
> > > +       struct dp_meter_table *tbl;
> > >
> > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > +       if (!tbl)
> > > +               return -ENOMEM;
> > >
> > > -       if (!dp->meters)
> > > +       tbl->count = 0;
> > > +
> > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > +       if (!ti) {
> > > +               kfree(tbl);
> > >                 return -ENOMEM;
> > > +       }
> > >
> > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > +       rcu_assign_pointer(tbl->ti, ti);
> > > +       dp->meters = tbl;
> > >
> > >         return 0;
> > >  }
> > >
> > >  void ovs_meters_exit(struct datapath *dp)
> > >  {
> > > +       struct dp_meter_table *tbl = dp->meters;
> > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > >         int i;
> > >
> > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > -               struct hlist_head *head = &dp->meters[i];
> > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > +               struct hlist_head *head = &ti->buckets[i];
> > >                 struct dp_meter *meter;
> > >                 struct hlist_node *n;
> > >
> > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > -                       kfree(meter);
> > > +               hlist_for_each_entry_safe(meter, n, head,
> > > +                                         hash_node[ti->node_ver])
> > > +                       ovs_meter_free(meter);
> > >         }
> > >
> > > -       kfree(dp->meters);
> > > +       kvfree(ti->buckets);
> > > +       kfree(ti);
> > > +       kfree(tbl);
> > >  }
> > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > index f645913870bd..bc84796d7d4d 100644
> > > --- a/net/openvswitch/meter.h
> > > +++ b/net/openvswitch/meter.h
> > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > >  struct dp_meter {
> > >         spinlock_t lock;    /* Per meter lock */
> > >         struct rcu_head rcu;
> > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > -                                        * hash table.
> > > -                                        */
> > > +       struct hlist_node hash_node[2];
> > >         u32 id;
> > >         u16 kbps:1, keep_stats:1;
> > >         u16 n_bands;
> > > @@ -42,6 +40,19 @@ struct dp_meter {
> > >         struct dp_meter_band bands[];
> > >  };
> > >
> > > +struct dp_meter_instance {
> > > +       struct hlist_head *buckets;
> > > +       struct rcu_head rcu;
> > > +       u32 n_buckets;
> > > +       u32 hash_seed;
> > > +       u8 node_ver;
> > > +};
> > > +
> > > +struct dp_meter_table {
> > > +       struct dp_meter_instance __rcu *ti;
> > > +       u32 count;
> > > +};
> > > +
> > >  extern struct genl_family dp_meter_genl_family;
> > >  int ovs_meters_init(struct datapath *dp);
> > >  void ovs_meters_exit(struct datapath *dp);
> > > --
> > > 2.23.0
> > >
>
>
>
> --
> Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-03-31  3:57     ` Pravin Shelar
@ 2020-04-01 10:50       ` Tonghao Zhang
  2020-04-01 21:12         ` Pravin Shelar
  2020-04-08 15:09         ` [ovs-dev] " William Tu
  0 siblings, 2 replies; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-01 10:50 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: Linux Kernel Network Developers, ovs dev, Andy Zhou

On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> >
> > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > >
> > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > >
> > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > >
> > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > buckets of meter in one dp. If installing more than 1024
> > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > But in some case, for example, Open vSwitch used as edge
> > > > gateway, there should be 200,000+ at least, meters used for
> > > > IP address bandwidth limitation.
> > > >
> > > > [Open vSwitch userspace datapath has this issue too.]
> > > >
> > > > For more scalable meter, this patch expands the buckets
> > > > when necessary, so we can install more meters in the datapath.
> > > >
> > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > >   expand meter though change the *ti* point in the struct
> > > >   *dp_meter_table*.
> > > > * Using kvmalloc_array instead of kmalloc_array.
> > > >
> > > Thanks for working on this, I have couple of comments.
> > >
> > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > ---
> > > >  net/openvswitch/datapath.h |   2 +-
> > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > >  net/openvswitch/meter.h    |  17 +++-
> > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > >
> > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > index e239a46c2f94..785105578448 100644
> > > > --- a/net/openvswitch/datapath.h
> > > > +++ b/net/openvswitch/datapath.h
> > > > @@ -82,7 +82,7 @@ struct datapath {
> > > >         u32 max_headroom;
> > > >
> > > >         /* Switch meters. */
> > > > -       struct hlist_head *meters;
> > > > +       struct dp_meter_table *meters;
> > > >  };
> > > >
> > > >  /**
> > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > --- a/net/openvswitch/meter.c
> > > > +++ b/net/openvswitch/meter.c
> > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > >         kfree_rcu(meter, rcu);
> > > >  }
> > > >
> > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > >                                             u32 meter_id)
> > > >  {
> > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > +
> > > I do not see any need to hash meter-id, can you explain it.
> > >
> > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > >  }
> > > >
> > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > >                                      u32 meter_id)
> > > >  {
> > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > >         struct dp_meter *meter;
> > > >         struct hlist_head *head;
> > > >
> > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > -                               lockdep_ovsl_is_held()) {
> > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > +                                lockdep_ovsl_is_held()) {
> > > >                 if (meter->id == meter_id)
> > > >                         return meter;
> > > >         }
> > > > +
> > > This patch is expanding meter table linearly with number meters added
> > > to datapath. so I do not see need to have hash table. it can be a
> > > simple array. This would also improve lookup efficiency.
> > > For hash collision we could find next free slot in array. let me know
> > > what do you think about this approach.
> > Hi Pravin
> > If we use the simple array, when inserting the meter, for hash collision, we can
> > find next free slot, but one case, when there are many meters in the array.
> > we may find many slot for the free slot.
> > And when we lookup the meter, for hash collision, we may find many
> > array slots, and
> > then find it, or that meter does not exist in the array, In that case,
> > there may be a lookup performance
> > drop.
> >
> I was thinking that users can insure that there are no hash collision,
> but time complexity of negative case is expensive. so I am fine with
> the hash table.
Hi Pravi
I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
get a valid meter-id which passed to kernel, so there is no hash collision. You
are right. we use the single array is the better solution.
> > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > useful. it just update
> > hash_seed when expand meters. For performance, we can remove it. Thanks.
> ok.
>
> > > >         return NULL;
> > > >  }
> > > >
> > > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > > +{
> > > > +       struct dp_meter_instance *ti;
> > > > +       int i;
> > > > +
> > > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > > +       if (!ti)
> > > > +               return NULL;
> > > > +
> > > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > > +                                    GFP_KERNEL);
> > > > +       if (!ti->buckets) {
> > > > +               kfree(ti);
> > > > +               return NULL;
> > > > +       }
> > > > +
> > > > +       for (i = 0; i < size; i++)
> > > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > > +
> > > > +       ti->n_buckets = size;
> > > > +       ti->node_ver = 0;
> > > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > > +
> > > > +       return ti;
> > > > +}
> > > > +
> > > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > > >  {
> > > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > > +       struct dp_meter_instance *ti;
> > > >
> > > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > > +       kvfree(ti->buckets);
> > > > +       kfree(ti);
> > > >  }
> > > >
> > > > -static void detach_meter(struct dp_meter *meter)
> > > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > > +                                    struct dp_meter *meter)
> > > > +{
> > > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > > +
> > > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > > +}
> > > > +
> > > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > > +                                    struct dp_meter *meter)
> > > >  {
> > > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > > +}
> > > > +
> > > > +static struct dp_meter_instance *
> > > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > > +{
> > > > +       struct dp_meter_instance *new_ti;
> > > > +       int i;
> > > > +
> > > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > > +       if (!new_ti)
> > > > +               return NULL;
> > > > +
> > > > +       new_ti->node_ver = !ti->node_ver;
> > > > +
> > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > +               struct dp_meter *meter;
> > > > +
> > > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > +                                        lockdep_ovsl_is_held())
> > > > +                       dp_meter_instance_insert(new_ti, meter);
> > > > +       }
> > > > +
> > > > +       return new_ti;
> > > > +}
> > > > +
> > > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > +{
> > > > +       struct dp_meter_instance *new_ti;
> > > > +       struct dp_meter_instance *ti;
> > > > +
> > > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > > +       dp_meter_instance_insert(ti, meter);
> > > > +
> > > > +       /* operate the counter safely, because called with ovs_lock. */
> > > > +       tbl->count++;
> > > > +
> > > > +       if (tbl->count > ti->n_buckets) {
> > > > +               new_ti = dp_meter_instance_expand(ti);
> > > > +
> > >
> > >
> > > > +               if (new_ti) {
> > > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > > +               }
> > > > +       }
> > > > +}
> > > > +
> > > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > +{
> > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > +
> > > >         ASSERT_OVSL();
> > > > -       if (meter)
> > > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > > +       if (meter) {
> > > > +               /* operate the counter safely, because called with ovs_lock. */
> > > > +               tbl->count--;
> > > > +               dp_meter_instance_remove(ti, meter);
> > > > +       }
> > > >  }
> > > >
> > > >  static struct sk_buff *
> > > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > > >
> > > >         /* Cannot fail after this. */
> > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > -       detach_meter(old_meter);
> > > > -       attach_meter(dp, meter);
> > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > +       detach_meter(dp->meters, old_meter);
> > > > +       attach_meter(dp->meters, meter);
> > > >         ovs_unlock();
> > > >
> > > >         /* Build response with the meter_id and stats from
> > > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > > >         }
> > > >
> > > >         /* Locate meter, copy stats. */
> > > > -       meter = lookup_meter(dp, meter_id);
> > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > >         if (!meter) {
> > > >                 err = -ENOENT;
> > > >                 goto exit_unlock;
> > > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > > >                 goto exit_unlock;
> > > >         }
> > > >
> > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > >         if (old_meter) {
> > > >                 spin_lock_bh(&old_meter->lock);
> > > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > > >                 WARN_ON(err);
> > > >                 spin_unlock_bh(&old_meter->lock);
> > > > -               detach_meter(old_meter);
> > > > +               detach_meter(dp->meters, old_meter);
> > > >         }
> > > >         ovs_unlock();
> > > >         ovs_meter_free(old_meter);
> > > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > > >         int i, band_exceeded_max = -1;
> > > >         u32 band_exceeded_rate = 0;
> > > >
> > > > -       meter = lookup_meter(dp, meter_id);
> > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > >         /* Do not drop the packet when there is no meter. */
> > > >         if (!meter)
> > > >                 return false;
> > > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > > >
> > > >  int ovs_meters_init(struct datapath *dp)
> > > >  {
> > > > -       int i;
> > > > +       struct dp_meter_instance *ti;
> > > > +       struct dp_meter_table *tbl;
> > > >
> > > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > > +       if (!tbl)
> > > > +               return -ENOMEM;
> > > >
> > > > -       if (!dp->meters)
> > > > +       tbl->count = 0;
> > > > +
> > > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > > +       if (!ti) {
> > > > +               kfree(tbl);
> > > >                 return -ENOMEM;
> > > > +       }
> > > >
> > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > > +       rcu_assign_pointer(tbl->ti, ti);
> > > > +       dp->meters = tbl;
> > > >
> > > >         return 0;
> > > >  }
> > > >
> > > >  void ovs_meters_exit(struct datapath *dp)
> > > >  {
> > > > +       struct dp_meter_table *tbl = dp->meters;
> > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > >         int i;
> > > >
> > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > > -               struct hlist_head *head = &dp->meters[i];
> > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > +               struct hlist_head *head = &ti->buckets[i];
> > > >                 struct dp_meter *meter;
> > > >                 struct hlist_node *n;
> > > >
> > > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > > -                       kfree(meter);
> > > > +               hlist_for_each_entry_safe(meter, n, head,
> > > > +                                         hash_node[ti->node_ver])
> > > > +                       ovs_meter_free(meter);
> > > >         }
> > > >
> > > > -       kfree(dp->meters);
> > > > +       kvfree(ti->buckets);
> > > > +       kfree(ti);
> > > > +       kfree(tbl);
> > > >  }
> > > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > > index f645913870bd..bc84796d7d4d 100644
> > > > --- a/net/openvswitch/meter.h
> > > > +++ b/net/openvswitch/meter.h
> > > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > > >  struct dp_meter {
> > > >         spinlock_t lock;    /* Per meter lock */
> > > >         struct rcu_head rcu;
> > > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > > -                                        * hash table.
> > > > -                                        */
> > > > +       struct hlist_node hash_node[2];
> > > >         u32 id;
> > > >         u16 kbps:1, keep_stats:1;
> > > >         u16 n_bands;
> > > > @@ -42,6 +40,19 @@ struct dp_meter {
> > > >         struct dp_meter_band bands[];
> > > >  };
> > > >
> > > > +struct dp_meter_instance {
> > > > +       struct hlist_head *buckets;
> > > > +       struct rcu_head rcu;
> > > > +       u32 n_buckets;
> > > > +       u32 hash_seed;
> > > > +       u8 node_ver;
> > > > +};
> > > > +
> > > > +struct dp_meter_table {
> > > > +       struct dp_meter_instance __rcu *ti;
> > > > +       u32 count;
> > > > +};
> > > > +
> > > >  extern struct genl_family dp_meter_genl_family;
> > > >  int ovs_meters_init(struct datapath *dp);
> > > >  void ovs_meters_exit(struct datapath *dp);
> > > > --
> > > > 2.23.0
> > > >
> >
> >
> >
> > --
> > Best regards, Tonghao



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-01 10:50       ` Tonghao Zhang
@ 2020-04-01 21:12         ` Pravin Shelar
  2020-04-08 15:09         ` [ovs-dev] " William Tu
  1 sibling, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-01 21:12 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: Linux Kernel Network Developers, ovs dev, Andy Zhou

Ok, thanks.

On Wed, Apr 1, 2020 at 3:50 AM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> >
> > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > >
> > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > >
> > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > >
> > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > buckets of meter in one dp. If installing more than 1024
> > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > But in some case, for example, Open vSwitch used as edge
> > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > IP address bandwidth limitation.
> > > > >
> > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > >
> > > > > For more scalable meter, this patch expands the buckets
> > > > > when necessary, so we can install more meters in the datapath.
> > > > >
> > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > >   expand meter though change the *ti* point in the struct
> > > > >   *dp_meter_table*.
> > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > >
> > > > Thanks for working on this, I have couple of comments.
> > > >
> > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > ---
> > > > >  net/openvswitch/datapath.h |   2 +-
> > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > >
> > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > index e239a46c2f94..785105578448 100644
> > > > > --- a/net/openvswitch/datapath.h
> > > > > +++ b/net/openvswitch/datapath.h
> > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > >         u32 max_headroom;
> > > > >
> > > > >         /* Switch meters. */
> > > > > -       struct hlist_head *meters;
> > > > > +       struct dp_meter_table *meters;
> > > > >  };
> > > > >
> > > > >  /**
> > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > --- a/net/openvswitch/meter.c
> > > > > +++ b/net/openvswitch/meter.c
> > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > >         kfree_rcu(meter, rcu);
> > > > >  }
> > > > >
> > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > >                                             u32 meter_id)
> > > > >  {
> > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > +
> > > > I do not see any need to hash meter-id, can you explain it.
> > > >
> > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > >  }
> > > > >
> > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > >                                      u32 meter_id)
> > > > >  {
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > >         struct dp_meter *meter;
> > > > >         struct hlist_head *head;
> > > > >
> > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > -                               lockdep_ovsl_is_held()) {
> > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > +                                lockdep_ovsl_is_held()) {
> > > > >                 if (meter->id == meter_id)
> > > > >                         return meter;
> > > > >         }
> > > > > +
> > > > This patch is expanding meter table linearly with number meters added
> > > > to datapath. so I do not see need to have hash table. it can be a
> > > > simple array. This would also improve lookup efficiency.
> > > > For hash collision we could find next free slot in array. let me know
> > > > what do you think about this approach.
> > > Hi Pravin
> > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > find next free slot, but one case, when there are many meters in the array.
> > > we may find many slot for the free slot.
> > > And when we lookup the meter, for hash collision, we may find many
> > > array slots, and
> > > then find it, or that meter does not exist in the array, In that case,
> > > there may be a lookup performance
> > > drop.
> > >
> > I was thinking that users can insure that there are no hash collision,
> > but time complexity of negative case is expensive. so I am fine with
> > the hash table.
> Hi Pravi
> I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> get a valid meter-id which passed to kernel, so there is no hash collision. You
> are right. we use the single array is the better solution.
> > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > useful. it just update
> > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > ok.
> >
> > > > >         return NULL;
> > > > >  }
> > > > >
> > > > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > > > +{
> > > > > +       struct dp_meter_instance *ti;
> > > > > +       int i;
> > > > > +
> > > > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > > > +       if (!ti)
> > > > > +               return NULL;
> > > > > +
> > > > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > > > +                                    GFP_KERNEL);
> > > > > +       if (!ti->buckets) {
> > > > > +               kfree(ti);
> > > > > +               return NULL;
> > > > > +       }
> > > > > +
> > > > > +       for (i = 0; i < size; i++)
> > > > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > > > +
> > > > > +       ti->n_buckets = size;
> > > > > +       ti->node_ver = 0;
> > > > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > > > +
> > > > > +       return ti;
> > > > > +}
> > > > > +
> > > > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > > > >  {
> > > > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > > > +       struct dp_meter_instance *ti;
> > > > >
> > > > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > > > +       kvfree(ti->buckets);
> > > > > +       kfree(ti);
> > > > >  }
> > > > >
> > > > > -static void detach_meter(struct dp_meter *meter)
> > > > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > > > +                                    struct dp_meter *meter)
> > > > > +{
> > > > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > > > +
> > > > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > > > +}
> > > > > +
> > > > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > > > +                                    struct dp_meter *meter)
> > > > >  {
> > > > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > > > +}
> > > > > +
> > > > > +static struct dp_meter_instance *
> > > > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > > > +{
> > > > > +       struct dp_meter_instance *new_ti;
> > > > > +       int i;
> > > > > +
> > > > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > > > +       if (!new_ti)
> > > > > +               return NULL;
> > > > > +
> > > > > +       new_ti->node_ver = !ti->node_ver;
> > > > > +
> > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > +               struct dp_meter *meter;
> > > > > +
> > > > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > +                                        lockdep_ovsl_is_held())
> > > > > +                       dp_meter_instance_insert(new_ti, meter);
> > > > > +       }
> > > > > +
> > > > > +       return new_ti;
> > > > > +}
> > > > > +
> > > > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > +{
> > > > > +       struct dp_meter_instance *new_ti;
> > > > > +       struct dp_meter_instance *ti;
> > > > > +
> > > > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > > > +       dp_meter_instance_insert(ti, meter);
> > > > > +
> > > > > +       /* operate the counter safely, because called with ovs_lock. */
> > > > > +       tbl->count++;
> > > > > +
> > > > > +       if (tbl->count > ti->n_buckets) {
> > > > > +               new_ti = dp_meter_instance_expand(ti);
> > > > > +
> > > >
> > > >
> > > > > +               if (new_ti) {
> > > > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > > > +               }
> > > > > +       }
> > > > > +}
> > > > > +
> > > > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > +{
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > +
> > > > >         ASSERT_OVSL();
> > > > > -       if (meter)
> > > > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > > > +       if (meter) {
> > > > > +               /* operate the counter safely, because called with ovs_lock. */
> > > > > +               tbl->count--;
> > > > > +               dp_meter_instance_remove(ti, meter);
> > > > > +       }
> > > > >  }
> > > > >
> > > > >  static struct sk_buff *
> > > > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > > > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > > > >
> > > > >         /* Cannot fail after this. */
> > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > -       detach_meter(old_meter);
> > > > > -       attach_meter(dp, meter);
> > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > +       detach_meter(dp->meters, old_meter);
> > > > > +       attach_meter(dp->meters, meter);
> > > > >         ovs_unlock();
> > > > >
> > > > >         /* Build response with the meter_id and stats from
> > > > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > > > >         }
> > > > >
> > > > >         /* Locate meter, copy stats. */
> > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > >         if (!meter) {
> > > > >                 err = -ENOENT;
> > > > >                 goto exit_unlock;
> > > > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > > > >                 goto exit_unlock;
> > > > >         }
> > > > >
> > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > >         if (old_meter) {
> > > > >                 spin_lock_bh(&old_meter->lock);
> > > > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > > > >                 WARN_ON(err);
> > > > >                 spin_unlock_bh(&old_meter->lock);
> > > > > -               detach_meter(old_meter);
> > > > > +               detach_meter(dp->meters, old_meter);
> > > > >         }
> > > > >         ovs_unlock();
> > > > >         ovs_meter_free(old_meter);
> > > > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > > > >         int i, band_exceeded_max = -1;
> > > > >         u32 band_exceeded_rate = 0;
> > > > >
> > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > >         /* Do not drop the packet when there is no meter. */
> > > > >         if (!meter)
> > > > >                 return false;
> > > > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > > > >
> > > > >  int ovs_meters_init(struct datapath *dp)
> > > > >  {
> > > > > -       int i;
> > > > > +       struct dp_meter_instance *ti;
> > > > > +       struct dp_meter_table *tbl;
> > > > >
> > > > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > > > +       if (!tbl)
> > > > > +               return -ENOMEM;
> > > > >
> > > > > -       if (!dp->meters)
> > > > > +       tbl->count = 0;
> > > > > +
> > > > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > > > +       if (!ti) {
> > > > > +               kfree(tbl);
> > > > >                 return -ENOMEM;
> > > > > +       }
> > > > >
> > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > > > +       rcu_assign_pointer(tbl->ti, ti);
> > > > > +       dp->meters = tbl;
> > > > >
> > > > >         return 0;
> > > > >  }
> > > > >
> > > > >  void ovs_meters_exit(struct datapath *dp)
> > > > >  {
> > > > > +       struct dp_meter_table *tbl = dp->meters;
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > >         int i;
> > > > >
> > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > > > -               struct hlist_head *head = &dp->meters[i];
> > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > >                 struct dp_meter *meter;
> > > > >                 struct hlist_node *n;
> > > > >
> > > > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > > > -                       kfree(meter);
> > > > > +               hlist_for_each_entry_safe(meter, n, head,
> > > > > +                                         hash_node[ti->node_ver])
> > > > > +                       ovs_meter_free(meter);
> > > > >         }
> > > > >
> > > > > -       kfree(dp->meters);
> > > > > +       kvfree(ti->buckets);
> > > > > +       kfree(ti);
> > > > > +       kfree(tbl);
> > > > >  }
> > > > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > > > index f645913870bd..bc84796d7d4d 100644
> > > > > --- a/net/openvswitch/meter.h
> > > > > +++ b/net/openvswitch/meter.h
> > > > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > > > >  struct dp_meter {
> > > > >         spinlock_t lock;    /* Per meter lock */
> > > > >         struct rcu_head rcu;
> > > > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > > > -                                        * hash table.
> > > > > -                                        */
> > > > > +       struct hlist_node hash_node[2];
> > > > >         u32 id;
> > > > >         u16 kbps:1, keep_stats:1;
> > > > >         u16 n_bands;
> > > > > @@ -42,6 +40,19 @@ struct dp_meter {
> > > > >         struct dp_meter_band bands[];
> > > > >  };
> > > > >
> > > > > +struct dp_meter_instance {
> > > > > +       struct hlist_head *buckets;
> > > > > +       struct rcu_head rcu;
> > > > > +       u32 n_buckets;
> > > > > +       u32 hash_seed;
> > > > > +       u8 node_ver;
> > > > > +};
> > > > > +
> > > > > +struct dp_meter_table {
> > > > > +       struct dp_meter_instance __rcu *ti;
> > > > > +       u32 count;
> > > > > +};
> > > > > +
> > > > >  extern struct genl_family dp_meter_genl_family;
> > > > >  int ovs_meters_init(struct datapath *dp);
> > > > >  void ovs_meters_exit(struct datapath *dp);
> > > > > --
> > > > > 2.23.0
> > > > >
> > >
> > >
> > >
> > > --
> > > Best regards, Tonghao
>
>
>
> --
> Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-01 10:50       ` Tonghao Zhang
  2020-04-01 21:12         ` Pravin Shelar
@ 2020-04-08 15:09         ` William Tu
  2020-04-08 15:59           ` Tonghao Zhang
  1 sibling, 1 reply; 48+ messages in thread
From: William Tu @ 2020-04-08 15:09 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: Pravin Shelar, ovs dev, Linux Kernel Network Developers

On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> >
> > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > >
> > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > >
> > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > >
> > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > buckets of meter in one dp. If installing more than 1024
> > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > But in some case, for example, Open vSwitch used as edge
> > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > IP address bandwidth limitation.
> > > > >
> > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > >
> > > > > For more scalable meter, this patch expands the buckets
> > > > > when necessary, so we can install more meters in the datapath.
> > > > >
> > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > >   expand meter though change the *ti* point in the struct
> > > > >   *dp_meter_table*.
> > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > >
> > > > Thanks for working on this, I have couple of comments.
> > > >
> > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > ---
> > > > >  net/openvswitch/datapath.h |   2 +-
> > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > >
> > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > index e239a46c2f94..785105578448 100644
> > > > > --- a/net/openvswitch/datapath.h
> > > > > +++ b/net/openvswitch/datapath.h
> > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > >         u32 max_headroom;
> > > > >
> > > > >         /* Switch meters. */
> > > > > -       struct hlist_head *meters;
> > > > > +       struct dp_meter_table *meters;
> > > > >  };
> > > > >
> > > > >  /**
> > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > --- a/net/openvswitch/meter.c
> > > > > +++ b/net/openvswitch/meter.c
> > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > >         kfree_rcu(meter, rcu);
> > > > >  }
> > > > >
> > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > >                                             u32 meter_id)
> > > > >  {
> > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > +
> > > > I do not see any need to hash meter-id, can you explain it.
> > > >
> > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > >  }
> > > > >
> > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > >                                      u32 meter_id)
> > > > >  {
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > >         struct dp_meter *meter;
> > > > >         struct hlist_head *head;
> > > > >
> > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > -                               lockdep_ovsl_is_held()) {
> > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > +                                lockdep_ovsl_is_held()) {
> > > > >                 if (meter->id == meter_id)
> > > > >                         return meter;
> > > > >         }
> > > > > +
> > > > This patch is expanding meter table linearly with number meters added
> > > > to datapath. so I do not see need to have hash table. it can be a
> > > > simple array. This would also improve lookup efficiency.
> > > > For hash collision we could find next free slot in array. let me know
> > > > what do you think about this approach.
> > > Hi Pravin
> > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > find next free slot, but one case, when there are many meters in the array.
> > > we may find many slot for the free slot.
> > > And when we lookup the meter, for hash collision, we may find many
> > > array slots, and
> > > then find it, or that meter does not exist in the array, In that case,
> > > there may be a lookup performance
> > > drop.
> > >
> > I was thinking that users can insure that there are no hash collision,
> > but time complexity of negative case is expensive. so I am fine with
> > the hash table.

IIUC, there will be hash collision. meter id is an 32-bit value.
Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.

I think what Pravin suggest is to use another hash function to make
the hash table more condense. Ex: hash1 and hash2. 
For lookup, if hash1(key) misses, then try hash2(key).

William

> Hi Pravi
> I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> get a valid meter-id which passed to kernel, so there is no hash collision. You
> are right. we use the single array is the better solution.
> > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > useful. it just update
> > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > ok.
> >
> > > > >         return NULL;
> > > > >  }
> > > > >
> > > > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > > > +{
> > > > > +       struct dp_meter_instance *ti;
> > > > > +       int i;
> > > > > +
> > > > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > > > +       if (!ti)
> > > > > +               return NULL;
> > > > > +
> > > > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > > > +                                    GFP_KERNEL);
> > > > > +       if (!ti->buckets) {
> > > > > +               kfree(ti);
> > > > > +               return NULL;
> > > > > +       }
> > > > > +
> > > > > +       for (i = 0; i < size; i++)
> > > > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > > > +
> > > > > +       ti->n_buckets = size;
> > > > > +       ti->node_ver = 0;
> > > > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > > > +
> > > > > +       return ti;
> > > > > +}
> > > > > +
> > > > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > > > >  {
> > > > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > > > +       struct dp_meter_instance *ti;
> > > > >
> > > > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > > > +       kvfree(ti->buckets);
> > > > > +       kfree(ti);
> > > > >  }
> > > > >
> > > > > -static void detach_meter(struct dp_meter *meter)
> > > > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > > > +                                    struct dp_meter *meter)
> > > > > +{
> > > > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > > > +
> > > > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > > > +}
> > > > > +
> > > > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > > > +                                    struct dp_meter *meter)
> > > > >  {
> > > > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > > > +}
> > > > > +
> > > > > +static struct dp_meter_instance *
> > > > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > > > +{
> > > > > +       struct dp_meter_instance *new_ti;
> > > > > +       int i;
> > > > > +
> > > > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > > > +       if (!new_ti)
> > > > > +               return NULL;
> > > > > +
> > > > > +       new_ti->node_ver = !ti->node_ver;
> > > > > +
> > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > +               struct dp_meter *meter;
> > > > > +
> > > > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > +                                        lockdep_ovsl_is_held())
> > > > > +                       dp_meter_instance_insert(new_ti, meter);
> > > > > +       }
> > > > > +
> > > > > +       return new_ti;
> > > > > +}
> > > > > +
> > > > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > +{
> > > > > +       struct dp_meter_instance *new_ti;
> > > > > +       struct dp_meter_instance *ti;
> > > > > +
> > > > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > > > +       dp_meter_instance_insert(ti, meter);
> > > > > +
> > > > > +       /* operate the counter safely, because called with ovs_lock. */
> > > > > +       tbl->count++;
> > > > > +
> > > > > +       if (tbl->count > ti->n_buckets) {
> > > > > +               new_ti = dp_meter_instance_expand(ti);
> > > > > +
> > > >
> > > >
> > > > > +               if (new_ti) {
> > > > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > > > +               }
> > > > > +       }
> > > > > +}
> > > > > +
> > > > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > +{
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > +
> > > > >         ASSERT_OVSL();
> > > > > -       if (meter)
> > > > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > > > +       if (meter) {
> > > > > +               /* operate the counter safely, because called with ovs_lock. */
> > > > > +               tbl->count--;
> > > > > +               dp_meter_instance_remove(ti, meter);
> > > > > +       }
> > > > >  }
> > > > >
> > > > >  static struct sk_buff *
> > > > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > > > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > > > >
> > > > >         /* Cannot fail after this. */
> > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > -       detach_meter(old_meter);
> > > > > -       attach_meter(dp, meter);
> > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > +       detach_meter(dp->meters, old_meter);
> > > > > +       attach_meter(dp->meters, meter);
> > > > >         ovs_unlock();
> > > > >
> > > > >         /* Build response with the meter_id and stats from
> > > > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > > > >         }
> > > > >
> > > > >         /* Locate meter, copy stats. */
> > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > >         if (!meter) {
> > > > >                 err = -ENOENT;
> > > > >                 goto exit_unlock;
> > > > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > > > >                 goto exit_unlock;
> > > > >         }
> > > > >
> > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > >         if (old_meter) {
> > > > >                 spin_lock_bh(&old_meter->lock);
> > > > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > > > >                 WARN_ON(err);
> > > > >                 spin_unlock_bh(&old_meter->lock);
> > > > > -               detach_meter(old_meter);
> > > > > +               detach_meter(dp->meters, old_meter);
> > > > >         }
> > > > >         ovs_unlock();
> > > > >         ovs_meter_free(old_meter);
> > > > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > > > >         int i, band_exceeded_max = -1;
> > > > >         u32 band_exceeded_rate = 0;
> > > > >
> > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > >         /* Do not drop the packet when there is no meter. */
> > > > >         if (!meter)
> > > > >                 return false;
> > > > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > > > >
> > > > >  int ovs_meters_init(struct datapath *dp)
> > > > >  {
> > > > > -       int i;
> > > > > +       struct dp_meter_instance *ti;
> > > > > +       struct dp_meter_table *tbl;
> > > > >
> > > > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > > > +       if (!tbl)
> > > > > +               return -ENOMEM;
> > > > >
> > > > > -       if (!dp->meters)
> > > > > +       tbl->count = 0;
> > > > > +
> > > > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > > > +       if (!ti) {
> > > > > +               kfree(tbl);
> > > > >                 return -ENOMEM;
> > > > > +       }
> > > > >
> > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > > > +       rcu_assign_pointer(tbl->ti, ti);
> > > > > +       dp->meters = tbl;
> > > > >
> > > > >         return 0;
> > > > >  }
> > > > >
> > > > >  void ovs_meters_exit(struct datapath *dp)
> > > > >  {
> > > > > +       struct dp_meter_table *tbl = dp->meters;
> > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > >         int i;
> > > > >
> > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > > > -               struct hlist_head *head = &dp->meters[i];
> > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > >                 struct dp_meter *meter;
> > > > >                 struct hlist_node *n;
> > > > >
> > > > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > > > -                       kfree(meter);
> > > > > +               hlist_for_each_entry_safe(meter, n, head,
> > > > > +                                         hash_node[ti->node_ver])
> > > > > +                       ovs_meter_free(meter);
> > > > >         }
> > > > >
> > > > > -       kfree(dp->meters);
> > > > > +       kvfree(ti->buckets);
> > > > > +       kfree(ti);
> > > > > +       kfree(tbl);
> > > > >  }
> > > > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > > > index f645913870bd..bc84796d7d4d 100644
> > > > > --- a/net/openvswitch/meter.h
> > > > > +++ b/net/openvswitch/meter.h
> > > > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > > > >  struct dp_meter {
> > > > >         spinlock_t lock;    /* Per meter lock */
> > > > >         struct rcu_head rcu;
> > > > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > > > -                                        * hash table.
> > > > > -                                        */
> > > > > +       struct hlist_node hash_node[2];
> > > > >         u32 id;
> > > > >         u16 kbps:1, keep_stats:1;
> > > > >         u16 n_bands;
> > > > > @@ -42,6 +40,19 @@ struct dp_meter {
> > > > >         struct dp_meter_band bands[];
> > > > >  };
> > > > >
> > > > > +struct dp_meter_instance {
> > > > > +       struct hlist_head *buckets;
> > > > > +       struct rcu_head rcu;
> > > > > +       u32 n_buckets;
> > > > > +       u32 hash_seed;
> > > > > +       u8 node_ver;
> > > > > +};
> > > > > +
> > > > > +struct dp_meter_table {
> > > > > +       struct dp_meter_instance __rcu *ti;
> > > > > +       u32 count;
> > > > > +};
> > > > > +
> > > > >  extern struct genl_family dp_meter_genl_family;
> > > > >  int ovs_meters_init(struct datapath *dp);
> > > > >  void ovs_meters_exit(struct datapath *dp);
> > > > > --
> > > > > 2.23.0
> > > > >
> > >
> > >
> > >
> > > --
> > > Best regards, Tonghao
> 
> 
> 
> -- 
> Best regards, Tonghao
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-08 15:09         ` [ovs-dev] " William Tu
@ 2020-04-08 15:59           ` Tonghao Zhang
  2020-04-08 16:01             ` Tonghao Zhang
  2020-04-09 21:41             ` William Tu
  0 siblings, 2 replies; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-08 15:59 UTC (permalink / raw)
  To: William Tu; +Cc: Pravin Shelar, ovs dev, Linux Kernel Network Developers

On Wed, Apr 8, 2020 at 11:09 PM William Tu <u9012063@gmail.com> wrote:
>
> On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> > On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > >
> > > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > >
> > > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > >
> > > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > > >
> > > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > >
> > > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > > buckets of meter in one dp. If installing more than 1024
> > > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > > But in some case, for example, Open vSwitch used as edge
> > > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > > IP address bandwidth limitation.
> > > > > >
> > > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > > >
> > > > > > For more scalable meter, this patch expands the buckets
> > > > > > when necessary, so we can install more meters in the datapath.
> > > > > >
> > > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > > >   expand meter though change the *ti* point in the struct
> > > > > >   *dp_meter_table*.
> > > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > > >
> > > > > Thanks for working on this, I have couple of comments.
> > > > >
> > > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > ---
> > > > > >  net/openvswitch/datapath.h |   2 +-
> > > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > > >
> > > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > > index e239a46c2f94..785105578448 100644
> > > > > > --- a/net/openvswitch/datapath.h
> > > > > > +++ b/net/openvswitch/datapath.h
> > > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > > >         u32 max_headroom;
> > > > > >
> > > > > >         /* Switch meters. */
> > > > > > -       struct hlist_head *meters;
> > > > > > +       struct dp_meter_table *meters;
> > > > > >  };
> > > > > >
> > > > > >  /**
> > > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > > --- a/net/openvswitch/meter.c
> > > > > > +++ b/net/openvswitch/meter.c
> > > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > > >         kfree_rcu(meter, rcu);
> > > > > >  }
> > > > > >
> > > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > > >                                             u32 meter_id)
> > > > > >  {
> > > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > > +
> > > > > I do not see any need to hash meter-id, can you explain it.
> > > > >
> > > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > > >  }
> > > > > >
> > > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > > >                                      u32 meter_id)
> > > > > >  {
> > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > >         struct dp_meter *meter;
> > > > > >         struct hlist_head *head;
> > > > > >
> > > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > > -                               lockdep_ovsl_is_held()) {
> > > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > +                                lockdep_ovsl_is_held()) {
> > > > > >                 if (meter->id == meter_id)
> > > > > >                         return meter;
> > > > > >         }
> > > > > > +
> > > > > This patch is expanding meter table linearly with number meters added
> > > > > to datapath. so I do not see need to have hash table. it can be a
> > > > > simple array. This would also improve lookup efficiency.
> > > > > For hash collision we could find next free slot in array. let me know
> > > > > what do you think about this approach.
> > > > Hi Pravin
> > > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > > find next free slot, but one case, when there are many meters in the array.
> > > > we may find many slot for the free slot.
> > > > And when we lookup the meter, for hash collision, we may find many
> > > > array slots, and
> > > > then find it, or that meter does not exist in the array, In that case,
> > > > there may be a lookup performance
> > > > drop.
> > > >
> > > I was thinking that users can insure that there are no hash collision,
> > > but time complexity of negative case is expensive. so I am fine with
> > > the hash table.
>
> IIUC, there will be hash collision. meter id is an 32-bit value.
> Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.
Hi, William
but id-pool makes sure the meter id is from 0, 1, 2, 3 ... n, but not n, m, y.
so if we alloc 1024 meters, the last meter id should be 1023, and then
use the simple array to expand the meter is better ?

> I think what Pravin suggest is to use another hash function to make
> the hash table more condense. Ex: hash1 and hash2.
> For lookup, if hash1(key) misses, then try hash2(key).
>
> William
>
> > Hi Pravi
> > I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> > get a valid meter-id which passed to kernel, so there is no hash collision. You
> > are right. we use the single array is the better solution.
> > > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > > useful. it just update
> > > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > > ok.
> > >
> > > > > >         return NULL;
> > > > > >  }
> > > > > >
> > > > > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > > > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > > > > +{
> > > > > > +       struct dp_meter_instance *ti;
> > > > > > +       int i;
> > > > > > +
> > > > > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > > > > +       if (!ti)
> > > > > > +               return NULL;
> > > > > > +
> > > > > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > > > > +                                    GFP_KERNEL);
> > > > > > +       if (!ti->buckets) {
> > > > > > +               kfree(ti);
> > > > > > +               return NULL;
> > > > > > +       }
> > > > > > +
> > > > > > +       for (i = 0; i < size; i++)
> > > > > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > > > > +
> > > > > > +       ti->n_buckets = size;
> > > > > > +       ti->node_ver = 0;
> > > > > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > > > > +
> > > > > > +       return ti;
> > > > > > +}
> > > > > > +
> > > > > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > > > > >  {
> > > > > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > > > > +       struct dp_meter_instance *ti;
> > > > > >
> > > > > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > > > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > > > > +       kvfree(ti->buckets);
> > > > > > +       kfree(ti);
> > > > > >  }
> > > > > >
> > > > > > -static void detach_meter(struct dp_meter *meter)
> > > > > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > > > > +                                    struct dp_meter *meter)
> > > > > > +{
> > > > > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > > > > +
> > > > > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > > > > +}
> > > > > > +
> > > > > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > > > > +                                    struct dp_meter *meter)
> > > > > >  {
> > > > > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > > > > +}
> > > > > > +
> > > > > > +static struct dp_meter_instance *
> > > > > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > > > > +{
> > > > > > +       struct dp_meter_instance *new_ti;
> > > > > > +       int i;
> > > > > > +
> > > > > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > > > > +       if (!new_ti)
> > > > > > +               return NULL;
> > > > > > +
> > > > > > +       new_ti->node_ver = !ti->node_ver;
> > > > > > +
> > > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > > +               struct dp_meter *meter;
> > > > > > +
> > > > > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > +                                        lockdep_ovsl_is_held())
> > > > > > +                       dp_meter_instance_insert(new_ti, meter);
> > > > > > +       }
> > > > > > +
> > > > > > +       return new_ti;
> > > > > > +}
> > > > > > +
> > > > > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > > +{
> > > > > > +       struct dp_meter_instance *new_ti;
> > > > > > +       struct dp_meter_instance *ti;
> > > > > > +
> > > > > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > +       dp_meter_instance_insert(ti, meter);
> > > > > > +
> > > > > > +       /* operate the counter safely, because called with ovs_lock. */
> > > > > > +       tbl->count++;
> > > > > > +
> > > > > > +       if (tbl->count > ti->n_buckets) {
> > > > > > +               new_ti = dp_meter_instance_expand(ti);
> > > > > > +
> > > > >
> > > > >
> > > > > > +               if (new_ti) {
> > > > > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > > > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > > > > +               }
> > > > > > +       }
> > > > > > +}
> > > > > > +
> > > > > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > > +{
> > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > +
> > > > > >         ASSERT_OVSL();
> > > > > > -       if (meter)
> > > > > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > > > > +       if (meter) {
> > > > > > +               /* operate the counter safely, because called with ovs_lock. */
> > > > > > +               tbl->count--;
> > > > > > +               dp_meter_instance_remove(ti, meter);
> > > > > > +       }
> > > > > >  }
> > > > > >
> > > > > >  static struct sk_buff *
> > > > > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > > > > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > > > > >
> > > > > >         /* Cannot fail after this. */
> > > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > > -       detach_meter(old_meter);
> > > > > > -       attach_meter(dp, meter);
> > > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > > +       detach_meter(dp->meters, old_meter);
> > > > > > +       attach_meter(dp->meters, meter);
> > > > > >         ovs_unlock();
> > > > > >
> > > > > >         /* Build response with the meter_id and stats from
> > > > > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > > > > >         }
> > > > > >
> > > > > >         /* Locate meter, copy stats. */
> > > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > > >         if (!meter) {
> > > > > >                 err = -ENOENT;
> > > > > >                 goto exit_unlock;
> > > > > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > > > > >                 goto exit_unlock;
> > > > > >         }
> > > > > >
> > > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > >         if (old_meter) {
> > > > > >                 spin_lock_bh(&old_meter->lock);
> > > > > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > > > > >                 WARN_ON(err);
> > > > > >                 spin_unlock_bh(&old_meter->lock);
> > > > > > -               detach_meter(old_meter);
> > > > > > +               detach_meter(dp->meters, old_meter);
> > > > > >         }
> > > > > >         ovs_unlock();
> > > > > >         ovs_meter_free(old_meter);
> > > > > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > > > > >         int i, band_exceeded_max = -1;
> > > > > >         u32 band_exceeded_rate = 0;
> > > > > >
> > > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > > >         /* Do not drop the packet when there is no meter. */
> > > > > >         if (!meter)
> > > > > >                 return false;
> > > > > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > > > > >
> > > > > >  int ovs_meters_init(struct datapath *dp)
> > > > > >  {
> > > > > > -       int i;
> > > > > > +       struct dp_meter_instance *ti;
> > > > > > +       struct dp_meter_table *tbl;
> > > > > >
> > > > > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > > > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > > > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > > > > +       if (!tbl)
> > > > > > +               return -ENOMEM;
> > > > > >
> > > > > > -       if (!dp->meters)
> > > > > > +       tbl->count = 0;
> > > > > > +
> > > > > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > > > > +       if (!ti) {
> > > > > > +               kfree(tbl);
> > > > > >                 return -ENOMEM;
> > > > > > +       }
> > > > > >
> > > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > > > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > > > > +       rcu_assign_pointer(tbl->ti, ti);
> > > > > > +       dp->meters = tbl;
> > > > > >
> > > > > >         return 0;
> > > > > >  }
> > > > > >
> > > > > >  void ovs_meters_exit(struct datapath *dp)
> > > > > >  {
> > > > > > +       struct dp_meter_table *tbl = dp->meters;
> > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > >         int i;
> > > > > >
> > > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > > > > -               struct hlist_head *head = &dp->meters[i];
> > > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > >                 struct dp_meter *meter;
> > > > > >                 struct hlist_node *n;
> > > > > >
> > > > > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > > > > -                       kfree(meter);
> > > > > > +               hlist_for_each_entry_safe(meter, n, head,
> > > > > > +                                         hash_node[ti->node_ver])
> > > > > > +                       ovs_meter_free(meter);
> > > > > >         }
> > > > > >
> > > > > > -       kfree(dp->meters);
> > > > > > +       kvfree(ti->buckets);
> > > > > > +       kfree(ti);
> > > > > > +       kfree(tbl);
> > > > > >  }
> > > > > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > > > > index f645913870bd..bc84796d7d4d 100644
> > > > > > --- a/net/openvswitch/meter.h
> > > > > > +++ b/net/openvswitch/meter.h
> > > > > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > > > > >  struct dp_meter {
> > > > > >         spinlock_t lock;    /* Per meter lock */
> > > > > >         struct rcu_head rcu;
> > > > > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > > > > -                                        * hash table.
> > > > > > -                                        */
> > > > > > +       struct hlist_node hash_node[2];
> > > > > >         u32 id;
> > > > > >         u16 kbps:1, keep_stats:1;
> > > > > >         u16 n_bands;
> > > > > > @@ -42,6 +40,19 @@ struct dp_meter {
> > > > > >         struct dp_meter_band bands[];
> > > > > >  };
> > > > > >
> > > > > > +struct dp_meter_instance {
> > > > > > +       struct hlist_head *buckets;
> > > > > > +       struct rcu_head rcu;
> > > > > > +       u32 n_buckets;
> > > > > > +       u32 hash_seed;
> > > > > > +       u8 node_ver;
> > > > > > +};
> > > > > > +
> > > > > > +struct dp_meter_table {
> > > > > > +       struct dp_meter_instance __rcu *ti;
> > > > > > +       u32 count;
> > > > > > +};
> > > > > > +
> > > > > >  extern struct genl_family dp_meter_genl_family;
> > > > > >  int ovs_meters_init(struct datapath *dp);
> > > > > >  void ovs_meters_exit(struct datapath *dp);
> > > > > > --
> > > > > > 2.23.0
> > > > > >
> > > >
> > > >
> > > >
> > > > --
> > > > Best regards, Tonghao
> >
> >
> >
> > --
> > Best regards, Tonghao
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-08 15:59           ` Tonghao Zhang
@ 2020-04-08 16:01             ` Tonghao Zhang
  2020-04-09 21:41             ` William Tu
  1 sibling, 0 replies; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-08 16:01 UTC (permalink / raw)
  To: William Tu; +Cc: Pravin Shelar, ovs dev, Linux Kernel Network Developers

On Wed, Apr 8, 2020 at 11:59 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Wed, Apr 8, 2020 at 11:09 PM William Tu <u9012063@gmail.com> wrote:
> >
> > On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> > > On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > >
> > > > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > > >
> > > > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > > >
> > > > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > > > >
> > > > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > >
> > > > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > > > buckets of meter in one dp. If installing more than 1024
> > > > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > > > But in some case, for example, Open vSwitch used as edge
> > > > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > > > IP address bandwidth limitation.
> > > > > > >
> > > > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > > > >
> > > > > > > For more scalable meter, this patch expands the buckets
> > > > > > > when necessary, so we can install more meters in the datapath.
> > > > > > >
> > > > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > > > >   expand meter though change the *ti* point in the struct
> > > > > > >   *dp_meter_table*.
> > > > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > > > >
> > > > > > Thanks for working on this, I have couple of comments.
> > > > > >
> > > > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > ---
> > > > > > >  net/openvswitch/datapath.h |   2 +-
> > > > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > > > >
> > > > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > > > index e239a46c2f94..785105578448 100644
> > > > > > > --- a/net/openvswitch/datapath.h
> > > > > > > +++ b/net/openvswitch/datapath.h
> > > > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > > > >         u32 max_headroom;
> > > > > > >
> > > > > > >         /* Switch meters. */
> > > > > > > -       struct hlist_head *meters;
> > > > > > > +       struct dp_meter_table *meters;
> > > > > > >  };
> > > > > > >
> > > > > > >  /**
> > > > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > > > --- a/net/openvswitch/meter.c
> > > > > > > +++ b/net/openvswitch/meter.c
> > > > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > > > >         kfree_rcu(meter, rcu);
> > > > > > >  }
> > > > > > >
> > > > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > > > >                                             u32 meter_id)
> > > > > > >  {
> > > > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > > > +
> > > > > > I do not see any need to hash meter-id, can you explain it.
> > > > > >
> > > > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > > > >  }
> > > > > > >
> > > > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > > > >                                      u32 meter_id)
> > > > > > >  {
> > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > >         struct dp_meter *meter;
> > > > > > >         struct hlist_head *head;
> > > > > > >
> > > > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > > > -                               lockdep_ovsl_is_held()) {
> > > > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > > +                                lockdep_ovsl_is_held()) {
> > > > > > >                 if (meter->id == meter_id)
> > > > > > >                         return meter;
> > > > > > >         }
> > > > > > > +
> > > > > > This patch is expanding meter table linearly with number meters added
> > > > > > to datapath. so I do not see need to have hash table. it can be a
> > > > > > simple array. This would also improve lookup efficiency.
> > > > > > For hash collision we could find next free slot in array. let me know
> > > > > > what do you think about this approach.
> > > > > Hi Pravin
> > > > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > > > find next free slot, but one case, when there are many meters in the array.
> > > > > we may find many slot for the free slot.
> > > > > And when we lookup the meter, for hash collision, we may find many
> > > > > array slots, and
> > > > > then find it, or that meter does not exist in the array, In that case,
> > > > > there may be a lookup performance
> > > > > drop.
> > > > >
> > > > I was thinking that users can insure that there are no hash collision,
> > > > but time complexity of negative case is expensive. so I am fine with
> > > > the hash table.
> >
> > IIUC, there will be hash collision. meter id is an 32-bit value.
> > Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.
> Hi, William
> but id-pool makes sure the meter id is from 0, 1, 2, 3 ... n, but not n, m, y.
> so if we alloc 1024 meters, the last meter id should be 1023, and then
> use the simple array to expand the meter is better ?
v2 is ready, will be sent when net-next is opened ?

> > I think what Pravin suggest is to use another hash function to make
> > the hash table more condense. Ex: hash1 and hash2.
> > For lookup, if hash1(key) misses, then try hash2(key).
> >
> > William
> >
> > > Hi Pravi
> > > I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> > > get a valid meter-id which passed to kernel, so there is no hash collision. You
> > > are right. we use the single array is the better solution.
> > > > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > > > useful. it just update
> > > > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > > > ok.
> > > >
> > > > > > >         return NULL;
> > > > > > >  }
> > > > > > >
> > > > > > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > > > > > +static struct dp_meter_instance *dp_meter_instance_alloc(const int size)
> > > > > > > +{
> > > > > > > +       struct dp_meter_instance *ti;
> > > > > > > +       int i;
> > > > > > > +
> > > > > > > +       ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > > > > > > +       if (!ti)
> > > > > > > +               return NULL;
> > > > > > > +
> > > > > > > +       ti->buckets = kvmalloc_array(size, sizeof(struct hlist_head),
> > > > > > > +                                    GFP_KERNEL);
> > > > > > > +       if (!ti->buckets) {
> > > > > > > +               kfree(ti);
> > > > > > > +               return NULL;
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       for (i = 0; i < size; i++)
> > > > > > > +               INIT_HLIST_HEAD(&ti->buckets[i]);
> > > > > > > +
> > > > > > > +       ti->n_buckets = size;
> > > > > > > +       ti->node_ver = 0;
> > > > > > > +       get_random_bytes(&ti->hash_seed, sizeof(u32));
> > > > > > > +
> > > > > > > +       return ti;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > > > > > >  {
> > > > > > > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > > > > > > +       struct dp_meter_instance *ti;
> > > > > > >
> > > > > > > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > > > > > > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > > > > > > +       kvfree(ti->buckets);
> > > > > > > +       kfree(ti);
> > > > > > >  }
> > > > > > >
> > > > > > > -static void detach_meter(struct dp_meter *meter)
> > > > > > > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > > > > > > +                                    struct dp_meter *meter)
> > > > > > > +{
> > > > > > > +       struct hlist_head *head = meter_hash_bucket(ti, meter->id);
> > > > > > > +
> > > > > > > +       hlist_add_head_rcu(&meter->hash_node[ti->node_ver], head);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > > > > > > +                                    struct dp_meter *meter)
> > > > > > >  {
> > > > > > > +       hlist_del_rcu(&meter->hash_node[ti->node_ver]);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static struct dp_meter_instance *
> > > > > > > +dp_meter_instance_expand(struct dp_meter_instance *ti)
> > > > > > > +{
> > > > > > > +       struct dp_meter_instance *new_ti;
> > > > > > > +       int i;
> > > > > > > +
> > > > > > > +       new_ti = dp_meter_instance_alloc(ti->n_buckets * 2);
> > > > > > > +       if (!new_ti)
> > > > > > > +               return NULL;
> > > > > > > +
> > > > > > > +       new_ti->node_ver = !ti->node_ver;
> > > > > > > +
> > > > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > > > +               struct dp_meter *meter;
> > > > > > > +
> > > > > > > +               hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > > +                                        lockdep_ovsl_is_held())
> > > > > > > +                       dp_meter_instance_insert(new_ti, meter);
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       return new_ti;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > > > +{
> > > > > > > +       struct dp_meter_instance *new_ti;
> > > > > > > +       struct dp_meter_instance *ti;
> > > > > > > +
> > > > > > > +       ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > > +       dp_meter_instance_insert(ti, meter);
> > > > > > > +
> > > > > > > +       /* operate the counter safely, because called with ovs_lock. */
> > > > > > > +       tbl->count++;
> > > > > > > +
> > > > > > > +       if (tbl->count > ti->n_buckets) {
> > > > > > > +               new_ti = dp_meter_instance_expand(ti);
> > > > > > > +
> > > > > >
> > > > > >
> > > > > > > +               if (new_ti) {
> > > > > > > +                       rcu_assign_pointer(tbl->ti, new_ti);
> > > > > > > +                       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > > > > > > +               }
> > > > > > > +       }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > > > > > +{
> > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > > +
> > > > > > >         ASSERT_OVSL();
> > > > > > > -       if (meter)
> > > > > > > -               hlist_del_rcu(&meter->dp_hash_node);
> > > > > > > +       if (meter) {
> > > > > > > +               /* operate the counter safely, because called with ovs_lock. */
> > > > > > > +               tbl->count--;
> > > > > > > +               dp_meter_instance_remove(ti, meter);
> > > > > > > +       }
> > > > > > >  }
> > > > > > >
> > > > > > >  static struct sk_buff *
> > > > > > > @@ -303,9 +399,9 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > > > > > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > > > > > >
> > > > > > >         /* Cannot fail after this. */
> > > > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > > > -       detach_meter(old_meter);
> > > > > > > -       attach_meter(dp, meter);
> > > > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > > > +       detach_meter(dp->meters, old_meter);
> > > > > > > +       attach_meter(dp->meters, meter);
> > > > > > >         ovs_unlock();
> > > > > > >
> > > > > > >         /* Build response with the meter_id and stats from
> > > > > > > @@ -365,7 +461,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> > > > > > >         }
> > > > > > >
> > > > > > >         /* Locate meter, copy stats. */
> > > > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > > > >         if (!meter) {
> > > > > > >                 err = -ENOENT;
> > > > > > >                 goto exit_unlock;
> > > > > > > @@ -416,13 +512,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> > > > > > >                 goto exit_unlock;
> > > > > > >         }
> > > > > > >
> > > > > > > -       old_meter = lookup_meter(dp, meter_id);
> > > > > > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > > > > > >         if (old_meter) {
> > > > > > >                 spin_lock_bh(&old_meter->lock);
> > > > > > >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> > > > > > >                 WARN_ON(err);
> > > > > > >                 spin_unlock_bh(&old_meter->lock);
> > > > > > > -               detach_meter(old_meter);
> > > > > > > +               detach_meter(dp->meters, old_meter);
> > > > > > >         }
> > > > > > >         ovs_unlock();
> > > > > > >         ovs_meter_free(old_meter);
> > > > > > > @@ -452,7 +548,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> > > > > > >         int i, band_exceeded_max = -1;
> > > > > > >         u32 band_exceeded_rate = 0;
> > > > > > >
> > > > > > > -       meter = lookup_meter(dp, meter_id);
> > > > > > > +       meter = lookup_meter(dp->meters, meter_id);
> > > > > > >         /* Do not drop the packet when there is no meter. */
> > > > > > >         if (!meter)
> > > > > > >                 return false;
> > > > > > > @@ -570,32 +666,44 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> > > > > > >
> > > > > > >  int ovs_meters_init(struct datapath *dp)
> > > > > > >  {
> > > > > > > -       int i;
> > > > > > > +       struct dp_meter_instance *ti;
> > > > > > > +       struct dp_meter_table *tbl;
> > > > > > >
> > > > > > > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > > > > > > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > > > > > > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > > > > > > +       if (!tbl)
> > > > > > > +               return -ENOMEM;
> > > > > > >
> > > > > > > -       if (!dp->meters)
> > > > > > > +       tbl->count = 0;
> > > > > > > +
> > > > > > > +       ti = dp_meter_instance_alloc(METER_HASH_BUCKETS);
> > > > > > > +       if (!ti) {
> > > > > > > +               kfree(tbl);
> > > > > > >                 return -ENOMEM;
> > > > > > > +       }
> > > > > > >
> > > > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > > > > > > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > > > > > > +       rcu_assign_pointer(tbl->ti, ti);
> > > > > > > +       dp->meters = tbl;
> > > > > > >
> > > > > > >         return 0;
> > > > > > >  }
> > > > > > >
> > > > > > >  void ovs_meters_exit(struct datapath *dp)
> > > > > > >  {
> > > > > > > +       struct dp_meter_table *tbl = dp->meters;
> > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > >         int i;
> > > > > > >
> > > > > > > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > > > > > > -               struct hlist_head *head = &dp->meters[i];
> > > > > > > +       for (i = 0; i < ti->n_buckets; i++) {
> > > > > > > +               struct hlist_head *head = &ti->buckets[i];
> > > > > > >                 struct dp_meter *meter;
> > > > > > >                 struct hlist_node *n;
> > > > > > >
> > > > > > > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > > > > > > -                       kfree(meter);
> > > > > > > +               hlist_for_each_entry_safe(meter, n, head,
> > > > > > > +                                         hash_node[ti->node_ver])
> > > > > > > +                       ovs_meter_free(meter);
> > > > > > >         }
> > > > > > >
> > > > > > > -       kfree(dp->meters);
> > > > > > > +       kvfree(ti->buckets);
> > > > > > > +       kfree(ti);
> > > > > > > +       kfree(tbl);
> > > > > > >  }
> > > > > > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > > > > > index f645913870bd..bc84796d7d4d 100644
> > > > > > > --- a/net/openvswitch/meter.h
> > > > > > > +++ b/net/openvswitch/meter.h
> > > > > > > @@ -30,9 +30,7 @@ struct dp_meter_band {
> > > > > > >  struct dp_meter {
> > > > > > >         spinlock_t lock;    /* Per meter lock */
> > > > > > >         struct rcu_head rcu;
> > > > > > > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > > > > > > -                                        * hash table.
> > > > > > > -                                        */
> > > > > > > +       struct hlist_node hash_node[2];
> > > > > > >         u32 id;
> > > > > > >         u16 kbps:1, keep_stats:1;
> > > > > > >         u16 n_bands;
> > > > > > > @@ -42,6 +40,19 @@ struct dp_meter {
> > > > > > >         struct dp_meter_band bands[];
> > > > > > >  };
> > > > > > >
> > > > > > > +struct dp_meter_instance {
> > > > > > > +       struct hlist_head *buckets;
> > > > > > > +       struct rcu_head rcu;
> > > > > > > +       u32 n_buckets;
> > > > > > > +       u32 hash_seed;
> > > > > > > +       u8 node_ver;
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct dp_meter_table {
> > > > > > > +       struct dp_meter_instance __rcu *ti;
> > > > > > > +       u32 count;
> > > > > > > +};
> > > > > > > +
> > > > > > >  extern struct genl_family dp_meter_genl_family;
> > > > > > >  int ovs_meters_init(struct datapath *dp);
> > > > > > >  void ovs_meters_exit(struct datapath *dp);
> > > > > > > --
> > > > > > > 2.23.0
> > > > > > >
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Best regards, Tonghao
> > >
> > >
> > >
> > > --
> > > Best regards, Tonghao
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
>
> --
> Best regards, Tonghao



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-08 15:59           ` Tonghao Zhang
  2020-04-08 16:01             ` Tonghao Zhang
@ 2020-04-09 21:41             ` William Tu
  2020-04-09 23:29               ` Tonghao Zhang
  1 sibling, 1 reply; 48+ messages in thread
From: William Tu @ 2020-04-09 21:41 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: Pravin Shelar, ovs dev, Linux Kernel Network Developers

On Wed, Apr 08, 2020 at 11:59:25PM +0800, Tonghao Zhang wrote:
> On Wed, Apr 8, 2020 at 11:09 PM William Tu <u9012063@gmail.com> wrote:
> >
> > On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> > > On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > >
> > > > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > > >
> > > > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > > >
> > > > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > > > >
> > > > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > >
> > > > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > > > buckets of meter in one dp. If installing more than 1024
> > > > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > > > But in some case, for example, Open vSwitch used as edge
> > > > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > > > IP address bandwidth limitation.
> > > > > > >
> > > > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > > > >
> > > > > > > For more scalable meter, this patch expands the buckets
> > > > > > > when necessary, so we can install more meters in the datapath.
> > > > > > >
> > > > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > > > >   expand meter though change the *ti* point in the struct
> > > > > > >   *dp_meter_table*.
> > > > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > > > >
> > > > > > Thanks for working on this, I have couple of comments.
> > > > > >
> > > > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > ---
> > > > > > >  net/openvswitch/datapath.h |   2 +-
> > > > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > > > >
> > > > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > > > index e239a46c2f94..785105578448 100644
> > > > > > > --- a/net/openvswitch/datapath.h
> > > > > > > +++ b/net/openvswitch/datapath.h
> > > > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > > > >         u32 max_headroom;
> > > > > > >
> > > > > > >         /* Switch meters. */
> > > > > > > -       struct hlist_head *meters;
> > > > > > > +       struct dp_meter_table *meters;
> > > > > > >  };
> > > > > > >
> > > > > > >  /**
> > > > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > > > --- a/net/openvswitch/meter.c
> > > > > > > +++ b/net/openvswitch/meter.c
> > > > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > > > >         kfree_rcu(meter, rcu);
> > > > > > >  }
> > > > > > >
> > > > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > > > >                                             u32 meter_id)
> > > > > > >  {
> > > > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > > > +
> > > > > > I do not see any need to hash meter-id, can you explain it.
> > > > > >
> > > > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > > > >  }
> > > > > > >
> > > > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > > > >                                      u32 meter_id)
> > > > > > >  {
> > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > >         struct dp_meter *meter;
> > > > > > >         struct hlist_head *head;
> > > > > > >
> > > > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > > > -                               lockdep_ovsl_is_held()) {
> > > > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > > +                                lockdep_ovsl_is_held()) {
> > > > > > >                 if (meter->id == meter_id)
> > > > > > >                         return meter;
> > > > > > >         }
> > > > > > > +
> > > > > > This patch is expanding meter table linearly with number meters added
> > > > > > to datapath. so I do not see need to have hash table. it can be a
> > > > > > simple array. This would also improve lookup efficiency.
> > > > > > For hash collision we could find next free slot in array. let me know
> > > > > > what do you think about this approach.
> > > > > Hi Pravin
> > > > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > > > find next free slot, but one case, when there are many meters in the array.
> > > > > we may find many slot for the free slot.
> > > > > And when we lookup the meter, for hash collision, we may find many
> > > > > array slots, and
> > > > > then find it, or that meter does not exist in the array, In that case,
> > > > > there may be a lookup performance
> > > > > drop.
> > > > >
> > > > I was thinking that users can insure that there are no hash collision,
> > > > but time complexity of negative case is expensive. so I am fine with
> > > > the hash table.
> >
> > IIUC, there will be hash collision. meter id is an 32-bit value.
> > Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.
> Hi, William
> but id-pool makes sure the meter id is from 0, 1, 2, 3 ... n, but not n, m, y.
> so if we alloc 1024 meters, the last meter id should be 1023, and then
> use the simple array to expand the meter is better ?
> 

I see, so you want to set the # of hash bucket = max # of meter id,
so there is no hash collision, (with the cost of using more memory)
I don't have strong opinion on which design is better. Let's wait for
Pravin's feedback.

William

> > I think what Pravin suggest is to use another hash function to make
> > the hash table more condense. Ex: hash1 and hash2.
> > For lookup, if hash1(key) misses, then try hash2(key).
> >
> > William
> >
> > > Hi Pravi
> > > I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> > > get a valid meter-id which passed to kernel, so there is no hash collision. You
> > > are right. we use the single array is the better solution.
> > > > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > > > useful. it just update
> > > > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > > > ok.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-09 21:41             ` William Tu
@ 2020-04-09 23:29               ` Tonghao Zhang
  2020-04-11  8:14                 ` Pravin Shelar
  0 siblings, 1 reply; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-09 23:29 UTC (permalink / raw)
  To: William Tu; +Cc: Pravin Shelar, ovs dev, Linux Kernel Network Developers

On Fri, Apr 10, 2020 at 5:41 AM William Tu <u9012063@gmail.com> wrote:
>
> On Wed, Apr 08, 2020 at 11:59:25PM +0800, Tonghao Zhang wrote:
> > On Wed, Apr 8, 2020 at 11:09 PM William Tu <u9012063@gmail.com> wrote:
> > >
> > > On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> > > > On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > >
> > > > > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > > > >
> > > > > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > > > > >
> > > > > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > >
> > > > > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > > > > buckets of meter in one dp. If installing more than 1024
> > > > > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > > > > But in some case, for example, Open vSwitch used as edge
> > > > > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > > > > IP address bandwidth limitation.
> > > > > > > >
> > > > > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > > > > >
> > > > > > > > For more scalable meter, this patch expands the buckets
> > > > > > > > when necessary, so we can install more meters in the datapath.
> > > > > > > >
> > > > > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > > > > >   expand meter though change the *ti* point in the struct
> > > > > > > >   *dp_meter_table*.
> > > > > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > > > > >
> > > > > > > Thanks for working on this, I have couple of comments.
> > > > > > >
> > > > > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > > ---
> > > > > > > >  net/openvswitch/datapath.h |   2 +-
> > > > > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > > > > index e239a46c2f94..785105578448 100644
> > > > > > > > --- a/net/openvswitch/datapath.h
> > > > > > > > +++ b/net/openvswitch/datapath.h
> > > > > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > > > > >         u32 max_headroom;
> > > > > > > >
> > > > > > > >         /* Switch meters. */
> > > > > > > > -       struct hlist_head *meters;
> > > > > > > > +       struct dp_meter_table *meters;
> > > > > > > >  };
> > > > > > > >
> > > > > > > >  /**
> > > > > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > > > > --- a/net/openvswitch/meter.c
> > > > > > > > +++ b/net/openvswitch/meter.c
> > > > > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > > > > >         kfree_rcu(meter, rcu);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > > > > >                                             u32 meter_id)
> > > > > > > >  {
> > > > > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > > > > +
> > > > > > > I do not see any need to hash meter-id, can you explain it.
> > > > > > >
> > > > > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > > > > >                                      u32 meter_id)
> > > > > > > >  {
> > > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > > >         struct dp_meter *meter;
> > > > > > > >         struct hlist_head *head;
> > > > > > > >
> > > > > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > > > > -                               lockdep_ovsl_is_held()) {
> > > > > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > > > +                                lockdep_ovsl_is_held()) {
> > > > > > > >                 if (meter->id == meter_id)
> > > > > > > >                         return meter;
> > > > > > > >         }
> > > > > > > > +
> > > > > > > This patch is expanding meter table linearly with number meters added
> > > > > > > to datapath. so I do not see need to have hash table. it can be a
> > > > > > > simple array. This would also improve lookup efficiency.
> > > > > > > For hash collision we could find next free slot in array. let me know
> > > > > > > what do you think about this approach.
> > > > > > Hi Pravin
> > > > > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > > > > find next free slot, but one case, when there are many meters in the array.
> > > > > > we may find many slot for the free slot.
> > > > > > And when we lookup the meter, for hash collision, we may find many
> > > > > > array slots, and
> > > > > > then find it, or that meter does not exist in the array, In that case,
> > > > > > there may be a lookup performance
> > > > > > drop.
> > > > > >
> > > > > I was thinking that users can insure that there are no hash collision,
> > > > > but time complexity of negative case is expensive. so I am fine with
> > > > > the hash table.
> > >
> > > IIUC, there will be hash collision. meter id is an 32-bit value.
> > > Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.
> > Hi, William
> > but id-pool makes sure the meter id is from 0, 1, 2, 3 ... n, but not n, m, y.
> > so if we alloc 1024 meters, the last meter id should be 1023, and then
> > use the simple array to expand the meter is better ?
> >
>
> I see, so you want to set the # of hash bucket = max # of meter id,
> so there is no hash collision, (with the cost of using more memory)
Not really, there are 1024 buckets as default, and will expand to
1024*2, and then 1024*2*2  if necessary
if the most meter is deleted, we will shrink it.

> I don't have strong opinion on which design is better. Let's wait for
> Pravin's feedback.
>
> William
>
> > > I think what Pravin suggest is to use another hash function to make
> > > the hash table more condense. Ex: hash1 and hash2.
> > > For lookup, if hash1(key) misses, then try hash2(key).
> > >
> > > William
> > >
> > > > Hi Pravi
> > > > I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> > > > get a valid meter-id which passed to kernel, so there is no hash collision. You
> > > > are right. we use the single array is the better solution.
> > > > > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > > > > useful. it just update
> > > > > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > > > > ok.
>


-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [ovs-dev] [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported
  2020-04-09 23:29               ` Tonghao Zhang
@ 2020-04-11  8:14                 ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-11  8:14 UTC (permalink / raw)
  To: Tonghao Zhang; +Cc: William Tu, ovs dev, Linux Kernel Network Developers

Given that we already use id-pool, we can significantly reduce
probability of the negative case of meter lookup. Therefore I do not
see need to use hash table in the datapath.

On Thu, Apr 9, 2020 at 4:29 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Fri, Apr 10, 2020 at 5:41 AM William Tu <u9012063@gmail.com> wrote:
> >
> > On Wed, Apr 08, 2020 at 11:59:25PM +0800, Tonghao Zhang wrote:
> > > On Wed, Apr 8, 2020 at 11:09 PM William Tu <u9012063@gmail.com> wrote:
> > > >
> > > > On Wed, Apr 01, 2020 at 06:50:09PM +0800, Tonghao Zhang wrote:
> > > > > On Tue, Mar 31, 2020 at 11:57 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > > >
> > > > > > On Sun, Mar 29, 2020 at 5:35 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > > > > >
> > > > > > > On Mon, Mar 30, 2020 at 12:46 AM Pravin Shelar <pshelar@ovn.org> wrote:
> > > > > > > >
> > > > > > > > On Sat, Mar 28, 2020 at 8:46 AM <xiangxia.m.yue@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > > >
> > > > > > > > > In kernel datapath of Open vSwitch, there are only 1024
> > > > > > > > > buckets of meter in one dp. If installing more than 1024
> > > > > > > > > (e.g. 8192) meters, it may lead to the performance drop.
> > > > > > > > > But in some case, for example, Open vSwitch used as edge
> > > > > > > > > gateway, there should be 200,000+ at least, meters used for
> > > > > > > > > IP address bandwidth limitation.
> > > > > > > > >
> > > > > > > > > [Open vSwitch userspace datapath has this issue too.]
> > > > > > > > >
> > > > > > > > > For more scalable meter, this patch expands the buckets
> > > > > > > > > when necessary, so we can install more meters in the datapath.
> > > > > > > > >
> > > > > > > > > * Introducing the struct *dp_meter_instance*, it's easy to
> > > > > > > > >   expand meter though change the *ti* point in the struct
> > > > > > > > >   *dp_meter_table*.
> > > > > > > > > * Using kvmalloc_array instead of kmalloc_array.
> > > > > > > > >
> > > > > > > > Thanks for working on this, I have couple of comments.
> > > > > > > >
> > > > > > > > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > > > > > > > Cc: Andy Zhou <azhou@ovn.org>
> > > > > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > > > > > > > ---
> > > > > > > > >  net/openvswitch/datapath.h |   2 +-
> > > > > > > > >  net/openvswitch/meter.c    | 168 ++++++++++++++++++++++++++++++-------
> > > > > > > > >  net/openvswitch/meter.h    |  17 +++-
> > > > > > > > >  3 files changed, 153 insertions(+), 34 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > > > > > > > index e239a46c2f94..785105578448 100644
> > > > > > > > > --- a/net/openvswitch/datapath.h
> > > > > > > > > +++ b/net/openvswitch/datapath.h
> > > > > > > > > @@ -82,7 +82,7 @@ struct datapath {
> > > > > > > > >         u32 max_headroom;
> > > > > > > > >
> > > > > > > > >         /* Switch meters. */
> > > > > > > > > -       struct hlist_head *meters;
> > > > > > > > > +       struct dp_meter_table *meters;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  /**
> > > > > > > > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > > > > > > > index 5010d1ddd4bd..98003b201b45 100644
> > > > > > > > > --- a/net/openvswitch/meter.c
> > > > > > > > > +++ b/net/openvswitch/meter.c
> > > > > > > > > @@ -47,40 +47,136 @@ static void ovs_meter_free(struct dp_meter *meter)
> > > > > > > > >         kfree_rcu(meter, rcu);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > > > > > > > +static struct hlist_head *meter_hash_bucket(struct dp_meter_instance *ti,
> > > > > > > > >                                             u32 meter_id)
> > > > > > > > >  {
> > > > > > > > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > > > > > > > +       u32 hash = jhash_1word(meter_id, ti->hash_seed);
> > > > > > > > > +
> > > > > > > > I do not see any need to hash meter-id, can you explain it.
> > > > > > > >
> > > > > > > > > +       return &ti->buckets[hash & (ti->n_buckets - 1)];
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  /* Call with ovs_mutex or RCU read lock. */
> > > > > > > > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > > > > > > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > > > > > > > >                                      u32 meter_id)
> > > > > > > > >  {
> > > > > > > > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > > > > > > >         struct dp_meter *meter;
> > > > > > > > >         struct hlist_head *head;
> > > > > > > > >
> > > > > > > > > -       head = meter_hash_bucket(dp, meter_id);
> > > > > > > > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > > > > > > > -                               lockdep_ovsl_is_held()) {
> > > > > > > > > +       head = meter_hash_bucket(ti, meter_id);
> > > > > > > > > +       hlist_for_each_entry_rcu(meter, head, hash_node[ti->node_ver],
> > > > > > > > > +                                lockdep_ovsl_is_held()) {
> > > > > > > > >                 if (meter->id == meter_id)
> > > > > > > > >                         return meter;
> > > > > > > > >         }
> > > > > > > > > +
> > > > > > > > This patch is expanding meter table linearly with number meters added
> > > > > > > > to datapath. so I do not see need to have hash table. it can be a
> > > > > > > > simple array. This would also improve lookup efficiency.
> > > > > > > > For hash collision we could find next free slot in array. let me know
> > > > > > > > what do you think about this approach.
> > > > > > > Hi Pravin
> > > > > > > If we use the simple array, when inserting the meter, for hash collision, we can
> > > > > > > find next free slot, but one case, when there are many meters in the array.
> > > > > > > we may find many slot for the free slot.
> > > > > > > And when we lookup the meter, for hash collision, we may find many
> > > > > > > array slots, and
> > > > > > > then find it, or that meter does not exist in the array, In that case,
> > > > > > > there may be a lookup performance
> > > > > > > drop.
> > > > > > >
> > > > > > I was thinking that users can insure that there are no hash collision,
> > > > > > but time complexity of negative case is expensive. so I am fine with
> > > > > > the hash table.
> > > >
> > > > IIUC, there will be hash collision. meter id is an 32-bit value.
> > > > Currenly in lib/dpif-netdev.c, MAX_METERS = 65536.
> > > Hi, William
> > > but id-pool makes sure the meter id is from 0, 1, 2, 3 ... n, but not n, m, y.
> > > so if we alloc 1024 meters, the last meter id should be 1023, and then
> > > use the simple array to expand the meter is better ?
> > >
> >
> > I see, so you want to set the # of hash bucket = max # of meter id,
> > so there is no hash collision, (with the cost of using more memory)
> Not really, there are 1024 buckets as default, and will expand to
> 1024*2, and then 1024*2*2  if necessary
> if the most meter is deleted, we will shrink it.
>
> > I don't have strong opinion on which design is better. Let's wait for
> > Pravin's feedback.
> >
> > William
> >
> > > > I think what Pravin suggest is to use another hash function to make
> > > > the hash table more condense. Ex: hash1 and hash2.
> > > > For lookup, if hash1(key) misses, then try hash2(key).
> > > >
> > > > William
> > > >
> > > > > Hi Pravi
> > > > > I check again the meter implementation of ovs, ovs-vswitchd use the id-pool to
> > > > > get a valid meter-id which passed to kernel, so there is no hash collision. You
> > > > > are right. we use the single array is the better solution.
> > > > > > > For hash meter-id in meter_hash_bucket, I am not 100% sure it is
> > > > > > > useful. it just update
> > > > > > > hash_seed when expand meters. For performance, we can remove it. Thanks.
> > > > > > ok.
> >
>
>
> --
> Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 0/5] expand meter tables and fix bug
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
                   ` (2 preceding siblings ...)
  2020-03-29 16:46 ` [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported Pravin Shelar
@ 2020-04-16 10:16 ` xiangxia.m.yue
  2020-04-16 10:16   ` [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
                     ` (5 more replies)
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
  5 siblings, 6 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:16 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

The patch set expands or shrink the meter table when necessary.
and other patch fixes bug or improve codes.

Tonghao Zhang (5):
  net: openvswitch: expand the meters supported number
  net: openvswitch: set max limitation to meters
  net: openvswitch: remove the unnecessary check
  net: openvswitch: make EINVAL return value more obvious
  net: openvswitch: use u64 for meter bucket

 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 227 ++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h    |  18 ++-
 3 files changed, 188 insertions(+), 59 deletions(-)

--
v2
* change the hash table to meter array
* add shrink meter codes
* add patch 4 and 5
--
2.23.0


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
@ 2020-04-16 10:16   ` xiangxia.m.yue
  2020-04-19 17:29     ` Pravin Shelar
  2020-04-16 10:17   ` [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:16 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In kernel datapath of Open vSwitch, there are only 1024
buckets of meter in one dp. If installing more than 1024
(e.g. 8192) meters, it may lead to the performance drop.
But in some case, for example, Open vSwitch used as edge
gateway, there should be 200,000+ at least, meters used for
IP address bandwidth limitation.

[Open vSwitch userspace datapath has this issue too.]

For more scalable meter, this patch expands the buckets
when necessary, so we can install more meters in the datapath.
Introducing the struct *dp_meter_instance*, it's easy to
expand meter though changing the *ti* point in the struct
*dp_meter_table*.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 200 +++++++++++++++++++++++++++++--------
 net/openvswitch/meter.h    |  15 ++-
 3 files changed, 169 insertions(+), 48 deletions(-)

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index e239a46c2f94..785105578448 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -82,7 +82,7 @@ struct datapath {
 	u32 max_headroom;
 
 	/* Switch meters. */
-	struct hlist_head *meters;
+	struct dp_meter_table *meters;
 };
 
 /**
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5010d1ddd4bd..494a0014ecd8 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -19,8 +19,6 @@
 #include "datapath.h"
 #include "meter.h"
 
-#define METER_HASH_BUCKETS 1024
-
 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
 	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
 	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
@@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
 	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 };
 
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+	return id % ti->n_meters;
+}
+
 static void ovs_meter_free(struct dp_meter *meter)
 {
 	if (!meter)
@@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
 	kfree_rcu(meter, rcu);
 }
 
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
-					    u32 meter_id)
-{
-	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
 /* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
 				     u32 meter_id)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter_id);
 	struct dp_meter *meter;
-	struct hlist_head *head;
 
-	head = meter_hash_bucket(dp, meter_id);
-	hlist_for_each_entry_rcu(meter, head, dp_hash_node,
-				lockdep_ovsl_is_held()) {
-		if (meter->id == meter_id)
-			return meter;
-	}
+	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+	if (meter && likely(meter->id == meter_id))
+		return meter;
+
 	return NULL;
 }
 
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+	struct dp_meter_instance *ti;
+
+	ti = kvzalloc(sizeof(*ti) +
+		      sizeof(struct dp_meter *) * size,
+		      GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->n_meters = size;
+
+	return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+	kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+	struct dp_meter_instance *ti;
+
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	int n_meters = min(size, ti->n_meters);
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(size);
+	if (!new_ti)
+		return -ENOMEM;
+
+	for (i = 0; i < n_meters; i++)
+		new_ti->dp_meters[i] =
+			rcu_dereference_ovsl(ti->dp_meters[i]);
+
+	rcu_assign_pointer(tbl->ti, new_ti);
+	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+	return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
 {
-	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+	u32 hash;
 
-	hlist_add_head_rcu(&meter->dp_hash_node, head);
+	hash = meter_hash(ti, meter->id);
+	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
 }
 
-static void detach_meter(struct dp_meter *meter)
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter->id);
+
+	/*
+	 * In generally, slot selected should be empty, because
+	 * OvS uses id-pool to fetch a available id.
+	 */
+	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+		return -EINVAL;
+
+	dp_meter_instance_insert(ti, meter);
+
+	/* That function is thread-safe. */
+	if (++tbl->count >= ti->n_meters)
+		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
+			goto expand_err;
+
+	return 0;
+
+expand_err:
+	dp_meter_instance_remove(ti, meter);
+	tbl->count--;
+	return -ENOMEM;
+}
+
+static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti;
+
 	ASSERT_OVSL();
-	if (meter)
-		hlist_del_rcu(&meter->dp_hash_node);
+	if (!meter)
+		return;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_remove(ti, meter);
+
+	tbl->count--;
+
+	/* Shrink the meter array if necessary. */
+	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+	    tbl->count <= (ti->n_meters / 4)) {
+		int half_size = ti->n_meters / 2;
+		int i;
+
+		for (i = half_size; i < ti->n_meters; i++)
+			if (rcu_dereference_ovsl(ti->dp_meters[i]))
+				return;
+
+		dp_meter_instance_realloc(tbl, half_size);
+	}
 }
 
 static struct sk_buff *
@@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	/* Cannot fail after this. */
-	old_meter = lookup_meter(dp, meter_id);
-	detach_meter(old_meter);
-	attach_meter(dp, meter);
+	old_meter = lookup_meter(dp->meters, meter_id);
+	detach_meter(dp->meters, old_meter);
+
+	err = attach_meter(dp->meters, meter);
+	if (err)
+		goto exit_unlock;
+
 	ovs_unlock();
 
 	/* Build response with the meter_id and stats from
@@ -365,7 +473,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Locate meter, copy stats. */
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(dp->meters, meter_id);
 	if (!meter) {
 		err = -ENOENT;
 		goto exit_unlock;
@@ -416,13 +524,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	old_meter = lookup_meter(dp, meter_id);
+	old_meter = lookup_meter(dp->meters, meter_id);
 	if (old_meter) {
 		spin_lock_bh(&old_meter->lock);
 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 		WARN_ON(err);
 		spin_unlock_bh(&old_meter->lock);
-		detach_meter(old_meter);
+		detach_meter(dp->meters, old_meter);
 	}
 	ovs_unlock();
 	ovs_meter_free(old_meter);
@@ -452,7 +560,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 	int i, band_exceeded_max = -1;
 	u32 band_exceeded_rate = 0;
 
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(dp->meters, meter_id);
 	/* Do not drop the packet when there is no meter. */
 	if (!meter)
 		return false;
@@ -570,32 +678,36 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
 
 int ovs_meters_init(struct datapath *dp)
 {
-	int i;
+	struct dp_meter_instance *ti;
+	struct dp_meter_table *tbl;
+
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+	if (!tbl)
+		return -ENOMEM;
 
-	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
-				   sizeof(struct hlist_head), GFP_KERNEL);
+	tbl->count = 0;
 
-	if (!dp->meters)
+	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+	if (!ti) {
+		kfree(tbl);
 		return -ENOMEM;
+	}
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->meters[i]);
+	rcu_assign_pointer(tbl->ti, ti);
+	dp->meters = tbl;
 
 	return 0;
 }
 
 void ovs_meters_exit(struct datapath *dp)
 {
+	struct dp_meter_table *tbl = dp->meters;
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	int i;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++) {
-		struct hlist_head *head = &dp->meters[i];
-		struct dp_meter *meter;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
-			kfree(meter);
-	}
+	for (i = 0; i < ti->n_meters; i++)
+		ovs_meter_free(ti->dp_meters[i]);
 
-	kfree(dp->meters);
+	dp_meter_instance_free(ti);
+	kfree(tbl);
 }
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f645913870bd..d91940383bbe 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -18,6 +18,7 @@
 struct datapath;
 
 #define DP_MAX_BANDS		1
+#define DP_METER_ARRAY_SIZE_MIN	(1ULL << 10)
 
 struct dp_meter_band {
 	u32 type;
@@ -30,9 +31,6 @@ struct dp_meter_band {
 struct dp_meter {
 	spinlock_t lock;    /* Per meter lock */
 	struct rcu_head rcu;
-	struct hlist_node dp_hash_node; /*Element in datapath->meters
-					 * hash table.
-					 */
 	u32 id;
 	u16 kbps:1, keep_stats:1;
 	u16 n_bands;
@@ -42,6 +40,17 @@ struct dp_meter {
 	struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+	struct rcu_head rcu;
+	u32 n_meters;
+	struct dp_meter __rcu *dp_meters[];
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+};
+
 extern struct genl_family dp_meter_genl_family;
 int ovs_meters_init(struct datapath *dp);
 void ovs_meters_exit(struct datapath *dp);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
  2020-04-16 10:16   ` [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
@ 2020-04-16 10:17   ` xiangxia.m.yue
  2020-04-19 17:30     ` Pravin Shelar
  2020-04-16 10:17   ` [PATCH net-next v2 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:17 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Don't allow user to create meter unlimitedly,
which may cause to consume a large amount of kernel memory.
The 200,000 meters may be fine in general case.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 21 +++++++++++++++------
 net/openvswitch/meter.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 494a0014ecd8..1b6776f9c109 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -137,6 +137,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	u32 hash = meter_hash(ti, meter->id);
+	int err;
 
 	/*
 	 * In generally, slot selected should be empty, because
@@ -148,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 	dp_meter_instance_insert(ti, meter);
 
 	/* That function is thread-safe. */
-	if (++tbl->count >= ti->n_meters)
-		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
-			goto expand_err;
+	tbl->count++;
+	if (tbl->count > DP_METER_NUM_MAX) {
+		err = -EFBIG;
+		goto attach_err;
+	}
+
+	if (tbl->count >= ti->n_meters &&
+	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+		err = -ENOMEM;
+		goto attach_err;
+	}
 
 	return 0;
 
-expand_err:
+attach_err:
 	dp_meter_instance_remove(ti, meter);
 	tbl->count--;
-	return -ENOMEM;
+	return err;
 }
 
 static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
@@ -264,7 +273,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, DP_METER_NUM_MAX) ||
 	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index d91940383bbe..cdfc6b9dbd42 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -19,6 +19,7 @@ struct datapath;
 
 #define DP_MAX_BANDS		1
 #define DP_METER_ARRAY_SIZE_MIN	(1ULL << 10)
+#define DP_METER_NUM_MAX	(200000ULL)
 
 struct dp_meter_band {
 	u32 type;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 3/5] net: openvswitch: remove the unnecessary check
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
  2020-04-16 10:16   ` [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
  2020-04-16 10:17   ` [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-04-16 10:17   ` xiangxia.m.yue
  2020-04-16 10:17   ` [PATCH net-next v2 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:17 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Before calling the ovs_meter_cmd_reply_stats, "meter"
is checked, so don't check it agin in that function.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 1b6776f9c109..f552c64ae8df 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -229,12 +229,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 		goto error;
 
-	if (!meter)
-		return 0;
-
 	if (nla_put(reply, OVS_METER_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &meter->stats) ||
-	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 4/5] net: openvswitch: make EINVAL return value more obvious
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
                     ` (2 preceding siblings ...)
  2020-04-16 10:17   ` [PATCH net-next v2 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
@ 2020-04-16 10:17   ` xiangxia.m.yue
  2020-04-16 10:17   ` [PATCH net-next v2 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
  2020-04-18 22:39   ` [PATCH net-next v2 0/5] expand meter tables and fix bug David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:17 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index f552c64ae8df..77fe39cf4f18 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -390,9 +390,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	u32 meter_id;
 	bool failed;
 
-	if (!a[OVS_METER_ATTR_ID]) {
-		return -ENODEV;
-	}
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
 
 	meter = dp_meter_create(a);
 	if (IS_ERR_OR_NULL(meter))
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v2 5/5] net: openvswitch: use u64 for meter bucket
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
                     ` (3 preceding siblings ...)
  2020-04-16 10:17   ` [PATCH net-next v2 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
@ 2020-04-16 10:17   ` xiangxia.m.yue
  2020-04-18 22:39   ` [PATCH net-next v2 0/5] expand meter tables and fix bug David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-16 10:17 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

When setting the meter rate to 4+Gbps, there is an
overflow, the meters don't work as expected.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 2 +-
 net/openvswitch/meter.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 77fe39cf4f18..51cfe8a52b5a 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -364,7 +364,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000;
+		band->bucket = (band->burst_size + band->rate) * 1000ULL;
 		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index cdfc6b9dbd42..b1a50d988e59 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -25,7 +25,7 @@ struct dp_meter_band {
 	u32 type;
 	u32 rate;
 	u32 burst_size;
-	u32 bucket; /* 1/1000 packets, or in bits */
+	u64 bucket; /* 1/1000 packets, or in bits */
 	struct ovs_flow_stats stats;
 };
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 0/5] expand meter tables and fix bug
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
                     ` (4 preceding siblings ...)
  2020-04-16 10:17   ` [PATCH net-next v2 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
@ 2020-04-18 22:39   ` David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: David Miller @ 2020-04-18 22:39 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: pshelar, azhou, blp, u9012063, netdev, dev

From: xiangxia.m.yue@gmail.com
Date: Thu, 16 Apr 2020 18:16:58 +0800

> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> The patch set expands or shrink the meter table when necessary.
> and other patch fixes bug or improve codes.

Pravin et al. please review this series.

Thank you.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number
  2020-04-16 10:16   ` [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
@ 2020-04-19 17:29     ` Pravin Shelar
  2020-04-20  0:23       ` Tonghao Zhang
  0 siblings, 1 reply; 48+ messages in thread
From: Pravin Shelar @ 2020-04-19 17:29 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> In kernel datapath of Open vSwitch, there are only 1024
> buckets of meter in one dp. If installing more than 1024
> (e.g. 8192) meters, it may lead to the performance drop.
> But in some case, for example, Open vSwitch used as edge
> gateway, there should be 200,000+ at least, meters used for
> IP address bandwidth limitation.
>
> [Open vSwitch userspace datapath has this issue too.]
>
> For more scalable meter, this patch expands the buckets
> when necessary, so we can install more meters in the datapath.
> Introducing the struct *dp_meter_instance*, it's easy to
> expand meter though changing the *ti* point in the struct
> *dp_meter_table*.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/datapath.h |   2 +-
>  net/openvswitch/meter.c    | 200 +++++++++++++++++++++++++++++--------
>  net/openvswitch/meter.h    |  15 ++-
>  3 files changed, 169 insertions(+), 48 deletions(-)
>
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index e239a46c2f94..785105578448 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -82,7 +82,7 @@ struct datapath {
>         u32 max_headroom;
>
>         /* Switch meters. */
> -       struct hlist_head *meters;
> +       struct dp_meter_table *meters;
lets define it as part of this struct to avoid indirection.

>  };
>
>  /**
> diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> index 5010d1ddd4bd..494a0014ecd8 100644
> --- a/net/openvswitch/meter.c
> +++ b/net/openvswitch/meter.c
> @@ -19,8 +19,6 @@
>  #include "datapath.h"
>  #include "meter.h"
>
> -#define METER_HASH_BUCKETS 1024
> -
>  static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
>         [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
>         [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
> @@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
>         [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
>  };
>
> +static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
> +{
> +       return id % ti->n_meters;
> +}
> +
>  static void ovs_meter_free(struct dp_meter *meter)
>  {
>         if (!meter)
> @@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
>         kfree_rcu(meter, rcu);
>  }
>
> -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> -                                           u32 meter_id)
> -{
> -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> -}
> -
>  /* Call with ovs_mutex or RCU read lock. */
> -static struct dp_meter *lookup_meter(const struct datapath *dp,
> +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
>                                      u32 meter_id)
>  {
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +       u32 hash = meter_hash(ti, meter_id);
>         struct dp_meter *meter;
> -       struct hlist_head *head;
>
> -       head = meter_hash_bucket(dp, meter_id);
> -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> -                               lockdep_ovsl_is_held()) {
> -               if (meter->id == meter_id)
> -                       return meter;
> -       }
> +       meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
> +       if (meter && likely(meter->id == meter_id))
> +               return meter;
> +
>         return NULL;
>  }
>
> -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
> +{
> +       struct dp_meter_instance *ti;
> +
> +       ti = kvzalloc(sizeof(*ti) +
> +                     sizeof(struct dp_meter *) * size,
> +                     GFP_KERNEL);
> +       if (!ti)
> +               return NULL;
Given this is a kernel space array we need to have hard limit inplace.

> +
> +       ti->n_meters = size;
> +
> +       return ti;
> +}
> +
> +static void dp_meter_instance_free(struct dp_meter_instance *ti)
> +{
> +       kvfree(ti);
> +}
> +
> +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> +{
> +       struct dp_meter_instance *ti;
> +
> +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> +       kvfree(ti);
> +}
> +
> +static int
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
> +{
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +       int n_meters = min(size, ti->n_meters);
> +       struct dp_meter_instance *new_ti;
> +       int i;
> +
> +       new_ti = dp_meter_instance_alloc(size);
> +       if (!new_ti)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < n_meters; i++)
> +               new_ti->dp_meters[i] =
> +                       rcu_dereference_ovsl(ti->dp_meters[i]);
> +
> +       rcu_assign_pointer(tbl->ti, new_ti);
> +       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> +
> +       return 0;
> +}
> +
> +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> +                                    struct dp_meter *meter)
> +{
> +       u32 hash;
> +
> +       hash = meter_hash(ti, meter->id);
> +       rcu_assign_pointer(ti->dp_meters[hash], meter);
> +}
> +
> +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> +                                    struct dp_meter *meter)
>  {
> -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> +       u32 hash;
>
> -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> +       hash = meter_hash(ti, meter->id);
> +       RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
>  }
>
> -static void detach_meter(struct dp_meter *meter)
> +static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
>  {
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +       u32 hash = meter_hash(ti, meter->id);
> +
> +       /*
> +        * In generally, slot selected should be empty, because
> +        * OvS uses id-pool to fetch a available id.
> +        */
> +       if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
> +               return -EINVAL;
we could return -EBUSY instead.
> +
> +       dp_meter_instance_insert(ti, meter);
> +
> +       /* That function is thread-safe. */
> +       if (++tbl->count >= ti->n_meters)
> +               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> +                       goto expand_err;
> +
> +       return 0;
> +
> +expand_err:
> +       dp_meter_instance_remove(ti, meter);
> +       tbl->count--;
> +       return -ENOMEM;
> +}
> +
> +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> +{
> +       struct dp_meter_instance *ti;
> +
>         ASSERT_OVSL();
> -       if (meter)
> -               hlist_del_rcu(&meter->dp_hash_node);
> +       if (!meter)
> +               return;
> +
> +       ti = rcu_dereference_ovsl(tbl->ti);
> +       dp_meter_instance_remove(ti, meter);
> +
> +       tbl->count--;
> +
> +       /* Shrink the meter array if necessary. */
> +       if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> +           tbl->count <= (ti->n_meters / 4)) {
> +               int half_size = ti->n_meters / 2;
> +               int i;
> +
Lets add a comment about this.
> +               for (i = half_size; i < ti->n_meters; i++)
> +                       if (rcu_dereference_ovsl(ti->dp_meters[i]))
> +                               return;
> +
> +               dp_meter_instance_realloc(tbl, half_size);
> +       }
>  }
>
>  static struct sk_buff *
> @@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
>         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
>
>         /* Cannot fail after this. */
> -       old_meter = lookup_meter(dp, meter_id);
> -       detach_meter(old_meter);
> -       attach_meter(dp, meter);
> +       old_meter = lookup_meter(dp->meters, meter_id);
in new scheme this can fail due to hash collision, lets check for NULL.

> +       detach_meter(dp->meters, old_meter);
> +
> +       err = attach_meter(dp->meters, meter);
> +       if (err)
> +               goto exit_unlock;
> +
>         ovs_unlock();
>
>         /* Build response with the meter_id and stats from
> @@ -365,7 +473,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
>         }
>
>         /* Locate meter, copy stats. */
> -       meter = lookup_meter(dp, meter_id);
> +       meter = lookup_meter(dp->meters, meter_id);
>         if (!meter) {
>                 err = -ENOENT;
>                 goto exit_unlock;
> @@ -416,13 +524,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
>                 goto exit_unlock;
>         }
>
> -       old_meter = lookup_meter(dp, meter_id);
> +       old_meter = lookup_meter(dp->meters, meter_id);
>         if (old_meter) {
>                 spin_lock_bh(&old_meter->lock);
>                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
>                 WARN_ON(err);
>                 spin_unlock_bh(&old_meter->lock);
> -               detach_meter(old_meter);
> +               detach_meter(dp->meters, old_meter);
>         }
>         ovs_unlock();
>         ovs_meter_free(old_meter);
> @@ -452,7 +560,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
>         int i, band_exceeded_max = -1;
>         u32 band_exceeded_rate = 0;
>
> -       meter = lookup_meter(dp, meter_id);
> +       meter = lookup_meter(dp->meters, meter_id);
>         /* Do not drop the packet when there is no meter. */
>         if (!meter)
>                 return false;
> @@ -570,32 +678,36 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
>
>  int ovs_meters_init(struct datapath *dp)
>  {
> -       int i;
> +       struct dp_meter_instance *ti;
> +       struct dp_meter_table *tbl;
> +
> +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> +       if (!tbl)
> +               return -ENOMEM;
>
> -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> -                                  sizeof(struct hlist_head), GFP_KERNEL);
> +       tbl->count = 0;
>
> -       if (!dp->meters)
> +       ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> +       if (!ti) {
> +               kfree(tbl);
>                 return -ENOMEM;
> +       }
>
> -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> -               INIT_HLIST_HEAD(&dp->meters[i]);
> +       rcu_assign_pointer(tbl->ti, ti);
> +       dp->meters = tbl;
>
>         return 0;
>  }
>
>  void ovs_meters_exit(struct datapath *dp)
>  {
> +       struct dp_meter_table *tbl = dp->meters;
> +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
>         int i;
>
> -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> -               struct hlist_head *head = &dp->meters[i];
> -               struct dp_meter *meter;
> -               struct hlist_node *n;
> -
> -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> -                       kfree(meter);
> -       }
> +       for (i = 0; i < ti->n_meters; i++)
> +               ovs_meter_free(ti->dp_meters[i]);
>
> -       kfree(dp->meters);
> +       dp_meter_instance_free(ti);
> +       kfree(tbl);
>  }
> diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> index f645913870bd..d91940383bbe 100644
> --- a/net/openvswitch/meter.h
> +++ b/net/openvswitch/meter.h
> @@ -18,6 +18,7 @@
>  struct datapath;
>
>  #define DP_MAX_BANDS           1
> +#define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
>
>  struct dp_meter_band {
>         u32 type;
> @@ -30,9 +31,6 @@ struct dp_meter_band {
>  struct dp_meter {
>         spinlock_t lock;    /* Per meter lock */
>         struct rcu_head rcu;
> -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> -                                        * hash table.
> -                                        */
>         u32 id;
>         u16 kbps:1, keep_stats:1;
>         u16 n_bands;
> @@ -42,6 +40,17 @@ struct dp_meter {
>         struct dp_meter_band bands[];
>  };
>
> +struct dp_meter_instance {
> +       struct rcu_head rcu;
> +       u32 n_meters;
> +       struct dp_meter __rcu *dp_meters[];
> +};
> +
> +struct dp_meter_table {
> +       struct dp_meter_instance __rcu *ti;
> +       u32 count;
> +};
> +
>  extern struct genl_family dp_meter_genl_family;
>  int ovs_meters_init(struct datapath *dp);
>  void ovs_meters_exit(struct datapath *dp);
> --
> 2.23.0
>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters
  2020-04-16 10:17   ` [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-04-19 17:30     ` Pravin Shelar
  2020-04-20  0:28       ` Tonghao Zhang
  0 siblings, 1 reply; 48+ messages in thread
From: Pravin Shelar @ 2020-04-19 17:30 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Don't allow user to create meter unlimitedly,
> which may cause to consume a large amount of kernel memory.
> The 200,000 meters may be fine in general case.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/meter.c | 21 +++++++++++++++------
>  net/openvswitch/meter.h |  1 +
>  2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> index 494a0014ecd8..1b6776f9c109 100644
> --- a/net/openvswitch/meter.c
> +++ b/net/openvswitch/meter.c
> @@ -137,6 +137,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
>  {
>         struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
>         u32 hash = meter_hash(ti, meter->id);
> +       int err;
>
>         /*
>          * In generally, slot selected should be empty, because
> @@ -148,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
>         dp_meter_instance_insert(ti, meter);
>
>         /* That function is thread-safe. */
> -       if (++tbl->count >= ti->n_meters)
> -               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> -                       goto expand_err;
> +       tbl->count++;
> +       if (tbl->count > DP_METER_NUM_MAX) {
> +               err = -EFBIG;
> +               goto attach_err;
> +       }
> +
> +       if (tbl->count >= ti->n_meters &&
> +           dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
> +               err = -ENOMEM;
> +               goto attach_err;
> +       }
>
>         return 0;
>
> -expand_err:
> +attach_err:
>         dp_meter_instance_remove(ti, meter);
>         tbl->count--;
> -       return -ENOMEM;
> +       return err;
>  }
>
>  static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> @@ -264,7 +273,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
>         if (IS_ERR(reply))
>                 return PTR_ERR(reply);
>
> -       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
> +       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, DP_METER_NUM_MAX) ||
>             nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
>                 goto nla_put_failure;
>
> diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> index d91940383bbe..cdfc6b9dbd42 100644
> --- a/net/openvswitch/meter.h
> +++ b/net/openvswitch/meter.h
> @@ -19,6 +19,7 @@ struct datapath;
>
>  #define DP_MAX_BANDS           1
>  #define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
> +#define DP_METER_NUM_MAX       (200000ULL)
>
Lets make it configurable and default could 200k to allow
customization on different memory configurations.


>  struct dp_meter_band {
>         u32 type;
> --
> 2.23.0
>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number
  2020-04-19 17:29     ` Pravin Shelar
@ 2020-04-20  0:23       ` Tonghao Zhang
  2020-04-20 21:43         ` Pravin Shelar
  0 siblings, 1 reply; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-20  0:23 UTC (permalink / raw)
  To: Pravin Shelar
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Mon, Apr 20, 2020 at 1:29 AM Pravin Shelar <pravin.ovn@gmail.com> wrote:
>
> On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > In kernel datapath of Open vSwitch, there are only 1024
> > buckets of meter in one dp. If installing more than 1024
> > (e.g. 8192) meters, it may lead to the performance drop.
> > But in some case, for example, Open vSwitch used as edge
> > gateway, there should be 200,000+ at least, meters used for
> > IP address bandwidth limitation.
> >
> > [Open vSwitch userspace datapath has this issue too.]
> >
> > For more scalable meter, this patch expands the buckets
> > when necessary, so we can install more meters in the datapath.
> > Introducing the struct *dp_meter_instance*, it's easy to
> > expand meter though changing the *ti* point in the struct
> > *dp_meter_table*.
> >
> > Cc: Pravin B Shelar <pshelar@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> >  net/openvswitch/datapath.h |   2 +-
> >  net/openvswitch/meter.c    | 200 +++++++++++++++++++++++++++++--------
> >  net/openvswitch/meter.h    |  15 ++-
> >  3 files changed, 169 insertions(+), 48 deletions(-)
> >
> > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > index e239a46c2f94..785105578448 100644
> > --- a/net/openvswitch/datapath.h
> > +++ b/net/openvswitch/datapath.h
> > @@ -82,7 +82,7 @@ struct datapath {
> >         u32 max_headroom;
> >
> >         /* Switch meters. */
> > -       struct hlist_head *meters;
> > +       struct dp_meter_table *meters;
> lets define it as part of this struct to avoid indirection.
>
> >  };
> >
> >  /**
> > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > index 5010d1ddd4bd..494a0014ecd8 100644
> > --- a/net/openvswitch/meter.c
> > +++ b/net/openvswitch/meter.c
> > @@ -19,8 +19,6 @@
> >  #include "datapath.h"
> >  #include "meter.h"
> >
> > -#define METER_HASH_BUCKETS 1024
> > -
> >  static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
> >         [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
> >         [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
> > @@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
> >         [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
> >  };
> >
> > +static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
> > +{
> > +       return id % ti->n_meters;
> > +}
> > +
> >  static void ovs_meter_free(struct dp_meter *meter)
> >  {
> >         if (!meter)
> > @@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
> >         kfree_rcu(meter, rcu);
> >  }
> >
> > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > -                                           u32 meter_id)
> > -{
> > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > -}
> > -
> >  /* Call with ovs_mutex or RCU read lock. */
> > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> >                                      u32 meter_id)
> >  {
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       u32 hash = meter_hash(ti, meter_id);
> >         struct dp_meter *meter;
> > -       struct hlist_head *head;
> >
> > -       head = meter_hash_bucket(dp, meter_id);
> > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > -                               lockdep_ovsl_is_held()) {
> > -               if (meter->id == meter_id)
> > -                       return meter;
> > -       }
> > +       meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
> > +       if (meter && likely(meter->id == meter_id))
> > +               return meter;
> > +
> >         return NULL;
> >  }
> >
> > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> > +       ti = kvzalloc(sizeof(*ti) +
> > +                     sizeof(struct dp_meter *) * size,
> > +                     GFP_KERNEL);
> > +       if (!ti)
> > +               return NULL;
> Given this is a kernel space array we need to have hard limit inplace.
In patch 2, I limited the meter number, should we add hard limit here ?
> > +
> > +       ti->n_meters = size;
> > +
> > +       return ti;
> > +}
> > +
> > +static void dp_meter_instance_free(struct dp_meter_instance *ti)
> > +{
> > +       kvfree(ti);
> > +}
> > +
> > +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> > +       ti = container_of(rcu, struct dp_meter_instance, rcu);
> > +       kvfree(ti);
> > +}
> > +
> > +static int
> > +dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
> > +{
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       int n_meters = min(size, ti->n_meters);
> > +       struct dp_meter_instance *new_ti;
> > +       int i;
> > +
> > +       new_ti = dp_meter_instance_alloc(size);
> > +       if (!new_ti)
> > +               return -ENOMEM;
> > +
> > +       for (i = 0; i < n_meters; i++)
> > +               new_ti->dp_meters[i] =
> > +                       rcu_dereference_ovsl(ti->dp_meters[i]);
> > +
> > +       rcu_assign_pointer(tbl->ti, new_ti);
> > +       call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> > +
> > +       return 0;
> > +}
> > +
> > +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> > +{
> > +       u32 hash;
> > +
> > +       hash = meter_hash(ti, meter->id);
> > +       rcu_assign_pointer(ti->dp_meters[hash], meter);
> > +}
> > +
> > +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> > +                                    struct dp_meter *meter)
> >  {
> > -       struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> > +       u32 hash;
> >
> > -       hlist_add_head_rcu(&meter->dp_hash_node, head);
> > +       hash = meter_hash(ti, meter->id);
> > +       RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
> >  }
> >
> > -static void detach_meter(struct dp_meter *meter)
> > +static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> >  {
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > +       u32 hash = meter_hash(ti, meter->id);
> > +
> > +       /*
> > +        * In generally, slot selected should be empty, because
> > +        * OvS uses id-pool to fetch a available id.
> > +        */
> > +       if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
> > +               return -EINVAL;
> we could return -EBUSY instead.
> > +
> > +       dp_meter_instance_insert(ti, meter);
> > +
> > +       /* That function is thread-safe. */
> > +       if (++tbl->count >= ti->n_meters)
> > +               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> > +                       goto expand_err;
> > +
> > +       return 0;
> > +
> > +expand_err:
> > +       dp_meter_instance_remove(ti, meter);
> > +       tbl->count--;
> > +       return -ENOMEM;
> > +}
> > +
> > +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > +{
> > +       struct dp_meter_instance *ti;
> > +
> >         ASSERT_OVSL();
> > -       if (meter)
> > -               hlist_del_rcu(&meter->dp_hash_node);
> > +       if (!meter)
> > +               return;
> > +
> > +       ti = rcu_dereference_ovsl(tbl->ti);
> > +       dp_meter_instance_remove(ti, meter);
> > +
> > +       tbl->count--;
> > +
> > +       /* Shrink the meter array if necessary. */
> > +       if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> > +           tbl->count <= (ti->n_meters / 4)) {
> > +               int half_size = ti->n_meters / 2;
> > +               int i;
> > +
> Lets add a comment about this.
> > +               for (i = half_size; i < ti->n_meters; i++)
> > +                       if (rcu_dereference_ovsl(ti->dp_meters[i]))
> > +                               return;
> > +
> > +               dp_meter_instance_realloc(tbl, half_size);
> > +       }
> >  }
> >
> >  static struct sk_buff *
> > @@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> >
> >         /* Cannot fail after this. */
> > -       old_meter = lookup_meter(dp, meter_id);
> > -       detach_meter(old_meter);
> > -       attach_meter(dp, meter);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> in new scheme this can fail due to hash collision, lets check for NULL.
If old_meter is NULL, detach_meter will do nothing.
> > +       detach_meter(dp->meters, old_meter);
> > +
> > +       err = attach_meter(dp->meters, meter);
> > +       if (err)
> > +               goto exit_unlock;
> > +
> >         ovs_unlock();
> >
> >         /* Build response with the meter_id and stats from
> > @@ -365,7 +473,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> >         }
> >
> >         /* Locate meter, copy stats. */
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         if (!meter) {
> >                 err = -ENOENT;
> >                 goto exit_unlock;
> > @@ -416,13 +524,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> >                 goto exit_unlock;
> >         }
> >
> > -       old_meter = lookup_meter(dp, meter_id);
> > +       old_meter = lookup_meter(dp->meters, meter_id);
> >         if (old_meter) {
> >                 spin_lock_bh(&old_meter->lock);
> >                 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> >                 WARN_ON(err);
> >                 spin_unlock_bh(&old_meter->lock);
> > -               detach_meter(old_meter);
> > +               detach_meter(dp->meters, old_meter);
> >         }
> >         ovs_unlock();
> >         ovs_meter_free(old_meter);
> > @@ -452,7 +560,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> >         int i, band_exceeded_max = -1;
> >         u32 band_exceeded_rate = 0;
> >
> > -       meter = lookup_meter(dp, meter_id);
> > +       meter = lookup_meter(dp->meters, meter_id);
> >         /* Do not drop the packet when there is no meter. */
> >         if (!meter)
> >                 return false;
> > @@ -570,32 +678,36 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
> >
> >  int ovs_meters_init(struct datapath *dp)
> >  {
> > -       int i;
> > +       struct dp_meter_instance *ti;
> > +       struct dp_meter_table *tbl;
> > +
> > +       tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> > +       if (!tbl)
> > +               return -ENOMEM;
> >
> > -       dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> > -                                  sizeof(struct hlist_head), GFP_KERNEL);
> > +       tbl->count = 0;
> >
> > -       if (!dp->meters)
> > +       ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> > +       if (!ti) {
> > +               kfree(tbl);
> >                 return -ENOMEM;
> > +       }
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++)
> > -               INIT_HLIST_HEAD(&dp->meters[i]);
> > +       rcu_assign_pointer(tbl->ti, ti);
> > +       dp->meters = tbl;
> >
> >         return 0;
> >  }
> >
> >  void ovs_meters_exit(struct datapath *dp)
> >  {
> > +       struct dp_meter_table *tbl = dp->meters;
> > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> >         int i;
> >
> > -       for (i = 0; i < METER_HASH_BUCKETS; i++) {
> > -               struct hlist_head *head = &dp->meters[i];
> > -               struct dp_meter *meter;
> > -               struct hlist_node *n;
> > -
> > -               hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> > -                       kfree(meter);
> > -       }
> > +       for (i = 0; i < ti->n_meters; i++)
> > +               ovs_meter_free(ti->dp_meters[i]);
> >
> > -       kfree(dp->meters);
> > +       dp_meter_instance_free(ti);
> > +       kfree(tbl);
> >  }
> > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > index f645913870bd..d91940383bbe 100644
> > --- a/net/openvswitch/meter.h
> > +++ b/net/openvswitch/meter.h
> > @@ -18,6 +18,7 @@
> >  struct datapath;
> >
> >  #define DP_MAX_BANDS           1
> > +#define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
> >
> >  struct dp_meter_band {
> >         u32 type;
> > @@ -30,9 +31,6 @@ struct dp_meter_band {
> >  struct dp_meter {
> >         spinlock_t lock;    /* Per meter lock */
> >         struct rcu_head rcu;
> > -       struct hlist_node dp_hash_node; /*Element in datapath->meters
> > -                                        * hash table.
> > -                                        */
> >         u32 id;
> >         u16 kbps:1, keep_stats:1;
> >         u16 n_bands;
> > @@ -42,6 +40,17 @@ struct dp_meter {
> >         struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_meter_instance {
> > +       struct rcu_head rcu;
> > +       u32 n_meters;
> > +       struct dp_meter __rcu *dp_meters[];
> > +};
> > +
> > +struct dp_meter_table {
> > +       struct dp_meter_instance __rcu *ti;
> > +       u32 count;
> > +};
> > +
> >  extern struct genl_family dp_meter_genl_family;
> >  int ovs_meters_init(struct datapath *dp);
> >  void ovs_meters_exit(struct datapath *dp);
> > --
> > 2.23.0
> >



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters
  2020-04-19 17:30     ` Pravin Shelar
@ 2020-04-20  0:28       ` Tonghao Zhang
  2020-04-20 21:44         ` Pravin Shelar
  0 siblings, 1 reply; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-20  0:28 UTC (permalink / raw)
  To: Pravin Shelar
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Mon, Apr 20, 2020 at 1:31 AM Pravin Shelar <pravin.ovn@gmail.com> wrote:
>
> On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > Don't allow user to create meter unlimitedly,
> > which may cause to consume a large amount of kernel memory.
> > The 200,000 meters may be fine in general case.
> >
> > Cc: Pravin B Shelar <pshelar@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> >  net/openvswitch/meter.c | 21 +++++++++++++++------
> >  net/openvswitch/meter.h |  1 +
> >  2 files changed, 16 insertions(+), 6 deletions(-)
> >
> > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > index 494a0014ecd8..1b6776f9c109 100644
> > --- a/net/openvswitch/meter.c
> > +++ b/net/openvswitch/meter.c
> > @@ -137,6 +137,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> >  {
> >         struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> >         u32 hash = meter_hash(ti, meter->id);
> > +       int err;
> >
> >         /*
> >          * In generally, slot selected should be empty, because
> > @@ -148,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> >         dp_meter_instance_insert(ti, meter);
> >
> >         /* That function is thread-safe. */
> > -       if (++tbl->count >= ti->n_meters)
> > -               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> > -                       goto expand_err;
> > +       tbl->count++;
> > +       if (tbl->count > DP_METER_NUM_MAX) {
> > +               err = -EFBIG;
> > +               goto attach_err;
> > +       }
> > +
> > +       if (tbl->count >= ti->n_meters &&
> > +           dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
> > +               err = -ENOMEM;
> > +               goto attach_err;
> > +       }
> >
> >         return 0;
> >
> > -expand_err:
> > +attach_err:
> >         dp_meter_instance_remove(ti, meter);
> >         tbl->count--;
> > -       return -ENOMEM;
> > +       return err;
> >  }
> >
> >  static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > @@ -264,7 +273,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
> >         if (IS_ERR(reply))
> >                 return PTR_ERR(reply);
> >
> > -       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
> > +       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, DP_METER_NUM_MAX) ||
> >             nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
> >                 goto nla_put_failure;
> >
> > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > index d91940383bbe..cdfc6b9dbd42 100644
> > --- a/net/openvswitch/meter.h
> > +++ b/net/openvswitch/meter.h
> > @@ -19,6 +19,7 @@ struct datapath;
> >
> >  #define DP_MAX_BANDS           1
> >  #define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
> > +#define DP_METER_NUM_MAX       (200000ULL)
> >
> Lets make it configurable and default could 200k to allow
> customization on different memory configurations.
Great, set different limit depend on current system memory size like tcp ?
>
> >  struct dp_meter_band {
> >         u32 type;
> > --
> > 2.23.0
> >



-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number
  2020-04-20  0:23       ` Tonghao Zhang
@ 2020-04-20 21:43         ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-20 21:43 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Sun, Apr 19, 2020 at 5:23 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Mon, Apr 20, 2020 at 1:29 AM Pravin Shelar <pravin.ovn@gmail.com> wrote:
> >
> > On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >
> > > In kernel datapath of Open vSwitch, there are only 1024
> > > buckets of meter in one dp. If installing more than 1024
> > > (e.g. 8192) meters, it may lead to the performance drop.
> > > But in some case, for example, Open vSwitch used as edge
> > > gateway, there should be 200,000+ at least, meters used for
> > > IP address bandwidth limitation.
> > >
> > > [Open vSwitch userspace datapath has this issue too.]
> > >
> > > For more scalable meter, this patch expands the buckets
> > > when necessary, so we can install more meters in the datapath.
> > > Introducing the struct *dp_meter_instance*, it's easy to
> > > expand meter though changing the *ti* point in the struct
> > > *dp_meter_table*.
> > >
> > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > Cc: Andy Zhou <azhou@ovn.org>
> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > ---
> > >  net/openvswitch/datapath.h |   2 +-
> > >  net/openvswitch/meter.c    | 200 +++++++++++++++++++++++++++++--------
> > >  net/openvswitch/meter.h    |  15 ++-
> > >  3 files changed, 169 insertions(+), 48 deletions(-)
> > >
> > > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > > index e239a46c2f94..785105578448 100644
> > > --- a/net/openvswitch/datapath.h
> > > +++ b/net/openvswitch/datapath.h
> > > @@ -82,7 +82,7 @@ struct datapath {
> > >         u32 max_headroom;
> > >
> > >         /* Switch meters. */
> > > -       struct hlist_head *meters;
> > > +       struct dp_meter_table *meters;
> > lets define it as part of this struct to avoid indirection.
> >
> > >  };
> > >
> > >  /**
> > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > index 5010d1ddd4bd..494a0014ecd8 100644
> > > --- a/net/openvswitch/meter.c
> > > +++ b/net/openvswitch/meter.c
> > > @@ -19,8 +19,6 @@
> > >  #include "datapath.h"
> > >  #include "meter.h"
> > >
> > > -#define METER_HASH_BUCKETS 1024
> > > -
> > >  static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
> > >         [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
> > >         [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
> > > @@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
> > >         [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
> > >  };
> > >
> > > +static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
> > > +{
> > > +       return id % ti->n_meters;
> > > +}
> > > +
> > >  static void ovs_meter_free(struct dp_meter *meter)
> > >  {
> > >         if (!meter)
> > > @@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
> > >         kfree_rcu(meter, rcu);
> > >  }
> > >
> > > -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> > > -                                           u32 meter_id)
> > > -{
> > > -       return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> > > -}
> > > -
> > >  /* Call with ovs_mutex or RCU read lock. */
> > > -static struct dp_meter *lookup_meter(const struct datapath *dp,
> > > +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> > >                                      u32 meter_id)
> > >  {
> > > +       struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > > +       u32 hash = meter_hash(ti, meter_id);
> > >         struct dp_meter *meter;
> > > -       struct hlist_head *head;
> > >
> > > -       head = meter_hash_bucket(dp, meter_id);
> > > -       hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> > > -                               lockdep_ovsl_is_held()) {
> > > -               if (meter->id == meter_id)
> > > -                       return meter;
> > > -       }
> > > +       meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
> > > +       if (meter && likely(meter->id == meter_id))
> > > +               return meter;
> > > +
> > >         return NULL;
> > >  }
> > >
> > > -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> > > +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
> > > +{
> > > +       struct dp_meter_instance *ti;
> > > +
> > > +       ti = kvzalloc(sizeof(*ti) +
> > > +                     sizeof(struct dp_meter *) * size,
> > > +                     GFP_KERNEL);
> > > +       if (!ti)
> > > +               return NULL;
> > Given this is a kernel space array we need to have hard limit inplace.
> In patch 2, I limited the meter number, should we add hard limit here ?
I guess its not needed here.
...

> > >  static struct sk_buff *
> > > @@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> > >         meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
> > >
> > >         /* Cannot fail after this. */
> > > -       old_meter = lookup_meter(dp, meter_id);
> > > -       detach_meter(old_meter);
> > > -       attach_meter(dp, meter);
> > > +       old_meter = lookup_meter(dp->meters, meter_id);
> > in new scheme this can fail due to hash collision, lets check for NULL.
> If old_meter is NULL, detach_meter will do nothing.

Lets return error.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters
  2020-04-20  0:28       ` Tonghao Zhang
@ 2020-04-20 21:44         ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-20 21:44 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Sun, Apr 19, 2020 at 5:28 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Mon, Apr 20, 2020 at 1:31 AM Pravin Shelar <pravin.ovn@gmail.com> wrote:
> >
> > On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >
> > > Don't allow user to create meter unlimitedly,
> > > which may cause to consume a large amount of kernel memory.
> > > The 200,000 meters may be fine in general case.
> > >
> > > Cc: Pravin B Shelar <pshelar@ovn.org>
> > > Cc: Andy Zhou <azhou@ovn.org>
> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > ---
> > >  net/openvswitch/meter.c | 21 +++++++++++++++------
> > >  net/openvswitch/meter.h |  1 +
> > >  2 files changed, 16 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> > > index 494a0014ecd8..1b6776f9c109 100644
> > > --- a/net/openvswitch/meter.c
> > > +++ b/net/openvswitch/meter.c
> > > @@ -137,6 +137,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > >  {
> > >         struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> > >         u32 hash = meter_hash(ti, meter->id);
> > > +       int err;
> > >
> > >         /*
> > >          * In generally, slot selected should be empty, because
> > > @@ -148,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > >         dp_meter_instance_insert(ti, meter);
> > >
> > >         /* That function is thread-safe. */
> > > -       if (++tbl->count >= ti->n_meters)
> > > -               if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> > > -                       goto expand_err;
> > > +       tbl->count++;
> > > +       if (tbl->count > DP_METER_NUM_MAX) {
> > > +               err = -EFBIG;
> > > +               goto attach_err;
> > > +       }
> > > +
> > > +       if (tbl->count >= ti->n_meters &&
> > > +           dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
> > > +               err = -ENOMEM;
> > > +               goto attach_err;
> > > +       }
> > >
> > >         return 0;
> > >
> > > -expand_err:
> > > +attach_err:
> > >         dp_meter_instance_remove(ti, meter);
> > >         tbl->count--;
> > > -       return -ENOMEM;
> > > +       return err;
> > >  }
> > >
> > >  static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> > > @@ -264,7 +273,7 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
> > >         if (IS_ERR(reply))
> > >                 return PTR_ERR(reply);
> > >
> > > -       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
> > > +       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, DP_METER_NUM_MAX) ||
> > >             nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
> > >                 goto nla_put_failure;
> > >
> > > diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> > > index d91940383bbe..cdfc6b9dbd42 100644
> > > --- a/net/openvswitch/meter.h
> > > +++ b/net/openvswitch/meter.h
> > > @@ -19,6 +19,7 @@ struct datapath;
> > >
> > >  #define DP_MAX_BANDS           1
> > >  #define DP_METER_ARRAY_SIZE_MIN        (1ULL << 10)
> > > +#define DP_METER_NUM_MAX       (200000ULL)
> > >
> > Lets make it configurable and default could 200k to allow
> > customization on different memory configurations.
> Great, set different limit depend on current system memory size like tcp ?

Yes. that could be useful.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 0/5] expand meter tables and fix bug
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
                   ` (3 preceding siblings ...)
  2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
@ 2020-04-22 17:08 ` xiangxia.m.yue
  2020-04-22 17:08   ` [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
                     ` (5 more replies)
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
  5 siblings, 6 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

The patch set expand or shrink the meter table when necessary.
and other patches fix bug or improve codes.

Tonghao Zhang (5):
  net: openvswitch: expand the meters supported number
  net: openvswitch: set max limitation to meters
  net: openvswitch: remove the unnecessary check
  net: openvswitch: make EINVAL return value more obvious
  net: openvswitch: use u64 for meter bucket

 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 303 ++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h    |  20 ++-
 3 files changed, 247 insertions(+), 78 deletions(-)

-- 
v3:
* attach_meter return -EBUSY, not -EINVAL
* change the return type of detach_meter
* add comments
* the meter max number limited by memory and DP_METER_NUM_MAX
* fix checkpatch warnning
v2:
* change the hash table to meter array
* add shrink meter codes
* add patch 4 and 5
--
2.23.0


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
@ 2020-04-22 17:08   ` xiangxia.m.yue
  2020-04-23  3:54     ` Pravin Shelar
  2020-04-22 17:08   ` [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In kernel datapath of Open vSwitch, there are only 1024
buckets of meter in one datapath. If installing more than
1024 (e.g. 8192) meters, it may lead to the performance drop.
But in some case, for example, Open vSwitch used as edge
gateway, there should be 20K at least, where meters used for
IP address bandwidth limitation.

[Open vSwitch userspace datapath has this issue too.]

For more scalable meter, this patch use meter array instead of
hash tables, and expand/shrink the array when necessary. So we
can install more meters than before in the datapath.
Introducing the struct *dp_meter_instance, it's easy to
expand meter though changing the *ti point in the struct
*dp_meter_table.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 240 ++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h    |  16 ++-
 3 files changed, 195 insertions(+), 63 deletions(-)

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index e239a46c2f94..2016dd107939 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -82,7 +82,7 @@ struct datapath {
 	u32 max_headroom;
 
 	/* Switch meters. */
-	struct hlist_head *meters;
+	struct dp_meter_table meter_tbl;
 };
 
 /**
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5010d1ddd4bd..f806ded1dd0a 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -19,8 +19,6 @@
 #include "datapath.h"
 #include "meter.h"
 
-#define METER_HASH_BUCKETS 1024
-
 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
 	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
 	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
@@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
 	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 };
 
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+	return id % ti->n_meters;
+}
+
 static void ovs_meter_free(struct dp_meter *meter)
 {
 	if (!meter)
@@ -47,40 +50,153 @@ static void ovs_meter_free(struct dp_meter *meter)
 	kfree_rcu(meter, rcu);
 }
 
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
-					    u32 meter_id)
-{
-	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
 /* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
 				     u32 meter_id)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter_id);
 	struct dp_meter *meter;
-	struct hlist_head *head;
 
-	head = meter_hash_bucket(dp, meter_id);
-	hlist_for_each_entry_rcu(meter, head, dp_hash_node,
-				lockdep_ovsl_is_held()) {
-		if (meter->id == meter_id)
-			return meter;
-	}
+	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+	if (meter && likely(meter->id == meter_id))
+		return meter;
+
 	return NULL;
 }
 
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+	struct dp_meter_instance *ti;
+
+	ti = kvzalloc(sizeof(*ti) +
+		      sizeof(struct dp_meter *) * size,
+		      GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->n_meters = size;
+
+	return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+	kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+	struct dp_meter_instance *ti;
+
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	int n_meters = min(size, ti->n_meters);
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(size);
+	if (!new_ti)
+		return -ENOMEM;
+
+	for (i = 0; i < n_meters; i++)
+		new_ti->dp_meters[i] =
+			rcu_dereference_ovsl(ti->dp_meters[i]);
+
+	rcu_assign_pointer(tbl->ti, new_ti);
+	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+	return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
 {
-	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+	u32 hash;
 
-	hlist_add_head_rcu(&meter->dp_hash_node, head);
+	hash = meter_hash(ti, meter->id);
+	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
 }
 
-static void detach_meter(struct dp_meter *meter)
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter->id);
+
+	/* In generally, slots selected should be empty, because
+	 * OvS uses id-pool to fetch a available id.
+	 */
+	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+		return -EBUSY;
+
+	dp_meter_instance_insert(ti, meter);
+
+	/* That function is thread-safe. */
+	if (++tbl->count >= ti->n_meters)
+		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
+			goto expand_err;
+
+	return 0;
+
+expand_err:
+	dp_meter_instance_remove(ti, meter);
+	tbl->count--;
+	return -ENOMEM;
+}
+
+static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti;
+
 	ASSERT_OVSL();
-	if (meter)
-		hlist_del_rcu(&meter->dp_hash_node);
+	if (!meter)
+		return 0;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_remove(ti, meter);
+
+	tbl->count--;
+
+	/* Shrink the meter array if necessary. */
+	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+	    tbl->count <= (ti->n_meters / 4)) {
+		int half_size = ti->n_meters / 2;
+		int i;
+
+		/* Avoid hash collision, don't move slots to other place.
+		 * Make sure there are no references of meters in array
+		 * which will be released.
+		 */
+		for (i = half_size; i < ti->n_meters; i++)
+			if (rcu_dereference_ovsl(ti->dp_meters[i]))
+				goto out;
+
+		if (dp_meter_instance_realloc(tbl, half_size))
+			goto shrink_err;
+	}
+
+out:
+	return 0;
+
+shrink_err:
+	dp_meter_instance_insert(ti, meter);
+	tbl->count++;
+	return -ENOMEM;
 }
 
 static struct sk_buff *
@@ -273,6 +389,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *reply;
 	struct ovs_header *ovs_reply_header;
 	struct ovs_header *ovs_header = info->userhdr;
+	struct dp_meter_table *meter_tbl;
 	struct datapath *dp;
 	int err;
 	u32 meter_id;
@@ -300,12 +417,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
+	meter_tbl = &dp->meter_tbl;
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
-	/* Cannot fail after this. */
-	old_meter = lookup_meter(dp, meter_id);
-	detach_meter(old_meter);
-	attach_meter(dp, meter);
+	old_meter = lookup_meter(meter_tbl, meter_id);
+	err = detach_meter(meter_tbl, old_meter);
+	if (err)
+		goto exit_unlock;
+
+	err = attach_meter(meter_tbl, meter);
+	if (err)
+		goto exit_unlock;
+
 	ovs_unlock();
 
 	/* Build response with the meter_id and stats from
@@ -337,14 +460,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 
 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
@@ -365,7 +488,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Locate meter, copy stats. */
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (!meter) {
 		err = -ENOENT;
 		goto exit_unlock;
@@ -390,18 +513,17 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 
 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *old_meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *old_meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
-	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
 					  &ovs_reply_header);
@@ -416,14 +538,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	old_meter = lookup_meter(dp, meter_id);
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	old_meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (old_meter) {
 		spin_lock_bh(&old_meter->lock);
 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 		WARN_ON(err);
 		spin_unlock_bh(&old_meter->lock);
-		detach_meter(old_meter);
+
+		err = detach_meter(&dp->meter_tbl, old_meter);
+		if (err)
+			goto exit_unlock;
 	}
+
 	ovs_unlock();
 	ovs_meter_free(old_meter);
 	genlmsg_end(reply, ovs_reply_header);
@@ -443,16 +570,16 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 		       struct sw_flow_key *key, u32 meter_id)
 {
-	struct dp_meter *meter;
-	struct dp_meter_band *band;
 	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
 	long long int long_delta_ms;
-	u32 delta_ms;
-	u32 cost;
+	struct dp_meter_band *band;
+	struct dp_meter *meter;
 	int i, band_exceeded_max = -1;
 	u32 band_exceeded_rate = 0;
+	u32 delta_ms;
+	u32 cost;
 
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	/* Do not drop the packet when there is no meter. */
 	if (!meter)
 		return false;
@@ -570,32 +697,27 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
 
 int ovs_meters_init(struct datapath *dp)
 {
-	int i;
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti;
 
-	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
-				   sizeof(struct hlist_head), GFP_KERNEL);
-
-	if (!dp->meters)
+	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+	if (!ti)
 		return -ENOMEM;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->meters[i]);
+	rcu_assign_pointer(tbl->ti, ti);
+	tbl->count = 0;
 
 	return 0;
 }
 
 void ovs_meters_exit(struct datapath *dp)
 {
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
 	int i;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++) {
-		struct hlist_head *head = &dp->meters[i];
-		struct dp_meter *meter;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
-			kfree(meter);
-	}
+	for (i = 0; i < ti->n_meters; i++)
+		ovs_meter_free(ti->dp_meters[i]);
 
-	kfree(dp->meters);
+	dp_meter_instance_free(ti);
 }
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f645913870bd..f52052d30a16 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -13,11 +13,13 @@
 #include <linux/openvswitch.h>
 #include <linux/genetlink.h>
 #include <linux/skbuff.h>
+#include <linux/bits.h>
 
 #include "flow.h"
 struct datapath;
 
 #define DP_MAX_BANDS		1
+#define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
 
 struct dp_meter_band {
 	u32 type;
@@ -30,9 +32,6 @@ struct dp_meter_band {
 struct dp_meter {
 	spinlock_t lock;    /* Per meter lock */
 	struct rcu_head rcu;
-	struct hlist_node dp_hash_node; /*Element in datapath->meters
-					 * hash table.
-					 */
 	u32 id;
 	u16 kbps:1, keep_stats:1;
 	u16 n_bands;
@@ -42,6 +41,17 @@ struct dp_meter {
 	struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+	struct rcu_head rcu;
+	u32 n_meters;
+	struct dp_meter __rcu *dp_meters[];
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+};
+
 extern struct genl_family dp_meter_genl_family;
 int ovs_meters_init(struct datapath *dp);
 void ovs_meters_exit(struct datapath *dp);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
  2020-04-22 17:08   ` [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
@ 2020-04-22 17:08   ` xiangxia.m.yue
  2020-04-23  3:54     ` Pravin Shelar
  2020-04-22 17:08   ` [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Don't allow user to create meter unlimitedly, which may cause
to consume a large amount of kernel memory. The max number
supported is decided by physical memory and 20K meters as default.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 57 +++++++++++++++++++++++++++++++++--------
 net/openvswitch/meter.h |  2 ++
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index f806ded1dd0a..372f4565872d 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -12,6 +12,7 @@
 #include <linux/openvswitch.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
+#include <linux/swap.h>
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -137,6 +138,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	u32 hash = meter_hash(ti, meter->id);
+	int err;
 
 	/* In generally, slots selected should be empty, because
 	 * OvS uses id-pool to fetch a available id.
@@ -147,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 	dp_meter_instance_insert(ti, meter);
 
 	/* That function is thread-safe. */
-	if (++tbl->count >= ti->n_meters)
-		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
-			goto expand_err;
+	tbl->count++;
+	if (tbl->count >= tbl->max_meters_allowed) {
+		err = -EFBIG;
+		goto attach_err;
+	}
+
+	if (tbl->count >= ti->n_meters &&
+	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+		err = -ENOMEM;
+		goto attach_err;
+	}
 
 	return 0;
 
-expand_err:
+attach_err:
 	dp_meter_instance_remove(ti, meter);
 	tbl->count--;
-	return -ENOMEM;
+	return err;
 }
 
 static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
@@ -266,18 +276,32 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 
 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 {
-	struct sk_buff *reply;
+	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
 	struct nlattr *nla, *band_nla;
-	int err;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err = -EMSGSIZE;
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
 					  &ovs_reply_header);
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
-	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
+			dp->meter_tbl.max_meters_allowed))
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
 	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
@@ -296,9 +320,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 	genlmsg_end(reply, ovs_reply_header);
 	return genlmsg_reply(reply, info);
 
+exit_unlock:
+	ovs_unlock();
 nla_put_failure:
 	nlmsg_free(reply);
-	err = -EMSGSIZE;
 	return err;
 }
 
@@ -699,15 +724,27 @@ int ovs_meters_init(struct datapath *dp)
 {
 	struct dp_meter_table *tbl = &dp->meter_tbl;
 	struct dp_meter_instance *ti;
+	unsigned long free_mem_bytes;
 
 	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
 	if (!ti)
 		return -ENOMEM;
 
+	/* Allow meters in a datapath to use ~3.12% of physical memory. */
+	free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
+	tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
+				      DP_METER_NUM_MAX);
+	if (!tbl->max_meters_allowed)
+		goto out_err;
+
 	rcu_assign_pointer(tbl->ti, ti);
 	tbl->count = 0;
 
 	return 0;
+
+out_err:
+	dp_meter_instance_free(ti);
+	return -ENOMEM;
 }
 
 void ovs_meters_exit(struct datapath *dp)
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f52052d30a16..fcde5ee647da 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -20,6 +20,7 @@ struct datapath;
 
 #define DP_MAX_BANDS		1
 #define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
+#define DP_METER_NUM_MAX	(200000ULL)
 
 struct dp_meter_band {
 	u32 type;
@@ -50,6 +51,7 @@ struct dp_meter_instance {
 struct dp_meter_table {
 	struct dp_meter_instance __rcu *ti;
 	u32 count;
+	u32 max_meters_allowed;
 };
 
 extern struct genl_family dp_meter_genl_family;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
  2020-04-22 17:08   ` [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
  2020-04-22 17:08   ` [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-04-22 17:08   ` xiangxia.m.yue
  2020-04-23  3:54     ` Pravin Shelar
  2020-04-22 17:08   ` [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Before invoking the ovs_meter_cmd_reply_stats, "meter"
was checked, so don't check it agin in that function.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 372f4565872d..b7893b0d6423 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,12 +242,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 		goto error;
 
-	if (!meter)
-		return 0;
-
 	if (nla_put(reply, OVS_METER_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &meter->stats) ||
-	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
                     ` (2 preceding siblings ...)
  2020-04-22 17:08   ` [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
@ 2020-04-22 17:08   ` xiangxia.m.yue
  2020-04-23  3:54     ` Pravin Shelar
  2020-04-22 17:09   ` [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
  2020-04-23 19:45   ` [PATCH net-next v3 0/5] expand meter tables and fix bug David Miller
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index b7893b0d6423..e36b464b32a5 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -419,9 +419,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	u32 meter_id;
 	bool failed;
 
-	if (!a[OVS_METER_ATTR_ID]) {
-		return -ENODEV;
-	}
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
 
 	meter = dp_meter_create(a);
 	if (IS_ERR_OR_NULL(meter))
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
                     ` (3 preceding siblings ...)
  2020-04-22 17:08   ` [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
@ 2020-04-22 17:09   ` xiangxia.m.yue
  2020-04-23  3:54     ` Pravin Shelar
  2020-04-23 19:45   ` [PATCH net-next v3 0/5] expand meter tables and fix bug David Miller
  5 siblings, 1 reply; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-22 17:09 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

When setting the meter rate to 4+Gbps, there is an
overflow, the meters don't work as expected.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 net/openvswitch/meter.c | 2 +-
 net/openvswitch/meter.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index e36b464b32a5..915f31123f23 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000;
+		band->bucket = (band->burst_size + band->rate) * 1000ULL;
 		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index fcde5ee647da..9ca50bfd1142 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -26,7 +26,7 @@ struct dp_meter_band {
 	u32 type;
 	u32 rate;
 	u32 burst_size;
-	u32 bucket; /* 1/1000 packets, or in bits */
+	u64 bucket; /* 1/1000 packets, or in bits */
 	struct ovs_flow_stats stats;
 };
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number
  2020-04-22 17:08   ` [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
@ 2020-04-23  3:54     ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-23  3:54 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Wed, Apr 22, 2020 at 10:10 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> In kernel datapath of Open vSwitch, there are only 1024
> buckets of meter in one datapath. If installing more than
> 1024 (e.g. 8192) meters, it may lead to the performance drop.
> But in some case, for example, Open vSwitch used as edge
> gateway, there should be 20K at least, where meters used for
> IP address bandwidth limitation.
>
> [Open vSwitch userspace datapath has this issue too.]
>
> For more scalable meter, this patch use meter array instead of
> hash tables, and expand/shrink the array when necessary. So we
> can install more meters than before in the datapath.
> Introducing the struct *dp_meter_instance, it's easy to
> expand meter though changing the *ti point in the struct
> *dp_meter_table.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/datapath.h |   2 +-
>  net/openvswitch/meter.c    | 240 ++++++++++++++++++++++++++++---------
>  net/openvswitch/meter.h    |  16 ++-
>  3 files changed, 195 insertions(+), 63 deletions(-)
>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters
  2020-04-22 17:08   ` [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-04-23  3:54     ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-23  3:54 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Wed, Apr 22, 2020 at 10:10 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Don't allow user to create meter unlimitedly, which may cause
> to consume a large amount of kernel memory. The max number
> supported is decided by physical memory and 20K meters as default.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/meter.c | 57 +++++++++++++++++++++++++++++++++--------
>  net/openvswitch/meter.h |  2 ++
>  2 files changed, 49 insertions(+), 10 deletions(-)
>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check
  2020-04-22 17:08   ` [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
@ 2020-04-23  3:54     ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-23  3:54 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Wed, Apr 22, 2020 at 10:10 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Before invoking the ovs_meter_cmd_reply_stats, "meter"
> was checked, so don't check it agin in that function.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/meter.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious
  2020-04-22 17:08   ` [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
@ 2020-04-23  3:54     ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-23  3:54 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Wed, Apr 22, 2020 at 10:10 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/meter.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket
  2020-04-22 17:09   ` [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
@ 2020-04-23  3:54     ` Pravin Shelar
  0 siblings, 0 replies; 48+ messages in thread
From: Pravin Shelar @ 2020-04-23  3:54 UTC (permalink / raw)
  To: Tonghao Zhang
  Cc: Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Wed, Apr 22, 2020 at 10:10 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> When setting the meter rate to 4+Gbps, there is an
> overflow, the meters don't work as expected.
>
> Cc: Pravin B Shelar <pshelar@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  net/openvswitch/meter.c | 2 +-
>  net/openvswitch/meter.h | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 0/5] expand meter tables and fix bug
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
                     ` (4 preceding siblings ...)
  2020-04-22 17:09   ` [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
@ 2020-04-23 19:45   ` David Miller
  2020-04-23 19:49     ` David Miller
  5 siblings, 1 reply; 48+ messages in thread
From: David Miller @ 2020-04-23 19:45 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: pshelar, azhou, blp, u9012063, netdev, dev

From: xiangxia.m.yue@gmail.com
Date: Thu, 23 Apr 2020 01:08:55 +0800

> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> The patch set expand or shrink the meter table when necessary.
> and other patches fix bug or improve codes.

Series applied, thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 0/5] expand meter tables and fix bug
  2020-04-23 19:45   ` [PATCH net-next v3 0/5] expand meter tables and fix bug David Miller
@ 2020-04-23 19:49     ` David Miller
  2020-04-23 22:56       ` Tonghao Zhang
  0 siblings, 1 reply; 48+ messages in thread
From: David Miller @ 2020-04-23 19:49 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: pshelar, azhou, blp, u9012063, netdev, dev

From: David Miller <davem@davemloft.net>
Date: Thu, 23 Apr 2020 12:45:29 -0700 (PDT)

> From: xiangxia.m.yue@gmail.com
> Date: Thu, 23 Apr 2020 01:08:55 +0800
> 
>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>> 
>> The patch set expand or shrink the meter table when necessary.
>> and other patches fix bug or improve codes.
> 
> Series applied, thanks.

Actually I had to revert, this adds build warnings:

In file included from ./include/linux/uio.h:8,
                 from ./include/linux/socket.h:8,
                 from ./include/uapi/linux/if.h:25,
                 from net/openvswitch/meter.c:8:
net/openvswitch/meter.c: In function ‘ovs_meters_init’:
./include/linux/kernel.h:842:29: warning: comparison of distinct pointer types lacks a cast
   (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                             ^~
./include/linux/kernel.h:856:4: note: in expansion of macro ‘__typecheck’
   (__typecheck(x, y) && __no_side_effects(x, y))
    ^~~~~~~~~~~
./include/linux/kernel.h:866:24: note: in expansion of macro ‘__safe_cmp’
  __builtin_choose_expr(__safe_cmp(x, y), \
                        ^~~~~~~~~~
./include/linux/kernel.h:875:19: note: in expansion of macro ‘__careful_cmp’
 #define min(x, y) __careful_cmp(x, y, <)
                   ^~~~~~~~~~~~~
net/openvswitch/meter.c:733:28: note: in expansion of macro ‘min’
  tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
                            ^~~

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v3 0/5] expand meter tables and fix bug
  2020-04-23 19:49     ` David Miller
@ 2020-04-23 22:56       ` Tonghao Zhang
  0 siblings, 0 replies; 48+ messages in thread
From: Tonghao Zhang @ 2020-04-23 22:56 UTC (permalink / raw)
  To: David Miller
  Cc: Pravin Shelar, Andy Zhou, Ben Pfaff, William Tu,
	Linux Kernel Network Developers, ovs dev

On Fri, Apr 24, 2020 at 3:49 AM David Miller <davem@davemloft.net> wrote:
>
> From: David Miller <davem@davemloft.net>
> Date: Thu, 23 Apr 2020 12:45:29 -0700 (PDT)
>
> > From: xiangxia.m.yue@gmail.com
> > Date: Thu, 23 Apr 2020 01:08:55 +0800
> >
> >> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >>
> >> The patch set expand or shrink the meter table when necessary.
> >> and other patches fix bug or improve codes.
> >
> > Series applied, thanks.
>
> Actually I had to revert, this adds build warnings:
>
> In file included from ./include/linux/uio.h:8,
>                  from ./include/linux/socket.h:8,
>                  from ./include/uapi/linux/if.h:25,
>                  from net/openvswitch/meter.c:8:
> net/openvswitch/meter.c: In function ‘ovs_meters_init’:
> ./include/linux/kernel.h:842:29: warning: comparison of distinct pointer types lacks a cast
>    (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
>                              ^~
> ./include/linux/kernel.h:856:4: note: in expansion of macro ‘__typecheck’
>    (__typecheck(x, y) && __no_side_effects(x, y))
>     ^~~~~~~~~~~
> ./include/linux/kernel.h:866:24: note: in expansion of macro ‘__safe_cmp’
>   __builtin_choose_expr(__safe_cmp(x, y), \
>                         ^~~~~~~~~~
> ./include/linux/kernel.h:875:19: note: in expansion of macro ‘__careful_cmp’
>  #define min(x, y) __careful_cmp(x, y, <)
>                    ^~~~~~~~~~~~~
> net/openvswitch/meter.c:733:28: note: in expansion of macro ‘min’
>   tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
>                             ^~~
The gcc compiler is tool old(4.8), and I did not found that building warning,
Then I use 9.3 gcc to build kernel and fix that warning. min function
checks the type of vars.
The patch 2 introduced that. v4 version will be sent, Thanks.
-- 
Best regards, Tonghao

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 0/5] expand meter tables and fix bug
  2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
                   ` (4 preceding siblings ...)
  2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
@ 2020-04-24  0:08 ` xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
                     ` (5 more replies)
  5 siblings, 6 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

The patch set expand or shrink the meter table when necessary.
and other patches fix bug or improve codes.

Tonghao Zhang (5):
  net: openvswitch: expand the meters supported number
  net: openvswitch: set max limitation to meters
  net: openvswitch: remove the unnecessary check
  net: openvswitch: make EINVAL return value more obvious
  net: openvswitch: use u64 for meter bucket

 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 303 ++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h    |  20 ++-
 3 files changed, 247 insertions(+), 78 deletions(-)

-- 
v4:
* patch 2, fix min() build warning which found with gcc-9.x
v3:
* attach_meter return -EBUSY, not -EINVAL
* change the return type of detach_meter
* add comments
* the meter max number limited by memory and DP_METER_NUM_MAX
* fix checkpatch warnning
v2:
* change the hash table to meter array
* add shrink meter codes
* add patch 4 and 5
--
2.23.0


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 1/5] net: openvswitch: expand the meters supported number
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
@ 2020-04-24  0:08   ` xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

In kernel datapath of Open vSwitch, there are only 1024
buckets of meter in one datapath. If installing more than
1024 (e.g. 8192) meters, it may lead to the performance drop.
But in some case, for example, Open vSwitch used as edge
gateway, there should be 20K at least, where meters used for
IP address bandwidth limitation.

[Open vSwitch userspace datapath has this issue too.]

For more scalable meter, this patch use meter array instead of
hash tables, and expand/shrink the array when necessary. So we
can install more meters than before in the datapath.
Introducing the struct *dp_meter_instance, it's easy to
expand meter though changing the *ti point in the struct
*dp_meter_table.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
---
 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 240 ++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h    |  16 ++-
 3 files changed, 195 insertions(+), 63 deletions(-)

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index e239a46c2f94..2016dd107939 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -82,7 +82,7 @@ struct datapath {
 	u32 max_headroom;
 
 	/* Switch meters. */
-	struct hlist_head *meters;
+	struct dp_meter_table meter_tbl;
 };
 
 /**
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5010d1ddd4bd..f806ded1dd0a 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -19,8 +19,6 @@
 #include "datapath.h"
 #include "meter.h"
 
-#define METER_HASH_BUCKETS 1024
-
 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
 	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
 	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
@@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
 	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 };
 
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+	return id % ti->n_meters;
+}
+
 static void ovs_meter_free(struct dp_meter *meter)
 {
 	if (!meter)
@@ -47,40 +50,153 @@ static void ovs_meter_free(struct dp_meter *meter)
 	kfree_rcu(meter, rcu);
 }
 
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
-					    u32 meter_id)
-{
-	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
 /* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
 				     u32 meter_id)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter_id);
 	struct dp_meter *meter;
-	struct hlist_head *head;
 
-	head = meter_hash_bucket(dp, meter_id);
-	hlist_for_each_entry_rcu(meter, head, dp_hash_node,
-				lockdep_ovsl_is_held()) {
-		if (meter->id == meter_id)
-			return meter;
-	}
+	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+	if (meter && likely(meter->id == meter_id))
+		return meter;
+
 	return NULL;
 }
 
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+	struct dp_meter_instance *ti;
+
+	ti = kvzalloc(sizeof(*ti) +
+		      sizeof(struct dp_meter *) * size,
+		      GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->n_meters = size;
+
+	return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+	kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+	struct dp_meter_instance *ti;
+
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	int n_meters = min(size, ti->n_meters);
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(size);
+	if (!new_ti)
+		return -ENOMEM;
+
+	for (i = 0; i < n_meters; i++)
+		new_ti->dp_meters[i] =
+			rcu_dereference_ovsl(ti->dp_meters[i]);
+
+	rcu_assign_pointer(tbl->ti, new_ti);
+	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+	return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
 {
-	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+	u32 hash;
 
-	hlist_add_head_rcu(&meter->dp_hash_node, head);
+	hash = meter_hash(ti, meter->id);
+	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
 }
 
-static void detach_meter(struct dp_meter *meter)
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter->id);
+
+	/* In generally, slots selected should be empty, because
+	 * OvS uses id-pool to fetch a available id.
+	 */
+	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+		return -EBUSY;
+
+	dp_meter_instance_insert(ti, meter);
+
+	/* That function is thread-safe. */
+	if (++tbl->count >= ti->n_meters)
+		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
+			goto expand_err;
+
+	return 0;
+
+expand_err:
+	dp_meter_instance_remove(ti, meter);
+	tbl->count--;
+	return -ENOMEM;
+}
+
+static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti;
+
 	ASSERT_OVSL();
-	if (meter)
-		hlist_del_rcu(&meter->dp_hash_node);
+	if (!meter)
+		return 0;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_remove(ti, meter);
+
+	tbl->count--;
+
+	/* Shrink the meter array if necessary. */
+	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+	    tbl->count <= (ti->n_meters / 4)) {
+		int half_size = ti->n_meters / 2;
+		int i;
+
+		/* Avoid hash collision, don't move slots to other place.
+		 * Make sure there are no references of meters in array
+		 * which will be released.
+		 */
+		for (i = half_size; i < ti->n_meters; i++)
+			if (rcu_dereference_ovsl(ti->dp_meters[i]))
+				goto out;
+
+		if (dp_meter_instance_realloc(tbl, half_size))
+			goto shrink_err;
+	}
+
+out:
+	return 0;
+
+shrink_err:
+	dp_meter_instance_insert(ti, meter);
+	tbl->count++;
+	return -ENOMEM;
 }
 
 static struct sk_buff *
@@ -273,6 +389,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *reply;
 	struct ovs_header *ovs_reply_header;
 	struct ovs_header *ovs_header = info->userhdr;
+	struct dp_meter_table *meter_tbl;
 	struct datapath *dp;
 	int err;
 	u32 meter_id;
@@ -300,12 +417,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
+	meter_tbl = &dp->meter_tbl;
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
-	/* Cannot fail after this. */
-	old_meter = lookup_meter(dp, meter_id);
-	detach_meter(old_meter);
-	attach_meter(dp, meter);
+	old_meter = lookup_meter(meter_tbl, meter_id);
+	err = detach_meter(meter_tbl, old_meter);
+	if (err)
+		goto exit_unlock;
+
+	err = attach_meter(meter_tbl, meter);
+	if (err)
+		goto exit_unlock;
+
 	ovs_unlock();
 
 	/* Build response with the meter_id and stats from
@@ -337,14 +460,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 
 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
@@ -365,7 +488,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Locate meter, copy stats. */
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (!meter) {
 		err = -ENOENT;
 		goto exit_unlock;
@@ -390,18 +513,17 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 
 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *old_meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *old_meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
-	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
 					  &ovs_reply_header);
@@ -416,14 +538,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	old_meter = lookup_meter(dp, meter_id);
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	old_meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (old_meter) {
 		spin_lock_bh(&old_meter->lock);
 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 		WARN_ON(err);
 		spin_unlock_bh(&old_meter->lock);
-		detach_meter(old_meter);
+
+		err = detach_meter(&dp->meter_tbl, old_meter);
+		if (err)
+			goto exit_unlock;
 	}
+
 	ovs_unlock();
 	ovs_meter_free(old_meter);
 	genlmsg_end(reply, ovs_reply_header);
@@ -443,16 +570,16 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 		       struct sw_flow_key *key, u32 meter_id)
 {
-	struct dp_meter *meter;
-	struct dp_meter_band *band;
 	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
 	long long int long_delta_ms;
-	u32 delta_ms;
-	u32 cost;
+	struct dp_meter_band *band;
+	struct dp_meter *meter;
 	int i, band_exceeded_max = -1;
 	u32 band_exceeded_rate = 0;
+	u32 delta_ms;
+	u32 cost;
 
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	/* Do not drop the packet when there is no meter. */
 	if (!meter)
 		return false;
@@ -570,32 +697,27 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
 
 int ovs_meters_init(struct datapath *dp)
 {
-	int i;
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti;
 
-	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
-				   sizeof(struct hlist_head), GFP_KERNEL);
-
-	if (!dp->meters)
+	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+	if (!ti)
 		return -ENOMEM;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->meters[i]);
+	rcu_assign_pointer(tbl->ti, ti);
+	tbl->count = 0;
 
 	return 0;
 }
 
 void ovs_meters_exit(struct datapath *dp)
 {
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
 	int i;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++) {
-		struct hlist_head *head = &dp->meters[i];
-		struct dp_meter *meter;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
-			kfree(meter);
-	}
+	for (i = 0; i < ti->n_meters; i++)
+		ovs_meter_free(ti->dp_meters[i]);
 
-	kfree(dp->meters);
+	dp_meter_instance_free(ti);
 }
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f645913870bd..f52052d30a16 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -13,11 +13,13 @@
 #include <linux/openvswitch.h>
 #include <linux/genetlink.h>
 #include <linux/skbuff.h>
+#include <linux/bits.h>
 
 #include "flow.h"
 struct datapath;
 
 #define DP_MAX_BANDS		1
+#define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
 
 struct dp_meter_band {
 	u32 type;
@@ -30,9 +32,6 @@ struct dp_meter_band {
 struct dp_meter {
 	spinlock_t lock;    /* Per meter lock */
 	struct rcu_head rcu;
-	struct hlist_node dp_hash_node; /*Element in datapath->meters
-					 * hash table.
-					 */
 	u32 id;
 	u16 kbps:1, keep_stats:1;
 	u16 n_bands;
@@ -42,6 +41,17 @@ struct dp_meter {
 	struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+	struct rcu_head rcu;
+	u32 n_meters;
+	struct dp_meter __rcu *dp_meters[];
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+};
+
 extern struct genl_family dp_meter_genl_family;
 int ovs_meters_init(struct datapath *dp);
 void ovs_meters_exit(struct datapath *dp);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 2/5] net: openvswitch: set max limitation to meters
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
@ 2020-04-24  0:08   ` xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Don't allow user to create meter unlimitedly, which may cause
to consume a large amount of kernel memory. The max number
supported is decided by physical memory and 20K meters as default.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
---
 net/openvswitch/meter.c | 57 +++++++++++++++++++++++++++++++++--------
 net/openvswitch/meter.h |  2 ++
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index f806ded1dd0a..372f4565872d 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -12,6 +12,7 @@
 #include <linux/openvswitch.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
+#include <linux/swap.h>
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -137,6 +138,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	u32 hash = meter_hash(ti, meter->id);
+	int err;
 
 	/* In generally, slots selected should be empty, because
 	 * OvS uses id-pool to fetch a available id.
@@ -147,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 	dp_meter_instance_insert(ti, meter);
 
 	/* That function is thread-safe. */
-	if (++tbl->count >= ti->n_meters)
-		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
-			goto expand_err;
+	tbl->count++;
+	if (tbl->count >= tbl->max_meters_allowed) {
+		err = -EFBIG;
+		goto attach_err;
+	}
+
+	if (tbl->count >= ti->n_meters &&
+	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+		err = -ENOMEM;
+		goto attach_err;
+	}
 
 	return 0;
 
-expand_err:
+attach_err:
 	dp_meter_instance_remove(ti, meter);
 	tbl->count--;
-	return -ENOMEM;
+	return err;
 }
 
 static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
@@ -266,18 +276,32 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 
 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 {
-	struct sk_buff *reply;
+	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
 	struct nlattr *nla, *band_nla;
-	int err;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err = -EMSGSIZE;
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
 					  &ovs_reply_header);
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
-	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
+			dp->meter_tbl.max_meters_allowed))
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
 	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
@@ -296,9 +320,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 	genlmsg_end(reply, ovs_reply_header);
 	return genlmsg_reply(reply, info);
 
+exit_unlock:
+	ovs_unlock();
 nla_put_failure:
 	nlmsg_free(reply);
-	err = -EMSGSIZE;
 	return err;
 }
 
@@ -699,15 +724,27 @@ int ovs_meters_init(struct datapath *dp)
 {
 	struct dp_meter_table *tbl = &dp->meter_tbl;
 	struct dp_meter_instance *ti;
+	unsigned long free_mem_bytes;
 
 	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
 	if (!ti)
 		return -ENOMEM;
 
+	/* Allow meters in a datapath to use ~3.12% of physical memory. */
+	free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
+	tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
+				      DP_METER_NUM_MAX);
+	if (!tbl->max_meters_allowed)
+		goto out_err;
+
 	rcu_assign_pointer(tbl->ti, ti);
 	tbl->count = 0;
 
 	return 0;
+
+out_err:
+	dp_meter_instance_free(ti);
+	return -ENOMEM;
 }
 
 void ovs_meters_exit(struct datapath *dp)
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f52052d30a16..61a3ca43cd77 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -20,6 +20,7 @@ struct datapath;
 
 #define DP_MAX_BANDS		1
 #define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
+#define DP_METER_NUM_MAX	(200000UL)
 
 struct dp_meter_band {
 	u32 type;
@@ -50,6 +51,7 @@ struct dp_meter_instance {
 struct dp_meter_table {
 	struct dp_meter_instance __rcu *ti;
 	u32 count;
+	u32 max_meters_allowed;
 };
 
 extern struct genl_family dp_meter_genl_family;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 3/5] net: openvswitch: remove the unnecessary check
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
@ 2020-04-24  0:08   ` xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Before invoking the ovs_meter_cmd_reply_stats, "meter"
was checked, so don't check it agin in that function.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
---
 net/openvswitch/meter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 372f4565872d..b7893b0d6423 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,12 +242,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 		goto error;
 
-	if (!meter)
-		return 0;
-
 	if (nla_put(reply, OVS_METER_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &meter->stats) ||
-	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 4/5] net: openvswitch: make EINVAL return value more obvious
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
                     ` (2 preceding siblings ...)
  2020-04-24  0:08   ` [PATCH net-next v4 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
@ 2020-04-24  0:08   ` xiangxia.m.yue
  2020-04-24  0:08   ` [PATCH net-next v4 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
  2020-04-24  1:29   ` [PATCH net-next v4 0/5] expand meter tables and fix bug David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
---
 net/openvswitch/meter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index b7893b0d6423..e36b464b32a5 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -419,9 +419,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	u32 meter_id;
 	bool failed;
 
-	if (!a[OVS_METER_ATTR_ID]) {
-		return -ENODEV;
-	}
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
 
 	meter = dp_meter_create(a);
 	if (IS_ERR_OR_NULL(meter))
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH net-next v4 5/5] net: openvswitch: use u64 for meter bucket
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
                     ` (3 preceding siblings ...)
  2020-04-24  0:08   ` [PATCH net-next v4 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
@ 2020-04-24  0:08   ` xiangxia.m.yue
  2020-04-24  1:29   ` [PATCH net-next v4 0/5] expand meter tables and fix bug David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: xiangxia.m.yue @ 2020-04-24  0:08 UTC (permalink / raw)
  To: pshelar, azhou, blp, u9012063; +Cc: netdev, dev, Tonghao Zhang

From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

When setting the meter rate to 4+Gbps, there is an
overflow, the meters don't work as expected.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
---
 net/openvswitch/meter.c | 2 +-
 net/openvswitch/meter.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index e36b464b32a5..915f31123f23 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000;
+		band->bucket = (band->burst_size + band->rate) * 1000ULL;
 		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index 61a3ca43cd77..0c33889a8515 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -26,7 +26,7 @@ struct dp_meter_band {
 	u32 type;
 	u32 rate;
 	u32 burst_size;
-	u32 bucket; /* 1/1000 packets, or in bits */
+	u64 bucket; /* 1/1000 packets, or in bits */
 	struct ovs_flow_stats stats;
 };
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH net-next v4 0/5] expand meter tables and fix bug
  2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
                     ` (4 preceding siblings ...)
  2020-04-24  0:08   ` [PATCH net-next v4 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
@ 2020-04-24  1:29   ` David Miller
  5 siblings, 0 replies; 48+ messages in thread
From: David Miller @ 2020-04-24  1:29 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: pshelar, azhou, blp, u9012063, netdev, dev

From: xiangxia.m.yue@gmail.com
Date: Fri, 24 Apr 2020 08:08:01 +0800

> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> The patch set expand or shrink the meter table when necessary.
> and other patches fix bug or improve codes.

This looks better, series applied, thank you.

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2020-04-24  1:29 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-23 13:10 [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported xiangxia.m.yue
2020-03-23 13:10 ` [PATCH net-next v1 2/3] net: openvswitch: set max limitation to meters xiangxia.m.yue
2020-03-23 13:10 ` [PATCH net-next v1 3/3] net: openvswitch: remove the unnecessary check xiangxia.m.yue
2020-03-29 16:46 ` [PATCH net-next v1 1/3] net: openvswitch: expand the meters number supported Pravin Shelar
2020-03-30  0:34   ` Tonghao Zhang
2020-03-31  3:57     ` Pravin Shelar
2020-04-01 10:50       ` Tonghao Zhang
2020-04-01 21:12         ` Pravin Shelar
2020-04-08 15:09         ` [ovs-dev] " William Tu
2020-04-08 15:59           ` Tonghao Zhang
2020-04-08 16:01             ` Tonghao Zhang
2020-04-09 21:41             ` William Tu
2020-04-09 23:29               ` Tonghao Zhang
2020-04-11  8:14                 ` Pravin Shelar
2020-04-16 10:16 ` [PATCH net-next v2 0/5] expand meter tables and fix bug xiangxia.m.yue
2020-04-16 10:16   ` [PATCH net-next v2 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
2020-04-19 17:29     ` Pravin Shelar
2020-04-20  0:23       ` Tonghao Zhang
2020-04-20 21:43         ` Pravin Shelar
2020-04-16 10:17   ` [PATCH net-next v2 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
2020-04-19 17:30     ` Pravin Shelar
2020-04-20  0:28       ` Tonghao Zhang
2020-04-20 21:44         ` Pravin Shelar
2020-04-16 10:17   ` [PATCH net-next v2 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
2020-04-16 10:17   ` [PATCH net-next v2 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
2020-04-16 10:17   ` [PATCH net-next v2 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
2020-04-18 22:39   ` [PATCH net-next v2 0/5] expand meter tables and fix bug David Miller
2020-04-22 17:08 ` [PATCH net-next v3 " xiangxia.m.yue
2020-04-22 17:08   ` [PATCH net-next v3 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
2020-04-23  3:54     ` Pravin Shelar
2020-04-22 17:08   ` [PATCH net-next v3 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
2020-04-23  3:54     ` Pravin Shelar
2020-04-22 17:08   ` [PATCH net-next v3 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
2020-04-23  3:54     ` Pravin Shelar
2020-04-22 17:08   ` [PATCH net-next v3 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
2020-04-23  3:54     ` Pravin Shelar
2020-04-22 17:09   ` [PATCH net-next v3 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
2020-04-23  3:54     ` Pravin Shelar
2020-04-23 19:45   ` [PATCH net-next v3 0/5] expand meter tables and fix bug David Miller
2020-04-23 19:49     ` David Miller
2020-04-23 22:56       ` Tonghao Zhang
2020-04-24  0:08 ` [PATCH net-next v4 " xiangxia.m.yue
2020-04-24  0:08   ` [PATCH net-next v4 1/5] net: openvswitch: expand the meters supported number xiangxia.m.yue
2020-04-24  0:08   ` [PATCH net-next v4 2/5] net: openvswitch: set max limitation to meters xiangxia.m.yue
2020-04-24  0:08   ` [PATCH net-next v4 3/5] net: openvswitch: remove the unnecessary check xiangxia.m.yue
2020-04-24  0:08   ` [PATCH net-next v4 4/5] net: openvswitch: make EINVAL return value more obvious xiangxia.m.yue
2020-04-24  0:08   ` [PATCH net-next v4 5/5] net: openvswitch: use u64 for meter bucket xiangxia.m.yue
2020-04-24  1:29   ` [PATCH net-next v4 0/5] expand meter tables and fix bug David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.