Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted
@ 2020-10-15 17:09 Eelco Chaudron
  2020-10-16 23:46 ` Jakub Kicinski
  0 siblings, 1 reply; 3+ messages in thread
From: Eelco Chaudron @ 2020-10-15 17:09 UTC (permalink / raw)
  To: netdev; +Cc: davem, dev, kuba, pabeni, pshelar, jlelli, bigeasy, i.maximets

The flow_lookup() function uses per CPU variables, which must be called
with BH disabled. However, this is fine in the general NAPI use case
where the local BH is disabled. But, it's also called from the netlink
context. The below patch makes sure that even in the netlink path, the
BH is disabled.

In addition, u64_stats_update_begin() requires a lock to ensure one writer
which is not ensured here. Making it per-CPU and disabling NAPI (softirq)
ensures that there is always only one writer.

Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on usage")
Reported-by: Juri Lelli <jlelli@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
v3: - Add comment to flow_lookup() call
    - Some update in code comments, and commit message

v2: - Add u64_stats_update_begin() sync point protection
    - Moved patch to net from net-next branch

 net/openvswitch/flow_table.c |   58 +++++++++++++++++++++++++-----------------
 net/openvswitch/flow_table.h |    8 ++++--
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index e2235849a57e..7d50c45fea37 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -172,7 +172,7 @@ static struct table_instance *table_instance_alloc(int new_size)
 
 static void __mask_array_destroy(struct mask_array *ma)
 {
-	free_percpu(ma->masks_usage_cntr);
+	free_percpu(ma->masks_usage_stats);
 	kfree(ma);
 }
 
@@ -196,15 +196,15 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma)
 		ma->masks_usage_zero_cntr[i] = 0;
 
 		for_each_possible_cpu(cpu) {
-			u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
-							  cpu);
+			struct mask_array_stats *stats;
 			unsigned int start;
 			u64 counter;
 
+			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
 			do {
-				start = u64_stats_fetch_begin_irq(&ma->syncp);
-				counter = usage_counters[i];
-			} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+				start = u64_stats_fetch_begin_irq(&stats->syncp);
+				counter = stats->usage_cntrs[i];
+			} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 
 			ma->masks_usage_zero_cntr[i] += counter;
 		}
@@ -227,9 +227,10 @@ static struct mask_array *tbl_mask_array_alloc(int size)
 					     sizeof(struct sw_flow_mask *) *
 					     size);
 
-	new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
-					       __alignof__(u64));
-	if (!new->masks_usage_cntr) {
+	new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
+						sizeof(u64) * size,
+						__alignof__(u64));
+	if (!new->masks_usage_stats) {
 		kfree(new);
 		return NULL;
 	}
@@ -723,6 +724,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 
 /* Flow lookup does full lookup on flow table. It starts with
  * mask from index passed in *index.
+ * This function MUST be called with BH disabled due to the use
+ * of CPU specific variables.
  */
 static struct sw_flow *flow_lookup(struct flow_table *tbl,
 				   struct table_instance *ti,
@@ -732,7 +735,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 				   u32 *n_cache_hit,
 				   u32 *index)
 {
-	u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
+	struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
 	struct sw_flow *flow;
 	struct sw_flow_mask *mask;
 	int i;
@@ -742,9 +745,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 		if (mask) {
 			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
 			if (flow) {
-				u64_stats_update_begin(&ma->syncp);
-				usage_counters[*index]++;
-				u64_stats_update_end(&ma->syncp);
+				u64_stats_update_begin(&stats->syncp);
+				stats->usage_cntrs[*index]++;
+				u64_stats_update_end(&stats->syncp);
 				(*n_cache_hit)++;
 				return flow;
 			}
@@ -763,9 +766,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
 		if (flow) { /* Found */
 			*index = i;
-			u64_stats_update_begin(&ma->syncp);
-			usage_counters[*index]++;
-			u64_stats_update_end(&ma->syncp);
+			u64_stats_update_begin(&stats->syncp);
+			stats->usage_cntrs[*index]++;
+			u64_stats_update_end(&stats->syncp);
 			return flow;
 		}
 	}
@@ -851,9 +854,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
 	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
 	u32 __always_unused n_mask_hit;
 	u32 __always_unused n_cache_hit;
+	struct sw_flow *flow;
 	u32 index = 0;
 
-	return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
+	/* This function gets called trough the netlink interface and therefore
+	 * is preemptible. However, flow_lookup() function needs to be called
+	 * with BH disabled due to CPU specific variables.
+	 */
+	local_bh_disable();
+	flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
+	local_bh_enable();
+	return flow;
 }
 
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
@@ -1109,7 +1120,6 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
 
 	for (i = 0; i < ma->max; i++)  {
 		struct sw_flow_mask *mask;
-		unsigned int start;
 		int cpu;
 
 		mask = rcu_dereference_ovsl(ma->masks[i]);
@@ -1120,14 +1130,16 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
 		masks_and_count[i].counter = 0;
 
 		for_each_possible_cpu(cpu) {
-			u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
-							  cpu);
+			struct mask_array_stats *stats;
+			unsigned int start;
 			u64 counter;
 
+			stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
 			do {
-				start = u64_stats_fetch_begin_irq(&ma->syncp);
-				counter = usage_counters[i];
-			} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+				start = u64_stats_fetch_begin_irq(&stats->syncp);
+				counter = stats->usage_cntrs[i];
+			} while (u64_stats_fetch_retry_irq(&stats->syncp,
+							   start));
 
 			masks_and_count[i].counter += counter;
 		}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 6e7d4ac59353..43144396e192 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -38,12 +38,16 @@ struct mask_count {
 	u64 counter;
 };
 
+struct mask_array_stats {
+	struct u64_stats_sync syncp;
+	u64 usage_cntrs[];
+};
+
 struct mask_array {
 	struct rcu_head rcu;
 	int count, max;
-	u64 __percpu *masks_usage_cntr;
+	struct mask_array_stats __percpu *masks_usage_stats;
 	u64 *masks_usage_zero_cntr;
-	struct u64_stats_sync syncp;
 	struct sw_flow_mask __rcu *masks[];
 };
 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted
  2020-10-15 17:09 [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted Eelco Chaudron
@ 2020-10-16 23:46 ` Jakub Kicinski
  2020-10-17 18:27   ` Eelco Chaudron
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Kicinski @ 2020-10-16 23:46 UTC (permalink / raw)
  To: Eelco Chaudron
  Cc: netdev, davem, dev, pabeni, pshelar, jlelli, bigeasy, i.maximets

On Thu, 15 Oct 2020 19:09:33 +0200 Eelco Chaudron wrote:
> The flow_lookup() function uses per CPU variables, which must be called
> with BH disabled. However, this is fine in the general NAPI use case
> where the local BH is disabled. But, it's also called from the netlink
> context. The below patch makes sure that even in the netlink path, the
> BH is disabled.
> 
> In addition, u64_stats_update_begin() requires a lock to ensure one writer
> which is not ensured here. Making it per-CPU and disabling NAPI (softirq)
> ensures that there is always only one writer.
> 
> Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on usage")
> Reported-by: Juri Lelli <jlelli@redhat.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

Hi Eelco, looks like this doesn't apply after the 5.10 merges.

Please respin on top of the current net.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted
  2020-10-16 23:46 ` Jakub Kicinski
@ 2020-10-17 18:27   ` Eelco Chaudron
  0 siblings, 0 replies; 3+ messages in thread
From: Eelco Chaudron @ 2020-10-17 18:27 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, davem, dev, pabeni, pshelar, jlelli, bigeasy, i maximets



----- Original Message -----
> From: "Jakub Kicinski" <kuba@kernel.org>
> To: "Eelco Chaudron" <echaudro@redhat.com>
> Cc: netdev@vger.kernel.org, davem@davemloft.net, dev@openvswitch.org, pabeni@redhat.com, pshelar@ovn.org,
> jlelli@redhat.com, bigeasy@linutronix.de, "i maximets" <i.maximets@ovn.org>
> Sent: Saturday, October 17, 2020 1:46:07 AM
> Subject: Re: [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted
> 
> On Thu, 15 Oct 2020 19:09:33 +0200 Eelco Chaudron wrote:
> > The flow_lookup() function uses per CPU variables, which must be called
> > with BH disabled. However, this is fine in the general NAPI use case
> > where the local BH is disabled. But, it's also called from the netlink
> > context. The below patch makes sure that even in the netlink path, the
> > BH is disabled.
> > 
> > In addition, u64_stats_update_begin() requires a lock to ensure one writer
> > which is not ensured here. Making it per-CPU and disabling NAPI (softirq)
> > ensures that there is always only one writer.
> > 
> > Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on
> > usage")
> > Reported-by: Juri Lelli <jlelli@redhat.com>
> > Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> 
> Hi Eelco, looks like this doesn't apply after the 5.10 merges.
> 
> Please respin on top of the current net.

Done, just sent out a v4.

//Eelco


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-15 17:09 [PATCH net v3] net: openvswitch: fix to make sure flow_lookup() is not preempted Eelco Chaudron
2020-10-16 23:46 ` Jakub Kicinski
2020-10-17 18:27   ` Eelco Chaudron

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git