All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Vlad Buslov <vladbu@nvidia.com>, Jianbo Liu <jianbol@nvidia.com>,
	Saeed Mahameed <saeedm@nvidia.com>,
	Sasha Levin <sashal@kernel.org>,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, roid@nvidia.com, maord@nvidia.com,
	shayd@nvidia.com, gal@nvidia.com, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org
Subject: [PATCH AUTOSEL 6.5 25/52] net/mlx5: Bridge, fix peer entry ageing in LAG mode
Date: Sun, 29 Oct 2023 18:53:12 -0400	[thread overview]
Message-ID: <20231029225441.789781-25-sashal@kernel.org> (raw)
In-Reply-To: <20231029225441.789781-1-sashal@kernel.org>

From: Vlad Buslov <vladbu@nvidia.com>

[ Upstream commit 7a3ce8074878a68a75ceacec93d9ae05906eec86 ]

With current implementation in single FDB LAG mode all packets are
processed by eswitch 0 rules. As such, 'peer' FDB entries receive the
packets for rules of other eswitches and are responsible for updating the
main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their
background update wq task. However, this introduces a race condition when
non-zero eswitch instance decides to delete a FDB entry, sends
SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task
refreshes the same entry concurrently while its async delete work is still
pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE
event may be generated and entry will remain stuck in FDB marked as
'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are
sent for deleting the peer entries.

Fix the issue by synchronously marking deleted entries with
MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update
job.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../mellanox/mlx5/core/en/rep/bridge.c        | 11 ++++++++
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 25 ++++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |  3 +++
 .../mellanox/mlx5/core/esw/bridge_priv.h      |  1 +
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 5608002465734..285c13edc09f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -463,6 +463,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
 		/* only handle the event on peers */
 		if (mlx5_esw_bridge_is_local(dev, rep, esw))
 			break;
+
+		fdb_info = container_of(info,
+					struct switchdev_notifier_fdb_info,
+					info);
+		/* Mark for deletion to prevent the update wq task from
+		 * spuriously refreshing the entry which would mark it again as
+		 * offloaded in SW bridge. After this fallthrough to regular
+		 * async delete code.
+		 */
+		mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads,
+						 fdb_info);
 		fallthrough;
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index f4fe1daa4afd5..de1ed59239da8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
 	entry->lastuse = jiffies;
 }
 
+void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+				      struct mlx5_esw_bridge_offloads *br_offloads,
+				      struct switchdev_notifier_fdb_info *fdb_info)
+{
+	struct mlx5_esw_bridge_fdb_entry *entry;
+	struct mlx5_esw_bridge *bridge;
+
+	bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+	if (!bridge)
+		return;
+
+	entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+	if (!entry) {
+		esw_debug(br_offloads->esw->dev,
+			  "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+			  fdb_info->addr, fdb_info->vid, vport_num);
+		return;
+	}
+
+	entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED;
+}
+
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
 				struct mlx5_esw_bridge_offloads *br_offloads,
 				struct switchdev_notifier_fdb_info *fdb_info)
@@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
 			unsigned long lastuse =
 				(unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter);
 
-			if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+			if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER |
+					    MLX5_ESW_BRIDGE_FLAG_DELETED))
 				continue;
 
 			if (time_after(lastuse, entry->lastuse))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index c2c7c70d99eb7..d6f5391619930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu
 void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
 				     struct mlx5_esw_bridge_offloads *br_offloads,
 				     struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+				      struct mlx5_esw_bridge_offloads *br_offloads,
+				      struct switchdev_notifier_fdb_info *fdb_info);
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
 				struct mlx5_esw_bridge_offloads *br_offloads,
 				struct switchdev_notifier_fdb_info *fdb_info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
index 4911cc32161b4..7c251af566c6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key {
 enum {
 	MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
 	MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+	MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2),
 };
 
 enum {
-- 
2.42.0


  parent reply	other threads:[~2023-10-29 23:33 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-29 22:52 [PATCH AUTOSEL 6.5 01/52] fbdev: atyfb: only use ioremap_uc() on i386 and ia64 Sasha Levin
2023-10-29 22:52 ` Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 02/52] fs/ntfs3: Add ckeck in ni_update_parent() Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 03/52] fs/ntfs3: Write immediately updated ntfs state Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 04/52] fs/ntfs3: Use kvmalloc instead of kmalloc(... __GFP_NOWARN) Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 05/52] fs/ntfs3: Add more attributes checks in mi_enum_attr() Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 06/52] fs/ntfs3: fix deadlock in mark_as_free_ex Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 07/52] fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 08/52] fs/ntfs3: Fix alternative boot searching Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 09/52] fs/ntfs3: Add more info into /proc/fs/ntfs3/<dev>/volinfo Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 10/52] fs/ntfs3: Do not allow to change label if volume is read-only Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 11/52] fs/ntfs3: Fix possible NULL-ptr-deref in ni_readpage_cmpr() Sasha Levin
2023-10-29 22:52 ` [PATCH AUTOSEL 6.5 12/52] fs/ntfs3: Fix NULL pointer dereference on error in attr_allocate_frame() Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 13/52] fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e() Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 14/52] fs/ntfs3: Fix directory element type detection Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 15/52] fs/ntfs3: Avoid possible memory leak Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 16/52] spi: npcm-fiu: Fix UMA reads when dummy.nbytes == 0 Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 17/52] nvme-rdma: do not try to stop unallocated queues Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 18/52] ASoC: soc-dapm: Add helper for comparing widget name Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 19/52] ASoC: codecs: wsa-macro: handle component name prefix Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 20/52] net: ipv6: fix return value check in esp_remove_trailer Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 21/52] net: ipv4: " Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 22/52] nfs42: client needs to strip file mode's suid/sgid bit after ALLOCATE op Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 23/52] Bluetooth: vhci: Fix race when opening vhci device Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 24/52] netfilter: nfnetlink_log: silence bogus compiler warning Sasha Levin
2023-10-29 22:53 ` Sasha Levin [this message]
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 26/52] x86/efistub: Don't try to print after ExitBootService() Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 27/52] efi: fix memory leak in krealloc failure handling Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 28/52] ASoC: rt5650: fix the wrong result of key button Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 29/52] ASoC: codecs: tas2780: Fix log of failed reset via I2C Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 30/52] s390/kasan: handle DCSS mapping in memory holes Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 31/52] drm/ttm: Reorder sys manager cleanup step Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 32/52] fbdev: omapfb: fix some error codes Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 33/52] fbdev: uvesafb: Call cn_del_callback() at the end of uvesafb_exit() Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 34/52] fbdev: core: cfbcopyarea: fix sloppy typing Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-30  8:39   ` Sergey Shtylyov
2023-10-30  8:39     ` Sergey Shtylyov
2023-11-04  2:07     ` Sasha Levin
2023-11-04  2:07       ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 35/52] fbdev: core: syscopyarea: " Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-30  8:40   ` Sergey Shtylyov
2023-10-30  8:40     ` Sergey Shtylyov
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 36/52] scsi: mpt3sas: Fix in error path Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 37/52] ASoC: da7219: Correct the process of setting up Gnd switch in AAD Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 38/52] drm/amdgpu: Unset context priority is now invalid Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 39/52] gpu/drm: Eliminate DRM_SCHED_PRIORITY_UNSET Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 40/52] LoongArch: Use SYM_CODE_* to annotate exception handlers Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 41/52] LoongArch: Export symbol invalid_pud_table for modules building Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 42/52] LoongArch: Replace kmap_atomic() with kmap_local_page() in copy_user_highpage() Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 43/52] LoongArch: Disable WUC for pgprot_writecombine() like ioremap_wc() Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 44/52] platform/x86: asus-wmi: Change ASUS_WMI_BRN_DOWN code from 0x20 to 0x2e Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 45/52] platform/x86: asus-wmi: Only map brightness codes when using asus-wmi backlight control Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 46/52] platform/x86: asus-wmi: Map 0x2a code, Ignore 0x2b and 0x2c events Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 47/52] netfilter: nf_tables: audit log object reset once per table Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 48/52] platform/mellanox: mlxbf-tmfifo: Fix a warning message Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 49/52] nvme-pci: add BOGUS_NID for Intel 0a54 device Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 50/52] Revert "accel/ivpu: Use cached buffers for FW loading" Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 51/52] Revert "pinctrl: avoid unsafe code pattern in find_pinctrl()" Sasha Levin
2023-10-29 22:53 ` [PATCH AUTOSEL 6.5 52/52] drm/amdgpu: Reserve fences for VM update Sasha Levin
2023-10-29 22:53   ` Sasha Levin
2023-10-29 22:53   ` Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231029225441.789781-25-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=jianbol@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=maord@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=roid@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=stable@vger.kernel.org \
    --cc=vladbu@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.