linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net 0/2] Fix VF to VM attach detach
@ 2023-08-07  9:48 Petr Oros
  2023-08-07  9:48 ` [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization" Petr Oros
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Petr Oros @ 2023-08-07  9:48 UTC (permalink / raw)
  To: netdev
  Cc: jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
	pabeni, Jacob.e.keller, przemyslawx.patynowski, kamil.maziarz,
	dawidx.wesierski, mateusz.palczewski, slawomirx.laba,
	norbertx.zulinski, intel-wired-lan, linux-kernel

Petr Oros (2):
  Revert "ice: Fix ice VF reset during iavf initialization"
  ice: Fix NULL pointer deref during VF reset

 drivers/net/ethernet/intel/ice/ice_sriov.c    |  8 ++---
 drivers/net/ethernet/intel/ice/ice_vf_lib.c   | 34 +++++--------------
 drivers/net/ethernet/intel/ice/ice_vf_lib.h   |  1 -
 drivers/net/ethernet/intel/ice/ice_virtchnl.c |  1 -
 4 files changed, 12 insertions(+), 32 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization"
  2023-08-07  9:48 [PATCH net 0/2] Fix VF to VM attach detach Petr Oros
@ 2023-08-07  9:48 ` Petr Oros
  2023-08-08 12:51   ` Simon Horman
  2023-08-07  9:48 ` [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset Petr Oros
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Petr Oros @ 2023-08-07  9:48 UTC (permalink / raw)
  To: netdev
  Cc: jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
	pabeni, Jacob.e.keller, przemyslawx.patynowski, kamil.maziarz,
	dawidx.wesierski, mateusz.palczewski, slawomirx.laba,
	norbertx.zulinski, intel-wired-lan, linux-kernel

This reverts commit 7255355a0636b4eff08d5e8139c77d98f151c4fc.

After this commit we are not able to attach VF to VM:
virsh attach-interface v0 hostdev --managed 0000:41:01.0 --mac 52:52:52:52:52:52
error: Failed to attach interface
error: Cannot set interface MAC to 52:52:52:52:52:52 for ifname enp65s0f0np0 vf 0: Resource temporarily unavailable

ice_check_vf_ready_for_cfg() already contain waiting for reset.
New condition in ice_check_vf_ready_for_reset() causing only problems.

Fixes: 7255355a0636 ("ice: Fix ice VF reset during iavf initialization")
Signed-off-by: Petr Oros <poros@redhat.com>
---
 drivers/net/ethernet/intel/ice/ice_sriov.c    |  8 ++++----
 drivers/net/ethernet/intel/ice/ice_vf_lib.c   | 19 -------------------
 drivers/net/ethernet/intel/ice/ice_vf_lib.h   |  1 -
 drivers/net/ethernet/intel/ice/ice_virtchnl.c |  1 -
 4 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 1f66914c7a202a..31314e7540f8cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1131,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 	if (!vf)
 		return -EINVAL;
 
-	ret = ice_check_vf_ready_for_reset(vf);
+	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		goto out_put_vf;
 
@@ -1246,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		goto out_put_vf;
 	}
 
-	ret = ice_check_vf_ready_for_reset(vf);
+	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		goto out_put_vf;
 
@@ -1300,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
 		return -EOPNOTSUPP;
 	}
 
-	ret = ice_check_vf_ready_for_reset(vf);
+	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		goto out_put_vf;
 
@@ -1613,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 	if (!vf)
 		return -EINVAL;
 
-	ret = ice_check_vf_ready_for_reset(vf);
+	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		goto out_put_vf;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index b26ce4425f4559..294e91c3453ccd 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -185,25 +185,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
 	return 0;
 }
 
-/**
- * ice_check_vf_ready_for_reset - check if VF is ready to be reset
- * @vf: VF to check if it's ready to be reset
- *
- * The purpose of this function is to ensure that the VF is not in reset,
- * disabled, and is both initialized and active, thus enabling us to safely
- * initialize another reset.
- */
-int ice_check_vf_ready_for_reset(struct ice_vf *vf)
-{
-	int ret;
-
-	ret = ice_check_vf_ready_for_cfg(vf);
-	if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
-		ret = -EAGAIN;
-
-	return ret;
-}
-
 /**
  * ice_trigger_vf_reset - Reset a VF on HW
  * @vf: pointer to the VF structure
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 67172fdd9bc27f..48fea6fa036210 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf);
 struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
 bool ice_is_vf_disabled(struct ice_vf *vf);
 int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
-int ice_check_vf_ready_for_reset(struct ice_vf *vf);
 void ice_set_vf_state_dis(struct ice_vf *vf);
 bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
 void
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index efbc2968a7bf6a..dcf628b1fccd93 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -3947,7 +3947,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
 		ice_vc_notify_vf_link_state(vf);
 		break;
 	case VIRTCHNL_OP_RESET_VF:
-		clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
 		ops->reset_vf(vf);
 		break;
 	case VIRTCHNL_OP_ADD_ETH_ADDR:
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset
  2023-08-07  9:48 [PATCH net 0/2] Fix VF to VM attach detach Petr Oros
  2023-08-07  9:48 ` [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization" Petr Oros
@ 2023-08-07  9:48 ` Petr Oros
  2023-08-08 12:51   ` Simon Horman
  2023-08-08 20:54   ` [Intel-wired-lan] " Przemek Kitszel
  2023-08-08 20:56 ` [Intel-wired-lan] [PATCH net 0/2] Fix VF to VM attach detach Przemek Kitszel
  2023-08-08 22:10 ` Jacob Keller
  3 siblings, 2 replies; 9+ messages in thread
From: Petr Oros @ 2023-08-07  9:48 UTC (permalink / raw)
  To: netdev
  Cc: jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
	pabeni, Jacob.e.keller, przemyslawx.patynowski, kamil.maziarz,
	dawidx.wesierski, mateusz.palczewski, slawomirx.laba,
	norbertx.zulinski, intel-wired-lan, linux-kernel

During stress test with attaching and detaching VF from KVM and
simultaneously changing VFs spoofcheck and trust there was a
NULL pointer dereference in ice_reset_vf that VF's VSI is null.

More than one instance of ice_reset_vf() can be running at a given
time. When we rebuild the VSI in ice_reset_vf, another reset can be
triaged from ice_service_task. In this case we can access the currently
uninitialized VSI and couse panic. The window for this racing condition
has been around for a long time but it's much worse after commit
227bf4500aaa ("ice: move VSI delete outside deconfig") because
the reset runs faster. ice_reset_vf() using vf->cfg_lock and when
we move this lock before accessing to the VF VSI, we can fix
BUG for all cases.

Panic occurs sometimes in ice_vsi_is_rx_queue_active() and sometimes
in ice_vsi_stop_all_rx_rings()

With our reproducer, we can hint BUG:
~8h before commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
~20m after commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
After this fix we are not able to reproduce it after ~48h

There was commit cf90b74341ee ("ice: Fix call trace with null VSI during
VF reset") which also tried to fix this issue, but it was only
partially resolved and the bug still exists.

[ 6420.658415] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 6420.665382] #PF: supervisor read access in kernel mode
[ 6420.670521] #PF: error_code(0x0000) - not-present page
[ 6420.675659] PGD 0
[ 6420.677679] Oops: 0000 [#1] PREEMPT SMP NOPTI
[ 6420.682038] CPU: 53 PID: 326472 Comm: kworker/53:0 Kdump: loaded Not tainted 5.14.0-317.el9.x86_64 #1
[ 6420.691250] Hardware name: Dell Inc. PowerEdge R750/04V528, BIOS 1.6.5 04/15/2022
[ 6420.698729] Workqueue: ice ice_service_task [ice]
[ 6420.703462] RIP: 0010:ice_vsi_is_rx_queue_active+0x2d/0x60 [ice]
[ 6420.705860] ice 0000:ca:00.0: VF 0 is now untrusted
[ 6420.709494] Code: 00 00 66 83 bf 76 04 00 00 00 48 8b 77 10 74 3e 31 c0 eb 0f 0f b7 97 76 04 00 00 48 83 c0 01 39 c2 7e 2b 48 8b 97 68 04 00 00 <0f> b7 0c 42 48 8b 96 20 13 00 00 48 8d 94 8a 00 00 12 00 8b 12 83
[ 6420.714426] ice 0000:ca:00.0 ens7f0: Setting MAC 22:22:22:22:22:00 on VF 0. VF driver will be reinitialized
[ 6420.733120] RSP: 0018:ff778d2ff383fdd8 EFLAGS: 00010246
[ 6420.733123] RAX: 0000000000000000 RBX: ff2acf1916294000 RCX: 0000000000000000
[ 6420.733125] RDX: 0000000000000000 RSI: ff2acf1f2c6401a0 RDI: ff2acf1a27301828
[ 6420.762346] RBP: ff2acf1a27301828 R08: 0000000000000010 R09: 0000000000001000
[ 6420.769476] R10: ff2acf1916286000 R11: 00000000019eba3f R12: ff2acf19066460d0
[ 6420.776611] R13: ff2acf1f2c6401a0 R14: ff2acf1f2c6401a0 R15: 00000000ffffffff
[ 6420.783742] FS:  0000000000000000(0000) GS:ff2acf28ffa80000(0000) knlGS:0000000000000000
[ 6420.791829] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6420.797575] CR2: 0000000000000000 CR3: 00000016ad410003 CR4: 0000000000773ee0
[ 6420.804708] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 6420.811034] vfio-pci 0000:ca:01.0: enabling device (0000 -> 0002)
[ 6420.811840] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 6420.811841] PKRU: 55555554
[ 6420.811842] Call Trace:
[ 6420.811843]  <TASK>
[ 6420.811844]  ice_reset_vf+0x9a/0x450 [ice]
[ 6420.811876]  ice_process_vflr_event+0x8f/0xc0 [ice]
[ 6420.841343]  ice_service_task+0x23b/0x600 [ice]
[ 6420.845884]  ? __schedule+0x212/0x550
[ 6420.849550]  process_one_work+0x1e2/0x3b0
[ 6420.853563]  ? rescuer_thread+0x390/0x390
[ 6420.857577]  worker_thread+0x50/0x3a0
[ 6420.861242]  ? rescuer_thread+0x390/0x390
[ 6420.865253]  kthread+0xdd/0x100
[ 6420.868400]  ? kthread_complete_and_exit+0x20/0x20
[ 6420.873194]  ret_from_fork+0x1f/0x30
[ 6420.876774]  </TASK>
[ 6420.878967] Modules linked in: vfio_pci vfio_pci_core vfio_iommu_type1 vfio iavf vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables bridge stp llc sctp ip6_udp_tunnel udp_tunnel nfp tls nfnetlink bluetooth mlx4_en mlx4_core rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp irdma kvm_intel i40e kvm iTCO_wdt dcdbas ib_uverbs irqbypass iTCO_vendor_support mgag200 mei_me ib_core dell_smbios isst_if_mmio isst_if_mbox_pci rapl i2c_algo_bit drm_shmem_helper intel_cstate drm_kms_helper syscopyarea sysfillrect isst_if_common sysimgblt intel_uncore fb_sys_fops dell_wmi_descriptor wmi_bmof intel_vsec mei i2c_i801 acpi_ipmi ipmi_si i2c_smbus ipmi_devintf intel_pch_thermal acpi_power_meter pcspkr

Fixes: efe41860008e ("ice: Fix memory corruption in VF driver")
Fixes: f23df5220d2b ("ice: Fix spurious interrupt during removal of trusted VF")
Signed-off-by: Petr Oros <poros@redhat.com>
---
 drivers/net/ethernet/intel/ice/ice_vf_lib.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 294e91c3453ccd..ea3310be8354cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -612,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 		return 0;
 	}
 
+	if (flags & ICE_VF_RESET_LOCK)
+		mutex_lock(&vf->cfg_lock);
+	else
+		lockdep_assert_held(&vf->cfg_lock);
+
 	if (ice_is_vf_disabled(vf)) {
 		vsi = ice_get_vf_vsi(vf);
 		if (!vsi) {
 			dev_dbg(dev, "VF is already removed\n");
-			return -EINVAL;
+			err = -EINVAL;
+			goto out_unlock;
 		}
 		ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
 
@@ -625,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 
 		dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
 			vf->vf_id);
-		return 0;
+		goto out_unlock;
 	}
 
-	if (flags & ICE_VF_RESET_LOCK)
-		mutex_lock(&vf->cfg_lock);
-	else
-		lockdep_assert_held(&vf->cfg_lock);
-
 	/* Set VF disable bit state here, before triggering reset */
 	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
 	ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset
  2023-08-07  9:48 ` [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset Petr Oros
@ 2023-08-08 12:51   ` Simon Horman
  2023-08-08 20:54   ` [Intel-wired-lan] " Przemek Kitszel
  1 sibling, 0 replies; 9+ messages in thread
From: Simon Horman @ 2023-08-08 12:51 UTC (permalink / raw)
  To: Petr Oros
  Cc: netdev, jesse.brandeburg, anthony.l.nguyen, davem, edumazet,
	kuba, pabeni, Jacob.e.keller, przemyslawx.patynowski,
	kamil.maziarz, dawidx.wesierski, mateusz.palczewski,
	slawomirx.laba, norbertx.zulinski, intel-wired-lan, linux-kernel

On Mon, Aug 07, 2023 at 11:48:31AM +0200, Petr Oros wrote:
> During stress test with attaching and detaching VF from KVM and
> simultaneously changing VFs spoofcheck and trust there was a
> NULL pointer dereference in ice_reset_vf that VF's VSI is null.
> 
> More than one instance of ice_reset_vf() can be running at a given
> time. When we rebuild the VSI in ice_reset_vf, another reset can be
> triaged from ice_service_task. In this case we can access the currently
> uninitialized VSI and couse panic. The window for this racing condition

nit: couse -> cause

> has been around for a long time but it's much worse after commit
> 227bf4500aaa ("ice: move VSI delete outside deconfig") because
> the reset runs faster. ice_reset_vf() using vf->cfg_lock and when
> we move this lock before accessing to the VF VSI, we can fix
> BUG for all cases.
> 
> Panic occurs sometimes in ice_vsi_is_rx_queue_active() and sometimes
> in ice_vsi_stop_all_rx_rings()
> 
> With our reproducer, we can hint BUG:
> ~8h before commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
> ~20m after commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
> After this fix we are not able to reproduce it after ~48h
> 
> There was commit cf90b74341ee ("ice: Fix call trace with null VSI during
> VF reset") which also tried to fix this issue, but it was only
> partially resolved and the bug still exists.
> 
> [ 6420.658415] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [ 6420.665382] #PF: supervisor read access in kernel mode
> [ 6420.670521] #PF: error_code(0x0000) - not-present page
> [ 6420.675659] PGD 0
> [ 6420.677679] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 6420.682038] CPU: 53 PID: 326472 Comm: kworker/53:0 Kdump: loaded Not tainted 5.14.0-317.el9.x86_64 #1
> [ 6420.691250] Hardware name: Dell Inc. PowerEdge R750/04V528, BIOS 1.6.5 04/15/2022
> [ 6420.698729] Workqueue: ice ice_service_task [ice]
> [ 6420.703462] RIP: 0010:ice_vsi_is_rx_queue_active+0x2d/0x60 [ice]
> [ 6420.705860] ice 0000:ca:00.0: VF 0 is now untrusted
> [ 6420.709494] Code: 00 00 66 83 bf 76 04 00 00 00 48 8b 77 10 74 3e 31 c0 eb 0f 0f b7 97 76 04 00 00 48 83 c0 01 39 c2 7e 2b 48 8b 97 68 04 00 00 <0f> b7 0c 42 48 8b 96 20 13 00 00 48 8d 94 8a 00 00 12 00 8b 12 83
> [ 6420.714426] ice 0000:ca:00.0 ens7f0: Setting MAC 22:22:22:22:22:00 on VF 0. VF driver will be reinitialized
> [ 6420.733120] RSP: 0018:ff778d2ff383fdd8 EFLAGS: 00010246
> [ 6420.733123] RAX: 0000000000000000 RBX: ff2acf1916294000 RCX: 0000000000000000
> [ 6420.733125] RDX: 0000000000000000 RSI: ff2acf1f2c6401a0 RDI: ff2acf1a27301828
> [ 6420.762346] RBP: ff2acf1a27301828 R08: 0000000000000010 R09: 0000000000001000
> [ 6420.769476] R10: ff2acf1916286000 R11: 00000000019eba3f R12: ff2acf19066460d0
> [ 6420.776611] R13: ff2acf1f2c6401a0 R14: ff2acf1f2c6401a0 R15: 00000000ffffffff
> [ 6420.783742] FS:  0000000000000000(0000) GS:ff2acf28ffa80000(0000) knlGS:0000000000000000
> [ 6420.791829] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 6420.797575] CR2: 0000000000000000 CR3: 00000016ad410003 CR4: 0000000000773ee0
> [ 6420.804708] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 6420.811034] vfio-pci 0000:ca:01.0: enabling device (0000 -> 0002)
> [ 6420.811840] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 6420.811841] PKRU: 55555554
> [ 6420.811842] Call Trace:
> [ 6420.811843]  <TASK>
> [ 6420.811844]  ice_reset_vf+0x9a/0x450 [ice]
> [ 6420.811876]  ice_process_vflr_event+0x8f/0xc0 [ice]
> [ 6420.841343]  ice_service_task+0x23b/0x600 [ice]
> [ 6420.845884]  ? __schedule+0x212/0x550
> [ 6420.849550]  process_one_work+0x1e2/0x3b0
> [ 6420.853563]  ? rescuer_thread+0x390/0x390
> [ 6420.857577]  worker_thread+0x50/0x3a0
> [ 6420.861242]  ? rescuer_thread+0x390/0x390
> [ 6420.865253]  kthread+0xdd/0x100
> [ 6420.868400]  ? kthread_complete_and_exit+0x20/0x20
> [ 6420.873194]  ret_from_fork+0x1f/0x30
> [ 6420.876774]  </TASK>
> [ 6420.878967] Modules linked in: vfio_pci vfio_pci_core vfio_iommu_type1 vfio iavf vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables bridge stp llc sctp ip6_udp_tunnel udp_tunnel nfp tls nfnetlink bluetooth mlx4_en mlx4_core rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp irdma kvm_intel i40e kvm iTCO_wdt dcdbas ib_uverbs irqbypass iTCO_vendor_support mgag200 mei_me ib_core dell_smbios isst_if_mmio isst_if_mbox_pci rapl i2c_algo_bit drm_shmem_helper intel_cstate drm_kms_helper syscopyarea sysfillrect isst_if_common sysimgblt intel_uncore fb_sys_fops dell_wmi_descriptor wmi_bmof intel_vsec mei i2c_i801 acpi_ipmi ipmi_si i2c_smbus ipmi_devintf intel_pch_thermal acpi_power_meter pcspkr
> 
> Fixes: efe41860008e ("ice: Fix memory corruption in VF driver")
> Fixes: f23df5220d2b ("ice: Fix spurious interrupt during removal of trusted VF")
> Signed-off-by: Petr Oros <poros@redhat.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization"
  2023-08-07  9:48 ` [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization" Petr Oros
@ 2023-08-08 12:51   ` Simon Horman
  0 siblings, 0 replies; 9+ messages in thread
From: Simon Horman @ 2023-08-08 12:51 UTC (permalink / raw)
  To: Petr Oros
  Cc: netdev, jesse.brandeburg, anthony.l.nguyen, davem, edumazet,
	kuba, pabeni, Jacob.e.keller, przemyslawx.patynowski,
	kamil.maziarz, dawidx.wesierski, mateusz.palczewski,
	slawomirx.laba, norbertx.zulinski, intel-wired-lan, linux-kernel

On Mon, Aug 07, 2023 at 11:48:30AM +0200, Petr Oros wrote:
> This reverts commit 7255355a0636b4eff08d5e8139c77d98f151c4fc.
> 
> After this commit we are not able to attach VF to VM:
> virsh attach-interface v0 hostdev --managed 0000:41:01.0 --mac 52:52:52:52:52:52
> error: Failed to attach interface
> error: Cannot set interface MAC to 52:52:52:52:52:52 for ifname enp65s0f0np0 vf 0: Resource temporarily unavailable
> 
> ice_check_vf_ready_for_cfg() already contain waiting for reset.
> New condition in ice_check_vf_ready_for_reset() causing only problems.
> 
> Fixes: 7255355a0636 ("ice: Fix ice VF reset during iavf initialization")
> Signed-off-by: Petr Oros <poros@redhat.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-wired-lan] [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset
  2023-08-07  9:48 ` [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset Petr Oros
  2023-08-08 12:51   ` Simon Horman
@ 2023-08-08 20:54   ` Przemek Kitszel
  2023-08-09 18:14     ` Petr Oros
  1 sibling, 1 reply; 9+ messages in thread
From: Przemek Kitszel @ 2023-08-08 20:54 UTC (permalink / raw)
  To: Petr Oros, netdev
  Cc: slawomirx.laba, przemyslawx.patynowski, kamil.maziarz,
	jesse.brandeburg, norbertx.zulinski, dawidx.wesierski, edumazet,
	anthony.l.nguyen, intel-wired-lan, kuba, pabeni, davem,
	linux-kernel

On 8/7/23 11:48, Petr Oros wrote:
> During stress test with attaching and detaching VF from KVM and
> simultaneously changing VFs spoofcheck and trust there was a
> NULL pointer dereference in ice_reset_vf that VF's VSI is null.
> 
> More than one instance of ice_reset_vf() can be running at a given
> time. When we rebuild the VSI in ice_reset_vf, another reset can be
> triaged from ice_service_task. In this case we can access the currently
> uninitialized VSI and couse panic. The window for this racing condition
> has been around for a long time but it's much worse after commit
> 227bf4500aaa ("ice: move VSI delete outside deconfig") because
> the reset runs faster. ice_reset_vf() using vf->cfg_lock and when
> we move this lock before accessing to the VF VSI, we can fix
> BUG for all cases.
> 
> Panic occurs sometimes in ice_vsi_is_rx_queue_active() and sometimes
> in ice_vsi_stop_all_rx_rings()
> 
> With our reproducer, we can hint BUG:

s/hint/hit/

> ~8h before commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
> ~20m after commit 227bf4500aaa ("ice: move VSI delete outside deconfig").
> After this fix we are not able to reproduce it after ~48h
> 
> There was commit cf90b74341ee ("ice: Fix call trace with null VSI during
> VF reset") which also tried to fix this issue, but it was only
> partially resolved and the bug still exists.
> 
> [ 6420.658415] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [ 6420.665382] #PF: supervisor read access in kernel mode
> [ 6420.670521] #PF: error_code(0x0000) - not-present page
> [ 6420.675659] PGD 0
> [ 6420.677679] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 6420.682038] CPU: 53 PID: 326472 Comm: kworker/53:0 Kdump: loaded Not tainted 5.14.0-317.el9.x86_64 #1
> [ 6420.691250] Hardware name: Dell Inc. PowerEdge R750/04V528, BIOS 1.6.5 04/15/2022
> [ 6420.698729] Workqueue: ice ice_service_task [ice]
> [ 6420.703462] RIP: 0010:ice_vsi_is_rx_queue_active+0x2d/0x60 [ice]
> [ 6420.705860] ice 0000:ca:00.0: VF 0 is now untrusted
> [ 6420.709494] Code: 00 00 66 83 bf 76 04 00 00 00 48 8b 77 10 74 3e 31 c0 eb 0f 0f b7 97 76 04 00 00 48 83 c0 01 39 c2 7e 2b 48 8b 97 68 04 00 00 <0f> b7 0c 42 48 8b 96 20 13 00 00 48 8d 94 8a 00 00 12 00 8b 12 83
> [ 6420.714426] ice 0000:ca:00.0 ens7f0: Setting MAC 22:22:22:22:22:00 on VF 0. VF driver will be reinitialized
> [ 6420.733120] RSP: 0018:ff778d2ff383fdd8 EFLAGS: 00010246
> [ 6420.733123] RAX: 0000000000000000 RBX: ff2acf1916294000 RCX: 0000000000000000
> [ 6420.733125] RDX: 0000000000000000 RSI: ff2acf1f2c6401a0 RDI: ff2acf1a27301828
> [ 6420.762346] RBP: ff2acf1a27301828 R08: 0000000000000010 R09: 0000000000001000
> [ 6420.769476] R10: ff2acf1916286000 R11: 00000000019eba3f R12: ff2acf19066460d0
> [ 6420.776611] R13: ff2acf1f2c6401a0 R14: ff2acf1f2c6401a0 R15: 00000000ffffffff
> [ 6420.783742] FS:  0000000000000000(0000) GS:ff2acf28ffa80000(0000) knlGS:0000000000000000
> [ 6420.791829] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 6420.797575] CR2: 0000000000000000 CR3: 00000016ad410003 CR4: 0000000000773ee0
> [ 6420.804708] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 6420.811034] vfio-pci 0000:ca:01.0: enabling device (0000 -> 0002)
> [ 6420.811840] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 6420.811841] PKRU: 55555554
> [ 6420.811842] Call Trace:
> [ 6420.811843]  <TASK>
> [ 6420.811844]  ice_reset_vf+0x9a/0x450 [ice]
> [ 6420.811876]  ice_process_vflr_event+0x8f/0xc0 [ice]
> [ 6420.841343]  ice_service_task+0x23b/0x600 [ice]
> [ 6420.845884]  ? __schedule+0x212/0x550
> [ 6420.849550]  process_one_work+0x1e2/0x3b0
> [ 6420.853563]  ? rescuer_thread+0x390/0x390
> [ 6420.857577]  worker_thread+0x50/0x3a0
> [ 6420.861242]  ? rescuer_thread+0x390/0x390
> [ 6420.865253]  kthread+0xdd/0x100
> [ 6420.868400]  ? kthread_complete_and_exit+0x20/0x20
> [ 6420.873194]  ret_from_fork+0x1f/0x30
> [ 6420.876774]  </TASK>
> [ 6420.878967] Modules linked in: vfio_pci vfio_pci_core vfio_iommu_type1 vfio iavf vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables bridge stp llc sctp ip6_udp_tunnel udp_tunnel nfp tls nfnetlink bluetooth mlx4_en mlx4_core rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp irdma kvm_intel i40e kvm iTCO_wdt dcdbas ib_uverbs irqbypass iTCO_vendor_support mgag200 mei_me ib_core dell_smbios isst_if_mmio isst_if_mbox_pci rapl i2c_algo_bit drm_shmem_helper intel_cstate drm_kms_helper syscopyarea sysfillrect isst_if_common sysimgblt intel_uncore fb_sys_fops dell_wmi_descriptor wmi_bmof intel_vsec mei i2c_i801 acpi_ipmi ipmi_si i2c_smbus ipmi_devintf intel_pch_thermal acpi_power_meter pcspk
>   r
> 
> Fixes: efe41860008e ("ice: Fix memory corruption in VF driver")
> Fixes: f23df5220d2b ("ice: Fix spurious interrupt during removal of trusted VF")
> Signed-off-by: Petr Oros <poros@redhat.com>

Thank you for all of your testing efforts, detailed explanation,
and the fix!

Is there anything interesting about your setup/methodology?
(Asking rather to explore and extend our tests, no doubts here :)

> ---
>   drivers/net/ethernet/intel/ice/ice_vf_lib.c | 15 ++++++++-------
>   1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> index 294e91c3453ccd..ea3310be8354cf 100644
> --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> @@ -612,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
>   		return 0;
>   	}
>   
> +	if (flags & ICE_VF_RESET_LOCK)
> +		mutex_lock(&vf->cfg_lock);
> +	else
> +		lockdep_assert_held(&vf->cfg_lock);
> +
>   	if (ice_is_vf_disabled(vf)) {
>   		vsi = ice_get_vf_vsi(vf);
>   		if (!vsi) {
>   			dev_dbg(dev, "VF is already removed\n");
> -			return -EINVAL;
> +			err = -EINVAL;
> +			goto out_unlock;
>   		}
>   		ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
>   
> @@ -625,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
>   
>   		dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
>   			vf->vf_id);
> -		return 0;
> +		goto out_unlock;
>   	}
>   
> -	if (flags & ICE_VF_RESET_LOCK)
> -		mutex_lock(&vf->cfg_lock);
> -	else
> -		lockdep_assert_held(&vf->cfg_lock);
> -
>   	/* Set VF disable bit state here, before triggering reset */
>   	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
>   	ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-wired-lan] [PATCH net 0/2] Fix VF to VM attach detach
  2023-08-07  9:48 [PATCH net 0/2] Fix VF to VM attach detach Petr Oros
  2023-08-07  9:48 ` [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization" Petr Oros
  2023-08-07  9:48 ` [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset Petr Oros
@ 2023-08-08 20:56 ` Przemek Kitszel
  2023-08-08 22:10 ` Jacob Keller
  3 siblings, 0 replies; 9+ messages in thread
From: Przemek Kitszel @ 2023-08-08 20:56 UTC (permalink / raw)
  To: Petr Oros, netdev
  Cc: slawomirx.laba, przemyslawx.patynowski, kamil.maziarz,
	jesse.brandeburg, norbertx.zulinski, dawidx.wesierski, edumazet,
	anthony.l.nguyen, intel-wired-lan, kuba, pabeni, davem,
	linux-kernel

On 8/7/23 11:48, Petr Oros wrote:
> Petr Oros (2):
>    Revert "ice: Fix ice VF reset during iavf initialization"
>    ice: Fix NULL pointer deref during VF reset
> 
>   drivers/net/ethernet/intel/ice/ice_sriov.c    |  8 ++---
>   drivers/net/ethernet/intel/ice/ice_vf_lib.c   | 34 +++++--------------
>   drivers/net/ethernet/intel/ice/ice_vf_lib.h   |  1 -
>   drivers/net/ethernet/intel/ice/ice_virtchnl.c |  1 -
>   4 files changed, 12 insertions(+), 32 deletions(-)
> 

There were 3 typos reported, but this series is good anyway

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 0/2] Fix VF to VM attach detach
  2023-08-07  9:48 [PATCH net 0/2] Fix VF to VM attach detach Petr Oros
                   ` (2 preceding siblings ...)
  2023-08-08 20:56 ` [Intel-wired-lan] [PATCH net 0/2] Fix VF to VM attach detach Przemek Kitszel
@ 2023-08-08 22:10 ` Jacob Keller
  3 siblings, 0 replies; 9+ messages in thread
From: Jacob Keller @ 2023-08-08 22:10 UTC (permalink / raw)
  To: Petr Oros, netdev
  Cc: jesse.brandeburg, anthony.l.nguyen, davem, edumazet, kuba,
	pabeni, przemyslawx.patynowski, kamil.maziarz, dawidx.wesierski,
	mateusz.palczewski, slawomirx.laba, norbertx.zulinski,
	intel-wired-lan, linux-kernel



On 8/7/2023 2:48 AM, Petr Oros wrote:
> Petr Oros (2):
>   Revert "ice: Fix ice VF reset during iavf initialization"
>   ice: Fix NULL pointer deref during VF reset
> 
>  drivers/net/ethernet/intel/ice/ice_sriov.c    |  8 ++---
>  drivers/net/ethernet/intel/ice/ice_vf_lib.c   | 34 +++++--------------
>  drivers/net/ethernet/intel/ice/ice_vf_lib.h   |  1 -
>  drivers/net/ethernet/intel/ice/ice_virtchnl.c |  1 -
>  4 files changed, 12 insertions(+), 32 deletions(-)
> 


I reviewed the commit message for the reverted commit and I concur that
any sort of issue that it fixed with respect to concurrent resets is
better fixed by the 2nd commit of this series.

The motivation for the original commit appears to be to prevent a reset
from happening while the VF is still resetting. I think this motivation
is incorrect for a few reasons:

1) as described by the revert above, if the VF has not had iAVF load on
it yet (such as when its been assigned to the pass through PCI driver),
it will never initialize, and thus we can't ever reset the device.

2) It does not make sense for the PF to rely on or assume behavior of
the VF (i.e. that it will properly initialize) and I believe it should
be allowed to reset the device without regard to the state of the VF
driver. It is a bug in the VF driver if this causes problems for it. I
think we recently fixed several issues here in iAVF.

With that in mind, I think this series of fixes is good.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-wired-lan] [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset
  2023-08-08 20:54   ` [Intel-wired-lan] " Przemek Kitszel
@ 2023-08-09 18:14     ` Petr Oros
  0 siblings, 0 replies; 9+ messages in thread
From: Petr Oros @ 2023-08-09 18:14 UTC (permalink / raw)
  To: Przemek Kitszel, netdev
  Cc: slawomirx.laba, przemyslawx.patynowski, kamil.maziarz,
	jesse.brandeburg, norbertx.zulinski, dawidx.wesierski, edumazet,
	anthony.l.nguyen, intel-wired-lan, kuba, pabeni, davem,
	linux-kernel

Przemek Kitszel píše v Čt 01. 01. 1970 v 00:00 +0000:
> On 8/7/23 11:48, Petr Oros wrote:
> > During stress test with attaching and detaching VF from KVM and
> > simultaneously changing VFs spoofcheck and trust there was a
> > NULL pointer dereference in ice_reset_vf that VF's VSI is null.
> > 
> > More than one instance of ice_reset_vf() can be running at a given
> > time. When we rebuild the VSI in ice_reset_vf, another reset can be
> > triaged from ice_service_task. In this case we can access the
> > currently
> > uninitialized VSI and couse panic. The window for this racing
> > condition
> > has been around for a long time but it's much worse after commit
> > 227bf4500aaa ("ice: move VSI delete outside deconfig") because
> > the reset runs faster. ice_reset_vf() using vf->cfg_lock and when
> > we move this lock before accessing to the VF VSI, we can fix
> > BUG for all cases.
> > 
> > Panic occurs sometimes in ice_vsi_is_rx_queue_active() and
> > sometimes
> > in ice_vsi_stop_all_rx_rings()
> > 
> > With our reproducer, we can hint BUG:
> 
> s/hint/hit/
> 
> > ~8h before commit 227bf4500aaa ("ice: move VSI delete outside
> > deconfig").
> > ~20m after commit 227bf4500aaa ("ice: move VSI delete outside
> > deconfig").
> > After this fix we are not able to reproduce it after ~48h
> > 
> > There was commit cf90b74341ee ("ice: Fix call trace with null VSI
> > during
> > VF reset") which also tried to fix this issue, but it was only
> > partially resolved and the bug still exists.
> > 
> > [ 6420.658415] BUG: kernel NULL pointer dereference, address:
> > 0000000000000000
> > [ 6420.665382] #PF: supervisor read access in kernel mode
> > [ 6420.670521] #PF: error_code(0x0000) - not-present page
> > [ 6420.675659] PGD 0
> > [ 6420.677679] Oops: 0000 [#1] PREEMPT SMP NOPTI
> > [ 6420.682038] CPU: 53 PID: 326472 Comm: kworker/53:0 Kdump: loaded
> > Not tainted 5.14.0-317.el9.x86_64 #1
> > [ 6420.691250] Hardware name: Dell Inc. PowerEdge R750/04V528, BIOS
> > 1.6.5 04/15/2022
> > [ 6420.698729] Workqueue: ice ice_service_task [ice]
> > [ 6420.703462] RIP: 0010:ice_vsi_is_rx_queue_active+0x2d/0x60 [ice]
> > [ 6420.705860] ice 0000:ca:00.0: VF 0 is now untrusted
> > [ 6420.709494] Code: 00 00 66 83 bf 76 04 00 00 00 48 8b 77 10 74
> > 3e 31 c0 eb 0f 0f b7 97 76 04 00 00 48 83 c0 01 39 c2 7e 2b 48 8b
> > 97 68 04 00 00 <0f> b7 0c 42 48 8b 96 20 13 00 00 48 8d 94 8a 00 00
> > 12 00 8b 12 83
> > [ 6420.714426] ice 0000:ca:00.0 ens7f0: Setting MAC
> > 22:22:22:22:22:00 on VF 0. VF driver will be reinitialized
> > [ 6420.733120] RSP: 0018:ff778d2ff383fdd8 EFLAGS: 00010246
> > [ 6420.733123] RAX: 0000000000000000 RBX: ff2acf1916294000 RCX:
> > 0000000000000000
> > [ 6420.733125] RDX: 0000000000000000 RSI: ff2acf1f2c6401a0 RDI:
> > ff2acf1a27301828
> > [ 6420.762346] RBP: ff2acf1a27301828 R08: 0000000000000010 R09:
> > 0000000000001000
> > [ 6420.769476] R10: ff2acf1916286000 R11: 00000000019eba3f R12:
> > ff2acf19066460d0
> > [ 6420.776611] R13: ff2acf1f2c6401a0 R14: ff2acf1f2c6401a0 R15:
> > 00000000ffffffff
> > [ 6420.783742] FS:  0000000000000000(0000)
> > GS:ff2acf28ffa80000(0000) knlGS:0000000000000000
> > [ 6420.791829] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 6420.797575] CR2: 0000000000000000 CR3: 00000016ad410003 CR4:
> > 0000000000773ee0
> > [ 6420.804708] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> > 0000000000000000
> > [ 6420.811034] vfio-pci 0000:ca:01.0: enabling device (0000 ->
> > 0002)
> > [ 6420.811840] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> > 0000000000000400
> > [ 6420.811841] PKRU: 55555554
> > [ 6420.811842] Call Trace:
> > [ 6420.811843]  <TASK>
> > [ 6420.811844]  ice_reset_vf+0x9a/0x450 [ice]
> > [ 6420.811876]  ice_process_vflr_event+0x8f/0xc0 [ice]
> > [ 6420.841343]  ice_service_task+0x23b/0x600 [ice]
> > [ 6420.845884]  ? __schedule+0x212/0x550
> > [ 6420.849550]  process_one_work+0x1e2/0x3b0
> > [ 6420.853563]  ? rescuer_thread+0x390/0x390
> > [ 6420.857577]  worker_thread+0x50/0x3a0
> > [ 6420.861242]  ? rescuer_thread+0x390/0x390
> > [ 6420.865253]  kthread+0xdd/0x100
> > [ 6420.868400]  ? kthread_complete_and_exit+0x20/0x20
> > [ 6420.873194]  ret_from_fork+0x1f/0x30
> > [ 6420.876774]  </TASK>
> > [ 6420.878967] Modules linked in: vfio_pci vfio_pci_core
> > vfio_iommu_type1 vfio iavf vhost_net vhost vhost_iotlb tap tun
> > xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4
> > nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6
> > nf_defrag_ipv4 nft_counter nf_tables bridge stp llc sctp
> > ip6_udp_tunnel udp_tunnel nfp tls nfnetlink bluetooth mlx4_en
> > mlx4_core rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd
> > grace fscache netfs rfkill sunrpc intel_rapl_msr intel_rapl_common
> > i10nm_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal
> > intel_powerclamp coretemp irdma kvm_intel i40e kvm iTCO_wdt dcdbas
> > ib_uverbs irqbypass iTCO_vendor_support mgag200 mei_me ib_core
> > dell_smbios isst_if_mmio isst_if_mbox_pci rapl i2c_algo_bit
> > drm_shmem_helper intel_cstate drm_kms_helper syscopyarea
> > sysfillrect isst_if_common sysimgblt intel_uncore fb_sys_fops
> > dell_wmi_descriptor wmi_bmof intel_vsec mei i2c_i801 acpi_ipmi
> > ipmi_si i2c_smbus ipmi_devintf intel_pch_thermal acpi_power_meter
> > pcspk
> >   r
> > 
> > Fixes: efe41860008e ("ice: Fix memory corruption in VF driver")
> > Fixes: f23df5220d2b ("ice: Fix spurious interrupt during removal of
> > trusted VF")
> > Signed-off-by: Petr Oros <poros@redhat.com>
> 
> Thank you for all of your testing efforts, detailed explanation,
> and the fix!
> 
> Is there anything interesting about your setup/methodology?
> (Asking rather to explore and extend our tests, no doubts here :)

This test is part of our QE test suite. There are a large number of
"cfg stress" tests.
I can't explain all the tests, but this one in particular just created
2 threads.
One attaching/detaching VF from VM and the other sets VF parameters in
a loop (trust, spoof check, mac addr) This causes concurrent access to
resources during reset. ice/iavf are very sensitive to this type of
stress ;) 

We also using LNST, but it currently catches other types of bugs.

> 
> > ---
> >   drivers/net/ethernet/intel/ice/ice_vf_lib.c | 15 ++++++++-------
> >   1 file changed, 8 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> > b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> > index 294e91c3453ccd..ea3310be8354cf 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
> > @@ -612,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32
> > flags)
> >                 return 0;
> >         }
> >   
> > +       if (flags & ICE_VF_RESET_LOCK)
> > +               mutex_lock(&vf->cfg_lock);
> > +       else
> > +               lockdep_assert_held(&vf->cfg_lock);
> > +
> >         if (ice_is_vf_disabled(vf)) {
> >                 vsi = ice_get_vf_vsi(vf);
> >                 if (!vsi) {
> >                         dev_dbg(dev, "VF is already removed\n");
> > -                       return -EINVAL;
> > +                       err = -EINVAL;
> > +                       goto out_unlock;
> >                 }
> >                 ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf-
> > >vf_id);
> >   
> > @@ -625,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
> >   
> >                 dev_dbg(dev, "VF is already disabled, there is no
> > need for resetting it, telling VM, all is fine %d\n",
> >                         vf->vf_id);
> > -               return 0;
> > +               goto out_unlock;
> >         }
> >   
> > -       if (flags & ICE_VF_RESET_LOCK)
> > -               mutex_lock(&vf->cfg_lock);
> > -       else
> > -               lockdep_assert_held(&vf->cfg_lock);
> > -
> >         /* Set VF disable bit state here, before triggering reset
> > */
> >         set_bit(ICE_VF_STATE_DIS, vf->vf_states);
> >         ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-08-09 18:14 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-07  9:48 [PATCH net 0/2] Fix VF to VM attach detach Petr Oros
2023-08-07  9:48 ` [PATCH net 1/2] Revert "ice: Fix ice VF reset during iavf initialization" Petr Oros
2023-08-08 12:51   ` Simon Horman
2023-08-07  9:48 ` [PATCH net 2/2] ice: Fix NULL pointer deref during VF reset Petr Oros
2023-08-08 12:51   ` Simon Horman
2023-08-08 20:54   ` [Intel-wired-lan] " Przemek Kitszel
2023-08-09 18:14     ` Petr Oros
2023-08-08 20:56 ` [Intel-wired-lan] [PATCH net 0/2] Fix VF to VM attach detach Przemek Kitszel
2023-08-08 22:10 ` Jacob Keller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).