All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/4] Clean-up stale/unused hci_request.c code
@ 2022-07-27 13:58 Brian Gix
  2022-07-27 13:58 ` [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync Brian Gix
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Brian Gix @ 2022-07-27 13:58 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: marcel, luiz.dentz, brian.gix

This will be a growing patch-set of conversions and dead-code removal
towards the goal of retiring hci_request.c

The patch sets will be split amoung the work queues and delayed work
queues as initialized in hci_request_setup(), with the ultimate goal of
eliminating hci_request.c entirely.

v2: Published

v3: Continuing work.  This does include one conversion
(SCO configure_datapath) that has been tested with mgmt-tester and
sco-tester, but has not been tested with a controller with an
off-loadable codec.

v4: Clean-up checkpatch warnings.

Brian Gix (4):
  Bluetooth: Convert le_scan_disable timeout to hci_sync
  Bluetooth: Rework le_scan_restart for hci_sync
  Bluetooth: Delete unused hci_req_stop_discovery()
  Bluetooth: Convert SCO configure_datapath to hci_sync

 net/bluetooth/hci_conn.c    |  86 +++++++++--
 net/bluetooth/hci_request.c | 282 +-----------------------------------
 net/bluetooth/hci_request.h |   4 -
 net/bluetooth/hci_sync.c    | 148 +++++++++++++++++++
 4 files changed, 224 insertions(+), 296 deletions(-)

-- 
2.37.1


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync
  2022-07-27 13:58 [PATCH v4 0/4] Clean-up stale/unused hci_request.c code Brian Gix
@ 2022-07-27 13:58 ` Brian Gix
  2022-07-27 14:30   ` Clean-up stale/unused hci_request.c code bluez.test.bot
  2022-07-27 13:58 ` [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync Brian Gix
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Brian Gix @ 2022-07-27 13:58 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: marcel, luiz.dentz, brian.gix

The le_scan_disable timeout was being performed on the deprecated
hci_request.c mechanism.  This timeout is performed in hci_sync.c

Signed-off-by: Brian Gix <brian.gix@intel.com>
---
 net/bluetooth/hci_request.c | 98 +------------------------------------
 net/bluetooth/hci_sync.c    | 73 +++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 97 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e64d558e5d69..32fefaa0d3ca 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -269,6 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
 		 const void *param)
 {
+	bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
 	hci_req_add_ev(req, opcode, plen, param, 0);
 }
 
@@ -1974,101 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
 	return 0;
 }
 
-static int le_scan_disable(struct hci_request *req, unsigned long opt)
-{
-	hci_req_add_le_scan_disable(req, false);
-	return 0;
-}
-
-static int bredr_inquiry(struct hci_request *req, unsigned long opt)
-{
-	u8 length = opt;
-	const u8 giac[3] = { 0x33, 0x8b, 0x9e };
-	const u8 liac[3] = { 0x00, 0x8b, 0x9e };
-	struct hci_cp_inquiry cp;
-
-	if (test_bit(HCI_INQUIRY, &req->hdev->flags))
-		return 0;
-
-	bt_dev_dbg(req->hdev, "");
-
-	hci_dev_lock(req->hdev);
-	hci_inquiry_cache_flush(req->hdev);
-	hci_dev_unlock(req->hdev);
-
-	memset(&cp, 0, sizeof(cp));
-
-	if (req->hdev->discovery.limited)
-		memcpy(&cp.lap, liac, sizeof(cp.lap));
-	else
-		memcpy(&cp.lap, giac, sizeof(cp.lap));
-
-	cp.length = length;
-
-	hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
-
-	return 0;
-}
-
-static void le_scan_disable_work(struct work_struct *work)
-{
-	struct hci_dev *hdev = container_of(work, struct hci_dev,
-					    le_scan_disable.work);
-	u8 status;
-
-	bt_dev_dbg(hdev, "");
-
-	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
-		return;
-
-	cancel_delayed_work(&hdev->le_scan_restart);
-
-	hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
-	if (status) {
-		bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
-			   status);
-		return;
-	}
-
-	hdev->discovery.scan_start = 0;
-
-	/* If we were running LE only scan, change discovery state. If
-	 * we were running both LE and BR/EDR inquiry simultaneously,
-	 * and BR/EDR inquiry is already finished, stop discovery,
-	 * otherwise BR/EDR inquiry will stop discovery when finished.
-	 * If we will resolve remote device name, do not change
-	 * discovery state.
-	 */
-
-	if (hdev->discovery.type == DISCOV_TYPE_LE)
-		goto discov_stopped;
-
-	if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
-		return;
-
-	if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
-		if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
-		    hdev->discovery.state != DISCOVERY_RESOLVING)
-			goto discov_stopped;
-
-		return;
-	}
-
-	hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
-		     HCI_CMD_TIMEOUT, &status);
-	if (status) {
-		bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
-		goto discov_stopped;
-	}
-
-	return;
-
-discov_stopped:
-	hci_dev_lock(hdev);
-	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
-	hci_dev_unlock(hdev);
-}
-
 static int le_scan_restart(struct hci_request *req, unsigned long opt)
 {
 	struct hci_dev *hdev = req->hdev;
@@ -2252,7 +2158,6 @@ int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec)
 
 void hci_request_setup(struct hci_dev *hdev)
 {
-	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
 	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
 	INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
 	INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
@@ -2262,7 +2167,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
 {
 	__hci_cmd_sync_cancel(hdev, ENODEV);
 
-	cancel_delayed_work_sync(&hdev->le_scan_disable);
 	cancel_delayed_work_sync(&hdev->le_scan_restart);
 
 	if (hdev->adv_instance_timeout) {
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 148ce629a59f..7dae2ee1bb82 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -321,6 +321,77 @@ static void hci_cmd_sync_cancel_work(struct work_struct *work)
 	wake_up_interruptible(&hdev->req_wait_q);
 }
 
+static int hci_scan_disable_sync(struct hci_dev *hdev);
+static int scan_disable_sync(struct hci_dev *hdev, void *data)
+{
+	return hci_scan_disable_sync(hdev);
+}
+
+static int hci_inquiry_sync(struct hci_dev *hdev, u8 length);
+static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data)
+{
+	return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN);
+}
+
+static void le_scan_disable(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    le_scan_disable.work);
+	int status;
+
+	bt_dev_dbg(hdev, "");
+	hci_dev_lock(hdev);
+
+	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+		goto _return;
+
+	cancel_delayed_work(&hdev->le_scan_restart);
+
+	status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL);
+	if (status) {
+		bt_dev_err(hdev, "failed to disable LE scan: %d", status);
+		goto _return;
+	}
+
+	hdev->discovery.scan_start = 0;
+
+	/* If we were running LE only scan, change discovery state. If
+	 * we were running both LE and BR/EDR inquiry simultaneously,
+	 * and BR/EDR inquiry is already finished, stop discovery,
+	 * otherwise BR/EDR inquiry will stop discovery when finished.
+	 * If we will resolve remote device name, do not change
+	 * discovery state.
+	 */
+
+	if (hdev->discovery.type == DISCOV_TYPE_LE)
+		goto discov_stopped;
+
+	if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
+		goto _return;
+
+	if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
+		if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
+		    hdev->discovery.state != DISCOVERY_RESOLVING)
+			goto discov_stopped;
+
+		goto _return;
+	}
+
+	status = hci_cmd_sync_queue(hdev, interleaved_inquiry_sync, NULL, NULL);
+	if (status) {
+		bt_dev_err(hdev, "inquiry failed: status %d", status);
+		goto discov_stopped;
+	}
+
+	goto _return;
+
+discov_stopped:
+	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+
+_return:
+	hci_dev_unlock(hdev);
+}
+
 void hci_cmd_sync_init(struct hci_dev *hdev)
 {
 	INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
@@ -328,6 +399,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
 	mutex_init(&hdev->cmd_sync_work_lock);
 
 	INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
+	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
 }
 
 void hci_cmd_sync_clear(struct hci_dev *hdev)
@@ -4415,6 +4487,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
 
 	cancel_delayed_work(&hdev->power_off);
 	cancel_delayed_work(&hdev->ncmd_timer);
+	cancel_delayed_work(&hdev->le_scan_disable);
 
 	hci_request_cancel_all(hdev);
 
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2022-07-27 13:58 [PATCH v4 0/4] Clean-up stale/unused hci_request.c code Brian Gix
  2022-07-27 13:58 ` [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync Brian Gix
@ 2022-07-27 13:58 ` Brian Gix
  2023-06-15 12:06   ` Stefan Agner
  2022-07-27 13:58 ` [PATCH v4 3/4] Bluetooth: Delete unused hci_req_stop_discovery() Brian Gix
  2022-07-27 13:58 ` [PATCH v4 4/4] Bluetooth: Convert SCO configure_datapath to hci_sync Brian Gix
  3 siblings, 1 reply; 19+ messages in thread
From: Brian Gix @ 2022-07-27 13:58 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: marcel, luiz.dentz, brian.gix

le_scan_restart delayed work queue was running as a deprecated
hci_request instead of on the newer thread-safe hci_sync mechanism.

Signed-off-by: Brian Gix <brian.gix@intel.com>
---
 net/bluetooth/hci_request.c | 89 -------------------------------------
 net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 89 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 32fefaa0d3ca..114af7350363 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
 	return 0;
 }
 
-static int le_scan_restart(struct hci_request *req, unsigned long opt)
-{
-	struct hci_dev *hdev = req->hdev;
-
-	/* If controller is not scanning we are done. */
-	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
-		return 0;
-
-	if (hdev->scanning_paused) {
-		bt_dev_dbg(hdev, "Scanning is paused for suspend");
-		return 0;
-	}
-
-	hci_req_add_le_scan_disable(req, false);
-
-	if (use_ext_scan(hdev)) {
-		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
-
-		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
-		ext_enable_cp.enable = LE_SCAN_ENABLE;
-		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-
-		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
-			    sizeof(ext_enable_cp), &ext_enable_cp);
-	} else {
-		struct hci_cp_le_set_scan_enable cp;
-
-		memset(&cp, 0, sizeof(cp));
-		cp.enable = LE_SCAN_ENABLE;
-		cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
-	}
-
-	return 0;
-}
-
-static void le_scan_restart_work(struct work_struct *work)
-{
-	struct hci_dev *hdev = container_of(work, struct hci_dev,
-					    le_scan_restart.work);
-	unsigned long timeout, duration, scan_start, now;
-	u8 status;
-
-	bt_dev_dbg(hdev, "");
-
-	hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
-	if (status) {
-		bt_dev_err(hdev, "failed to restart LE scan: status %d",
-			   status);
-		return;
-	}
-
-	hci_dev_lock(hdev);
-
-	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
-	    !hdev->discovery.scan_start)
-		goto unlock;
-
-	/* When the scan was started, hdev->le_scan_disable has been queued
-	 * after duration from scan_start. During scan restart this job
-	 * has been canceled, and we need to queue it again after proper
-	 * timeout, to make sure that scan does not run indefinitely.
-	 */
-	duration = hdev->discovery.scan_duration;
-	scan_start = hdev->discovery.scan_start;
-	now = jiffies;
-	if (now - scan_start <= duration) {
-		int elapsed;
-
-		if (now >= scan_start)
-			elapsed = now - scan_start;
-		else
-			elapsed = ULONG_MAX - scan_start + now;
-
-		timeout = duration - elapsed;
-	} else {
-		timeout = 0;
-	}
-
-	queue_delayed_work(hdev->req_workqueue,
-			   &hdev->le_scan_disable, timeout);
-
-unlock:
-	hci_dev_unlock(hdev);
-}
-
 bool hci_req_stop_discovery(struct hci_request *req)
 {
 	struct hci_dev *hdev = req->hdev;
@@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec)
 
 void hci_request_setup(struct hci_dev *hdev)
 {
-	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
 	INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
 	INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
 }
@@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
 {
 	__hci_cmd_sync_cancel(hdev, ENODEV);
 
-	cancel_delayed_work_sync(&hdev->le_scan_restart);
-
 	if (hdev->adv_instance_timeout) {
 		cancel_delayed_work_sync(&hdev->adv_instance_expire);
 		hdev->adv_instance_timeout = 0;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 7dae2ee1bb82..19d57ec0feb8 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
 	hci_dev_unlock(hdev);
 }
 
+static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
+				       u8 filter_dup);
+static int hci_le_scan_restart_sync(struct hci_dev *hdev)
+{
+	/* If controller is not scanning we are done. */
+	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+		return 0;
+
+	if (hdev->scanning_paused) {
+		bt_dev_dbg(hdev, "Scanning is paused for suspend");
+		return 0;
+	}
+
+	hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
+	return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
+					   LE_SCAN_FILTER_DUP_ENABLE);
+}
+
+static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
+{
+	return hci_le_scan_restart_sync(hdev);
+}
+
+static void le_scan_restart(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    le_scan_restart.work);
+	unsigned long timeout, duration, scan_start, now;
+	int status;
+
+	bt_dev_dbg(hdev, "");
+
+	hci_dev_lock(hdev);
+
+	status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
+	if (status) {
+		bt_dev_err(hdev, "failed to restart LE scan: status %d",
+			   status);
+		goto unlock;
+	}
+
+	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
+	    !hdev->discovery.scan_start)
+		goto unlock;
+
+	/* When the scan was started, hdev->le_scan_disable has been queued
+	 * after duration from scan_start. During scan restart this job
+	 * has been canceled, and we need to queue it again after proper
+	 * timeout, to make sure that scan does not run indefinitely.
+	 */
+	duration = hdev->discovery.scan_duration;
+	scan_start = hdev->discovery.scan_start;
+	now = jiffies;
+	if (now - scan_start <= duration) {
+		int elapsed;
+
+		if (now >= scan_start)
+			elapsed = now - scan_start;
+		else
+			elapsed = ULONG_MAX - scan_start + now;
+
+		timeout = duration - elapsed;
+	} else {
+		timeout = 0;
+	}
+
+	queue_delayed_work(hdev->req_workqueue,
+			   &hdev->le_scan_disable, timeout);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 void hci_cmd_sync_init(struct hci_dev *hdev)
 {
 	INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
@@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
 
 	INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
 	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
+	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
 }
 
 void hci_cmd_sync_clear(struct hci_dev *hdev)
@@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
 	cancel_delayed_work(&hdev->power_off);
 	cancel_delayed_work(&hdev->ncmd_timer);
 	cancel_delayed_work(&hdev->le_scan_disable);
+	cancel_delayed_work(&hdev->le_scan_restart);
 
 	hci_request_cancel_all(hdev);
 
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v4 3/4] Bluetooth: Delete unused hci_req_stop_discovery()
  2022-07-27 13:58 [PATCH v4 0/4] Clean-up stale/unused hci_request.c code Brian Gix
  2022-07-27 13:58 ` [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync Brian Gix
  2022-07-27 13:58 ` [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync Brian Gix
@ 2022-07-27 13:58 ` Brian Gix
  2022-07-27 13:58 ` [PATCH v4 4/4] Bluetooth: Convert SCO configure_datapath to hci_sync Brian Gix
  3 siblings, 0 replies; 19+ messages in thread
From: Brian Gix @ 2022-07-27 13:58 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: marcel, luiz.dentz, brian.gix

hci_req_stop_discovery has been deprecated in favor of
hci_stop_discovery_sync() as part of transition to hci_sync.c

Signed-off-by: Brian Gix <brian.gix@intel.com>
---
 net/bluetooth/hci_request.c | 48 -------------------------------------
 net/bluetooth/hci_request.h |  2 --
 2 files changed, 50 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 114af7350363..ef0a5ec067b6 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1975,54 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
 	return 0;
 }
 
-bool hci_req_stop_discovery(struct hci_request *req)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct discovery_state *d = &hdev->discovery;
-	struct hci_cp_remote_name_req_cancel cp;
-	struct inquiry_entry *e;
-	bool ret = false;
-
-	bt_dev_dbg(hdev, "state %u", hdev->discovery.state);
-
-	if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) {
-		if (test_bit(HCI_INQUIRY, &hdev->flags))
-			hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
-
-		if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
-			cancel_delayed_work(&hdev->le_scan_disable);
-			cancel_delayed_work(&hdev->le_scan_restart);
-			hci_req_add_le_scan_disable(req, false);
-		}
-
-		ret = true;
-	} else {
-		/* Passive scanning */
-		if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
-			hci_req_add_le_scan_disable(req, false);
-			ret = true;
-		}
-	}
-
-	/* No further actions needed for LE-only discovery */
-	if (d->type == DISCOV_TYPE_LE)
-		return ret;
-
-	if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) {
-		e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
-						     NAME_PENDING);
-		if (!e)
-			return ret;
-
-		bacpy(&cp.bdaddr, &e->data.bdaddr);
-		hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
-			    &cp);
-		ret = true;
-	}
-
-	return ret;
-}
-
 static void config_data_path_complete(struct hci_dev *hdev, u8 status,
 				      u16 opcode)
 {
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 39d001fa3acf..faf6d9a51a91 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -113,8 +113,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
 void __hci_req_update_class(struct hci_request *req);
 
 /* Returns true if HCI commands were queued */
-bool hci_req_stop_discovery(struct hci_request *req);
-
 int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec);
 
 void __hci_req_update_scan(struct hci_request *req);
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v4 4/4] Bluetooth: Convert SCO configure_datapath to hci_sync
  2022-07-27 13:58 [PATCH v4 0/4] Clean-up stale/unused hci_request.c code Brian Gix
                   ` (2 preceding siblings ...)
  2022-07-27 13:58 ` [PATCH v4 3/4] Bluetooth: Delete unused hci_req_stop_discovery() Brian Gix
@ 2022-07-27 13:58 ` Brian Gix
  3 siblings, 0 replies; 19+ messages in thread
From: Brian Gix @ 2022-07-27 13:58 UTC (permalink / raw)
  To: linux-bluetooth; +Cc: marcel, luiz.dentz, brian.gix

Recoding HCI cmds to offload SCO codec to use hci_sync mechanism rather
than deprecated hci_request mechanism.

Signed-off-by: Brian Gix <brian.gix@intel.com>
---
 net/bluetooth/hci_conn.c    | 86 ++++++++++++++++++++++++++++++++-----
 net/bluetooth/hci_request.c | 47 --------------------
 net/bluetooth/hci_request.h |  2 -
 3 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f54864e19866..2bda50d9e3ab 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -44,6 +44,11 @@ struct sco_param {
 	u8  retrans_effort;
 };
 
+struct conn_handle_t {
+	struct hci_conn *conn;
+	__u16 handle;
+};
+
 static const struct sco_param esco_param_cvsd[] = {
 	{ EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a,	0x01 }, /* S3 */
 	{ EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007,	0x01 }, /* S2 */
@@ -316,17 +321,60 @@ static bool find_next_esco_param(struct hci_conn *conn,
 	return conn->attempt <= size;
 }
 
-static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
+static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
 {
-	struct hci_dev *hdev = conn->hdev;
+	int err;
+	__u8 vnd_len, *vnd_data = NULL;
+	struct hci_op_configure_data_path *cmd = NULL;
+
+	err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
+					  &vnd_data);
+	if (err < 0)
+		goto error;
+
+	cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	err = hdev->get_data_path_id(hdev, &cmd->data_path_id);
+	if (err < 0)
+		goto error;
+
+	cmd->vnd_len = vnd_len;
+	memcpy(cmd->vnd_data, vnd_data, vnd_len);
+
+	cmd->direction = 0x00;
+	__hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH,
+			      sizeof(*cmd) + vnd_len, cmd, HCI_CMD_TIMEOUT);
+
+	cmd->direction = 0x01;
+	err = __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH,
+				    sizeof(*cmd) + vnd_len, cmd,
+				    HCI_CMD_TIMEOUT);
+error:
+
+	kfree(cmd);
+	kfree(vnd_data);
+	return err;
+}
+
+static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data)
+{
+	struct conn_handle_t *conn_handle = data;
+	struct hci_conn *conn = conn_handle->conn;
+	__u16 handle = conn_handle->handle;
 	struct hci_cp_enhanced_setup_sync_conn cp;
 	const struct sco_param *param;
 
+	kfree(conn_handle);
+
 	bt_dev_dbg(hdev, "hcon %p", conn);
 
 	/* for offload use case, codec needs to configured before opening SCO */
 	if (conn->codec.data_path)
-		hci_req_configure_datapath(hdev, &conn->codec);
+		configure_datapath_sync(hdev, &conn->codec);
 
 	conn->state = BT_CONNECT;
 	conn->out = true;
@@ -344,7 +392,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
 	case BT_CODEC_MSBC:
 		if (!find_next_esco_param(conn, esco_param_msbc,
 					  ARRAY_SIZE(esco_param_msbc)))
-			return false;
+			return -EINVAL;
 
 		param = &esco_param_msbc[conn->attempt - 1];
 		cp.tx_coding_format.id = 0x05;
@@ -396,11 +444,11 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
 		if (lmp_esco_capable(conn->link)) {
 			if (!find_next_esco_param(conn, esco_param_cvsd,
 						  ARRAY_SIZE(esco_param_cvsd)))
-				return false;
+				return -EINVAL;
 			param = &esco_param_cvsd[conn->attempt - 1];
 		} else {
 			if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
-				return false;
+				return -EINVAL;
 			param = &sco_param_cvsd[conn->attempt - 1];
 		}
 		cp.tx_coding_format.id = 2;
@@ -423,7 +471,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
 		cp.out_transport_unit_size = 16;
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	cp.retrans_effort = param->retrans_effort;
@@ -431,9 +479,9 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
 	cp.max_latency = __cpu_to_le16(param->max_latency);
 
 	if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
-		return false;
+		return -EIO;
 
-	return true;
+	return 0;
 }
 
 static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
@@ -490,8 +538,24 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
 
 bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
 {
-	if (enhanced_sync_conn_capable(conn->hdev))
-		return hci_enhanced_setup_sync_conn(conn, handle);
+	int result;
+	struct conn_handle_t *conn_handle;
+
+	if (enhanced_sync_conn_capable(conn->hdev)) {
+		conn_handle = kzalloc(sizeof(*conn_handle), GFP_KERNEL);
+
+		if (!conn_handle)
+			return false;
+
+		conn_handle->conn = conn;
+		conn_handle->handle = handle;
+		result = hci_cmd_sync_queue(conn->hdev, hci_enhanced_setup_sync,
+					    conn_handle, NULL);
+		if (result < 0)
+			kfree(conn_handle);
+
+		return result == 0;
+	}
 
 	return hci_setup_sync_conn(conn, handle);
 }
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index ef0a5ec067b6..d14e50951aec 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1975,53 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
 	return 0;
 }
 
-static void config_data_path_complete(struct hci_dev *hdev, u8 status,
-				      u16 opcode)
-{
-	bt_dev_dbg(hdev, "status %u", status);
-}
-
-int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec)
-{
-	struct hci_request req;
-	int err;
-	__u8 vnd_len, *vnd_data = NULL;
-	struct hci_op_configure_data_path *cmd = NULL;
-
-	hci_req_init(&req, hdev);
-
-	err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
-					  &vnd_data);
-	if (err < 0)
-		goto error;
-
-	cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL);
-	if (!cmd) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	err = hdev->get_data_path_id(hdev, &cmd->data_path_id);
-	if (err < 0)
-		goto error;
-
-	cmd->vnd_len = vnd_len;
-	memcpy(cmd->vnd_data, vnd_data, vnd_len);
-
-	cmd->direction = 0x00;
-	hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
-
-	cmd->direction = 0x01;
-	hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
-
-	err = hci_req_run(&req, config_data_path_complete);
-error:
-
-	kfree(cmd);
-	kfree(vnd_data);
-	return err;
-}
-
 void hci_request_setup(struct hci_dev *hdev)
 {
 	INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index faf6d9a51a91..41e0b84f2042 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -113,8 +113,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
 void __hci_req_update_class(struct hci_request *req);
 
 /* Returns true if HCI commands were queued */
-int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec);
-
 void __hci_req_update_scan(struct hci_request *req);
 
 int hci_update_random_address(struct hci_request *req, bool require_privacy,
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* RE: Clean-up stale/unused hci_request.c code
  2022-07-27 13:58 ` [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync Brian Gix
@ 2022-07-27 14:30   ` bluez.test.bot
  0 siblings, 0 replies; 19+ messages in thread
From: bluez.test.bot @ 2022-07-27 14:30 UTC (permalink / raw)
  To: linux-bluetooth, brian.gix

[-- Attachment #1: Type: text/plain, Size: 1100 bytes --]

This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=663448

---Test result---

Test Summary:
CheckPatch                    PASS      6.94 seconds
GitLint                       PASS      3.40 seconds
SubjectPrefix                 PASS      2.37 seconds
BuildKernel                   PASS      40.29 seconds
BuildKernel32                 PASS      35.21 seconds
Incremental Build with patchesPASS      84.66 seconds
TestRunner: Setup             PASS      582.65 seconds
TestRunner: l2cap-tester      PASS      19.53 seconds
TestRunner: bnep-tester       PASS      7.70 seconds
TestRunner: mgmt-tester       PASS      115.78 seconds
TestRunner: rfcomm-tester     PASS      11.30 seconds
TestRunner: sco-tester        PASS      11.03 seconds
TestRunner: smp-tester        PASS      11.06 seconds
TestRunner: userchan-tester   PASS      8.14 seconds



---
Regards,
Linux Bluetooth


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2022-07-27 13:58 ` [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync Brian Gix
@ 2023-06-15 12:06   ` Stefan Agner
  2023-06-15 12:47     ` Linux regression tracking #adding (Thorsten Leemhuis)
  2023-06-15 17:27     ` Luiz Augusto von Dentz
  0 siblings, 2 replies; 19+ messages in thread
From: Stefan Agner @ 2023-06-15 12:06 UTC (permalink / raw)
  To: Brian Gix
  Cc: linux-bluetooth, marcel, luiz.dentz, Regressions, Jan Čermák

Hi Brian, hi all,

We experienced quite some Bluetooth issues after moving from Linux 5.15
to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
is a popular choice in our community, so it might just be that). When
continuously scanning/listening for BLE packets, the packet flow
suddenly ends. Depending on which and how many devices (possibly also
other factors) within minutes or hours.

Jan (in cc) was able to bisect the issue, and was able to pinpoint the
problem to this change.

Meanwhile I was able to confirm, that reverting this single commit on
the latest 6.1.34 seems to resolve the issue.

I've reviewed the change and surrounding code, and one thing I've
noticed is that the if statement to set cp.filter_dup in
hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
different. Not sure if that needs to be the way it is, but my outside
gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
hci_dev_test_flag(hdev, HCI_MESH))" as well. 

However, that did not fix the problem (but maybe it is wrong
nonetheless?).

Anyone has an idea what could be the problem here?

--
Stefan

On 2022-07-27 15:58, Brian Gix wrote:
> le_scan_restart delayed work queue was running as a deprecated
> hci_request instead of on the newer thread-safe hci_sync mechanism.
> 
> Signed-off-by: Brian Gix <brian.gix@intel.com>
> ---
>  net/bluetooth/hci_request.c | 89 -------------------------------------
>  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
>  2 files changed, 75 insertions(+), 89 deletions(-)
> 
> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
> index 32fefaa0d3ca..114af7350363 100644
> --- a/net/bluetooth/hci_request.c
> +++ b/net/bluetooth/hci_request.c
> @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>  	return 0;
>  }
>  
> -static int le_scan_restart(struct hci_request *req, unsigned long opt)
> -{
> -	struct hci_dev *hdev = req->hdev;
> -
> -	/* If controller is not scanning we are done. */
> -	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> -		return 0;
> -
> -	if (hdev->scanning_paused) {
> -		bt_dev_dbg(hdev, "Scanning is paused for suspend");
> -		return 0;
> -	}
> -
> -	hci_req_add_le_scan_disable(req, false);
> -
> -	if (use_ext_scan(hdev)) {
> -		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
> -
> -		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
> -		ext_enable_cp.enable = LE_SCAN_ENABLE;
> -		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> -
> -		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
> -			    sizeof(ext_enable_cp), &ext_enable_cp);
> -	} else {
> -		struct hci_cp_le_set_scan_enable cp;
> -
> -		memset(&cp, 0, sizeof(cp));
> -		cp.enable = LE_SCAN_ENABLE;
> -		cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> -		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
> -	}
> -
> -	return 0;
> -}
> -
> -static void le_scan_restart_work(struct work_struct *work)
> -{
> -	struct hci_dev *hdev = container_of(work, struct hci_dev,
> -					    le_scan_restart.work);
> -	unsigned long timeout, duration, scan_start, now;
> -	u8 status;
> -
> -	bt_dev_dbg(hdev, "");
> -
> -	hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
> -	if (status) {
> -		bt_dev_err(hdev, "failed to restart LE scan: status %d",
> -			   status);
> -		return;
> -	}
> -
> -	hci_dev_lock(hdev);
> -
> -	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> -	    !hdev->discovery.scan_start)
> -		goto unlock;
> -
> -	/* When the scan was started, hdev->le_scan_disable has been queued
> -	 * after duration from scan_start. During scan restart this job
> -	 * has been canceled, and we need to queue it again after proper
> -	 * timeout, to make sure that scan does not run indefinitely.
> -	 */
> -	duration = hdev->discovery.scan_duration;
> -	scan_start = hdev->discovery.scan_start;
> -	now = jiffies;
> -	if (now - scan_start <= duration) {
> -		int elapsed;
> -
> -		if (now >= scan_start)
> -			elapsed = now - scan_start;
> -		else
> -			elapsed = ULONG_MAX - scan_start + now;
> -
> -		timeout = duration - elapsed;
> -	} else {
> -		timeout = 0;
> -	}
> -
> -	queue_delayed_work(hdev->req_workqueue,
> -			   &hdev->le_scan_disable, timeout);
> -
> -unlock:
> -	hci_dev_unlock(hdev);
> -}
> -
>  bool hci_req_stop_discovery(struct hci_request *req)
>  {
>  	struct hci_dev *hdev = req->hdev;
> @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
> *hdev, struct bt_codec *codec)
>  
>  void hci_request_setup(struct hci_dev *hdev)
>  {
> -	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
>  	INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
>  	INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
>  }
> @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
>  {
>  	__hci_cmd_sync_cancel(hdev, ENODEV);
>  
> -	cancel_delayed_work_sync(&hdev->le_scan_restart);
> -
>  	if (hdev->adv_instance_timeout) {
>  		cancel_delayed_work_sync(&hdev->adv_instance_expire);
>  		hdev->adv_instance_timeout = 0;
> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> index 7dae2ee1bb82..19d57ec0feb8 100644
> --- a/net/bluetooth/hci_sync.c
> +++ b/net/bluetooth/hci_sync.c
> @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
>  	hci_dev_unlock(hdev);
>  }
>  
> +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
> +				       u8 filter_dup);
> +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
> +{
> +	/* If controller is not scanning we are done. */
> +	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> +		return 0;
> +
> +	if (hdev->scanning_paused) {
> +		bt_dev_dbg(hdev, "Scanning is paused for suspend");
> +		return 0;
> +	}
> +
> +	hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
> +	return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
> +					   LE_SCAN_FILTER_DUP_ENABLE);
> +}
> +
> +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
> +{
> +	return hci_le_scan_restart_sync(hdev);
> +}
> +
> +static void le_scan_restart(struct work_struct *work)
> +{
> +	struct hci_dev *hdev = container_of(work, struct hci_dev,
> +					    le_scan_restart.work);
> +	unsigned long timeout, duration, scan_start, now;
> +	int status;
> +
> +	bt_dev_dbg(hdev, "");
> +
> +	hci_dev_lock(hdev);
> +
> +	status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
> +	if (status) {
> +		bt_dev_err(hdev, "failed to restart LE scan: status %d",
> +			   status);
> +		goto unlock;
> +	}
> +
> +	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> +	    !hdev->discovery.scan_start)
> +		goto unlock;
> +
> +	/* When the scan was started, hdev->le_scan_disable has been queued
> +	 * after duration from scan_start. During scan restart this job
> +	 * has been canceled, and we need to queue it again after proper
> +	 * timeout, to make sure that scan does not run indefinitely.
> +	 */
> +	duration = hdev->discovery.scan_duration;
> +	scan_start = hdev->discovery.scan_start;
> +	now = jiffies;
> +	if (now - scan_start <= duration) {
> +		int elapsed;
> +
> +		if (now >= scan_start)
> +			elapsed = now - scan_start;
> +		else
> +			elapsed = ULONG_MAX - scan_start + now;
> +
> +		timeout = duration - elapsed;
> +	} else {
> +		timeout = 0;
> +	}
> +
> +	queue_delayed_work(hdev->req_workqueue,
> +			   &hdev->le_scan_disable, timeout);
> +
> +unlock:
> +	hci_dev_unlock(hdev);
> +}
> +
>  void hci_cmd_sync_init(struct hci_dev *hdev)
>  {
>  	INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
> @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
>  
>  	INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
>  	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
> +	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
>  }
>  
>  void hci_cmd_sync_clear(struct hci_dev *hdev)
> @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
>  	cancel_delayed_work(&hdev->power_off);
>  	cancel_delayed_work(&hdev->ncmd_timer);
>  	cancel_delayed_work(&hdev->le_scan_disable);
> +	cancel_delayed_work(&hdev->le_scan_restart);
>  
>  	hci_request_cancel_all(hdev);

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-15 12:06   ` Stefan Agner
@ 2023-06-15 12:47     ` Linux regression tracking #adding (Thorsten Leemhuis)
  2023-06-15 14:47       ` Jan Čermák
  2023-06-15 17:27     ` Luiz Augusto von Dentz
  1 sibling, 1 reply; 19+ messages in thread
From: Linux regression tracking #adding (Thorsten Leemhuis) @ 2023-06-15 12:47 UTC (permalink / raw)
  To: Stefan Agner, Brian Gix
  Cc: linux-bluetooth, marcel, luiz.dentz, Regressions, Jan Čermák

[TLDR: I'm adding this report to the list of tracked Linux kernel
regressions; the text you find below is based on a few templates
paragraphs you might have encountered already in similar form.
See link in footer if these mails annoy you.]

On 15.06.23 14:06, Stefan Agner wrote:
> Hi Brian, hi all,
> 
> We experienced quite some Bluetooth issues after moving from Linux 5.15
> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> is a popular choice in our community, so it might just be that). When
> continuously scanning/listening for BLE packets, the packet flow
> suddenly ends. Depending on which and how many devices (possibly also
> other factors) within minutes or hours.
> 
> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> problem to this change.
> 
> Meanwhile I was able to confirm, that reverting this single commit on
> the latest 6.1.34 seems to resolve the issue.

FWIW & BTW: might be wise to also check if latest mainline still shows
the problem, as explained on this page:

https://linux-regtracking.leemhuis.info/post/frequent-reasons-why-linux-kernel-bug-reports-are-ignored/

> I've reviewed the change and surrounding code, and one thing I've
> noticed is that the if statement to set cp.filter_dup in
> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> different. Not sure if that needs to be the way it is, but my outside
> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> hci_dev_test_flag(hdev, HCI_MESH))" as well. 
> 
> However, that did not fix the problem (but maybe it is wrong
> nonetheless?).
> 
> Anyone has an idea what could be the problem here?

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced 27d54b778ad
#regzbot title net/bluetooth: packet flow suddenly ends when
continuously scanning/listening for BLE packets
#regzbot ignore-activity

This isn't a regression? This issue or a fix for it are already
discussed somewhere else? It was fixed already? You want to clarify when
the regression started to happen? Or point out I got the title or
something else totally wrong? Then just reply and tell me -- ideally
while also telling regzbot about it, as explained by the page listed in
the footer of this mail.

Developers: When fixing the issue, remember to add 'Link:' tags pointing
to the report (the parent of this mail). See page linked in footer for
details.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.

> On 2022-07-27 15:58, Brian Gix wrote:
>> le_scan_restart delayed work queue was running as a deprecated
>> hci_request instead of on the newer thread-safe hci_sync mechanism.
>>
>> Signed-off-by: Brian Gix <brian.gix@intel.com>
>> ---
>>  net/bluetooth/hci_request.c | 89 -------------------------------------
>>  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
>>  2 files changed, 75 insertions(+), 89 deletions(-)
>>
>> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
>> index 32fefaa0d3ca..114af7350363 100644
>> --- a/net/bluetooth/hci_request.c
>> +++ b/net/bluetooth/hci_request.c
>> @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>>  	return 0;
>>  }
>>  
>> -static int le_scan_restart(struct hci_request *req, unsigned long opt)
>> -{
>> -	struct hci_dev *hdev = req->hdev;
>> -
>> -	/* If controller is not scanning we are done. */
>> -	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
>> -		return 0;
>> -
>> -	if (hdev->scanning_paused) {
>> -		bt_dev_dbg(hdev, "Scanning is paused for suspend");
>> -		return 0;
>> -	}
>> -
>> -	hci_req_add_le_scan_disable(req, false);
>> -
>> -	if (use_ext_scan(hdev)) {
>> -		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
>> -
>> -		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
>> -		ext_enable_cp.enable = LE_SCAN_ENABLE;
>> -		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
>> -
>> -		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
>> -			    sizeof(ext_enable_cp), &ext_enable_cp);
>> -	} else {
>> -		struct hci_cp_le_set_scan_enable cp;
>> -
>> -		memset(&cp, 0, sizeof(cp));
>> -		cp.enable = LE_SCAN_ENABLE;
>> -		cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
>> -		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
>> -	}
>> -
>> -	return 0;
>> -}
>> -
>> -static void le_scan_restart_work(struct work_struct *work)
>> -{
>> -	struct hci_dev *hdev = container_of(work, struct hci_dev,
>> -					    le_scan_restart.work);
>> -	unsigned long timeout, duration, scan_start, now;
>> -	u8 status;
>> -
>> -	bt_dev_dbg(hdev, "");
>> -
>> -	hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
>> -	if (status) {
>> -		bt_dev_err(hdev, "failed to restart LE scan: status %d",
>> -			   status);
>> -		return;
>> -	}
>> -
>> -	hci_dev_lock(hdev);
>> -
>> -	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
>> -	    !hdev->discovery.scan_start)
>> -		goto unlock;
>> -
>> -	/* When the scan was started, hdev->le_scan_disable has been queued
>> -	 * after duration from scan_start. During scan restart this job
>> -	 * has been canceled, and we need to queue it again after proper
>> -	 * timeout, to make sure that scan does not run indefinitely.
>> -	 */
>> -	duration = hdev->discovery.scan_duration;
>> -	scan_start = hdev->discovery.scan_start;
>> -	now = jiffies;
>> -	if (now - scan_start <= duration) {
>> -		int elapsed;
>> -
>> -		if (now >= scan_start)
>> -			elapsed = now - scan_start;
>> -		else
>> -			elapsed = ULONG_MAX - scan_start + now;
>> -
>> -		timeout = duration - elapsed;
>> -	} else {
>> -		timeout = 0;
>> -	}
>> -
>> -	queue_delayed_work(hdev->req_workqueue,
>> -			   &hdev->le_scan_disable, timeout);
>> -
>> -unlock:
>> -	hci_dev_unlock(hdev);
>> -}
>> -
>>  bool hci_req_stop_discovery(struct hci_request *req)
>>  {
>>  	struct hci_dev *hdev = req->hdev;
>> @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
>> *hdev, struct bt_codec *codec)
>>  
>>  void hci_request_setup(struct hci_dev *hdev)
>>  {
>> -	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
>>  	INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
>>  	INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
>>  }
>> @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
>>  {
>>  	__hci_cmd_sync_cancel(hdev, ENODEV);
>>  
>> -	cancel_delayed_work_sync(&hdev->le_scan_restart);
>> -
>>  	if (hdev->adv_instance_timeout) {
>>  		cancel_delayed_work_sync(&hdev->adv_instance_expire);
>>  		hdev->adv_instance_timeout = 0;
>> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
>> index 7dae2ee1bb82..19d57ec0feb8 100644
>> --- a/net/bluetooth/hci_sync.c
>> +++ b/net/bluetooth/hci_sync.c
>> @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
>>  	hci_dev_unlock(hdev);
>>  }
>>  
>> +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
>> +				       u8 filter_dup);
>> +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
>> +{
>> +	/* If controller is not scanning we are done. */
>> +	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
>> +		return 0;
>> +
>> +	if (hdev->scanning_paused) {
>> +		bt_dev_dbg(hdev, "Scanning is paused for suspend");
>> +		return 0;
>> +	}
>> +
>> +	hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
>> +	return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
>> +					   LE_SCAN_FILTER_DUP_ENABLE);
>> +}
>> +
>> +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
>> +{
>> +	return hci_le_scan_restart_sync(hdev);
>> +}
>> +
>> +static void le_scan_restart(struct work_struct *work)
>> +{
>> +	struct hci_dev *hdev = container_of(work, struct hci_dev,
>> +					    le_scan_restart.work);
>> +	unsigned long timeout, duration, scan_start, now;
>> +	int status;
>> +
>> +	bt_dev_dbg(hdev, "");
>> +
>> +	hci_dev_lock(hdev);
>> +
>> +	status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
>> +	if (status) {
>> +		bt_dev_err(hdev, "failed to restart LE scan: status %d",
>> +			   status);
>> +		goto unlock;
>> +	}
>> +
>> +	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
>> +	    !hdev->discovery.scan_start)
>> +		goto unlock;
>> +
>> +	/* When the scan was started, hdev->le_scan_disable has been queued
>> +	 * after duration from scan_start. During scan restart this job
>> +	 * has been canceled, and we need to queue it again after proper
>> +	 * timeout, to make sure that scan does not run indefinitely.
>> +	 */
>> +	duration = hdev->discovery.scan_duration;
>> +	scan_start = hdev->discovery.scan_start;
>> +	now = jiffies;
>> +	if (now - scan_start <= duration) {
>> +		int elapsed;
>> +
>> +		if (now >= scan_start)
>> +			elapsed = now - scan_start;
>> +		else
>> +			elapsed = ULONG_MAX - scan_start + now;
>> +
>> +		timeout = duration - elapsed;
>> +	} else {
>> +		timeout = 0;
>> +	}
>> +
>> +	queue_delayed_work(hdev->req_workqueue,
>> +			   &hdev->le_scan_disable, timeout);
>> +
>> +unlock:
>> +	hci_dev_unlock(hdev);
>> +}
>> +
>>  void hci_cmd_sync_init(struct hci_dev *hdev)
>>  {
>>  	INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
>> @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
>>  
>>  	INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
>>  	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
>> +	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
>>  }
>>  
>>  void hci_cmd_sync_clear(struct hci_dev *hdev)
>> @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
>>  	cancel_delayed_work(&hdev->power_off);
>>  	cancel_delayed_work(&hdev->ncmd_timer);
>>  	cancel_delayed_work(&hdev->le_scan_disable);
>> +	cancel_delayed_work(&hdev->le_scan_restart);
>>  
>>  	hci_request_cancel_all(hdev);
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-15 12:47     ` Linux regression tracking #adding (Thorsten Leemhuis)
@ 2023-06-15 14:47       ` Jan Čermák
  0 siblings, 0 replies; 19+ messages in thread
From: Jan Čermák @ 2023-06-15 14:47 UTC (permalink / raw)
  To: Linux regressions mailing list
  Cc: Stefan Agner, Brian Gix, linux-bluetooth, marcel, luiz.dentz

Hi Thorsten, hi everyone,

I confirm the regression is reproducible in my environment also on
latest tagged mainline (6.4.0-rc6).

Cheers,
Jan

On Thu, 15 Jun 2023 at 14:47, Linux regression tracking #adding
(Thorsten Leemhuis) <regressions@leemhuis.info> wrote:
>
> [TLDR: I'm adding this report to the list of tracked Linux kernel
> regressions; the text you find below is based on a few templates
> paragraphs you might have encountered already in similar form.
> See link in footer if these mails annoy you.]
>
> On 15.06.23 14:06, Stefan Agner wrote:
> > Hi Brian, hi all,
> >
> > We experienced quite some Bluetooth issues after moving from Linux 5.15
> > to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> > is a popular choice in our community, so it might just be that). When
> > continuously scanning/listening for BLE packets, the packet flow
> > suddenly ends. Depending on which and how many devices (possibly also
> > other factors) within minutes or hours.
> >
> > Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> > problem to this change.
> >
> > Meanwhile I was able to confirm, that reverting this single commit on
> > the latest 6.1.34 seems to resolve the issue.
>
> FWIW & BTW: might be wise to also check if latest mainline still shows
> the problem, as explained on this page:
>
> https://linux-regtracking.leemhuis.info/post/frequent-reasons-why-linux-kernel-bug-reports-are-ignored/
>
> > I've reviewed the change and surrounding code, and one thing I've
> > noticed is that the if statement to set cp.filter_dup in
> > hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> > different. Not sure if that needs to be the way it is, but my outside
> > gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> > hci_dev_test_flag(hdev, HCI_MESH))" as well.
> >
> > However, that did not fix the problem (but maybe it is wrong
> > nonetheless?).
> >
> > Anyone has an idea what could be the problem here?
>
> Thanks for the report. To be sure the issue doesn't fall through the
> cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
> tracking bot:
>
> #regzbot ^introduced 27d54b778ad
> #regzbot title net/bluetooth: packet flow suddenly ends when
> continuously scanning/listening for BLE packets
> #regzbot ignore-activity
>
> This isn't a regression? This issue or a fix for it are already
> discussed somewhere else? It was fixed already? You want to clarify when
> the regression started to happen? Or point out I got the title or
> something else totally wrong? Then just reply and tell me -- ideally
> while also telling regzbot about it, as explained by the page listed in
> the footer of this mail.
>
> Developers: When fixing the issue, remember to add 'Link:' tags pointing
> to the report (the parent of this mail). See page linked in footer for
> details.
>
> Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
> --
> Everything you wanna know about Linux kernel regression tracking:
> https://linux-regtracking.leemhuis.info/about/#tldr
> That page also explains what to do if mails like this annoy you.
>
> > On 2022-07-27 15:58, Brian Gix wrote:
> >> le_scan_restart delayed work queue was running as a deprecated
> >> hci_request instead of on the newer thread-safe hci_sync mechanism.
> >>
> >> Signed-off-by: Brian Gix <brian.gix@intel.com>
> >> ---
> >>  net/bluetooth/hci_request.c | 89 -------------------------------------
> >>  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
> >>  2 files changed, 75 insertions(+), 89 deletions(-)
> >>
> >> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
> >> index 32fefaa0d3ca..114af7350363 100644
> >> --- a/net/bluetooth/hci_request.c
> >> +++ b/net/bluetooth/hci_request.c
> >> @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
> >>      return 0;
> >>  }
> >>
> >> -static int le_scan_restart(struct hci_request *req, unsigned long opt)
> >> -{
> >> -    struct hci_dev *hdev = req->hdev;
> >> -
> >> -    /* If controller is not scanning we are done. */
> >> -    if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> >> -            return 0;
> >> -
> >> -    if (hdev->scanning_paused) {
> >> -            bt_dev_dbg(hdev, "Scanning is paused for suspend");
> >> -            return 0;
> >> -    }
> >> -
> >> -    hci_req_add_le_scan_disable(req, false);
> >> -
> >> -    if (use_ext_scan(hdev)) {
> >> -            struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
> >> -
> >> -            memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
> >> -            ext_enable_cp.enable = LE_SCAN_ENABLE;
> >> -            ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> >> -
> >> -            hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
> >> -                        sizeof(ext_enable_cp), &ext_enable_cp);
> >> -    } else {
> >> -            struct hci_cp_le_set_scan_enable cp;
> >> -
> >> -            memset(&cp, 0, sizeof(cp));
> >> -            cp.enable = LE_SCAN_ENABLE;
> >> -            cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> >> -            hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
> >> -    }
> >> -
> >> -    return 0;
> >> -}
> >> -
> >> -static void le_scan_restart_work(struct work_struct *work)
> >> -{
> >> -    struct hci_dev *hdev = container_of(work, struct hci_dev,
> >> -                                        le_scan_restart.work);
> >> -    unsigned long timeout, duration, scan_start, now;
> >> -    u8 status;
> >> -
> >> -    bt_dev_dbg(hdev, "");
> >> -
> >> -    hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
> >> -    if (status) {
> >> -            bt_dev_err(hdev, "failed to restart LE scan: status %d",
> >> -                       status);
> >> -            return;
> >> -    }
> >> -
> >> -    hci_dev_lock(hdev);
> >> -
> >> -    if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> >> -        !hdev->discovery.scan_start)
> >> -            goto unlock;
> >> -
> >> -    /* When the scan was started, hdev->le_scan_disable has been queued
> >> -     * after duration from scan_start. During scan restart this job
> >> -     * has been canceled, and we need to queue it again after proper
> >> -     * timeout, to make sure that scan does not run indefinitely.
> >> -     */
> >> -    duration = hdev->discovery.scan_duration;
> >> -    scan_start = hdev->discovery.scan_start;
> >> -    now = jiffies;
> >> -    if (now - scan_start <= duration) {
> >> -            int elapsed;
> >> -
> >> -            if (now >= scan_start)
> >> -                    elapsed = now - scan_start;
> >> -            else
> >> -                    elapsed = ULONG_MAX - scan_start + now;
> >> -
> >> -            timeout = duration - elapsed;
> >> -    } else {
> >> -            timeout = 0;
> >> -    }
> >> -
> >> -    queue_delayed_work(hdev->req_workqueue,
> >> -                       &hdev->le_scan_disable, timeout);
> >> -
> >> -unlock:
> >> -    hci_dev_unlock(hdev);
> >> -}
> >> -
> >>  bool hci_req_stop_discovery(struct hci_request *req)
> >>  {
> >>      struct hci_dev *hdev = req->hdev;
> >> @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
> >> *hdev, struct bt_codec *codec)
> >>
> >>  void hci_request_setup(struct hci_dev *hdev)
> >>  {
> >> -    INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
> >>      INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
> >>      INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
> >>  }
> >> @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
> >>  {
> >>      __hci_cmd_sync_cancel(hdev, ENODEV);
> >>
> >> -    cancel_delayed_work_sync(&hdev->le_scan_restart);
> >> -
> >>      if (hdev->adv_instance_timeout) {
> >>              cancel_delayed_work_sync(&hdev->adv_instance_expire);
> >>              hdev->adv_instance_timeout = 0;
> >> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> >> index 7dae2ee1bb82..19d57ec0feb8 100644
> >> --- a/net/bluetooth/hci_sync.c
> >> +++ b/net/bluetooth/hci_sync.c
> >> @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
> >>      hci_dev_unlock(hdev);
> >>  }
> >>
> >> +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
> >> +                                   u8 filter_dup);
> >> +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
> >> +{
> >> +    /* If controller is not scanning we are done. */
> >> +    if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> >> +            return 0;
> >> +
> >> +    if (hdev->scanning_paused) {
> >> +            bt_dev_dbg(hdev, "Scanning is paused for suspend");
> >> +            return 0;
> >> +    }
> >> +
> >> +    hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
> >> +    return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
> >> +                                       LE_SCAN_FILTER_DUP_ENABLE);
> >> +}
> >> +
> >> +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
> >> +{
> >> +    return hci_le_scan_restart_sync(hdev);
> >> +}
> >> +
> >> +static void le_scan_restart(struct work_struct *work)
> >> +{
> >> +    struct hci_dev *hdev = container_of(work, struct hci_dev,
> >> +                                        le_scan_restart.work);
> >> +    unsigned long timeout, duration, scan_start, now;
> >> +    int status;
> >> +
> >> +    bt_dev_dbg(hdev, "");
> >> +
> >> +    hci_dev_lock(hdev);
> >> +
> >> +    status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
> >> +    if (status) {
> >> +            bt_dev_err(hdev, "failed to restart LE scan: status %d",
> >> +                       status);
> >> +            goto unlock;
> >> +    }
> >> +
> >> +    if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> >> +        !hdev->discovery.scan_start)
> >> +            goto unlock;
> >> +
> >> +    /* When the scan was started, hdev->le_scan_disable has been queued
> >> +     * after duration from scan_start. During scan restart this job
> >> +     * has been canceled, and we need to queue it again after proper
> >> +     * timeout, to make sure that scan does not run indefinitely.
> >> +     */
> >> +    duration = hdev->discovery.scan_duration;
> >> +    scan_start = hdev->discovery.scan_start;
> >> +    now = jiffies;
> >> +    if (now - scan_start <= duration) {
> >> +            int elapsed;
> >> +
> >> +            if (now >= scan_start)
> >> +                    elapsed = now - scan_start;
> >> +            else
> >> +                    elapsed = ULONG_MAX - scan_start + now;
> >> +
> >> +            timeout = duration - elapsed;
> >> +    } else {
> >> +            timeout = 0;
> >> +    }
> >> +
> >> +    queue_delayed_work(hdev->req_workqueue,
> >> +                       &hdev->le_scan_disable, timeout);
> >> +
> >> +unlock:
> >> +    hci_dev_unlock(hdev);
> >> +}
> >> +
> >>  void hci_cmd_sync_init(struct hci_dev *hdev)
> >>  {
> >>      INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
> >> @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
> >>
> >>      INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
> >>      INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
> >> +    INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
> >>  }
> >>
> >>  void hci_cmd_sync_clear(struct hci_dev *hdev)
> >> @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
> >>      cancel_delayed_work(&hdev->power_off);
> >>      cancel_delayed_work(&hdev->ncmd_timer);
> >>      cancel_delayed_work(&hdev->le_scan_disable);
> >> +    cancel_delayed_work(&hdev->le_scan_restart);
> >>
> >>      hci_request_cancel_all(hdev);
> >
> >

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-15 12:06   ` Stefan Agner
  2023-06-15 12:47     ` Linux regression tracking #adding (Thorsten Leemhuis)
@ 2023-06-15 17:27     ` Luiz Augusto von Dentz
  2023-06-15 18:28       ` Luiz Augusto von Dentz
  1 sibling, 1 reply; 19+ messages in thread
From: Luiz Augusto von Dentz @ 2023-06-15 17:27 UTC (permalink / raw)
  To: Stefan Agner
  Cc: Brian Gix, linux-bluetooth, marcel, Regressions, Jan Čermák

Hi Stefan,

On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>
> Hi Brian, hi all,
>
> We experienced quite some Bluetooth issues after moving from Linux 5.15
> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> is a popular choice in our community, so it might just be that). When
> continuously scanning/listening for BLE packets, the packet flow
> suddenly ends. Depending on which and how many devices (possibly also
> other factors) within minutes or hours.
>
> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> problem to this change.
>
> Meanwhile I was able to confirm, that reverting this single commit on
> the latest 6.1.34 seems to resolve the issue.
>
> I've reviewed the change and surrounding code, and one thing I've
> noticed is that the if statement to set cp.filter_dup in
> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> different. Not sure if that needs to be the way it is, but my outside
> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>
> However, that did not fix the problem (but maybe it is wrong
> nonetheless?).
>
> Anyone has an idea what could be the problem here?

Are there any logs of the problem? Does any HCI command fails or
anything so that we can track down what could be wrong?

> --
> Stefan
>
> On 2022-07-27 15:58, Brian Gix wrote:
> > le_scan_restart delayed work queue was running as a deprecated
> > hci_request instead of on the newer thread-safe hci_sync mechanism.
> >
> > Signed-off-by: Brian Gix <brian.gix@intel.com>
> > ---
> >  net/bluetooth/hci_request.c | 89 -------------------------------------
> >  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
> >  2 files changed, 75 insertions(+), 89 deletions(-)
> >
> > diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
> > index 32fefaa0d3ca..114af7350363 100644
> > --- a/net/bluetooth/hci_request.c
> > +++ b/net/bluetooth/hci_request.c
> > @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
> >       return 0;
> >  }
> >
> > -static int le_scan_restart(struct hci_request *req, unsigned long opt)
> > -{
> > -     struct hci_dev *hdev = req->hdev;
> > -
> > -     /* If controller is not scanning we are done. */
> > -     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> > -             return 0;
> > -
> > -     if (hdev->scanning_paused) {
> > -             bt_dev_dbg(hdev, "Scanning is paused for suspend");
> > -             return 0;
> > -     }
> > -
> > -     hci_req_add_le_scan_disable(req, false);
> > -
> > -     if (use_ext_scan(hdev)) {
> > -             struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
> > -
> > -             memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
> > -             ext_enable_cp.enable = LE_SCAN_ENABLE;
> > -             ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> > -
> > -             hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
> > -                         sizeof(ext_enable_cp), &ext_enable_cp);
> > -     } else {
> > -             struct hci_cp_le_set_scan_enable cp;
> > -
> > -             memset(&cp, 0, sizeof(cp));
> > -             cp.enable = LE_SCAN_ENABLE;
> > -             cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> > -             hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
> > -     }
> > -
> > -     return 0;
> > -}
> > -
> > -static void le_scan_restart_work(struct work_struct *work)
> > -{
> > -     struct hci_dev *hdev = container_of(work, struct hci_dev,
> > -                                         le_scan_restart.work);
> > -     unsigned long timeout, duration, scan_start, now;
> > -     u8 status;
> > -
> > -     bt_dev_dbg(hdev, "");
> > -
> > -     hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
> > -     if (status) {
> > -             bt_dev_err(hdev, "failed to restart LE scan: status %d",
> > -                        status);
> > -             return;
> > -     }
> > -
> > -     hci_dev_lock(hdev);
> > -
> > -     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> > -         !hdev->discovery.scan_start)
> > -             goto unlock;
> > -
> > -     /* When the scan was started, hdev->le_scan_disable has been queued
> > -      * after duration from scan_start. During scan restart this job
> > -      * has been canceled, and we need to queue it again after proper
> > -      * timeout, to make sure that scan does not run indefinitely.
> > -      */
> > -     duration = hdev->discovery.scan_duration;
> > -     scan_start = hdev->discovery.scan_start;
> > -     now = jiffies;
> > -     if (now - scan_start <= duration) {
> > -             int elapsed;
> > -
> > -             if (now >= scan_start)
> > -                     elapsed = now - scan_start;
> > -             else
> > -                     elapsed = ULONG_MAX - scan_start + now;
> > -
> > -             timeout = duration - elapsed;
> > -     } else {
> > -             timeout = 0;
> > -     }
> > -
> > -     queue_delayed_work(hdev->req_workqueue,
> > -                        &hdev->le_scan_disable, timeout);
> > -
> > -unlock:
> > -     hci_dev_unlock(hdev);
> > -}
> > -
> >  bool hci_req_stop_discovery(struct hci_request *req)
> >  {
> >       struct hci_dev *hdev = req->hdev;
> > @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
> > *hdev, struct bt_codec *codec)
> >
> >  void hci_request_setup(struct hci_dev *hdev)
> >  {
> > -     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
> >       INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
> >       INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
> >  }
> > @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
> >  {
> >       __hci_cmd_sync_cancel(hdev, ENODEV);
> >
> > -     cancel_delayed_work_sync(&hdev->le_scan_restart);
> > -
> >       if (hdev->adv_instance_timeout) {
> >               cancel_delayed_work_sync(&hdev->adv_instance_expire);
> >               hdev->adv_instance_timeout = 0;
> > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> > index 7dae2ee1bb82..19d57ec0feb8 100644
> > --- a/net/bluetooth/hci_sync.c
> > +++ b/net/bluetooth/hci_sync.c
> > @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
> >       hci_dev_unlock(hdev);
> >  }
> >
> > +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
> > +                                    u8 filter_dup);
> > +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
> > +{
> > +     /* If controller is not scanning we are done. */
> > +     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> > +             return 0;
> > +
> > +     if (hdev->scanning_paused) {
> > +             bt_dev_dbg(hdev, "Scanning is paused for suspend");
> > +             return 0;
> > +     }
> > +
> > +     hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
> > +     return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
> > +                                        LE_SCAN_FILTER_DUP_ENABLE);
> > +}
> > +
> > +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
> > +{
> > +     return hci_le_scan_restart_sync(hdev);
> > +}
> > +
> > +static void le_scan_restart(struct work_struct *work)
> > +{
> > +     struct hci_dev *hdev = container_of(work, struct hci_dev,
> > +                                         le_scan_restart.work);
> > +     unsigned long timeout, duration, scan_start, now;
> > +     int status;
> > +
> > +     bt_dev_dbg(hdev, "");
> > +
> > +     hci_dev_lock(hdev);
> > +
> > +     status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
> > +     if (status) {
> > +             bt_dev_err(hdev, "failed to restart LE scan: status %d",
> > +                        status);
> > +             goto unlock;
> > +     }
> > +
> > +     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> > +         !hdev->discovery.scan_start)
> > +             goto unlock;
> > +
> > +     /* When the scan was started, hdev->le_scan_disable has been queued
> > +      * after duration from scan_start. During scan restart this job
> > +      * has been canceled, and we need to queue it again after proper
> > +      * timeout, to make sure that scan does not run indefinitely.
> > +      */
> > +     duration = hdev->discovery.scan_duration;
> > +     scan_start = hdev->discovery.scan_start;
> > +     now = jiffies;
> > +     if (now - scan_start <= duration) {
> > +             int elapsed;
> > +
> > +             if (now >= scan_start)
> > +                     elapsed = now - scan_start;
> > +             else
> > +                     elapsed = ULONG_MAX - scan_start + now;
> > +
> > +             timeout = duration - elapsed;
> > +     } else {
> > +             timeout = 0;
> > +     }
> > +
> > +     queue_delayed_work(hdev->req_workqueue,
> > +                        &hdev->le_scan_disable, timeout);
> > +
> > +unlock:
> > +     hci_dev_unlock(hdev);
> > +}
> > +
> >  void hci_cmd_sync_init(struct hci_dev *hdev)
> >  {
> >       INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
> > @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
> >
> >       INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
> >       INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
> > +     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
> >  }
> >
> >  void hci_cmd_sync_clear(struct hci_dev *hdev)
> > @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
> >       cancel_delayed_work(&hdev->power_off);
> >       cancel_delayed_work(&hdev->ncmd_timer);
> >       cancel_delayed_work(&hdev->le_scan_disable);
> > +     cancel_delayed_work(&hdev->le_scan_restart);
> >
> >       hci_request_cancel_all(hdev);



-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-15 17:27     ` Luiz Augusto von Dentz
@ 2023-06-15 18:28       ` Luiz Augusto von Dentz
       [not found]         ` <CABUQxGxBdAFncJ6YVb7a9gnU-_YZDGFDmpHJTtm5K1tDGEGRDQ@mail.gmail.com>
  0 siblings, 1 reply; 19+ messages in thread
From: Luiz Augusto von Dentz @ 2023-06-15 18:28 UTC (permalink / raw)
  To: Stefan Agner, Brian Gix
  Cc: Brian Gix, linux-bluetooth, marcel, Regressions, Jan Čermák

+Brian Gix

On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>
> Hi Stefan,
>
> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
> >
> > Hi Brian, hi all,
> >
> > We experienced quite some Bluetooth issues after moving from Linux 5.15
> > to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> > is a popular choice in our community, so it might just be that). When
> > continuously scanning/listening for BLE packets, the packet flow
> > suddenly ends. Depending on which and how many devices (possibly also
> > other factors) within minutes or hours.
> >
> > Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> > problem to this change.
> >
> > Meanwhile I was able to confirm, that reverting this single commit on
> > the latest 6.1.34 seems to resolve the issue.
> >
> > I've reviewed the change and surrounding code, and one thing I've
> > noticed is that the if statement to set cp.filter_dup in
> > hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> > different. Not sure if that needs to be the way it is, but my outside
> > gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> > hci_dev_test_flag(hdev, HCI_MESH))" as well.
> >
> > However, that did not fix the problem (but maybe it is wrong
> > nonetheless?).
> >
> > Anyone has an idea what could be the problem here?
>
> Are there any logs of the problem? Does any HCI command fails or
> anything so that we can track down what could be wrong?

@Brian Gix perhaps you have a better idea what is going wrong here?

> > --
> > Stefan
> >
> > On 2022-07-27 15:58, Brian Gix wrote:
> > > le_scan_restart delayed work queue was running as a deprecated
> > > hci_request instead of on the newer thread-safe hci_sync mechanism.
> > >
> > > Signed-off-by: Brian Gix <brian.gix@intel.com>
> > > ---
> > >  net/bluetooth/hci_request.c | 89 -------------------------------------
> > >  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
> > >  2 files changed, 75 insertions(+), 89 deletions(-)
> > >
> > > diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
> > > index 32fefaa0d3ca..114af7350363 100644
> > > --- a/net/bluetooth/hci_request.c
> > > +++ b/net/bluetooth/hci_request.c
> > > @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
> > >       return 0;
> > >  }
> > >
> > > -static int le_scan_restart(struct hci_request *req, unsigned long opt)
> > > -{
> > > -     struct hci_dev *hdev = req->hdev;
> > > -
> > > -     /* If controller is not scanning we are done. */
> > > -     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> > > -             return 0;
> > > -
> > > -     if (hdev->scanning_paused) {
> > > -             bt_dev_dbg(hdev, "Scanning is paused for suspend");
> > > -             return 0;
> > > -     }
> > > -
> > > -     hci_req_add_le_scan_disable(req, false);
> > > -
> > > -     if (use_ext_scan(hdev)) {
> > > -             struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
> > > -
> > > -             memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
> > > -             ext_enable_cp.enable = LE_SCAN_ENABLE;
> > > -             ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> > > -
> > > -             hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
> > > -                         sizeof(ext_enable_cp), &ext_enable_cp);
> > > -     } else {
> > > -             struct hci_cp_le_set_scan_enable cp;
> > > -
> > > -             memset(&cp, 0, sizeof(cp));
> > > -             cp.enable = LE_SCAN_ENABLE;
> > > -             cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
> > > -             hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
> > > -     }
> > > -
> > > -     return 0;
> > > -}
> > > -
> > > -static void le_scan_restart_work(struct work_struct *work)
> > > -{
> > > -     struct hci_dev *hdev = container_of(work, struct hci_dev,
> > > -                                         le_scan_restart.work);
> > > -     unsigned long timeout, duration, scan_start, now;
> > > -     u8 status;
> > > -
> > > -     bt_dev_dbg(hdev, "");
> > > -
> > > -     hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
> > > -     if (status) {
> > > -             bt_dev_err(hdev, "failed to restart LE scan: status %d",
> > > -                        status);
> > > -             return;
> > > -     }
> > > -
> > > -     hci_dev_lock(hdev);
> > > -
> > > -     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> > > -         !hdev->discovery.scan_start)
> > > -             goto unlock;
> > > -
> > > -     /* When the scan was started, hdev->le_scan_disable has been queued
> > > -      * after duration from scan_start. During scan restart this job
> > > -      * has been canceled, and we need to queue it again after proper
> > > -      * timeout, to make sure that scan does not run indefinitely.
> > > -      */
> > > -     duration = hdev->discovery.scan_duration;
> > > -     scan_start = hdev->discovery.scan_start;
> > > -     now = jiffies;
> > > -     if (now - scan_start <= duration) {
> > > -             int elapsed;
> > > -
> > > -             if (now >= scan_start)
> > > -                     elapsed = now - scan_start;
> > > -             else
> > > -                     elapsed = ULONG_MAX - scan_start + now;
> > > -
> > > -             timeout = duration - elapsed;
> > > -     } else {
> > > -             timeout = 0;
> > > -     }
> > > -
> > > -     queue_delayed_work(hdev->req_workqueue,
> > > -                        &hdev->le_scan_disable, timeout);
> > > -
> > > -unlock:
> > > -     hci_dev_unlock(hdev);
> > > -}
> > > -
> > >  bool hci_req_stop_discovery(struct hci_request *req)
> > >  {
> > >       struct hci_dev *hdev = req->hdev;
> > > @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
> > > *hdev, struct bt_codec *codec)
> > >
> > >  void hci_request_setup(struct hci_dev *hdev)
> > >  {
> > > -     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
> > >       INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
> > >       INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
> > >  }
> > > @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
> > >  {
> > >       __hci_cmd_sync_cancel(hdev, ENODEV);
> > >
> > > -     cancel_delayed_work_sync(&hdev->le_scan_restart);
> > > -
> > >       if (hdev->adv_instance_timeout) {
> > >               cancel_delayed_work_sync(&hdev->adv_instance_expire);
> > >               hdev->adv_instance_timeout = 0;
> > > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> > > index 7dae2ee1bb82..19d57ec0feb8 100644
> > > --- a/net/bluetooth/hci_sync.c
> > > +++ b/net/bluetooth/hci_sync.c
> > > @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
> > >       hci_dev_unlock(hdev);
> > >  }
> > >
> > > +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
> > > +                                    u8 filter_dup);
> > > +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
> > > +{
> > > +     /* If controller is not scanning we are done. */
> > > +     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
> > > +             return 0;
> > > +
> > > +     if (hdev->scanning_paused) {
> > > +             bt_dev_dbg(hdev, "Scanning is paused for suspend");
> > > +             return 0;
> > > +     }
> > > +
> > > +     hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
> > > +     return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
> > > +                                        LE_SCAN_FILTER_DUP_ENABLE);
> > > +}
> > > +
> > > +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
> > > +{
> > > +     return hci_le_scan_restart_sync(hdev);
> > > +}
> > > +
> > > +static void le_scan_restart(struct work_struct *work)
> > > +{
> > > +     struct hci_dev *hdev = container_of(work, struct hci_dev,
> > > +                                         le_scan_restart.work);
> > > +     unsigned long timeout, duration, scan_start, now;
> > > +     int status;
> > > +
> > > +     bt_dev_dbg(hdev, "");
> > > +
> > > +     hci_dev_lock(hdev);
> > > +
> > > +     status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
> > > +     if (status) {
> > > +             bt_dev_err(hdev, "failed to restart LE scan: status %d",
> > > +                        status);
> > > +             goto unlock;
> > > +     }
> > > +
> > > +     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
> > > +         !hdev->discovery.scan_start)
> > > +             goto unlock;
> > > +
> > > +     /* When the scan was started, hdev->le_scan_disable has been queued
> > > +      * after duration from scan_start. During scan restart this job
> > > +      * has been canceled, and we need to queue it again after proper
> > > +      * timeout, to make sure that scan does not run indefinitely.
> > > +      */
> > > +     duration = hdev->discovery.scan_duration;
> > > +     scan_start = hdev->discovery.scan_start;
> > > +     now = jiffies;
> > > +     if (now - scan_start <= duration) {
> > > +             int elapsed;
> > > +
> > > +             if (now >= scan_start)
> > > +                     elapsed = now - scan_start;
> > > +             else
> > > +                     elapsed = ULONG_MAX - scan_start + now;
> > > +
> > > +             timeout = duration - elapsed;
> > > +     } else {
> > > +             timeout = 0;
> > > +     }
> > > +
> > > +     queue_delayed_work(hdev->req_workqueue,
> > > +                        &hdev->le_scan_disable, timeout);
> > > +
> > > +unlock:
> > > +     hci_dev_unlock(hdev);
> > > +}
> > > +
> > >  void hci_cmd_sync_init(struct hci_dev *hdev)
> > >  {
> > >       INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
> > > @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
> > >
> > >       INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
> > >       INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
> > > +     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
> > >  }
> > >
> > >  void hci_cmd_sync_clear(struct hci_dev *hdev)
> > > @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
> > >       cancel_delayed_work(&hdev->power_off);
> > >       cancel_delayed_work(&hdev->ncmd_timer);
> > >       cancel_delayed_work(&hdev->le_scan_disable);
> > > +     cancel_delayed_work(&hdev->le_scan_restart);
> > >
> > >       hci_request_cancel_all(hdev);
>
>
>
> --
> Luiz Augusto von Dentz



-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
       [not found]         ` <CABUQxGxBdAFncJ6YVb7a9gnU-_YZDGFDmpHJTtm5K1tDGEGRDQ@mail.gmail.com>
@ 2023-06-20 14:41           ` Stefan Agner
  2023-06-30 10:59             ` Stefan Agner
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Agner @ 2023-06-20 14:41 UTC (permalink / raw)
  To: Brian Gix
  Cc: Luiz Augusto von Dentz, Brian Gix, linux-bluetooth, marcel,
	Regressions, Jan Čermák

On 2023-06-16 03:22, Brian Gix wrote:

> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote: 
> 
>> +Brian Gix
>> 
>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>> <luiz.dentz@gmail.com> wrote:
>>> 
>>> Hi Stefan,
>>> 
>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>>>> 
>>>> Hi Brian, hi all,
>>>> 
>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>>>> is a popular choice in our community, so it might just be that). When
>>>> continuously scanning/listening for BLE packets, the packet flow
>>>> suddenly ends. Depending on which and how many devices (possibly also
>>>> other factors) within minutes or hours.
>>>> 
>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>>>> problem to this change.
>>>> 
>>>> Meanwhile I was able to confirm, that reverting this single commit on
>>>> the latest 6.1.34 seems to resolve the issue.
>>>> 
>>>> I've reviewed the change and surrounding code, and one thing I've
>>>> noticed is that the if statement to set cp.filter_dup in
>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>>>> different. Not sure if that needs to be the way it is, but my outside
>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>>>> 
>>>> However, that did not fix the problem (but maybe it is wrong
>>>> nonetheless?).
>>>> 
>>>> Anyone has an idea what could be the problem here?
>>> 
>>> Are there any logs of the problem? Does any HCI command fails or
>>> anything so that we can track down what could be wrong?

No HCI command fails, there is also no issue reported in the kernel log.
BlueZ just stops receiving BLE packets, at least from certain devices.

>> 
>> @Brian Gix perhaps you have a better idea what is going wrong here?
> 
> It seems unlikely that this is Mesh related. Mesh does need for filtering to
> be FALSE, and Mesh does not use extended scanning in any case. 
> 
> But this was part of the final rewrite to retire the hci_req mechanism in
> favor of the hci_sync mechanism. So my best guess off the top of my head is
> that there was an unintended race condition that worked better than the
> synchronous single-threading mechanism?  Filtering (or not) should not

After review the code I concluded the same. What is a bit surprising to
me is that it is so well reproducible. I guess it is nicer to have a
reproducible one than a hard to reproduce one :)

> prevent advertising packets from permanently wedging.  Does anyone have an
> HCI flow log with and without the offending patch?  Ideally they should be
> identical...  If they are not then I obviously did something wrong. As this
> was not specifically Mesh related, I may have missed some non-mesh corner
> cases.


I've taken two btmon captures, I created them using:
btmon -i hci0 -w /config/hcidump-hci-req-working.log

You can find them at:
https://os-builds.home-assistant.io/hcidump-hci-req-working.log
https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log

This is while running our user space software (Home Assistant with
Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
advertisements every 100ms (this accelerates the issue). In the
non-working case you'll see that the system doesn't receive any SPAM
advertisements after around 27 seconds. The working log shows that it
continuously receives the same packets (capture 120s).

Hope this helps.

--
Stefan



> 
>>>> --
>>>> Stefan
>>>> 
>>>> On 2022-07-27 15:58, Brian Gix wrote:
>>>>> le_scan_restart delayed work queue was running as a deprecated
>>>>> hci_request instead of on the newer thread-safe hci_sync mechanism.
>>>>>
>>>>> Signed-off-by: Brian Gix <brian.gix@intel.com>
>>>>> ---
>>>>>  net/bluetooth/hci_request.c | 89 -------------------------------------
>>>>>  net/bluetooth/hci_sync.c    | 75 +++++++++++++++++++++++++++++++
>>>>>  2 files changed, 75 insertions(+), 89 deletions(-)
>>>>>
>>>>> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
>>>>> index 32fefaa0d3ca..114af7350363 100644
>>>>> --- a/net/bluetooth/hci_request.c
>>>>> +++ b/net/bluetooth/hci_request.c
>>>>> @@ -1975,92 +1975,6 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
>>>>>       return 0;
>>>>>  }
>>>>>
>>>>> -static int le_scan_restart(struct hci_request *req, unsigned long opt)
>>>>> -{
>>>>> -     struct hci_dev *hdev = req->hdev;
>>>>> -
>>>>> -     /* If controller is not scanning we are done. */
>>>>> -     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
>>>>> -             return 0;
>>>>> -
>>>>> -     if (hdev->scanning_paused) {
>>>>> -             bt_dev_dbg(hdev, "Scanning is paused for suspend");
>>>>> -             return 0;
>>>>> -     }
>>>>> -
>>>>> -     hci_req_add_le_scan_disable(req, false);
>>>>> -
>>>>> -     if (use_ext_scan(hdev)) {
>>>>> -             struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
>>>>> -
>>>>> -             memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
>>>>> -             ext_enable_cp.enable = LE_SCAN_ENABLE;
>>>>> -             ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
>>>>> -
>>>>> -             hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
>>>>> -                         sizeof(ext_enable_cp), &ext_enable_cp);
>>>>> -     } else {
>>>>> -             struct hci_cp_le_set_scan_enable cp;
>>>>> -
>>>>> -             memset(&cp, 0, sizeof(cp));
>>>>> -             cp.enable = LE_SCAN_ENABLE;
>>>>> -             cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
>>>>> -             hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
>>>>> -     }
>>>>> -
>>>>> -     return 0;
>>>>> -}
>>>>> -
>>>>> -static void le_scan_restart_work(struct work_struct *work)
>>>>> -{
>>>>> -     struct hci_dev *hdev = container_of(work, struct hci_dev,
>>>>> -                                         le_scan_restart.work);
>>>>> -     unsigned long timeout, duration, scan_start, now;
>>>>> -     u8 status;
>>>>> -
>>>>> -     bt_dev_dbg(hdev, "");
>>>>> -
>>>>> -     hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
>>>>> -     if (status) {
>>>>> -             bt_dev_err(hdev, "failed to restart LE scan: status %d",
>>>>> -                        status);
>>>>> -             return;
>>>>> -     }
>>>>> -
>>>>> -     hci_dev_lock(hdev);
>>>>> -
>>>>> -     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
>>>>> -         !hdev->discovery.scan_start)
>>>>> -             goto unlock;
>>>>> -
>>>>> -     /* When the scan was started, hdev->le_scan_disable has been queued
>>>>> -      * after duration from scan_start. During scan restart this job
>>>>> -      * has been canceled, and we need to queue it again after proper
>>>>> -      * timeout, to make sure that scan does not run indefinitely.
>>>>> -      */
>>>>> -     duration = hdev->discovery.scan_duration;
>>>>> -     scan_start = hdev->discovery.scan_start;
>>>>> -     now = jiffies;
>>>>> -     if (now - scan_start <= duration) {
>>>>> -             int elapsed;
>>>>> -
>>>>> -             if (now >= scan_start)
>>>>> -                     elapsed = now - scan_start;
>>>>> -             else
>>>>> -                     elapsed = ULONG_MAX - scan_start + now;
>>>>> -
>>>>> -             timeout = duration - elapsed;
>>>>> -     } else {
>>>>> -             timeout = 0;
>>>>> -     }
>>>>> -
>>>>> -     queue_delayed_work(hdev->req_workqueue,
>>>>> -                        &hdev->le_scan_disable, timeout);
>>>>> -
>>>>> -unlock:
>>>>> -     hci_dev_unlock(hdev);
>>>>> -}
>>>>> -
>>>>>  bool hci_req_stop_discovery(struct hci_request *req)
>>>>>  {
>>>>>       struct hci_dev *hdev = req->hdev;
>>>>> @@ -2158,7 +2072,6 @@ int hci_req_configure_datapath(struct hci_dev
>>>>> *hdev, struct bt_codec *codec)
>>>>>
>>>>>  void hci_request_setup(struct hci_dev *hdev)
>>>>>  {
>>>>> -     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
>>>>>       INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
>>>>>       INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
>>>>>  }
>>>>> @@ -2167,8 +2080,6 @@ void hci_request_cancel_all(struct hci_dev *hdev)
>>>>>  {
>>>>>       __hci_cmd_sync_cancel(hdev, ENODEV);
>>>>>
>>>>> -     cancel_delayed_work_sync(&hdev->le_scan_restart);
>>>>> -
>>>>>       if (hdev->adv_instance_timeout) {
>>>>>               cancel_delayed_work_sync(&hdev->adv_instance_expire);
>>>>>               hdev->adv_instance_timeout = 0;
>>>>> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
>>>>> index 7dae2ee1bb82..19d57ec0feb8 100644
>>>>> --- a/net/bluetooth/hci_sync.c
>>>>> +++ b/net/bluetooth/hci_sync.c
>>>>> @@ -392,6 +392,79 @@ static void le_scan_disable(struct work_struct *work)
>>>>>       hci_dev_unlock(hdev);
>>>>>  }
>>>>>
>>>>> +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
>>>>> +                                    u8 filter_dup);
>>>>> +static int hci_le_scan_restart_sync(struct hci_dev *hdev)
>>>>> +{
>>>>> +     /* If controller is not scanning we are done. */
>>>>> +     if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
>>>>> +             return 0;
>>>>> +
>>>>> +     if (hdev->scanning_paused) {
>>>>> +             bt_dev_dbg(hdev, "Scanning is paused for suspend");
>>>>> +             return 0;
>>>>> +     }
>>>>> +
>>>>> +     hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
>>>>> +     return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
>>>>> +                                        LE_SCAN_FILTER_DUP_ENABLE);
>>>>> +}
>>>>> +
>>>>> +static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
>>>>> +{
>>>>> +     return hci_le_scan_restart_sync(hdev);
>>>>> +}
>>>>> +
>>>>> +static void le_scan_restart(struct work_struct *work)
>>>>> +{
>>>>> +     struct hci_dev *hdev = container_of(work, struct hci_dev,
>>>>> +                                         le_scan_restart.work);
>>>>> +     unsigned long timeout, duration, scan_start, now;
>>>>> +     int status;
>>>>> +
>>>>> +     bt_dev_dbg(hdev, "");
>>>>> +
>>>>> +     hci_dev_lock(hdev);
>>>>> +
>>>>> +     status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
>>>>> +     if (status) {
>>>>> +             bt_dev_err(hdev, "failed to restart LE scan: status %d",
>>>>> +                        status);
>>>>> +             goto unlock;
>>>>> +     }
>>>>> +
>>>>> +     if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
>>>>> +         !hdev->discovery.scan_start)
>>>>> +             goto unlock;
>>>>> +
>>>>> +     /* When the scan was started, hdev->le_scan_disable has been queued
>>>>> +      * after duration from scan_start. During scan restart this job
>>>>> +      * has been canceled, and we need to queue it again after proper
>>>>> +      * timeout, to make sure that scan does not run indefinitely.
>>>>> +      */
>>>>> +     duration = hdev->discovery.scan_duration;
>>>>> +     scan_start = hdev->discovery.scan_start;
>>>>> +     now = jiffies;
>>>>> +     if (now - scan_start <= duration) {
>>>>> +             int elapsed;
>>>>> +
>>>>> +             if (now >= scan_start)
>>>>> +                     elapsed = now - scan_start;
>>>>> +             else
>>>>> +                     elapsed = ULONG_MAX - scan_start + now;
>>>>> +
>>>>> +             timeout = duration - elapsed;
>>>>> +     } else {
>>>>> +             timeout = 0;
>>>>> +     }
>>>>> +
>>>>> +     queue_delayed_work(hdev->req_workqueue,
>>>>> +                        &hdev->le_scan_disable, timeout);
>>>>> +
>>>>> +unlock:
>>>>> +     hci_dev_unlock(hdev);
>>>>> +}
>>>>> +
>>>>>  void hci_cmd_sync_init(struct hci_dev *hdev)
>>>>>  {
>>>>>       INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
>>>>> @@ -400,6 +473,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
>>>>>
>>>>>       INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
>>>>>       INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
>>>>> +     INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
>>>>>  }
>>>>>
>>>>>  void hci_cmd_sync_clear(struct hci_dev *hdev)
>>>>> @@ -4488,6 +4562,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
>>>>>       cancel_delayed_work(&hdev->power_off);
>>>>>       cancel_delayed_work(&hdev->ncmd_timer);
>>>>>       cancel_delayed_work(&hdev->le_scan_disable);
>>>>> +     cancel_delayed_work(&hdev->le_scan_restart);
>>>>>
>>>>>       hci_request_cancel_all(hdev);
>>> 
>>> 
>>> 
>>> --
>>> Luiz Augusto von Dentz
>> 
>> -- 
>> Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-20 14:41           ` Stefan Agner
@ 2023-06-30 10:59             ` Stefan Agner
  2023-08-29 11:22               ` Linux regression tracking (Thorsten Leemhuis)
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Agner @ 2023-06-30 10:59 UTC (permalink / raw)
  To: Brian Gix
  Cc: Luiz Augusto von Dentz, linux-bluetooth, marcel, Regressions,
	Jan Čermák

Hi Brian,

Gentle ping on the issue below.

On 2023-06-20 16:41, Stefan Agner wrote:
> On 2023-06-16 03:22, Brian Gix wrote:
> 
>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
>>
>>> +Brian Gix
>>>
>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>>> <luiz.dentz@gmail.com> wrote:
>>>>
>>>> Hi Stefan,
>>>>
>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>>>>>
>>>>> Hi Brian, hi all,
>>>>>
>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>>>>> is a popular choice in our community, so it might just be that). When
>>>>> continuously scanning/listening for BLE packets, the packet flow
>>>>> suddenly ends. Depending on which and how many devices (possibly also
>>>>> other factors) within minutes or hours.
>>>>>
>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>>>>> problem to this change.
>>>>>
>>>>> Meanwhile I was able to confirm, that reverting this single commit on
>>>>> the latest 6.1.34 seems to resolve the issue.
>>>>>
>>>>> I've reviewed the change and surrounding code, and one thing I've
>>>>> noticed is that the if statement to set cp.filter_dup in
>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>>>>> different. Not sure if that needs to be the way it is, but my outside
>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>>>>>
>>>>> However, that did not fix the problem (but maybe it is wrong
>>>>> nonetheless?).
>>>>>
>>>>> Anyone has an idea what could be the problem here?
>>>>
>>>> Are there any logs of the problem? Does any HCI command fails or
>>>> anything so that we can track down what could be wrong?
> 
> No HCI command fails, there is also no issue reported in the kernel log.
> BlueZ just stops receiving BLE packets, at least from certain devices.
> 
>>>
>>> @Brian Gix perhaps you have a better idea what is going wrong here?
>>
>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
>> be FALSE, and Mesh does not use extended scanning in any case.
>>
>> But this was part of the final rewrite to retire the hci_req mechanism in
>> favor of the hci_sync mechanism. So my best guess off the top of my head is
>> that there was an unintended race condition that worked better than the
>> synchronous single-threading mechanism?  Filtering (or not) should not
> 
> After review the code I concluded the same. What is a bit surprising to
> me is that it is so well reproducible. I guess it is nicer to have a
> reproducible one than a hard to reproduce one :)
> 
>> prevent advertising packets from permanently wedging.  Does anyone have an
>> HCI flow log with and without the offending patch?  Ideally they should be
>> identical...  If they are not then I obviously did something wrong. As this
>> was not specifically Mesh related, I may have missed some non-mesh corner
>> cases.
> 
> 
> I've taken two btmon captures, I created them using:
> btmon -i hci0 -w /config/hcidump-hci-req-working.log
> 
> You can find them at:
> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log

Could you gain any insights from these logs?

--
Stefan


> 
> This is while running our user space software (Home Assistant with
> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
> advertisements every 100ms (this accelerates the issue). In the
> non-working case you'll see that the system doesn't receive any SPAM
> advertisements after around 27 seconds. The working log shows that it
> continuously receives the same packets (capture 120s).
> 
> Hope this helps.
> 
> --
> Stefan
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-06-30 10:59             ` Stefan Agner
@ 2023-08-29 11:22               ` Linux regression tracking (Thorsten Leemhuis)
  2023-08-29 13:27                 ` Stefan Agner
  0 siblings, 1 reply; 19+ messages in thread
From: Linux regression tracking (Thorsten Leemhuis) @ 2023-08-29 11:22 UTC (permalink / raw)
  To: Stefan Agner, Brian Gix
  Cc: Luiz Augusto von Dentz, linux-bluetooth, marcel, Regressions,
	Jan Čermák

Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
for once, to make this easily accessible to everyone.

Stefan, was this regression ever addressed? Doesn't look like it from
here, but maybe I'm missing something.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.

#regzbot poke

On 30.06.23 12:59, Stefan Agner wrote:
> Hi Brian,
> 
> Gentle ping on the issue below.
> 
> On 2023-06-20 16:41, Stefan Agner wrote:
>> On 2023-06-16 03:22, Brian Gix wrote:
>>
>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
>>>
>>>> +Brian Gix
>>>>
>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>>>> <luiz.dentz@gmail.com> wrote:
>>>>>
>>>>> Hi Stefan,
>>>>>
>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>>>>>>
>>>>>> Hi Brian, hi all,
>>>>>>
>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>>>>>> is a popular choice in our community, so it might just be that). When
>>>>>> continuously scanning/listening for BLE packets, the packet flow
>>>>>> suddenly ends. Depending on which and how many devices (possibly also
>>>>>> other factors) within minutes or hours.
>>>>>>
>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>>>>>> problem to this change.
>>>>>>
>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
>>>>>> the latest 6.1.34 seems to resolve the issue.
>>>>>>
>>>>>> I've reviewed the change and surrounding code, and one thing I've
>>>>>> noticed is that the if statement to set cp.filter_dup in
>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>>>>>> different. Not sure if that needs to be the way it is, but my outside
>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>>>>>>
>>>>>> However, that did not fix the problem (but maybe it is wrong
>>>>>> nonetheless?).
>>>>>>
>>>>>> Anyone has an idea what could be the problem here?
>>>>>
>>>>> Are there any logs of the problem? Does any HCI command fails or
>>>>> anything so that we can track down what could be wrong?
>>
>> No HCI command fails, there is also no issue reported in the kernel log.
>> BlueZ just stops receiving BLE packets, at least from certain devices.
>>
>>>>
>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
>>>
>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
>>> be FALSE, and Mesh does not use extended scanning in any case.
>>>
>>> But this was part of the final rewrite to retire the hci_req mechanism in
>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
>>> that there was an unintended race condition that worked better than the
>>> synchronous single-threading mechanism?  Filtering (or not) should not
>>
>> After review the code I concluded the same. What is a bit surprising to
>> me is that it is so well reproducible. I guess it is nicer to have a
>> reproducible one than a hard to reproduce one :)
>>
>>> prevent advertising packets from permanently wedging.  Does anyone have an
>>> HCI flow log with and without the offending patch?  Ideally they should be
>>> identical...  If they are not then I obviously did something wrong. As this
>>> was not specifically Mesh related, I may have missed some non-mesh corner
>>> cases.
>>
>>
>> I've taken two btmon captures, I created them using:
>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
>>
>> You can find them at:
>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
> 
> Could you gain any insights from these logs?
> 
> --
> Stefan
> 
> 
>>
>> This is while running our user space software (Home Assistant with
>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
>> advertisements every 100ms (this accelerates the issue). In the
>> non-working case you'll see that the system doesn't receive any SPAM
>> advertisements after around 27 seconds. The working log shows that it
>> continuously receives the same packets (capture 120s).
>>
>> Hope this helps.
>>
>> --
>> Stefan
>>
>>
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-08-29 11:22               ` Linux regression tracking (Thorsten Leemhuis)
@ 2023-08-29 13:27                 ` Stefan Agner
  2023-08-29 14:34                   ` Linux regression tracking (Thorsten Leemhuis)
  2023-08-29 20:42                   ` Luiz Augusto von Dentz
  0 siblings, 2 replies; 19+ messages in thread
From: Stefan Agner @ 2023-08-29 13:27 UTC (permalink / raw)
  To: Linux regressions mailing list
  Cc: Brian Gix, Luiz Augusto von Dentz, linux-bluetooth, marcel,
	Jan Čermák

Hi Thorsten,

No, this hasn't been addressed so far. I am also not sure how we can
help solving that particular issue.

Besides this, we have other Bluetooth issues which seem to be Kernel
regressions (where downgrading to Linux 5.15 also helps), folks see
"hci0: unexpected event for opcode" on Intel but also other systems. We
haven't bisected that issue yet. But it seems that the Bluetooth stack
is really somewhat unstable in recent releases.

--
Stefan


On 2023-08-29 13:22, Linux regression tracking (Thorsten Leemhuis)
wrote:
> Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
> for once, to make this easily accessible to everyone.
> 
> Stefan, was this regression ever addressed? Doesn't look like it from
> here, but maybe I'm missing something.
> 
> Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
> --
> Everything you wanna know about Linux kernel regression tracking:
> https://linux-regtracking.leemhuis.info/about/#tldr
> If I did something stupid, please tell me, as explained on that page.
> 
> #regzbot poke
> 
> On 30.06.23 12:59, Stefan Agner wrote:
>> Hi Brian,
>>
>> Gentle ping on the issue below.
>>
>> On 2023-06-20 16:41, Stefan Agner wrote:
>>> On 2023-06-16 03:22, Brian Gix wrote:
>>>
>>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
>>>>
>>>>> +Brian Gix
>>>>>
>>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>>>>> <luiz.dentz@gmail.com> wrote:
>>>>>>
>>>>>> Hi Stefan,
>>>>>>
>>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>>>>>>>
>>>>>>> Hi Brian, hi all,
>>>>>>>
>>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>>>>>>> is a popular choice in our community, so it might just be that). When
>>>>>>> continuously scanning/listening for BLE packets, the packet flow
>>>>>>> suddenly ends. Depending on which and how many devices (possibly also
>>>>>>> other factors) within minutes or hours.
>>>>>>>
>>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>>>>>>> problem to this change.
>>>>>>>
>>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
>>>>>>> the latest 6.1.34 seems to resolve the issue.
>>>>>>>
>>>>>>> I've reviewed the change and surrounding code, and one thing I've
>>>>>>> noticed is that the if statement to set cp.filter_dup in
>>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>>>>>>> different. Not sure if that needs to be the way it is, but my outside
>>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>>>>>>>
>>>>>>> However, that did not fix the problem (but maybe it is wrong
>>>>>>> nonetheless?).
>>>>>>>
>>>>>>> Anyone has an idea what could be the problem here?
>>>>>>
>>>>>> Are there any logs of the problem? Does any HCI command fails or
>>>>>> anything so that we can track down what could be wrong?
>>>
>>> No HCI command fails, there is also no issue reported in the kernel log.
>>> BlueZ just stops receiving BLE packets, at least from certain devices.
>>>
>>>>>
>>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
>>>>
>>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
>>>> be FALSE, and Mesh does not use extended scanning in any case.
>>>>
>>>> But this was part of the final rewrite to retire the hci_req mechanism in
>>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
>>>> that there was an unintended race condition that worked better than the
>>>> synchronous single-threading mechanism?  Filtering (or not) should not
>>>
>>> After review the code I concluded the same. What is a bit surprising to
>>> me is that it is so well reproducible. I guess it is nicer to have a
>>> reproducible one than a hard to reproduce one :)
>>>
>>>> prevent advertising packets from permanently wedging.  Does anyone have an
>>>> HCI flow log with and without the offending patch?  Ideally they should be
>>>> identical...  If they are not then I obviously did something wrong. As this
>>>> was not specifically Mesh related, I may have missed some non-mesh corner
>>>> cases.
>>>
>>>
>>> I've taken two btmon captures, I created them using:
>>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
>>>
>>> You can find them at:
>>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
>>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
>>
>> Could you gain any insights from these logs?
>>
>> --
>> Stefan
>>
>>
>>>
>>> This is while running our user space software (Home Assistant with
>>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
>>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
>>> advertisements every 100ms (this accelerates the issue). In the
>>> non-working case you'll see that the system doesn't receive any SPAM
>>> advertisements after around 27 seconds. The working log shows that it
>>> continuously receives the same packets (capture 120s).
>>>
>>> Hope this helps.
>>>
>>> --
>>> Stefan
>>>
>>>
>>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-08-29 13:27                 ` Stefan Agner
@ 2023-08-29 14:34                   ` Linux regression tracking (Thorsten Leemhuis)
  2023-08-29 20:42                   ` Luiz Augusto von Dentz
  1 sibling, 0 replies; 19+ messages in thread
From: Linux regression tracking (Thorsten Leemhuis) @ 2023-08-29 14:34 UTC (permalink / raw)
  To: Stefan Agner, Linux regressions mailing list
  Cc: Brian Gix, Luiz Augusto von Dentz, linux-bluetooth, marcel,
	Jan Čermák, Marcel Holtmann, Johan Hedberg

On 29.08.23 15:27, Stefan Agner wrote:
> 
> No, this hasn't been addressed so far.

Thx and aggh. It's vacation time, so sometimes things take longer, but
that doesn't explain why nothing seems to have happened for 9 weeks now
(at least that how it looks from here, but maybe I missed something).

Luiz, what's up here? What do you need to get down to this?

CCing the other Bluetooth maintainers just to be sure. FWIW, the thread
starts here:
https://lore.kernel.org/linux-bluetooth/578e6d7afd676129decafba846a933f5@agner.ch/#t

Jan saw similar problems:
https://lore.kernel.org/linux-bluetooth/CAPa5EdBSzkuMRoHDJ5w9ESckvNvs68nAfvixyetKcQ5+YD50wA@mail.gmail.com/

> I am also not sure how we can
> help solving that particular issue.

Let's see if this prodding helps to get things rolling. If not, I'll
have to get higher level maintainers involved.

> Besides this, we have other Bluetooth issues which seem to be Kernel
> regressions (where downgrading to Linux 5.15 also helps), folks see
> "hci0: unexpected event for opcode" on Intel but also other systems. We
> haven't bisected that issue yet. But it seems that the Bluetooth stack
> is really somewhat unstable in recent releases.

Might be wise to create a separate thread for those and asking the
bluetooth maintainers if they might have an idea (please CC the
regressions lists as well), maybe we are lucky; if not someone has to
bisect this to get closer to a solution.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.

> On 2023-08-29 13:22, Linux regression tracking (Thorsten Leemhuis)
> wrote:
>> Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
>> for once, to make this easily accessible to everyone.
>>
>> Stefan, was this regression ever addressed? Doesn't look like it from
>> here, but maybe I'm missing something.
>>
>> Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
>> --
>> Everything you wanna know about Linux kernel regression tracking:
>> https://linux-regtracking.leemhuis.info/about/#tldr
>> If I did something stupid, please tell me, as explained on that page.
>>
>> #regzbot poke
>>
>> On 30.06.23 12:59, Stefan Agner wrote:
>>> Hi Brian,
>>>
>>> Gentle ping on the issue below.
>>>
>>> On 2023-06-20 16:41, Stefan Agner wrote:
>>>> On 2023-06-16 03:22, Brian Gix wrote:
>>>>
>>>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
>>>>>
>>>>>> +Brian Gix
>>>>>>
>>>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>>>>>> <luiz.dentz@gmail.com> wrote:
>>>>>>>
>>>>>>> Hi Stefan,
>>>>>>>
>>>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>>>>>>>>
>>>>>>>> Hi Brian, hi all,
>>>>>>>>
>>>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>>>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>>>>>>>> is a popular choice in our community, so it might just be that). When
>>>>>>>> continuously scanning/listening for BLE packets, the packet flow
>>>>>>>> suddenly ends. Depending on which and how many devices (possibly also
>>>>>>>> other factors) within minutes or hours.
>>>>>>>>
>>>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>>>>>>>> problem to this change.
>>>>>>>>
>>>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
>>>>>>>> the latest 6.1.34 seems to resolve the issue.
>>>>>>>>
>>>>>>>> I've reviewed the change and surrounding code, and one thing I've
>>>>>>>> noticed is that the if statement to set cp.filter_dup in
>>>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>>>>>>>> different. Not sure if that needs to be the way it is, but my outside
>>>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>>>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>>>>>>>>
>>>>>>>> However, that did not fix the problem (but maybe it is wrong
>>>>>>>> nonetheless?).
>>>>>>>>
>>>>>>>> Anyone has an idea what could be the problem here?
>>>>>>>
>>>>>>> Are there any logs of the problem? Does any HCI command fails or
>>>>>>> anything so that we can track down what could be wrong?
>>>>
>>>> No HCI command fails, there is also no issue reported in the kernel log.
>>>> BlueZ just stops receiving BLE packets, at least from certain devices.
>>>>
>>>>>>
>>>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
>>>>>
>>>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
>>>>> be FALSE, and Mesh does not use extended scanning in any case.
>>>>>
>>>>> But this was part of the final rewrite to retire the hci_req mechanism in
>>>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
>>>>> that there was an unintended race condition that worked better than the
>>>>> synchronous single-threading mechanism?  Filtering (or not) should not
>>>>
>>>> After review the code I concluded the same. What is a bit surprising to
>>>> me is that it is so well reproducible. I guess it is nicer to have a
>>>> reproducible one than a hard to reproduce one :)
>>>>
>>>>> prevent advertising packets from permanently wedging.  Does anyone have an
>>>>> HCI flow log with and without the offending patch?  Ideally they should be
>>>>> identical...  If they are not then I obviously did something wrong. As this
>>>>> was not specifically Mesh related, I may have missed some non-mesh corner
>>>>> cases.
>>>>
>>>>
>>>> I've taken two btmon captures, I created them using:
>>>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
>>>>
>>>> You can find them at:
>>>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
>>>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
>>>
>>> Could you gain any insights from these logs?
>>>
>>> --
>>> Stefan
>>>
>>>
>>>>
>>>> This is while running our user space software (Home Assistant with
>>>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
>>>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
>>>> advertisements every 100ms (this accelerates the issue). In the
>>>> non-working case you'll see that the system doesn't receive any SPAM
>>>> advertisements after around 27 seconds. The working log shows that it
>>>> continuously receives the same packets (capture 120s).
>>>>
>>>> Hope this helps.
>>>>
>>>> --
>>>> Stefan
>>>>
>>>>
>>>
>>>
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-08-29 13:27                 ` Stefan Agner
  2023-08-29 14:34                   ` Linux regression tracking (Thorsten Leemhuis)
@ 2023-08-29 20:42                   ` Luiz Augusto von Dentz
  2023-08-30 17:28                     ` Luiz Augusto von Dentz
  1 sibling, 1 reply; 19+ messages in thread
From: Luiz Augusto von Dentz @ 2023-08-29 20:42 UTC (permalink / raw)
  To: Stefan Agner
  Cc: Linux regressions mailing list, Brian Gix, linux-bluetooth,
	marcel, Jan Čermák

Hi Stefan, Brian,

On Tue, Aug 29, 2023 at 6:27 AM Stefan Agner <stefan@agner.ch> wrote:
>
> Hi Thorsten,
>
> No, this hasn't been addressed so far. I am also not sure how we can
> help solving that particular issue.
>
> Besides this, we have other Bluetooth issues which seem to be Kernel
> regressions (where downgrading to Linux 5.15 also helps), folks see
> "hci0: unexpected event for opcode" on Intel but also other systems. We
> haven't bisected that issue yet. But it seems that the Bluetooth stack
> is really somewhat unstable in recent releases.


I suspect the following change shall make it behave as before, the use
of hci_cmd_sync_queue is not equivalent to hci_req_sync:

https://gist.github.com/Vudentz/b78f34e3775c8cd2db55b868e5c8ef42

That said, I'm considering removing the whole custom handling for
HCI_QUIRK_STRICT_DUPLICATE_FILTER and just disable duplicate filtering
when this flag is set.

> --
> Stefan
>
>
> On 2023-08-29 13:22, Linux regression tracking (Thorsten Leemhuis)
> wrote:
> > Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
> > for once, to make this easily accessible to everyone.
> >
> > Stefan, was this regression ever addressed? Doesn't look like it from
> > here, but maybe I'm missing something.
> >
> > Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
> > --
> > Everything you wanna know about Linux kernel regression tracking:
> > https://linux-regtracking.leemhuis.info/about/#tldr
> > If I did something stupid, please tell me, as explained on that page.
> >
> > #regzbot poke
> >
> > On 30.06.23 12:59, Stefan Agner wrote:
> >> Hi Brian,
> >>
> >> Gentle ping on the issue below.
> >>
> >> On 2023-06-20 16:41, Stefan Agner wrote:
> >>> On 2023-06-16 03:22, Brian Gix wrote:
> >>>
> >>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
> >>>>
> >>>>> +Brian Gix
> >>>>>
> >>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
> >>>>> <luiz.dentz@gmail.com> wrote:
> >>>>>>
> >>>>>> Hi Stefan,
> >>>>>>
> >>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
> >>>>>>>
> >>>>>>> Hi Brian, hi all,
> >>>>>>>
> >>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
> >>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> >>>>>>> is a popular choice in our community, so it might just be that). When
> >>>>>>> continuously scanning/listening for BLE packets, the packet flow
> >>>>>>> suddenly ends. Depending on which and how many devices (possibly also
> >>>>>>> other factors) within minutes or hours.
> >>>>>>>
> >>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> >>>>>>> problem to this change.
> >>>>>>>
> >>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
> >>>>>>> the latest 6.1.34 seems to resolve the issue.
> >>>>>>>
> >>>>>>> I've reviewed the change and surrounding code, and one thing I've
> >>>>>>> noticed is that the if statement to set cp.filter_dup in
> >>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> >>>>>>> different. Not sure if that needs to be the way it is, but my outside
> >>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> >>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
> >>>>>>>
> >>>>>>> However, that did not fix the problem (but maybe it is wrong
> >>>>>>> nonetheless?).
> >>>>>>>
> >>>>>>> Anyone has an idea what could be the problem here?
> >>>>>>
> >>>>>> Are there any logs of the problem? Does any HCI command fails or
> >>>>>> anything so that we can track down what could be wrong?
> >>>
> >>> No HCI command fails, there is also no issue reported in the kernel log.
> >>> BlueZ just stops receiving BLE packets, at least from certain devices.
> >>>
> >>>>>
> >>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
> >>>>
> >>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
> >>>> be FALSE, and Mesh does not use extended scanning in any case.
> >>>>
> >>>> But this was part of the final rewrite to retire the hci_req mechanism in
> >>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
> >>>> that there was an unintended race condition that worked better than the
> >>>> synchronous single-threading mechanism?  Filtering (or not) should not
> >>>
> >>> After review the code I concluded the same. What is a bit surprising to
> >>> me is that it is so well reproducible. I guess it is nicer to have a
> >>> reproducible one than a hard to reproduce one :)
> >>>
> >>>> prevent advertising packets from permanently wedging.  Does anyone have an
> >>>> HCI flow log with and without the offending patch?  Ideally they should be
> >>>> identical...  If they are not then I obviously did something wrong. As this
> >>>> was not specifically Mesh related, I may have missed some non-mesh corner
> >>>> cases.
> >>>
> >>>
> >>> I've taken two btmon captures, I created them using:
> >>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
> >>>
> >>> You can find them at:
> >>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
> >>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
> >>
> >> Could you gain any insights from these logs?
> >>
> >> --
> >> Stefan
> >>
> >>
> >>>
> >>> This is while running our user space software (Home Assistant with
> >>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
> >>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
> >>> advertisements every 100ms (this accelerates the issue). In the
> >>> non-working case you'll see that the system doesn't receive any SPAM
> >>> advertisements after around 27 seconds. The working log shows that it
> >>> continuously receives the same packets (capture 120s).
> >>>
> >>> Hope this helps.
> >>>
> >>> --
> >>> Stefan
> >>>
> >>>
> >>
> >>



-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-08-29 20:42                   ` Luiz Augusto von Dentz
@ 2023-08-30 17:28                     ` Luiz Augusto von Dentz
  2023-08-30 20:31                       ` Stefan Agner
  0 siblings, 1 reply; 19+ messages in thread
From: Luiz Augusto von Dentz @ 2023-08-30 17:28 UTC (permalink / raw)
  To: Stefan Agner
  Cc: Linux regressions mailing list, Brian Gix, linux-bluetooth,
	marcel, Jan Čermák

Hi Stefan,

On Tue, Aug 29, 2023 at 1:42 PM Luiz Augusto von Dentz
<luiz.dentz@gmail.com> wrote:
>
> Hi Stefan, Brian,
>
> On Tue, Aug 29, 2023 at 6:27 AM Stefan Agner <stefan@agner.ch> wrote:
> >
> > Hi Thorsten,
> >
> > No, this hasn't been addressed so far. I am also not sure how we can
> > help solving that particular issue.
> >
> > Besides this, we have other Bluetooth issues which seem to be Kernel
> > regressions (where downgrading to Linux 5.15 also helps), folks see
> > "hci0: unexpected event for opcode" on Intel but also other systems. We
> > haven't bisected that issue yet. But it seems that the Bluetooth stack
> > is really somewhat unstable in recent releases.
>
>
> I suspect the following change shall make it behave as before, the use
> of hci_cmd_sync_queue is not equivalent to hci_req_sync:
>
> https://gist.github.com/Vudentz/b78f34e3775c8cd2db55b868e5c8ef42
>
> That said, I'm considering removing the whole custom handling for
> HCI_QUIRK_STRICT_DUPLICATE_FILTER and just disable duplicate filtering
> when this flag is set.

Any chance to tests the following changes:

https://patchwork.kernel.org/project/bluetooth/patch/20230829205936.766544-1-luiz.dentz@gmail.com/

> > --
> > Stefan
> >
> >
> > On 2023-08-29 13:22, Linux regression tracking (Thorsten Leemhuis)
> > wrote:
> > > Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
> > > for once, to make this easily accessible to everyone.
> > >
> > > Stefan, was this regression ever addressed? Doesn't look like it from
> > > here, but maybe I'm missing something.
> > >
> > > Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
> > > --
> > > Everything you wanna know about Linux kernel regression tracking:
> > > https://linux-regtracking.leemhuis.info/about/#tldr
> > > If I did something stupid, please tell me, as explained on that page.
> > >
> > > #regzbot poke
> > >
> > > On 30.06.23 12:59, Stefan Agner wrote:
> > >> Hi Brian,
> > >>
> > >> Gentle ping on the issue below.
> > >>
> > >> On 2023-06-20 16:41, Stefan Agner wrote:
> > >>> On 2023-06-16 03:22, Brian Gix wrote:
> > >>>
> > >>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
> > >>>>
> > >>>>> +Brian Gix
> > >>>>>
> > >>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
> > >>>>> <luiz.dentz@gmail.com> wrote:
> > >>>>>>
> > >>>>>> Hi Stefan,
> > >>>>>>
> > >>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
> > >>>>>>>
> > >>>>>>> Hi Brian, hi all,
> > >>>>>>>
> > >>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
> > >>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
> > >>>>>>> is a popular choice in our community, so it might just be that). When
> > >>>>>>> continuously scanning/listening for BLE packets, the packet flow
> > >>>>>>> suddenly ends. Depending on which and how many devices (possibly also
> > >>>>>>> other factors) within minutes or hours.
> > >>>>>>>
> > >>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
> > >>>>>>> problem to this change.
> > >>>>>>>
> > >>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
> > >>>>>>> the latest 6.1.34 seems to resolve the issue.
> > >>>>>>>
> > >>>>>>> I've reviewed the change and surrounding code, and one thing I've
> > >>>>>>> noticed is that the if statement to set cp.filter_dup in
> > >>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
> > >>>>>>> different. Not sure if that needs to be the way it is, but my outside
> > >>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
> > >>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
> > >>>>>>>
> > >>>>>>> However, that did not fix the problem (but maybe it is wrong
> > >>>>>>> nonetheless?).
> > >>>>>>>
> > >>>>>>> Anyone has an idea what could be the problem here?
> > >>>>>>
> > >>>>>> Are there any logs of the problem? Does any HCI command fails or
> > >>>>>> anything so that we can track down what could be wrong?
> > >>>
> > >>> No HCI command fails, there is also no issue reported in the kernel log.
> > >>> BlueZ just stops receiving BLE packets, at least from certain devices.
> > >>>
> > >>>>>
> > >>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
> > >>>>
> > >>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
> > >>>> be FALSE, and Mesh does not use extended scanning in any case.
> > >>>>
> > >>>> But this was part of the final rewrite to retire the hci_req mechanism in
> > >>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
> > >>>> that there was an unintended race condition that worked better than the
> > >>>> synchronous single-threading mechanism?  Filtering (or not) should not
> > >>>
> > >>> After review the code I concluded the same. What is a bit surprising to
> > >>> me is that it is so well reproducible. I guess it is nicer to have a
> > >>> reproducible one than a hard to reproduce one :)
> > >>>
> > >>>> prevent advertising packets from permanently wedging.  Does anyone have an
> > >>>> HCI flow log with and without the offending patch?  Ideally they should be
> > >>>> identical...  If they are not then I obviously did something wrong. As this
> > >>>> was not specifically Mesh related, I may have missed some non-mesh corner
> > >>>> cases.
> > >>>
> > >>>
> > >>> I've taken two btmon captures, I created them using:
> > >>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
> > >>>
> > >>> You can find them at:
> > >>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
> > >>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
> > >>
> > >> Could you gain any insights from these logs?
> > >>
> > >> --
> > >> Stefan
> > >>
> > >>
> > >>>
> > >>> This is while running our user space software (Home Assistant with
> > >>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
> > >>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
> > >>> advertisements every 100ms (this accelerates the issue). In the
> > >>> non-working case you'll see that the system doesn't receive any SPAM
> > >>> advertisements after around 27 seconds. The working log shows that it
> > >>> continuously receives the same packets (capture 120s).
> > >>>
> > >>> Hope this helps.
> > >>>
> > >>> --
> > >>> Stefan
> > >>>
> > >>>
> > >>
> > >>
>
>
>
> --
> Luiz Augusto von Dentz



-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync
  2023-08-30 17:28                     ` Luiz Augusto von Dentz
@ 2023-08-30 20:31                       ` Stefan Agner
  0 siblings, 0 replies; 19+ messages in thread
From: Stefan Agner @ 2023-08-30 20:31 UTC (permalink / raw)
  To: Luiz Augusto von Dentz
  Cc: Linux regressions mailing list, Brian Gix, linux-bluetooth,
	marcel, Jan Čermák

Hi Luiz,

On 2023-08-30 19:28, Luiz Augusto von Dentz wrote:
> Hi Stefan,
> 
> On Tue, Aug 29, 2023 at 1:42 PM Luiz Augusto von Dentz
> <luiz.dentz@gmail.com> wrote:
>>
>> Hi Stefan, Brian,
>>
>> On Tue, Aug 29, 2023 at 6:27 AM Stefan Agner <stefan@agner.ch> wrote:
>> >
>> > Hi Thorsten,
>> >
>> > No, this hasn't been addressed so far. I am also not sure how we can
>> > help solving that particular issue.
>> >
>> > Besides this, we have other Bluetooth issues which seem to be Kernel
>> > regressions (where downgrading to Linux 5.15 also helps), folks see
>> > "hci0: unexpected event for opcode" on Intel but also other systems. We
>> > haven't bisected that issue yet. But it seems that the Bluetooth stack
>> > is really somewhat unstable in recent releases.
>>
>>
>> I suspect the following change shall make it behave as before, the use
>> of hci_cmd_sync_queue is not equivalent to hci_req_sync:
>>
>> https://gist.github.com/Vudentz/b78f34e3775c8cd2db55b868e5c8ef42
>>
>> That said, I'm considering removing the whole custom handling for
>> HCI_QUIRK_STRICT_DUPLICATE_FILTER and just disable duplicate filtering
>> when this flag is set.
> 
> Any chance to tests the following changes:
> 
> https://patchwork.kernel.org/project/bluetooth/patch/20230829205936.766544-1-luiz.dentz@gmail.com/

I've tested this with my SPAM test device, and I can confirm that this
indeed fixes the problem we are seeing: The BLE advertisements continue
to come in just fine with the patch applied!

Thanks for the fix!

--
Stefan

> 
>> > --
>> > Stefan
>> >
>> >
>> > On 2023-08-29 13:22, Linux regression tracking (Thorsten Leemhuis)
>> > wrote:
>> > > Hi, Thorsten here, the Linux kernel's regression tracker. Top-posting
>> > > for once, to make this easily accessible to everyone.
>> > >
>> > > Stefan, was this regression ever addressed? Doesn't look like it from
>> > > here, but maybe I'm missing something.
>> > >
>> > > Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
>> > > --
>> > > Everything you wanna know about Linux kernel regression tracking:
>> > > https://linux-regtracking.leemhuis.info/about/#tldr
>> > > If I did something stupid, please tell me, as explained on that page.
>> > >
>> > > #regzbot poke
>> > >
>> > > On 30.06.23 12:59, Stefan Agner wrote:
>> > >> Hi Brian,
>> > >>
>> > >> Gentle ping on the issue below.
>> > >>
>> > >> On 2023-06-20 16:41, Stefan Agner wrote:
>> > >>> On 2023-06-16 03:22, Brian Gix wrote:
>> > >>>
>> > >>>> On Thu, Jun 15, 2023 at 11:28 AM Luiz Augusto von Dentz <luiz.dentz@gmail.com> wrote:
>> > >>>>
>> > >>>>> +Brian Gix
>> > >>>>>
>> > >>>>> On Thu, Jun 15, 2023 at 10:27 AM Luiz Augusto von Dentz
>> > >>>>> <luiz.dentz@gmail.com> wrote:
>> > >>>>>>
>> > >>>>>> Hi Stefan,
>> > >>>>>>
>> > >>>>>> On Thu, Jun 15, 2023 at 5:06 AM Stefan Agner <stefan@agner.ch> wrote:
>> > >>>>>>>
>> > >>>>>>> Hi Brian, hi all,
>> > >>>>>>>
>> > >>>>>>> We experienced quite some Bluetooth issues after moving from Linux 5.15
>> > >>>>>>> to 6.1 on Home Assistant OS, especially on Intel NUC type systems (which
>> > >>>>>>> is a popular choice in our community, so it might just be that). When
>> > >>>>>>> continuously scanning/listening for BLE packets, the packet flow
>> > >>>>>>> suddenly ends. Depending on which and how many devices (possibly also
>> > >>>>>>> other factors) within minutes or hours.
>> > >>>>>>>
>> > >>>>>>> Jan (in cc) was able to bisect the issue, and was able to pinpoint the
>> > >>>>>>> problem to this change.
>> > >>>>>>>
>> > >>>>>>> Meanwhile I was able to confirm, that reverting this single commit on
>> > >>>>>>> the latest 6.1.34 seems to resolve the issue.
>> > >>>>>>>
>> > >>>>>>> I've reviewed the change and surrounding code, and one thing I've
>> > >>>>>>> noticed is that the if statement to set cp.filter_dup in
>> > >>>>>>> hci_le_set_ext_scan_enable_sync and hci_le_set_scan_enable_sync are
>> > >>>>>>> different. Not sure if that needs to be the way it is, but my outside
>> > >>>>>>> gut feeling says hci_le_set_ext_scan_enable_sync should use "if (val &&
>> > >>>>>>> hci_dev_test_flag(hdev, HCI_MESH))" as well.
>> > >>>>>>>
>> > >>>>>>> However, that did not fix the problem (but maybe it is wrong
>> > >>>>>>> nonetheless?).
>> > >>>>>>>
>> > >>>>>>> Anyone has an idea what could be the problem here?
>> > >>>>>>
>> > >>>>>> Are there any logs of the problem? Does any HCI command fails or
>> > >>>>>> anything so that we can track down what could be wrong?
>> > >>>
>> > >>> No HCI command fails, there is also no issue reported in the kernel log.
>> > >>> BlueZ just stops receiving BLE packets, at least from certain devices.
>> > >>>
>> > >>>>>
>> > >>>>> @Brian Gix perhaps you have a better idea what is going wrong here?
>> > >>>>
>> > >>>> It seems unlikely that this is Mesh related. Mesh does need for filtering to
>> > >>>> be FALSE, and Mesh does not use extended scanning in any case.
>> > >>>>
>> > >>>> But this was part of the final rewrite to retire the hci_req mechanism in
>> > >>>> favor of the hci_sync mechanism. So my best guess off the top of my head is
>> > >>>> that there was an unintended race condition that worked better than the
>> > >>>> synchronous single-threading mechanism?  Filtering (or not) should not
>> > >>>
>> > >>> After review the code I concluded the same. What is a bit surprising to
>> > >>> me is that it is so well reproducible. I guess it is nicer to have a
>> > >>> reproducible one than a hard to reproduce one :)
>> > >>>
>> > >>>> prevent advertising packets from permanently wedging.  Does anyone have an
>> > >>>> HCI flow log with and without the offending patch?  Ideally they should be
>> > >>>> identical...  If they are not then I obviously did something wrong. As this
>> > >>>> was not specifically Mesh related, I may have missed some non-mesh corner
>> > >>>> cases.
>> > >>>
>> > >>>
>> > >>> I've taken two btmon captures, I created them using:
>> > >>> btmon -i hci0 -w /config/hcidump-hci-req-working.log
>> > >>>
>> > >>> You can find them at:
>> > >>> https://os-builds.home-assistant.io/hcidump-hci-req-working.log
>> > >>> https://os-builds.home-assistant.io/hcidump-hci-sync-non-working.log
>> > >>
>> > >> Could you gain any insights from these logs?
>> > >>
>> > >> --
>> > >> Stefan
>> > >>
>> > >>
>> > >>>
>> > >>> This is while running our user space software (Home Assistant with
>> > >>> Bluetooth integration). Besides some BLE devices (e.g. Xioami Mi
>> > >>> Temperature & Humidity sensor) I have a ESP32 running which sends SPAM
>> > >>> advertisements every 100ms (this accelerates the issue). In the
>> > >>> non-working case you'll see that the system doesn't receive any SPAM
>> > >>> advertisements after around 27 seconds. The working log shows that it
>> > >>> continuously receives the same packets (capture 120s).
>> > >>>
>> > >>> Hope this helps.
>> > >>>
>> > >>> --
>> > >>> Stefan
>> > >>>
>> > >>>
>> > >>
>> > >>
>>
>>
>>
>> --
>> Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2023-08-30 20:31 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-27 13:58 [PATCH v4 0/4] Clean-up stale/unused hci_request.c code Brian Gix
2022-07-27 13:58 ` [PATCH v4 1/4] Bluetooth: Convert le_scan_disable timeout to hci_sync Brian Gix
2022-07-27 14:30   ` Clean-up stale/unused hci_request.c code bluez.test.bot
2022-07-27 13:58 ` [PATCH v4 2/4] Bluetooth: Rework le_scan_restart for hci_sync Brian Gix
2023-06-15 12:06   ` Stefan Agner
2023-06-15 12:47     ` Linux regression tracking #adding (Thorsten Leemhuis)
2023-06-15 14:47       ` Jan Čermák
2023-06-15 17:27     ` Luiz Augusto von Dentz
2023-06-15 18:28       ` Luiz Augusto von Dentz
     [not found]         ` <CABUQxGxBdAFncJ6YVb7a9gnU-_YZDGFDmpHJTtm5K1tDGEGRDQ@mail.gmail.com>
2023-06-20 14:41           ` Stefan Agner
2023-06-30 10:59             ` Stefan Agner
2023-08-29 11:22               ` Linux regression tracking (Thorsten Leemhuis)
2023-08-29 13:27                 ` Stefan Agner
2023-08-29 14:34                   ` Linux regression tracking (Thorsten Leemhuis)
2023-08-29 20:42                   ` Luiz Augusto von Dentz
2023-08-30 17:28                     ` Luiz Augusto von Dentz
2023-08-30 20:31                       ` Stefan Agner
2022-07-27 13:58 ` [PATCH v4 3/4] Bluetooth: Delete unused hci_req_stop_discovery() Brian Gix
2022-07-27 13:58 ` [PATCH v4 4/4] Bluetooth: Convert SCO configure_datapath to hci_sync Brian Gix

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.