All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/4] Roam based on packet loss event
@ 2022-08-09 23:04 James Prestwood
  2022-08-09 23:04 ` [RFC 1/4] station: react to (new) netdev " James Prestwood
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-09 23:04 UTC (permalink / raw)
  To: iwd; +Cc: James Prestwood, mjohnson459

A few weeks ago a user (CC'ed) reported that his driver was not roaming
as expected, and would get disconnected before any CQM RSSI events got
to userspace. He did see that the kernel was sending packet loss events
which should also be a good reason to roam.

These patches do just that, roam based on packet loss events.

Sending as an RFC so Michael can (hopefully) give this a trial run. I
can simulate packet loss events in a virtual environment, but testing
on the driver in question would be best.

James Prestwood (4):
  station: react to (new) netdev packet loss event
  netdev: handle packet loss notification
  auto-t: add generic tx_packet function
  auto-t: add packet loss test to testPSK-roam

 autotests/testPSK-roam/connection_test.py | 46 ++++++++++++++++++++++-
 autotests/util/testutil.py                |  7 ++++
 src/netdev.c                              |  8 ++--
 src/netdev.h                              |  1 +
 src/station.c                             | 39 ++++++++++++++++---
 5 files changed, 90 insertions(+), 11 deletions(-)

-- 
2.34.3


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC 1/4] station: react to (new) netdev packet loss event
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
@ 2022-08-09 23:04 ` James Prestwood
  2022-08-09 23:04 ` [RFC 2/4] netdev: handle packet loss notification James Prestwood
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-09 23:04 UTC (permalink / raw)
  To: iwd; +Cc: James Prestwood, mjohnson459

This adds a new netdev event for packet loss notifications from
the kernel. Depending on the scenario a station may see packet
loss events without any other indications like low RSSI. In these
cases IWD should still roam since there is no data flowing.
---
 src/netdev.h  |  1 +
 src/station.c | 39 +++++++++++++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/netdev.h b/src/netdev.h
index f093c499..d13bc5e6 100644
--- a/src/netdev.h
+++ b/src/netdev.h
@@ -49,6 +49,7 @@ enum netdev_event {
 	NETDEV_EVENT_RSSI_THRESHOLD_LOW,
 	NETDEV_EVENT_RSSI_THRESHOLD_HIGH,
 	NETDEV_EVENT_RSSI_LEVEL_NOTIFY,
+	NETDEV_EVENT_PACKET_LOSS_NOTIFY,
 };
 
 enum netdev_watch_event {
diff --git a/src/station.c b/src/station.c
index b9038431..458c6ee2 100644
--- a/src/station.c
+++ b/src/station.c
@@ -2535,15 +2535,10 @@ static void station_neighbor_report_cb(struct netdev *netdev, int err,
 		station_roam_failed(station);
 }
 
-static void station_roam_trigger_cb(struct l_timeout *timeout, void *user_data)
+static void station_start_roam(struct station *station)
 {
-	struct station *station = user_data;
 	int r;
 
-	l_debug("%u", netdev_get_ifindex(station->netdev));
-
-	l_timeout_remove(station->roam_trigger_timeout);
-	station->roam_trigger_timeout = NULL;
 	station->preparing_roam = true;
 
 	/*
@@ -2577,6 +2572,18 @@ static void station_roam_trigger_cb(struct l_timeout *timeout, void *user_data)
 		station_roam_failed(station);
 }
 
+static void station_roam_trigger_cb(struct l_timeout *timeout, void *user_data)
+{
+	struct station *station = user_data;
+
+	l_debug("%u", netdev_get_ifindex(station->netdev));
+
+	l_timeout_remove(station->roam_trigger_timeout);
+	station->roam_trigger_timeout = NULL;
+
+	station_start_roam(station);
+}
+
 static void station_roam_timeout_rearm(struct station *station, int seconds)
 {
 	struct timespec now, min_timeout;
@@ -3057,6 +3064,23 @@ static void station_disconnect_event(struct station *station, void *event_data)
 	l_warn("Unexpected disconnect event");
 }
 
+#define STATION_PKT_LOSS_THRESHOLD 10
+
+static void station_packets_lost(struct station *station, uint32_t num_pkts)
+{
+	l_debug("Packets lost event: %u", num_pkts);
+
+	if (num_pkts < STATION_PKT_LOSS_THRESHOLD)
+		return;
+
+	if (station_cannot_roam(station))
+		return;
+
+	station_debug_event(station, "packet-loss-roam");
+
+	station_start_roam(station);
+}
+
 static void station_netdev_event(struct netdev *netdev, enum netdev_event event,
 					void *event_data, void *user_data)
 {
@@ -3092,6 +3116,9 @@ static void station_netdev_event(struct netdev *netdev, enum netdev_event event,
 	case NETDEV_EVENT_CHANNEL_SWITCHED:
 		station_event_channel_switched(station, l_get_u32(event_data));
 		break;
+	case NETDEV_EVENT_PACKET_LOSS_NOTIFY:
+		station_packets_lost(station, l_get_u32(event_data));
+		break;
 	}
 }
 
-- 
2.34.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 2/4] netdev: handle packet loss notification
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
  2022-08-09 23:04 ` [RFC 1/4] station: react to (new) netdev " James Prestwood
@ 2022-08-09 23:04 ` James Prestwood
  2022-08-09 23:04 ` [RFC 3/4] auto-t: add generic tx_packet function James Prestwood
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-09 23:04 UTC (permalink / raw)
  To: iwd; +Cc: James Prestwood, mjohnson459

This attribute was already handled and simply printed. Now a
netdev event will be sent to notify any listeners.
---
 src/netdev.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/netdev.c b/src/netdev.c
index 47e076e2..0d48631c 100644
--- a/src/netdev.c
+++ b/src/netdev.c
@@ -1073,6 +1073,7 @@ static void netdev_cqm_event(struct l_genl_msg *msg, struct netdev *netdev)
 	const void *data;
 	uint32_t *rssi_event = NULL;
 	int32_t *rssi_val = NULL;
+	uint32_t *pkt_event = NULL;
 
 	if (!l_genl_attr_init(&attr, msg))
 		return;
@@ -1096,8 +1097,7 @@ static void netdev_cqm_event(struct l_genl_msg *msg, struct netdev *netdev)
 					if (len != 4)
 						continue;
 
-					l_debug("Packets lost event: %d",
-							*(uint32_t *) data);
+					pkt_event = (uint32_t *) data;
 					break;
 
 				case NL80211_ATTR_CQM_BEACON_LOSS_EVENT:
@@ -1128,7 +1128,9 @@ static void netdev_cqm_event(struct l_genl_msg *msg, struct netdev *netdev)
 			l_debug("Signal change event (above=%d)", *rssi_event);
 			netdev_cqm_event_rssi_threshold(netdev, *rssi_event);
 		}
-	}
+	} else if (pkt_event && netdev->event_filter)
+		netdev->event_filter(netdev, NETDEV_EVENT_PACKET_LOSS_NOTIFY,
+					pkt_event, netdev->user_data);
 }
 
 static void netdev_rekey_offload_event(struct l_genl_msg *msg,
-- 
2.34.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 3/4] auto-t: add generic tx_packet function
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
  2022-08-09 23:04 ` [RFC 1/4] station: react to (new) netdev " James Prestwood
  2022-08-09 23:04 ` [RFC 2/4] netdev: handle packet loss notification James Prestwood
@ 2022-08-09 23:04 ` James Prestwood
  2022-08-09 23:04 ` [RFC 4/4] auto-t: add packet loss test to testPSK-roam James Prestwood
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-09 23:04 UTC (permalink / raw)
  To: iwd; +Cc: James Prestwood, mjohnson459

This sends data over the raw sockets similar to test_ifaces_connected
---
 autotests/util/testutil.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/autotests/util/testutil.py b/autotests/util/testutil.py
index eae4dd89..37de49a2 100644
--- a/autotests/util/testutil.py
+++ b/autotests/util/testutil.py
@@ -53,6 +53,13 @@ def tx(fromsock, tosock, src, dst):
 
     return (frame, fromsock, tosock, src, dst)
 
+def tx_packets(if0, if1, num):
+    sock0, addr0 = raw_if_socket(if0)
+    sock1, addr1 = raw_if_socket(if1)
+
+    for i in range(num):
+        tx(sock0, sock1, addr0, addr1)
+
 def test_connected(if0=None, if1=None, group=True, expect_fail=False):
     if expect_fail:
         timeout = 0
-- 
2.34.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 4/4] auto-t: add packet loss test to testPSK-roam
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
                   ` (2 preceding siblings ...)
  2022-08-09 23:04 ` [RFC 3/4] auto-t: add generic tx_packet function James Prestwood
@ 2022-08-09 23:04 ` James Prestwood
  2022-08-15 14:17 ` [RFC 0/4] Roam based on packet loss event Michael Johnson
  2022-08-16 20:31 ` Denis Kenzior
  5 siblings, 0 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-09 23:04 UTC (permalink / raw)
  To: iwd; +Cc: James Prestwood, mjohnson459

A new test which blocks all data frames once connected, then tries
to send 100 packets. This should result in the kernel sending a
packet loss event to userspace, which IWD should react to and roam.
---
 autotests/testPSK-roam/connection_test.py | 46 ++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/autotests/testPSK-roam/connection_test.py b/autotests/testPSK-roam/connection_test.py
index f1b43130..c926e255 100644
--- a/autotests/testPSK-roam/connection_test.py
+++ b/autotests/testPSK-roam/connection_test.py
@@ -13,7 +13,7 @@ from hostapd import HostapdCLI
 import testutil
 
 class Test(unittest.TestCase):
-    def validate_connection(self, wd, over_ds=False):
+    def validate_connection(self, wd, over_ds=False, pkt_loss=False):
         device = wd.list_devices(1)[0]
 
         ordered_network = device.get_ordered_network('TestFT', full_scan=True)
@@ -41,11 +41,24 @@ class Test(unittest.TestCase):
         if over_ds:
             self.rule0.enabled = True
 
-        device.roam(self.bss_hostapd[1].bssid)
+        if pkt_loss:
+            # Drop all data frames
+            self.rule1.enabled = True
+            # Set the current BSS signal lower so we have roam candidates
+            self.rule2.enabled = True
+            # Send 100 packets (to be dropped), should trigger beacon loss
+            testutil.tx_packets(device.name, self.bss_hostapd[0].ifname, 100)
+            device.wait_for_event('packet-loss-roam', timeout=30)
+        else:
+            device.roam(self.bss_hostapd[1].bssid)
 
         condition = 'obj.state == DeviceState.roaming'
         wd.wait_for_object_condition(device, condition)
 
+        if pkt_loss:
+            self.rule1.enabled = False
+            self.rule2.enabled = False
+
         # Check that iwd is on BSS 1 once out of roaming state and doesn't
         # go through 'disconnected', 'autoconnect', 'connecting' in between
         from_condition = 'obj.state == DeviceState.roaming'
@@ -132,6 +145,23 @@ class Test(unittest.TestCase):
 
         self.validate_connection(wd)
 
+    def test_roam_packet_loss(self):
+        wd = IWD(True)
+
+        self.bss_hostapd[0].set_value('wpa_key_mgmt', 'FT-PSK')
+        self.bss_hostapd[0].set_value('ft_over_ds', '0')
+        self.bss_hostapd[0].set_value('ocv', '1')
+        self.bss_hostapd[0].reload()
+        self.bss_hostapd[0].wait_for_event("AP-ENABLED")
+
+        self.bss_hostapd[1].set_value('wpa_key_mgmt', 'FT-PSK')
+        self.bss_hostapd[1].set_value('ft_over_ds', '0')
+        self.bss_hostapd[0].set_value('ocv', '1')
+        self.bss_hostapd[1].reload()
+        self.bss_hostapd[1].wait_for_event("AP-ENABLED")
+
+        self.validate_connection(wd, pkt_loss=True)
+
     def tearDown(self):
         os.system('ip link set "' + self.bss_hostapd[0].ifname + '" down')
         os.system('ip link set "' + self.bss_hostapd[1].ifname + '" down')
@@ -139,6 +169,8 @@ class Test(unittest.TestCase):
         os.system('ip link set "' + self.bss_hostapd[1].ifname + '" up')
 
         self.rule0.enabled = False
+        self.rule1.enabled = False
+        self.rule2.enabled = False
 
     @classmethod
     def setUpClass(cls):
@@ -148,6 +180,7 @@ class Test(unittest.TestCase):
 
         cls.bss_hostapd = [ HostapdCLI(config='ft-psk-ccmp-1.conf'),
                             HostapdCLI(config='ft-psk-ccmp-2.conf') ]
+        rad0 = hwsim.get_radio('rad0')
         rad2 = hwsim.get_radio('rad2')
 
         cls.rule0 = hwsim.rules.create()
@@ -157,6 +190,15 @@ class Test(unittest.TestCase):
         cls.rule0.prefix = 'b0'
         cls.rule0.drop = True
 
+        cls.rule1 = hwsim.rules.create()
+        cls.rule1.source = rad2.addresses[0]
+        cls.rule1.prefix = '08'
+        cls.rule1.drop = True
+
+        cls.rule2 = hwsim.rules.create()
+        cls.rule2.source = rad0.addresses[0]
+        cls.rule2.signal = -4000
+
         # Set interface addresses to those expected by hostapd config files
         os.system('ip link set dev "' + cls.bss_hostapd[0].ifname + '" down')
         os.system('ip link set dev "' + cls.bss_hostapd[0].ifname + '" addr 12:00:00:00:00:01 up')
-- 
2.34.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC 0/4] Roam based on packet loss event
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
                   ` (3 preceding siblings ...)
  2022-08-09 23:04 ` [RFC 4/4] auto-t: add packet loss test to testPSK-roam James Prestwood
@ 2022-08-15 14:17 ` Michael Johnson
  2022-08-15 16:25   ` James Prestwood
  2022-08-16 20:31 ` Denis Kenzior
  5 siblings, 1 reply; 8+ messages in thread
From: Michael Johnson @ 2022-08-15 14:17 UTC (permalink / raw)
  To: James Prestwood; +Cc: iwd

Hi James,

I've been testing this today and it does seem to do what is intended.
When we get a packet loss event we start a scan.

```
Aug 15 12:07:17 p3-1337 iwd[447]: src/netdev.c:netdev_mlme_notify()
MLME notification Notify CQM(64)
Aug 15 12:07:17 p3-1337 iwd[447]: src/netdev.c:netdev_cqm_event()
Signal change event (above=1 signal=-60)
Aug 15 12:08:20 p3-1337 iwd[447]: src/netdev.c:netdev_mlme_notify()
MLME notification Notify CQM(64)
Aug 15 12:08:20 p3-1337 iwd[447]: src/station.c:station_packets_lost()
Packets lost event: 10
Aug 15 12:08:20 p3-1337 iwd[447]: src/station.c:station_roam_scan() ifindex: 5
Aug 15 12:08:20 p3-1337 iwd[447]:
src/wiphy.c:wiphy_radio_work_insert() Inserting work item 61
Aug 15 12:08:20 p3-1337 iwd[447]: src/wiphy.c:wiphy_radio_work_next()
Starting work item 61
Aug 15 12:08:20 p3-1337 iwd[447]: src/station.c:station_start_roam()
Using cached neighbor report for roam
Aug 15 12:08:20 p3-1337 iwd[447]: src/scan.c:scan_notify() Scan
notification Trigger Scan(33)
```

In most cases, this does result in a better BSSID being found and a
roam occuring. Occasionally a better BSSID isn't found and the roam
fails, presumably when the packet loss event is not directly caused by
a low RSSI. Either way the new behaviour does add more opportunities
for fixing itself so seems an improvement, thanks!

Regards,
Michael

On Wed, 10 Aug 2022 at 00:04, James Prestwood <prestwoj@gmail.com> wrote:
>
> A few weeks ago a user (CC'ed) reported that his driver was not roaming
> as expected, and would get disconnected before any CQM RSSI events got
> to userspace. He did see that the kernel was sending packet loss events
> which should also be a good reason to roam.
>
> These patches do just that, roam based on packet loss events.
>
> Sending as an RFC so Michael can (hopefully) give this a trial run. I
> can simulate packet loss events in a virtual environment, but testing
> on the driver in question would be best.
>
> James Prestwood (4):
>   station: react to (new) netdev packet loss event
>   netdev: handle packet loss notification
>   auto-t: add generic tx_packet function
>   auto-t: add packet loss test to testPSK-roam
>
>  autotests/testPSK-roam/connection_test.py | 46 ++++++++++++++++++++++-
>  autotests/util/testutil.py                |  7 ++++
>  src/netdev.c                              |  8 ++--
>  src/netdev.h                              |  1 +
>  src/station.c                             | 39 ++++++++++++++++---
>  5 files changed, 90 insertions(+), 11 deletions(-)
>
> --
> 2.34.3
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC 0/4] Roam based on packet loss event
  2022-08-15 14:17 ` [RFC 0/4] Roam based on packet loss event Michael Johnson
@ 2022-08-15 16:25   ` James Prestwood
  0 siblings, 0 replies; 8+ messages in thread
From: James Prestwood @ 2022-08-15 16:25 UTC (permalink / raw)
  To: Michael Johnson; +Cc: iwd

On Mon, 2022-08-15 at 15:17 +0100, Michael Johnson wrote:
> Hi James,
> 
> I've been testing this today and it does seem to do what is intended.
> When we get a packet loss event we start a scan.
> 
> ```
> Aug 15 12:07:17 p3-1337 iwd[447]: src/netdev.c:netdev_mlme_notify()
> MLME notification Notify CQM(64)
> Aug 15 12:07:17 p3-1337 iwd[447]: src/netdev.c:netdev_cqm_event()
> Signal change event (above=1 signal=-60)
> Aug 15 12:08:20 p3-1337 iwd[447]: src/netdev.c:netdev_mlme_notify()
> MLME notification Notify CQM(64)
> Aug 15 12:08:20 p3-1337 iwd[447]:
> src/station.c:station_packets_lost()
> Packets lost event: 10
> Aug 15 12:08:20 p3-1337 iwd[447]: src/station.c:station_roam_scan()
> ifindex: 5
> Aug 15 12:08:20 p3-1337 iwd[447]:
> src/wiphy.c:wiphy_radio_work_insert() Inserting work item 61
> Aug 15 12:08:20 p3-1337 iwd[447]: src/wiphy.c:wiphy_radio_work_next()
> Starting work item 61
> Aug 15 12:08:20 p3-1337 iwd[447]: src/station.c:station_start_roam()
> Using cached neighbor report for roam
> Aug 15 12:08:20 p3-1337 iwd[447]: src/scan.c:scan_notify() Scan
> notification Trigger Scan(33)
> ```
> 
> In most cases, this does result in a better BSSID being found and a
> roam occuring. Occasionally a better BSSID isn't found and the roam
> fails, presumably when the packet loss event is not directly caused
> by
> a low RSSI. Either way the new behaviour does add more opportunities
> for fixing itself so seems an improvement, thanks!

Great, thanks for testing!

As far as not finding a better BSS things become a bit harder to handle
since we don't want to affect the existing roam behavior which has
worked well for the majority of users. We are still talking about ideas
to handle this one.

Anyways, at least its an improvement.

Thanks,
James

 

> 
> Regards,
> Michael
> 
> On Wed, 10 Aug 2022 at 00:04, James Prestwood <prestwoj@gmail.com>
> wrote:
> > 
> > A few weeks ago a user (CC'ed) reported that his driver was not
> > roaming
> > as expected, and would get disconnected before any CQM RSSI events
> > got
> > to userspace. He did see that the kernel was sending packet loss
> > events
> > which should also be a good reason to roam.
> > 
> > These patches do just that, roam based on packet loss events.
> > 
> > Sending as an RFC so Michael can (hopefully) give this a trial run.
> > I
> > can simulate packet loss events in a virtual environment, but
> > testing
> > on the driver in question would be best.
> > 
> > James Prestwood (4):
> >   station: react to (new) netdev packet loss event
> >   netdev: handle packet loss notification
> >   auto-t: add generic tx_packet function
> >   auto-t: add packet loss test to testPSK-roam
> > 
> >  autotests/testPSK-roam/connection_test.py | 46
> > ++++++++++++++++++++++-
> >  autotests/util/testutil.py                |  7 ++++
> >  src/netdev.c                              |  8 ++--
> >  src/netdev.h                              |  1 +
> >  src/station.c                             | 39 ++++++++++++++++---
> >  5 files changed, 90 insertions(+), 11 deletions(-)
> > 
> > --
> > 2.34.3
> > 



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC 0/4] Roam based on packet loss event
  2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
                   ` (4 preceding siblings ...)
  2022-08-15 14:17 ` [RFC 0/4] Roam based on packet loss event Michael Johnson
@ 2022-08-16 20:31 ` Denis Kenzior
  5 siblings, 0 replies; 8+ messages in thread
From: Denis Kenzior @ 2022-08-16 20:31 UTC (permalink / raw)
  To: James Prestwood, iwd; +Cc: mjohnson459

Hi James,

On 8/9/22 18:04, James Prestwood wrote:
> A few weeks ago a user (CC'ed) reported that his driver was not roaming
> as expected, and would get disconnected before any CQM RSSI events got
> to userspace. He did see that the kernel was sending packet loss events
> which should also be a good reason to roam.
> 
> These patches do just that, roam based on packet loss events.
> 
> Sending as an RFC so Michael can (hopefully) give this a trial run. I
> can simulate packet loss events in a virtual environment, but testing
> on the driver in question would be best.
> 

I went ahead and applied this series.  Hopefully the default packet loss 
threshold is good enough.  Worst case we might have to make it configurable or 
disable it on certain drivers.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-08-16 20:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-09 23:04 [RFC 0/4] Roam based on packet loss event James Prestwood
2022-08-09 23:04 ` [RFC 1/4] station: react to (new) netdev " James Prestwood
2022-08-09 23:04 ` [RFC 2/4] netdev: handle packet loss notification James Prestwood
2022-08-09 23:04 ` [RFC 3/4] auto-t: add generic tx_packet function James Prestwood
2022-08-09 23:04 ` [RFC 4/4] auto-t: add packet loss test to testPSK-roam James Prestwood
2022-08-15 14:17 ` [RFC 0/4] Roam based on packet loss event Michael Johnson
2022-08-15 16:25   ` James Prestwood
2022-08-16 20:31 ` Denis Kenzior

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.