All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] anqp: refactor to use frame-xchg
@ 2020-05-27 20:45 James Prestwood
  2020-05-27 20:45 ` [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect() James Prestwood
  2020-05-28 15:42 ` [PATCH 1/2] anqp: refactor to use frame-xchg Denis Kenzior
  0 siblings, 2 replies; 8+ messages in thread
From: James Prestwood @ 2020-05-27 20:45 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 18621 bytes --]

The new frame-xchg module now handles a lot of what ANQP used to do. ANQP
now does not need to depend on nl80211/netdev for building and sending
frames. It also no longer needs any of the request lookups or frame
watches because frame-xchg filters this for us. The request queue did
need to remain in ANQP because station fires off ANQP requests all in
a row, and frame-xchg cannot handle this. Now ANQP just sends out the
first request, and queue's the rest. As the responses come in it will
send out the next frame in the queue until there are no more left.

The anqp_request API was changed to only return a bool, as well as
take the wdev_id rather than ifindex. Station was updated to remove
the 'pending' member since it was not actually used, as well as fix
a bug on the timeout path where the request queue would get popped
twice.

There is somewhat of a TODO that was overlooked initially with ANQP
and was realized when testing these patches. Since the network_info
depends on ANQP completing there is a race condition between scanning
finishing and being ready to connect to networks which require ANQP.
The autoconnect case does not have this issue, but explicit Dbus
connects do. In practice this may not matter since the user would have
to issue a connect at just the right time, but it could still happen.
This may need to be fixed in network where we see if there are any
pending ANQP requests before we return NotConfigured. If there are
pending requests we should wait for those, then attempt to connect.
---
 src/anqp.c    | 438 +++++++++++++-------------------------------------
 src/anqp.h    |   2 +-
 src/station.c |  16 +-
 3 files changed, 114 insertions(+), 342 deletions(-)

diff --git a/src/anqp.c b/src/anqp.c
index 6627bd81..504b5bdf 100644
--- a/src/anqp.c
+++ b/src/anqp.c
@@ -31,105 +31,63 @@
 #include "src/module.h"
 #include "src/anqp.h"
 #include "src/util.h"
-#include "src/eap-private.h"
 #include "src/ie.h"
-#include "src/nl80211util.h"
-#include "src/nl80211cmd.h"
 #include "src/scan.h"
-#include "src/netdev.h"
 #include "src/iwd.h"
 #include "src/mpdu.h"
-#include "src/wiphy.h"
+#include "src/frame-xchg.h"
 
 #include "linux/nl80211.h"
 
+#define ANQP_GROUP	1
+
 struct anqp_request {
-	uint32_t ifindex;
+	uint64_t wdev_id;
 	anqp_response_func_t anqp_cb;
 	anqp_destroy_func_t anqp_destroy;
 	void *anqp_data;
-	uint64_t anqp_cookie;
 	uint8_t anqp_token;
+	uint32_t frequency;
+	uint8_t *frame;
+	size_t frame_len;
 };
 
-static struct l_genl_family *nl80211 = NULL;
-
 static struct l_queue *anqp_requests;
 static uint8_t anqp_token = 0;
 
-static uint32_t netdev_watch;
-static uint32_t unicast_watch;
-
 static void anqp_destroy(void *user_data)
 {
 	struct anqp_request *request = user_data;
 
 	if (request->anqp_destroy)
 		request->anqp_destroy(request->anqp_data);
-}
-
-static void netdev_gas_request_cb(struct l_genl_msg *msg, void *user_data)
-{
-	struct anqp_request *request = user_data;
-
-	if (l_genl_msg_get_error(msg) != 0)
-		goto error;
-
-	if (nl80211_parse_attrs(msg, NL80211_ATTR_COOKIE, &request->anqp_cookie,
-					NL80211_ATTR_UNSPEC) < 0)
-		goto error;
-
-	return;
-
-error:
-	l_debug("Error sending CMD_FRAME (%d)", l_genl_msg_get_error(msg));
-
-	if (request->anqp_cb)
-		request->anqp_cb(ANQP_FAILED, NULL, 0, request->anqp_data);
-
-	if (request->anqp_destroy)
-		request->anqp_destroy(request->anqp_data);
 
+	l_free(request->frame);
 	l_free(request);
 }
 
-static bool match_token(const void *a, const void *b)
-{
-	const struct anqp_request *request = a;
-	const struct token_match {
-		uint32_t ifindex;
-		uint8_t token;
-
-	} *match = b;
+static void anqp_request_next(void);
 
-	if (request->ifindex != match->ifindex)
-		return false;
-
-	if (request->anqp_token != match->token)
-		return false;
-
-	return true;
-}
-
-static void anqp_response_frame_event(uint32_t ifindex,
-					const struct mmpdu_header *hdr,
-					const void *body, size_t body_len)
+/*
+ * By using frame-xchg we should get called back here for any frame matching our
+ * prefix until the duration expires. If frame-xchg is never signalled 'done'
+ * (returning true) we should get a timeout in anqp_frame_timeout. This is
+ * why we drop any improperly formatted frames without cleaning up the request.
+ */
+static bool anqp_response_frame_event(const struct mmpdu_header *hdr,
+					const void *body, size_t body_len,
+					int rssi, void *user_data)
 {
-	struct anqp_request *request;
+	struct anqp_request *request = user_data;
 	const uint8_t *ptr = body;
 	uint16_t status_code;
 	uint16_t delay;
 	uint16_t qrlen;
 	uint8_t adv_proto_len;
 	uint8_t token;
-	struct token_match {
-		uint32_t ifindex;
-		uint8_t token;
-
-	} match;
 
 	if (body_len < 9)
-		return;
+		return false;
 
 	/* Skip past category/action since this frame was prefixed matched */
 	ptr += 2;
@@ -138,12 +96,8 @@ static void anqp_response_frame_event(uint32_t ifindex,
 	/* dialog token */
 	token = *ptr++;
 
-	match.ifindex = ifindex;
-	match.token = token;
-
-	request = l_queue_find(anqp_requests, match_token, &match);
-	if (!request)
-		return;
+	if (request->anqp_token != token)
+		return false;
 
 	status_code = l_get_le16(ptr);
 	ptr += 2;
@@ -151,7 +105,7 @@ static void anqp_response_frame_event(uint32_t ifindex,
 
 	if (status_code != 0) {
 		l_error("Bad status code on GAS response %u", status_code);
-		return;
+		return false;
 	}
 
 	delay = l_get_le16(ptr);
@@ -166,12 +120,12 @@ static void anqp_response_frame_event(uint32_t ifindex,
 	 */
 	if (delay != 0) {
 		l_error("GAS comeback delay was not zero");
-		return;
+		return false;
 	}
 
 	if (*ptr != IE_TYPE_ADVERTISEMENT_PROTOCOL) {
 		l_error("GAS request not advertisement protocol");
-		return;
+		return false;
 	}
 
 	ptr++;
@@ -181,339 +135,163 @@ static void anqp_response_frame_event(uint32_t ifindex,
 	body_len--;
 
 	if (body_len < adv_proto_len)
-		return;
+		return false;
 
 	ptr += adv_proto_len;
 	body_len -= adv_proto_len;
 
 	if (body_len < 2)
-		return;
+		return false;
 
 	qrlen = l_get_le16(ptr);
 	ptr += 2;
 
 	if (body_len < qrlen)
-		return;
-
-	l_queue_remove(anqp_requests, request);
+		return false;
 
 	l_debug("ANQP response received from "MAC, MAC_STR(hdr->address_2));
 
 	if (request->anqp_cb)
-		request->anqp_cb(ANQP_SUCCESS, ptr, qrlen,
-					request->anqp_data);
+		request->anqp_cb(ANQP_SUCCESS, ptr, qrlen, request->anqp_data);
 
-	if (request->anqp_destroy)
-		request->anqp_destroy(request->anqp_data);
+	l_queue_remove(anqp_requests, request);
+	anqp_destroy(request);
 
-	l_free(request);
+	if (l_queue_isempty(anqp_requests))
+		return true;
 
-	return;
+	anqp_request_next();
+
+	return false;
 }
 
-static void netdev_gas_timeout_cb(void *user_data)
+static const struct frame_xchg_prefix anqp_frame_prefix = {
+	.data = (uint8_t []) {
+		0x04, 0x0b,
+	},
+	.len = 2,
+};
+
+static void anqp_frame_timeout(int error, void *user_data)
 {
 	struct anqp_request *request = user_data;
+	enum anqp_result result = ANQP_TIMEOUT;
 
-	l_debug("GAS request timed out");
+	if (error < 0) {
+		result = ANQP_FAILED;
+		l_error("Sending ANQP request failed: %s (%i)",
+			strerror(-error), -error);
+	}
 
 	if (request->anqp_cb)
-		request->anqp_cb(ANQP_TIMEOUT, NULL, 0,
-					request->anqp_data);
+		request->anqp_cb(result, NULL, 0, request->anqp_data);
 
-	/* allows anqp_request to be re-entrant */
 	if (request->anqp_destroy)
 		request->anqp_destroy(request->anqp_data);
 
 	l_queue_remove(anqp_requests, request);
-	l_free(request);
-}
-
-static bool match_cookie(const void *a, const void *b)
-{
-	const struct anqp_request *request = a;
-	const struct cookie_match {
-		uint64_t cookie;
-		uint32_t ifindex;
-	} *match = b;
-
-	if (match->ifindex != request->ifindex)
-		return false;
-
-	if (match->cookie != request->anqp_cookie)
-		return false;
+	anqp_destroy(request);
 
-	return true;
-}
-
-static void anqp_frame_wait_cancel_event(struct l_genl_msg *msg,
-						uint32_t ifindex)
-{
-	uint64_t cookie;
-	struct anqp_request *request;
-	struct cookie_match {
-		uint64_t cookie;
-		uint32_t ifindex;
-	} match;
-
-	l_debug("");
-
-	if (nl80211_parse_attrs(msg, NL80211_ATTR_COOKIE, &cookie,
-					NL80211_ATTR_UNSPEC) < 0)
-		return;
-
-	match.cookie = cookie;
-	match.ifindex = ifindex;
-
-	request = l_queue_find(anqp_requests, match_cookie, &match);
-	if (!request)
-		return;
-
-	if (cookie != request->anqp_cookie)
+	if (l_queue_isempty(anqp_requests))
 		return;
 
-	netdev_gas_timeout_cb(request);
+	anqp_request_next();
 }
 
-uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
-				struct scan_bss *bss, const uint8_t *anqp,
-				size_t len, anqp_response_func_t cb,
-				void *user_data, anqp_destroy_func_t destroy)
+static void anqp_request_next(void)
 {
 	struct anqp_request *request;
-	uint8_t frame[512];
-	struct l_genl_msg *msg;
 	struct iovec iov[2];
-	uint32_t id;
-	uint32_t duration = 300;
-	struct netdev *netdev = netdev_find(ifindex);
-
-	if (!netdev)
-		return 0;
-
-	/*
-	 * TODO: Netdev dependencies will eventually be removed so we need
-	 * another way to figure out wiphy capabilities.
-	 */
-	if (!wiphy_can_offchannel_tx(netdev_get_wiphy(netdev))) {
-		l_error("ANQP failed, driver does not support offchannel TX");
-		return 0;
-	}
-
-	frame[0] = 0x04;		/* Category: Public */
-	frame[1] = 0x0a;		/* Action: GAS initial Request */
-	frame[2] = anqp_token;		/* Dialog Token */
-	frame[3] = IE_TYPE_ADVERTISEMENT_PROTOCOL;
-	frame[4] = 2;
-	frame[5] = 0x7f;
-	frame[6] = IE_ADVERTISEMENT_ANQP;
-	l_put_le16(len, frame + 7);
-
-	iov[0].iov_base = frame;
-	iov[0].iov_len = 9;
-	iov[1].iov_base = (void *)anqp;
-	iov[1].iov_len = len;
-
-	request = l_new(struct anqp_request, 1);
-
-	request->ifindex = ifindex;
-	request->anqp_cb = cb;
-	request->anqp_destroy = destroy;
-	request->anqp_token = anqp_token++;
-	request->anqp_data = user_data;
-
-	msg = nl80211_build_cmd_frame(ifindex, addr, bss->addr,
-					bss->frequency, iov, 2);
 
-	l_genl_msg_append_attr(msg, NL80211_ATTR_OFFCHANNEL_TX_OK, 0, "");
-	l_genl_msg_append_attr(msg, NL80211_ATTR_DURATION, 4, &duration);
+	if (l_queue_isempty(anqp_requests))
+		return;
 
-	id = l_genl_family_send(nl80211, msg, netdev_gas_request_cb,
-					request, NULL);
+	request = l_queue_peek_head(anqp_requests);
 
-	if (!id) {
-		l_debug("Failed to send ANQP request");
-		l_genl_msg_unref(msg);
-		l_free(request);
-		return 0;
-	}
+	iov[0].iov_base = request->frame;
+	iov[0].iov_len = request->frame_len;
+	iov[1].iov_base = NULL;
 
-	l_debug("ANQP request sent to "MAC, MAC_STR(bss->addr));
-
-	l_queue_push_head(anqp_requests, request);
+	l_debug("Sending ANQP request\n");
 
-	return id;
+	frame_xchg_start(request->wdev_id, iov, request->frequency, 0, 300, 0,
+				ANQP_GROUP, anqp_frame_timeout, request,
+				&anqp_frame_prefix, anqp_response_frame_event,
+				NULL);
 }
 
-static void netdev_frame_cb(struct l_genl_msg *msg, void *user_data)
+static uint8_t *anqp_build_frame(const uint8_t *addr, struct scan_bss *bss,
+					const uint8_t *anqp, size_t len,
+					size_t *len_out)
 {
-	if (l_genl_msg_get_error(msg) < 0)
-		l_error("Could not register frame watch type %04x: %i",
-			L_PTR_TO_UINT(user_data), l_genl_msg_get_error(msg));
-}
-
-static void anqp_register_frame(uint32_t ifindex)
-{
-	struct l_genl_msg *msg;
-	uint16_t frame_type = 0x00d0;
-	uint8_t prefix[] = { 0x04, 0x0b };
-
-	msg = l_genl_msg_new_sized(NL80211_CMD_REGISTER_FRAME, 34);
+	uint8_t *frame = l_malloc(len + 33);
+	uint8_t *ptr;
 
-	l_genl_msg_append_attr(msg, NL80211_ATTR_IFINDEX, 4, &ifindex);
-	l_genl_msg_append_attr(msg, NL80211_ATTR_FRAME_TYPE, 2, &frame_type);
-	l_genl_msg_append_attr(msg, NL80211_ATTR_FRAME_MATCH,
-					sizeof(prefix), prefix);
+	memset(frame, 0, len + 33);
 
-	l_genl_family_send(nl80211, msg, netdev_frame_cb,
-			L_UINT_TO_PTR(frame_type), NULL);
-}
+	l_put_le16(0x00d0, frame + 0);
+	memcpy(frame + 4, bss->addr, 6);
+	memcpy(frame + 10, addr, 6);
+	memcpy(frame + 16, bss->addr, 6);
 
-static void anqp_netdev_watch(struct netdev *netdev,
-				enum netdev_watch_event event, void *user_data)
-{
-	switch (event) {
-	case NETDEV_WATCH_EVENT_NEW:
-		if (netdev_get_iftype(netdev) == NETDEV_IFTYPE_STATION)
-			anqp_register_frame(netdev_get_ifindex(netdev));
+	ptr = frame + 24;
 
-		return;
-	default:
-		break;
-	}
-}
+	*ptr++ = 0x04;			/* Category: Public */
+	*ptr++ = 0x0a;			/* Action: GAS initial Request */
+	*ptr++ = anqp_token++;		/* Dialog Token */
+	*ptr++ = IE_TYPE_ADVERTISEMENT_PROTOCOL;
+	*ptr++ = 2;
 
-static void anqp_unicast_notify(struct l_genl_msg *msg, void *user_data)
-{
-	const struct mmpdu_header *mpdu = NULL;
-	const uint8_t *body;
-	struct l_genl_attr attr;
-	uint16_t type, len;
-	uint16_t frame_len = 0;
-	const void *data;
-	uint8_t cmd;
-	uint32_t ifindex = 0;
-
-	if (l_queue_isempty(anqp_requests))
-		return;
-
-	cmd = l_genl_msg_get_command(msg);
-	if (!cmd)
-		return;
-
-	if (!l_genl_attr_init(&attr, msg))
-		return;
-
-	while (l_genl_attr_next(&attr, &type, &len, &data)) {
-		switch (type) {
-		case NL80211_ATTR_IFINDEX:
-			if (len != sizeof(uint32_t)) {
-				l_warn("Invalid interface index attribute");
-				return;
-			}
-
-			ifindex = *((uint32_t *) data);
-
-			break;
-		case NL80211_ATTR_FRAME:
-			if (mpdu)
-				return;
-
-			mpdu = mpdu_validate(data, len);
-			if (!mpdu)
-				l_error("Frame didn't validate as MMPDU");
-
-			frame_len = len;
-			break;
-		}
-	}
+	*ptr++ = 0x7f;
+	*ptr++ = IE_ADVERTISEMENT_ANQP;
+	l_put_le16(len, ptr);
+	ptr += 2;
 
-	if (!ifindex || !mpdu)
-		return;
+	memcpy(ptr, anqp, len);
+	ptr += len;
 
-	body = mmpdu_body(mpdu);
+	*len_out = ptr - frame;
 
-	anqp_response_frame_event(ifindex, mpdu, body,
-				(const uint8_t *) mpdu + frame_len - body);
+	return frame;
 }
 
-static void anqp_mlme_notify(struct l_genl_msg *msg, void *user_data)
+bool anqp_request(uint64_t wdev_id, const uint8_t *addr,
+			struct scan_bss *bss, const uint8_t *anqp,
+			size_t len, anqp_response_func_t cb,
+			void *user_data, anqp_destroy_func_t destroy)
 {
-	struct l_genl_attr attr;
-	uint16_t type, len;
-	const void *data;
-	uint8_t cmd;
-	uint32_t ifindex = 0;
-
-	if (l_queue_isempty(anqp_requests))
-		return;
+	struct anqp_request *request;
 
-	cmd = l_genl_msg_get_command(msg);
+	request = l_new(struct anqp_request, 1);
 
-	l_debug("MLME notification %s(%u)", nl80211cmd_to_string(cmd), cmd);
+	request->wdev_id = wdev_id;
+	request->frequency = bss->frequency;
+	request->anqp_cb = cb;
+	request->anqp_destroy = destroy;
+	request->anqp_token = anqp_token;
+	request->anqp_data = user_data;
 
-	if (!l_genl_attr_init(&attr, msg))
-		return;
+	request->frame = anqp_build_frame(addr, bss, anqp, len,
+						&request->frame_len);
 
-	while (l_genl_attr_next(&attr, &type, &len, &data)) {
-		switch (type) {
-		case NL80211_ATTR_IFINDEX:
-			if (len != sizeof(uint32_t)) {
-				l_warn("Invalid interface index attribute");
-				return;
-			}
-
-			ifindex = *((uint32_t *) data);
-			break;
-		}
-	}
+	l_queue_push_head(anqp_requests, request);
 
-	if (!ifindex) {
-		l_warn("MLME notification is missing ifindex attribute");
-		return;
-	}
+	if (l_queue_length(anqp_requests) == 1)
+		anqp_request_next();
 
-	switch (cmd) {
-	case NL80211_CMD_FRAME_WAIT_CANCEL:
-		anqp_frame_wait_cancel_event(msg, ifindex);
-		break;
-	}
+	return true;
 }
 
 static int anqp_init(void)
 {
-	struct l_genl *genl = iwd_get_genl();
-
-	nl80211 = l_genl_family_new(genl, NL80211_GENL_NAME);
-
 	anqp_requests = l_queue_new();
 
-	netdev_watch =  netdev_watch_add(anqp_netdev_watch, NULL, NULL);
-
-	unicast_watch = l_genl_add_unicast_watch(genl, NL80211_GENL_NAME,
-						anqp_unicast_notify,
-						NULL, NULL);
-
-	if (!l_genl_family_register(nl80211, "mlme", anqp_mlme_notify,
-								NULL, NULL))
-		l_error("Registering for MLME notification failed");
-
 	return 0;
 }
 
 static void anqp_exit(void)
 {
-	struct l_genl *genl = iwd_get_genl();
-
-	l_genl_family_free(nl80211);
-	nl80211 = NULL;
-
 	l_queue_destroy(anqp_requests, anqp_destroy);
-
-	netdev_watch_remove(netdev_watch);
-
-	l_genl_remove_unicast_watch(genl, unicast_watch);
 }
 
 IWD_MODULE(anqp, anqp_init, anqp_exit);
-IWD_MODULE_DEPENDS(anqp, netdev);
diff --git a/src/anqp.h b/src/anqp.h
index 998277dd..99af76d5 100644
--- a/src/anqp.h
+++ b/src/anqp.h
@@ -34,7 +34,7 @@ typedef void (*anqp_response_func_t)(enum anqp_result result,
 					const void *anqp, size_t len,
 					void *user_data);
 
-uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
+bool anqp_request(uint64_t wdev_id, const uint8_t *addr,
 			struct scan_bss *bss, const uint8_t *anqp, size_t len,
 			anqp_response_func_t cb, void *user_data,
 			anqp_destroy_func_t destroy);
diff --git a/src/station.c b/src/station.c
index c532319a..29f94e3b 100644
--- a/src/station.c
+++ b/src/station.c
@@ -104,7 +104,6 @@ struct station {
 struct anqp_entry {
 	struct station *station;
 	struct network *network;
-	uint32_t pending;
 };
 
 struct wiphy *station_get_wiphy(struct station *station)
@@ -447,12 +446,9 @@ static void station_anqp_response_cb(enum anqp_result result,
 	char **realms = NULL;
 	struct nai_search search;
 
-	entry->pending = 0;
-
 	l_debug("");
 
-	if (result == ANQP_TIMEOUT) {
-		l_queue_remove(station->anqp_pending, entry);
+	if (result != ANQP_SUCCESS) {
 		/* TODO: try next BSS */
 		goto request_done;
 	}
@@ -551,12 +547,10 @@ static bool station_start_anqp(struct station *station, struct network *network,
 	 * these are checked in hs20_find_settings_file.
 	 */
 
-	entry->pending = anqp_request(netdev_get_ifindex(station->netdev),
-					netdev_get_address(station->netdev),
-					bss, anqp, ptr - anqp,
-					station_anqp_response_cb,
-					entry, l_free);
-	if (!entry->pending) {
+	if (!anqp_request(netdev_get_wdev_id(station->netdev),
+				netdev_get_address(station->netdev), bss, anqp,
+				ptr - anqp, station_anqp_response_cb,
+				entry, l_free)) {
 		l_free(entry);
 		return false;
 	}
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect()
  2020-05-27 20:45 [PATCH 1/2] anqp: refactor to use frame-xchg James Prestwood
@ 2020-05-27 20:45 ` James Prestwood
  2020-05-28 15:53   ` Denis Kenzior
  2020-05-28 15:42 ` [PATCH 1/2] anqp: refactor to use frame-xchg Denis Kenzior
  1 sibling, 1 reply; 8+ messages in thread
From: James Prestwood @ 2020-05-27 20:45 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 1302 bytes --]

ANQP is performed as the scan results come in but scanning is still
signaled as done regardless of if ANQP has finished. Autoconnect
will wait for ANQP to complete, but as far as an explicit DBus call
we are just assuming ANQP finished. For now, we can work around this
in the autotest and wait a bit before issuing Connect(). The real
fix would either be to delay setting scanning as done, or have
network/station wait for ANQP to complete before checking the
network settings.
---
 autotests/testHotspot/hotspot_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/autotests/testHotspot/hotspot_test.py b/autotests/testHotspot/hotspot_test.py
index 0f18fae1..389038fd 100644
--- a/autotests/testHotspot/hotspot_test.py
+++ b/autotests/testHotspot/hotspot_test.py
@@ -11,6 +11,7 @@ from iwd import PSKAgent
 from iwd import NetworkType
 from hostapd import HostapdCLI
 import testutil
+from time import sleep
 
 class Test(unittest.TestCase):
 
@@ -33,6 +34,8 @@ class Test(unittest.TestCase):
         condition = 'not obj.scanning'
         wd.wait_for_object_condition(device, condition)
 
+        sleep(1)
+
         ordered_network = device.get_ordered_network('Hotspot')
 
         self.assertEqual(ordered_network.type, NetworkType.eap)
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] anqp: refactor to use frame-xchg
  2020-05-27 20:45 [PATCH 1/2] anqp: refactor to use frame-xchg James Prestwood
  2020-05-27 20:45 ` [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect() James Prestwood
@ 2020-05-28 15:42 ` Denis Kenzior
  2020-05-28 17:01   ` James Prestwood
  1 sibling, 1 reply; 8+ messages in thread
From: Denis Kenzior @ 2020-05-28 15:42 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 3930 bytes --]

Hi James,

On 5/27/20 3:45 PM, James Prestwood wrote:
> The new frame-xchg module now handles a lot of what ANQP used to do. ANQP
> now does not need to depend on nl80211/netdev for building and sending
> frames. It also no longer needs any of the request lookups or frame
> watches because frame-xchg filters this for us. The request queue did
> need to remain in ANQP because station fires off ANQP requests all in
> a row, and frame-xchg cannot handle this. Now ANQP just sends out the
> first request, and queue's the rest. As the responses come in it will
> send out the next frame in the queue until there are no more left.
> 
> The anqp_request API was changed to only return a bool, as well as
> take the wdev_id rather than ifindex. Station was updated to remove
> the 'pending' member since it was not actually used, as well as fix
> a bug on the timeout path where the request queue would get popped
> twice.
> 
> There is somewhat of a TODO that was overlooked initially with ANQP
> and was realized when testing these patches. Since the network_info
> depends on ANQP completing there is a race condition between scanning
> finishing and being ready to connect to networks which require ANQP.
> The autoconnect case does not have this issue, but explicit Dbus
> connects do. In practice this may not matter since the user would have
> to issue a connect at just the right time, but it could still happen.
> This may need to be fixed in network where we see if there are any
> pending ANQP requests before we return NotConfigured. If there are
> pending requests we should wait for those, then attempt to connect.
> ---
>   src/anqp.c    | 438 +++++++++++++-------------------------------------
>   src/anqp.h    |   2 +-
>   src/station.c |  16 +-
>   3 files changed, 114 insertions(+), 342 deletions(-)
> 

<snip>

> +#define ANQP_GROUP	1
> +

Maybe a question for Andrew, but can't we fix frame-xchg to run this 
over group 0?

<snip>

>   
> -static uint32_t netdev_watch;

So how do you handle device removal while ANQP queue is running?

Right now I think the kernel ends up sending us FRAME_WAIT_CANCEL for 
all pending remain-on-channel requests if the device is removed.  So it 
would seem that we get lucky and this all works in the current 
implementation.

It doesn't look like frame-xchg does this though and depends on its own 
timers...

So either station/network has to cancel any pending anqp requests, or 
anqp has to somehow clean this up after itself.

<snip>

> diff --git a/src/anqp.h b/src/anqp.h
> index 998277dd..99af76d5 100644
> --- a/src/anqp.h
> +++ b/src/anqp.h
> @@ -34,7 +34,7 @@ typedef void (*anqp_response_func_t)(enum anqp_result result,
>   					const void *anqp, size_t len,
>   					void *user_data);
>   
> -uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
> +bool anqp_request(uint64_t wdev_id, const uint8_t *addr,

See above for the likely reason why we had this return a uint32_t 
originally.

>   			struct scan_bss *bss, const uint8_t *anqp, size_t len,
>   			anqp_response_func_t cb, void *user_data,
>   			anqp_destroy_func_t destroy);
> diff --git a/src/station.c b/src/station.c
> index c532319a..29f94e3b 100644
> --- a/src/station.c
> +++ b/src/station.c
> @@ -104,7 +104,6 @@ struct station {
>   struct anqp_entry {
>   	struct station *station;
>   	struct network *network;
> -	uint32_t pending;
>   };

Do we currently send a single ANQP request per network?  If so, maybe we 
can drop this structure completely, by:

1. Adding network_get_station() getter if necessary
2. Using the network object itself as the user_data for 
station_anqp_response_cb
3. Maybe storing pending inside network itself?

The above would also make it easier to solve the issue of the user 
triggering Connect just when ANQP hasn't been done yet...

Regards,
-Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect()
  2020-05-27 20:45 ` [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect() James Prestwood
@ 2020-05-28 15:53   ` Denis Kenzior
  0 siblings, 0 replies; 8+ messages in thread
From: Denis Kenzior @ 2020-05-28 15:53 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 1840 bytes --]

Hi James,

On 5/27/20 3:45 PM, James Prestwood wrote:
> ANQP is performed as the scan results come in but scanning is still
> signaled as done regardless of if ANQP has finished. Autoconnect
> will wait for ANQP to complete, but as far as an explicit DBus call
> we are just assuming ANQP finished. For now, we can work around this
> in the autotest and wait a bit before issuing Connect(). The real
> fix would either be to delay setting scanning as done, or have
> network/station wait for ANQP to complete before checking the
> network settings.
> ---
>   autotests/testHotspot/hotspot_test.py | 3 +++
>   1 file changed, 3 insertions(+)
> 
> diff --git a/autotests/testHotspot/hotspot_test.py b/autotests/testHotspot/hotspot_test.py
> index 0f18fae1..389038fd 100644
> --- a/autotests/testHotspot/hotspot_test.py
> +++ b/autotests/testHotspot/hotspot_test.py
> @@ -11,6 +11,7 @@ from iwd import PSKAgent
>   from iwd import NetworkType
>   from hostapd import HostapdCLI
>   import testutil
> +from time import sleep
>   
>   class Test(unittest.TestCase):
>   
> @@ -33,6 +34,8 @@ class Test(unittest.TestCase):
>           condition = 'not obj.scanning'
>           wd.wait_for_object_condition(device, condition)
>   
> +        sleep(1)
> +

So you know this, but the issue with sleep is that you're completely at 
the mercy of the scheduler and the speed of qemu.  This might work for 
you most of the time, but then it will occasionally fail.  Or it might 
fail reliably on a slower machine running qemu.

In general I think we need to go in and eliminate all sleeps in the test 
scripts.  Or at least as many as we can.

>           ordered_network = device.get_ordered_network('Hotspot')
>   
>           self.assertEqual(ordered_network.type, NetworkType.eap)
> 

Regards,
-Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] anqp: refactor to use frame-xchg
  2020-05-28 15:42 ` [PATCH 1/2] anqp: refactor to use frame-xchg Denis Kenzior
@ 2020-05-28 17:01   ` James Prestwood
  2020-05-28 17:40     ` Denis Kenzior
  0 siblings, 1 reply; 8+ messages in thread
From: James Prestwood @ 2020-05-28 17:01 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 5055 bytes --]

On Tue, 2020-05-26 at 18:39 -0500, Denis Kenzior wrote:
> Hi James,
> 
> On 5/27/20 3:45 PM, James Prestwood wrote:
> > The new frame-xchg module now handles a lot of what ANQP used to
> > do. ANQP
> > now does not need to depend on nl80211/netdev for building and
> > sending
> > frames. It also no longer needs any of the request lookups or frame
> > watches because frame-xchg filters this for us. The request queue
> > did
> > need to remain in ANQP because station fires off ANQP requests all
> > in
> > a row, and frame-xchg cannot handle this. Now ANQP just sends out
> > the
> > first request, and queue's the rest. As the responses come in it
> > will
> > send out the next frame in the queue until there are no more left.
> > 
> > The anqp_request API was changed to only return a bool, as well as
> > take the wdev_id rather than ifindex. Station was updated to remove
> > the 'pending' member since it was not actually used, as well as fix
> > a bug on the timeout path where the request queue would get popped
> > twice.
> > 
> > There is somewhat of a TODO that was overlooked initially with ANQP
> > and was realized when testing these patches. Since the network_info
> > depends on ANQP completing there is a race condition between
> > scanning
> > finishing and being ready to connect to networks which require
> > ANQP.
> > The autoconnect case does not have this issue, but explicit Dbus
> > connects do. In practice this may not matter since the user would
> > have
> > to issue a connect at just the right time, but it could still
> > happen.
> > This may need to be fixed in network where we see if there are any
> > pending ANQP requests before we return NotConfigured. If there are
> > pending requests we should wait for those, then attempt to connect.
> > ---
> >   src/anqp.c    | 438 +++++++++++++------------------------------
> > -------
> >   src/anqp.h    |   2 +-
> >   src/station.c |  16 +-
> >   3 files changed, 114 insertions(+), 342 deletions(-)
> > 
> 
> <snip>
> 
> > +#define ANQP_GROUP	1
> > +
> 
> Maybe a question for Andrew, but can't we fix frame-xchg to run this 
> over group 0?
> 
> <snip>
> 
> >   
> > -static uint32_t netdev_watch;
> 
> So how do you handle device removal while ANQP queue is running?
> 
> Right now I think the kernel ends up sending us FRAME_WAIT_CANCEL
> for 
> all pending remain-on-channel requests if the device is removed.  So
> it 
> would seem that we get lucky and this all works in the current 
> implementation.
> 
> It doesn't look like frame-xchg does this though and depends on its
> own 
> timers...
> 
> So either station/network has to cancel any pending anqp requests,
> or 
> anqp has to somehow clean this up after itself.

I think doing this in station/network is the right choice since this
would allow us to remove the netdev dependency on ANQP.

> 
> <snip>
> 
> > diff --git a/src/anqp.h b/src/anqp.h
> > index 998277dd..99af76d5 100644
> > --- a/src/anqp.h
> > +++ b/src/anqp.h
> > @@ -34,7 +34,7 @@ typedef void (*anqp_response_func_t)(enum
> > anqp_result result,
> >   					const void *anqp, size_t len,
> >   					void *user_data);
> >   
> > -uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
> > +bool anqp_request(uint64_t wdev_id, const uint8_t *addr,
> 
> See above for the likely reason why we had this return a uint32_t 
> originally.

I am kinda thinking this should be implemented in frame-xchg since
cancellation doesn't even exist at this point. Then we can just give
that ID directly to station and ANQP doesnt need to track it.

> 
> >   			struct scan_bss *bss, const uint8_t *anqp,
> > size_t len,
> >   			anqp_response_func_t cb, void *user_data,
> >   			anqp_destroy_func_t destroy);
> > diff --git a/src/station.c b/src/station.c
> > index c532319a..29f94e3b 100644
> > --- a/src/station.c
> > +++ b/src/station.c
> > @@ -104,7 +104,6 @@ struct station {
> >   struct anqp_entry {
> >   	struct station *station;
> >   	struct network *network;
> > -	uint32_t pending;
> >   };
> 
> Do we currently send a single ANQP request per network?  If so, maybe
> we 
> can drop this structure completely, by:
> 
> 1. Adding network_get_station() getter if necessary
> 2. Using the network object itself as the user_data for 
> station_anqp_response_cb
> 3. Maybe storing pending inside network itself?
> 
> The above would also make it easier to solve the issue of the user 
> triggering Connect just when ANQP hasn't been done yet...

The tricky part is that ANQP is directly tied to the scan results, so
putting anything in network is difficult. We could have
network_set_anqp_pending (or a better name), then if that was set and
Connect() came in we would know to wait. But that would get ugly as
station would need to detect that a Connect() came in (so another flag
in network + getter) while waiting for ANQP and to re-issue the
Connect() once ANQP completed.

> 
> Regards,
> -Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] anqp: refactor to use frame-xchg
  2020-05-28 17:01   ` James Prestwood
@ 2020-05-28 17:40     ` Denis Kenzior
  2020-05-29 16:19       ` James Prestwood
  0 siblings, 1 reply; 8+ messages in thread
From: Denis Kenzior @ 2020-05-28 17:40 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 1973 bytes --]

Hi James,

<snip>

>>>    
>>> -uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
>>> +bool anqp_request(uint64_t wdev_id, const uint8_t *addr,
>>
>> See above for the likely reason why we had this return a uint32_t
>> originally.
> 
> I am kinda thinking this should be implemented in frame-xchg since
> cancellation doesn't even exist at this point. Then we can just give
> that ID directly to station and ANQP doesnt need to track it.
> 

I'm fine either way.  You could in theory do this in anqp.c for now and 
just use frame_xchg_stop for the ongoing operation.  But adding this 
directly too frame-xchg would be fine as well.

>> Do we currently send a single ANQP request per network?  If so, maybe
>> we
>> can drop this structure completely, by:
>>
>> 1. Adding network_get_station() getter if necessary
>> 2. Using the network object itself as the user_data for
>> station_anqp_response_cb
>> 3. Maybe storing pending inside network itself?
>>
>> The above would also make it easier to solve the issue of the user
>> triggering Connect just when ANQP hasn't been done yet...
> 
> The tricky part is that ANQP is directly tied to the scan results, so

Hm, how so?

> putting anything in network is difficult. We could have
> network_set_anqp_pending (or a better name), then if that was set and
> Connect() came in we would know to wait. But that would get ugly as
> station would need to detect that a Connect() came in (so another flag
> in network + getter) while waiting for ANQP and to re-issue the
> Connect() once ANQP completed.

Well, can't we just check if anqp is pending for the network inside 
network_connect() or so, and if it is, defer taking any action until 
anqp succeeds / fails?

If that is too complicated, then I guess we have to document that 
.Connect shouldn't be called until KnownNetwork property has been 
signaled and make our auto tests do this as well.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] anqp: refactor to use frame-xchg
  2020-05-29 16:19       ` James Prestwood
@ 2020-05-29 16:13         ` Denis Kenzior
  0 siblings, 0 replies; 8+ messages in thread
From: Denis Kenzior @ 2020-05-29 16:13 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 1465 bytes --]

Hi James,

>> Well, can't we just check if anqp is pending for the network inside
>> network_connect() or so, and if it is, defer taking any action until
>> anqp succeeds / fails?
> 
> Checking for pending is easy, defering is the hard part. We could set a
> flag in network if ANQP is pending, then if/when the network info gets
> set by station we could attempt a connect then (basically from inside
> network_set_info). But I have a feeling you wont like this, it just
> feels wrong.

Well we have to communicate between the two modules somehow.

> 
> Then a further issue is how do we ultimately decide when to fail with
> NotConfigured if ANQP fails or there is an error? Unless we add some
> other mechanism for station to signal that ANQP failed like
> network_set_info_failed(). Again doesn't really feel right.

So one thing we could do is to have network actually drive the anqp 
queries.  Might be what we want to do anyway since we actually should be 
doing multiple queries for grabbing icons, etc.  The scanning suspension 
and resumption of auto-connect is the only tricky part.  Perhaps we just 
have a reference count or something on the anqp request and station can 
kick off autoconnect when that reaches 0.

> 
> Maybe a per-station watch for ANQP that network can subscribe to?
> Network could get notified both if ANQP is being performed and when it
> finished.

Or something like this, yes.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] anqp: refactor to use frame-xchg
  2020-05-28 17:40     ` Denis Kenzior
@ 2020-05-29 16:19       ` James Prestwood
  2020-05-29 16:13         ` Denis Kenzior
  0 siblings, 1 reply; 8+ messages in thread
From: James Prestwood @ 2020-05-29 16:19 UTC (permalink / raw)
  To: iwd

[-- Attachment #1: Type: text/plain, Size: 3134 bytes --]

On Tue, 2020-05-26 at 19:10 -0500, Denis Kenzior wrote:
> Hi James,
> 
> <snip>
> 
> > > >    
> > > > -uint32_t anqp_request(uint32_t ifindex, const uint8_t *addr,
> > > > +bool anqp_request(uint64_t wdev_id, const uint8_t *addr,
> > > 
> > > See above for the likely reason why we had this return a uint32_t
> > > originally.
> > 
> > I am kinda thinking this should be implemented in frame-xchg since
> > cancellation doesn't even exist at this point. Then we can just
> > give
> > that ID directly to station and ANQP doesnt need to track it.
> > 
> 
> I'm fine either way.  You could in theory do this in anqp.c for now
> and 
> just use frame_xchg_stop for the ongoing operation.  But adding this 
> directly too frame-xchg would be fine as well.

Ok, I put it back in (properly, with a cancel API) to ANQP. I didn't
think about the fact that frame-xchg only handles single requests at a
time, so there isn't a point keeping IDs for requests in frame-xchg.

> 
> > > Do we currently send a single ANQP request per network?  If so,
> > > maybe
> > > we
> > > can drop this structure completely, by:
> > > 
> > > 1. Adding network_get_station() getter if necessary
> > > 2. Using the network object itself as the user_data for
> > > station_anqp_response_cb
> > > 3. Maybe storing pending inside network itself?
> > > 
> > > The above would also make it easier to solve the issue of the
> > > user
> > > triggering Connect just when ANQP hasn't been done yet...
> > 
> > The tricky part is that ANQP is directly tied to the scan results,
> > so
> 
> Hm, how so?
> 
> > putting anything in network is difficult. We could have
> > network_set_anqp_pending (or a better name), then if that was set
> > and
> > Connect() came in we would know to wait. But that would get ugly as
> > station would need to detect that a Connect() came in (so another
> > flag
> > in network + getter) while waiting for ANQP and to re-issue the
> > Connect() once ANQP completed.
> 
> Well, can't we just check if anqp is pending for the network inside 
> network_connect() or so, and if it is, defer taking any action until 
> anqp succeeds / fails?

Checking for pending is easy, defering is the hard part. We could set a
flag in network if ANQP is pending, then if/when the network info gets
set by station we could attempt a connect then (basically from inside
network_set_info). But I have a feeling you wont like this, it just
feels wrong.

Then a further issue is how do we ultimately decide when to fail with
NotConfigured if ANQP fails or there is an error? Unless we add some
other mechanism for station to signal that ANQP failed like
network_set_info_failed(). Again doesn't really feel right.

Maybe a per-station watch for ANQP that network can subscribe to?
Network could get notified both if ANQP is being performed and when it
finished.

> 
> If that is too complicated, then I guess we have to document that 
> .Connect shouldn't be called until KnownNetwork property has been 
> signaled and make our auto tests do this as well.
> 
> Regards,
> -Denis

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-05-29 16:19 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-27 20:45 [PATCH 1/2] anqp: refactor to use frame-xchg James Prestwood
2020-05-27 20:45 ` [PATCH 2/2] auto-t: work around race between ANQP and dbus Connect() James Prestwood
2020-05-28 15:53   ` Denis Kenzior
2020-05-28 15:42 ` [PATCH 1/2] anqp: refactor to use frame-xchg Denis Kenzior
2020-05-28 17:01   ` James Prestwood
2020-05-28 17:40     ` Denis Kenzior
2020-05-29 16:19       ` James Prestwood
2020-05-29 16:13         ` Denis Kenzior

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.