All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/11] mac80211: add TX fastpath
@ 2015-04-17 15:15 Johannes Berg
  2015-04-17 15:15 ` [PATCH 02/11] mac80211_hwsim: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
                   ` (11 more replies)
  0 siblings, 12 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

In order to speed up mac80211's TX path, add the "fast-xmit" cache
that will cache the data frame 802.11 header and other data to be
able to build the frame more quickly. This cache is rebuilt when
external triggers imply changes, but a lot of the checks done per
packet today are simplified away to the check for the cache.

There's also a more detailed description in the code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |   6 +-
 net/mac80211/cfg.c         |   9 +-
 net/mac80211/chan.c        |   6 +
 net/mac80211/ieee80211_i.h |   5 +
 net/mac80211/key.c         |   2 +
 net/mac80211/rx.c          |   2 +
 net/mac80211/sta_info.c    |   4 +
 net/mac80211/sta_info.h    |  27 +++
 net/mac80211/tx.c          | 404 ++++++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 462 insertions(+), 3 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 38a5fd790366..9001bd685b1e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1796,6 +1796,10 @@ struct ieee80211_txq {
  *	the driver returns 1. This also forces the driver to advertise its
  *	supported cipher suites.
  *
+ * @IEEE80211_HW_SUPPORT_FAST_XMIT: The driver/hardware supports fast-xmit,
+ *	this currently requires only the ability to calculate the duration
+ *	for frames.
+ *
  * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface
  *	queue mapping in order to use different queues (not just one per AC)
  *	for different virtual interfaces. See the doc section on HW queue
@@ -1844,7 +1848,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
 	IEEE80211_HW_NO_AUTO_VIF			= 1<<15,
 	IEEE80211_HW_SW_CRYPTO_CONTROL			= 1<<16,
-	/* free slots */
+	IEEE80211_HW_SUPPORT_FAST_XMIT			= 1<<17,
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
 	IEEE80211_HW_QUEUE_CONTROL			= 1<<20,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 265e42721a66..4aa5e893cbaa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	sdata->noack_map = noack_map;
+
+	ieee80211_check_fast_xmit_iface(sdata);
+
 	return 0;
 }
 
@@ -2099,10 +2102,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	int err;
 
 	if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
+		ieee80211_check_fast_xmit_all(local);
+
 		err = drv_set_frag_threshold(local, wiphy->frag_threshold);
 
-		if (err)
+		if (err) {
+			ieee80211_check_fast_xmit_all(local);
 			return err;
+		}
 	}
 
 	if ((changed & WIPHY_PARAM_COVERAGE_CLASS) ||
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5bcd4e5589d3..7e9b62475400 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -664,6 +664,8 @@ out:
 		ieee80211_bss_info_change_notify(sdata,
 						 BSS_CHANGED_IDLE);
 
+	ieee80211_check_fast_xmit_iface(sdata);
+
 	return ret;
 }
 
@@ -1030,6 +1032,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
 	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
 
+	ieee80211_check_fast_xmit_iface(sdata);
+
 	if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
 		ieee80211_free_chanctx(local, old_ctx);
 
@@ -1376,6 +1380,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
 				__ieee80211_vif_copy_chanctx_to_vlans(sdata,
 								      false);
 
+			ieee80211_check_fast_xmit_iface(sdata);
+
 			sdata->radar_required = sdata->reserved_radar_required;
 
 			if (sdata->vif.bss_conf.chandef.width !=
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab46ab4a7249..09a15a855c5a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1651,6 +1651,11 @@ struct sk_buff *
 ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
 			      struct sk_buff *skb, u32 info_flags);
 
+void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp);
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
+void ieee80211_clear_fast_xmit(struct sta_info *sta);
+
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 2291cd730091..ac7eb9201ac1 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -229,6 +229,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
 
 	if (uni) {
 		rcu_assign_pointer(sdata->default_unicast_key, key);
+		ieee80211_check_fast_xmit_iface(sdata);
 		drv_set_default_unicast_key(sdata->local, sdata, idx);
 	}
 
@@ -298,6 +299,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 		if (pairwise) {
 			rcu_assign_pointer(sta->ptk[idx], new);
 			sta->ptk_idx = idx;
+			ieee80211_check_fast_xmit(sta, GFP_KERNEL);
 		} else {
 			rcu_assign_pointer(sta->gtk[idx], new);
 			sta->gtk_idx = idx;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bc59c8a20a39..3d890643af2e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1199,6 +1199,8 @@ static void sta_ps_start(struct sta_info *sta)
 	ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
 	       sta->sta.addr, sta->sta.aid);
 
+	ieee80211_clear_fast_xmit(sta);
+
 	if (!sta->sta.txq[0])
 		return;
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0800e02cce05..21462e3e68c9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1201,6 +1201,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	ps_dbg(sdata,
 	       "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
 	       sta->sta.addr, sta->sta.aid, filtered, buffered);
+
+	ieee80211_check_fast_xmit(sta, GFP_ATOMIC);
 }
 
 static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
@@ -1720,6 +1722,7 @@ int sta_info_move_state(struct sta_info *sta,
 			     !sta->sdata->u.vlan.sta))
 				atomic_dec(&sta->sdata->bss->num_mcast_sta);
 			clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+			ieee80211_clear_fast_xmit(sta);
 		}
 		break;
 	case IEEE80211_STA_AUTHORIZED:
@@ -1729,6 +1732,7 @@ int sta_info_move_state(struct sta_info *sta,
 			     !sta->sdata->u.vlan.sta))
 				atomic_inc(&sta->sdata->bss->num_mcast_sta);
 			set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+			ieee80211_check_fast_xmit(sta, GFP_KERNEL);
 		}
 		break;
 	default:
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a875b92c7acf..e365f6213702 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -240,6 +240,30 @@ struct sta_ampdu_mlme {
 #define IEEE80211_TID_UNRESERVED	0xff
 
 /**
+ * struct ieee80211_fast_tx - TX fastpath information
+ * @key: key to use for hw crypto
+ * @hdr: the 802.11 header to put with the frame
+ * @hdr_len: actual 802.11 header length
+ * @sa_offs: offset of the SA
+ * @da_offs: offset of the DA
+ * @pn_offs: offset where to put PN for crypto (or 0 if not needed)
+ * @band: band this will be transmitted on, for tx_info
+ * @rcu_head: RCU head to free this struct
+ *
+ * Try to keep this struct small so it fits into a single cacheline.
+ */
+struct ieee80211_fast_tx {
+	struct ieee80211_key *key;
+	u8 hdr[30 + 2 + IEEE80211_CCMP_HDR_LEN +
+	       sizeof(rfc1042_header)];
+	u8 hdr_len;
+	u8 sa_offs, da_offs, pn_offs;
+	u8 band;
+
+	struct rcu_head rcu_head;
+};
+
+/**
  * struct sta_info - STA information
  *
  * This structure collects information about a station that
@@ -337,6 +361,7 @@ struct sta_ampdu_mlme {
  *	using IEEE80211_NUM_TID entry for non-QoS frames
  * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
  *	entry for non-QoS frames
+ * @fast_tx: TX fastpath information
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -354,6 +379,8 @@ struct sta_info {
 	spinlock_t rate_ctrl_lock;
 	spinlock_t lock;
 
+	struct ieee80211_fast_tx __rcu *fast_tx;
+
 	struct work_struct drv_deliver_wk;
 
 	u16 listen_interval;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667111ee6a20..d5bfa6c4afd0 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1600,7 +1600,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
 	if (skb_cloned(skb) &&
 	    (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) ||
 	     !skb_clone_writable(skb, ETH_HLEN) ||
-	     sdata->crypto_tx_tailroom_needed_cnt))
+	     (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
 		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
 	else if (head_need || tail_need)
 		I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -2387,6 +2387,398 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	return ERR_PTR(ret);
 }
 
+/*
+ * fast-xmit overview
+ *
+ * The core idea of this fast-xmit is to remove per-packet checks by checking
+ * them out of band. ieee80211_check_fast_xmit() implements the out-of-band
+ * checks that are needed to get the sta->fast_tx pointer assigned, after which
+ * much less work can be done per packet. For example, fragmentation must be
+ * disabled or the fast_tx pointer will not be set. All the conditions are seen
+ * in the code here.
+ *
+ * Once assigned, the fast_tx data structure also caches the per-packet 802.11
+ * header and other data to aid packet processing in ieee80211_xmit_fast().
+ *
+ * The most difficult part of this is that when any of these assumptions
+ * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(),
+ * ieee80211_check_fast_xmit() or friends) is required to reset the data,
+ * since the per-packet code no longer checks the conditions. This is reflected
+ * by the calls to these functions throughout the rest of the code, and must be
+ * maintained if any of the TX path checks change.
+ */
+
+void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp)
+{
+	struct ieee80211_fast_tx build = {}, *fast_tx, *old;
+	struct ieee80211_local *local = sta->local;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_hdr *hdr = (void *)build.hdr;
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	__le16 fc;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT))
+		return;
+
+	ieee80211_clear_fast_xmit(sta);
+
+	if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS &&
+	    !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION)
+		return;
+
+	if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+		return;
+
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
+	    test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
+	    test_sta_flag(sta, WLAN_STA_PS_DELIVER))
+		return;
+
+	if (sdata->noack_map)
+		return;
+
+	/* fast-xmit doesn't handle fragmentation at all */
+	if (local->hw.wiphy->frag_threshold != (u32)-1)
+		return;
+
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	if (!chanctx_conf) {
+		rcu_read_unlock();
+		return;
+	}
+	build.band = chanctx_conf->def.chan->band;
+	rcu_read_unlock();
+
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
+		if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+			/* DA SA BSSID */
+			build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+			build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+			memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+			build.hdr_len = 24;
+			break;
+		}
+
+		if (sdata->u.mgd.use_4addr) {
+			/* non-regular ethertype cannot use the fastpath */
+			fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+					  IEEE80211_FCTL_TODS);
+			/* RA TA DA SA */
+			memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+			memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+			build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+			build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+			build.hdr_len = 30;
+			break;
+		}
+		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
+		/* BSSID SA DA */
+		memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+		build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+		build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+		build.hdr_len = 24;
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		if (sdata->wdev.use_4addr) {
+			fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+					  IEEE80211_FCTL_TODS);
+			/* RA TA DA SA */
+			memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
+			memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+			build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+			build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+			build.hdr_len = 30;
+			break;
+		}
+		/* fall through */
+	case NL80211_IFTYPE_AP:
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
+		/* DA BSSID SA */
+		build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		build.sa_offs = offsetof(struct ieee80211_hdr, addr3);
+		build.hdr_len = 24;
+		break;
+	default:
+		/* not handled on fast-xmit */
+		return;
+	}
+
+	if (sta->sta.wme) {
+		build.hdr_len += 2;
+		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
+	}
+
+	build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
+	if (!build.key)
+		build.key = rcu_access_pointer(sdata->default_unicast_key);
+	if (build.key) {
+		bool gen_iv, iv_spc;
+
+		gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
+		iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+
+		/* don't handle software crypto */
+		if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+			return;
+
+		switch (build.key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
+			/* add fixed key ID */
+			if (gen_iv) {
+				(build.hdr + build.hdr_len)[3] =
+					0x20 | (build.key->conf.keyidx << 6);
+				build.pn_offs = build.hdr_len;
+			}
+			if (gen_iv || iv_spc)
+				build.hdr_len += IEEE80211_CCMP_HDR_LEN;
+			break;
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
+			/* add fixed key ID */
+			if (gen_iv) {
+				(build.hdr + build.hdr_len)[3] =
+					0x20 | (build.key->conf.keyidx << 6);
+				build.pn_offs = build.hdr_len;
+			}
+			if (gen_iv || iv_spc)
+				build.hdr_len += IEEE80211_GCMP_HDR_LEN;
+			break;
+		default:
+			/* don't do fast-xmit for these ciphers (yet) */
+			return;
+		}
+
+		fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+	}
+
+	hdr->frame_control = fc;
+
+	memcpy(build.hdr + build.hdr_len,
+	       rfc1042_header,  sizeof(rfc1042_header));
+	build.hdr_len += sizeof(rfc1042_header);
+
+	fast_tx = kmemdup(&build, sizeof(build), gfp);
+	/* if the kmemdup fails, continue w/o fast_tx */
+	if (!fast_tx)
+		return;
+
+	spin_lock_bh(&sta->lock);
+	/* we might have raced against another call to this function */
+	old = rcu_dereference_protected(sta->fast_tx,
+					lockdep_is_held(&sta->lock));
+	rcu_assign_pointer(sta->fast_tx, fast_tx);
+	spin_unlock_bh(&sta->lock);
+	if (old)
+		kfree_rcu(old, rcu_head);
+}
+
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local)
+{
+	struct sta_info *sta;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &local->sta_list, list)
+		ieee80211_check_fast_xmit(sta, GFP_ATOMIC);
+	rcu_read_unlock();
+}
+
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sdata != sta->sdata &&
+		    (!sta->sdata->bss || sta->sdata->bss != sdata->bss))
+			continue;
+		ieee80211_check_fast_xmit(sta, GFP_ATOMIC);
+	}
+
+	rcu_read_unlock();
+}
+
+void ieee80211_clear_fast_xmit(struct sta_info *sta)
+{
+	struct ieee80211_fast_tx *fast_tx;
+
+	spin_lock_bh(&sta->lock);
+	fast_tx = rcu_dereference_protected(sta->fast_tx,
+					    lockdep_is_held(&sta->lock));
+	RCU_INIT_POINTER(sta->fast_tx, NULL);
+	spin_unlock_bh(&sta->lock);
+
+	if (fast_tx)
+		kfree_rcu(fast_tx, rcu_head);
+}
+
+static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
+				struct net_device *dev, struct sta_info *sta,
+				struct ieee80211_fast_tx *fast_tx,
+				struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+	u16 ethertype = (skb->data[12] << 8) | skb->data[13];
+	int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
+	int hw_headroom = sdata->local->hw.extra_tx_headroom;
+	struct ethhdr eth;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
+	struct ieee80211_tx_data tx;
+	ieee80211_tx_result r;
+	struct tid_ampdu_tx *tid_tx = NULL;
+	u8 tid = IEEE80211_NUM_TIDS;
+
+	/* control port protocol needs a lot of special handling */
+	if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
+		return false;
+
+	/* only RFC 1042 SNAP */
+	if (ethertype < ETH_P_802_3_MIN)
+		return false;
+
+	/* don't handle TX status request here either */
+	if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
+		return false;
+
+	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+		tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+		if (tid_tx &&
+		    !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
+			return false;
+	}
+
+	/* after this point (skb is modified) we cannot return false */
+
+	if (skb_shared(skb)) {
+		struct sk_buff *tmp_skb = skb;
+
+		skb = skb_clone(skb, GFP_ATOMIC);
+		kfree_skb(tmp_skb);
+
+		if (!skb)
+			return true;
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len + extra_head;
+	dev->trans_start = jiffies;
+
+	/* will not be crypto-handled beyond what we do here, so use false
+	 * as the may-encrypt argument for the resize to not account for
+	 * more room than we already have in 'extra_head'
+	 */
+	if (unlikely(ieee80211_skb_resize(sdata, skb,
+					  max_t(int, extra_head + hw_headroom -
+						     skb_headroom(skb), 0),
+					  false))) {
+		kfree_skb(skb);
+		return true;
+	}
+
+	memcpy(&eth, skb->data, ETH_HLEN - 2);
+	hdr = (void *)skb_push(skb, extra_head);
+	memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len);
+	memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
+	memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
+
+	memset(info, 0, sizeof(*info));
+	info->band = fast_tx->band;
+	info->control.vif = &sdata->vif;
+	info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
+		      IEEE80211_TX_CTL_DONTFRAG |
+		      (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
+
+	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+		*ieee80211_get_qos_ctl(hdr) = tid;
+		hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+	} else {
+		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+		hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+		sdata->sequence_number += 0x10;
+	}
+
+	sta->tx_msdu[tid]++;
+
+	info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+	__skb_queue_head_init(&tx.skbs);
+
+	tx.flags = IEEE80211_TX_UNICAST;
+	tx.local = local;
+	tx.sdata = sdata;
+	tx.sta = sta;
+	tx.key = fast_tx->key;
+
+	if (fast_tx->key)
+		info->control.hw_key = &fast_tx->key->conf;
+
+	if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) {
+		tx.skb = skb;
+		r = ieee80211_tx_h_rate_ctrl(&tx);
+		skb = tx.skb;
+		tx.skb = NULL;
+
+		if (r != TX_CONTINUE) {
+			if (r != TX_QUEUED)
+				kfree_skb(skb);
+			return true;
+		}
+	}
+
+	/* statistics normally done by ieee80211_tx_h_stats (but that
+	 * has to consider fragmentation, so is more complex)
+	 */
+	sta->tx_fragments++;
+	sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len;
+	sta->tx_packets[skb_get_queue_mapping(skb)]++;
+
+	if (fast_tx->pn_offs) {
+		u64 pn;
+		u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
+
+		switch (fast_tx->key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
+			pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn);
+			crypto_hdr[0] = pn;
+			crypto_hdr[1] = pn >> 8;
+			crypto_hdr[4] = pn >> 16;
+			crypto_hdr[5] = pn >> 24;
+			crypto_hdr[6] = pn >> 32;
+			crypto_hdr[7] = pn >> 40;
+			break;
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
+			pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn);
+			crypto_hdr[0] = pn;
+			crypto_hdr[1] = pn >> 8;
+			crypto_hdr[4] = pn >> 16;
+			crypto_hdr[5] = pn >> 24;
+			crypto_hdr[6] = pn >> 32;
+			crypto_hdr[7] = pn >> 40;
+			break;
+		}
+	}
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		sdata = container_of(sdata->bss,
+				     struct ieee80211_sub_if_data, u.ap);
+
+	__skb_queue_tail(&tx.skbs, skb);
+	ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+	return true;
+}
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags)
@@ -2406,6 +2798,16 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 		goto out;
 	}
 
+	if (!IS_ERR_OR_NULL(sta)) {
+		struct ieee80211_fast_tx *fast_tx;
+
+		fast_tx = rcu_dereference(sta->fast_tx);
+
+		if (fast_tx &&
+		    ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+			goto out;
+	}
+
 	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
 	if (IS_ERR(skb))
 		goto out;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 02/11] mac80211_hwsim: enable IEEE80211_HW_SUPPORT_FAST_XMIT
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 03/11] mac80211: extend fast-xmit to driver fragmentation Johannes Berg
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

For hwsim, the duration field in frames is already not valid for
the common case of HT/VHT MCSes, so there's little point in trying
to keep it accurate for the legacy rates. Enable the fast-xmit code
to allow testing that, although given the dependency on hardware
crypto it will only be enabled in open network configurations.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index d5c0a1af08b9..07626cc21d6e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2399,7 +2399,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		    IEEE80211_HW_WANT_MONITOR_VIF |
 		    IEEE80211_HW_QUEUE_CONTROL |
 		    IEEE80211_HW_SUPPORTS_HT_CCK_RATES |
-		    IEEE80211_HW_CHANCTX_STA_CSA;
+		    IEEE80211_HW_CHANCTX_STA_CSA |
+		    IEEE80211_HW_SUPPORT_FAST_XMIT;
 	if (rctbl)
 		hw->flags |= IEEE80211_HW_SUPPORTS_RC_TABLE;
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 03/11] mac80211: extend fast-xmit to driver fragmentation
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
  2015-04-17 15:15 ` [PATCH 02/11] mac80211_hwsim: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 04/11] mac80211: extend fast-xmit for more ciphers Johannes Berg
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

If the driver handles fragmentation then it wouldn't
be done in software so we can still use the fast-xmit
path in that case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d5bfa6c4afd0..86d64bd11c01 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2439,7 +2439,8 @@ void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp)
 		return;
 
 	/* fast-xmit doesn't handle fragmentation at all */
-	if (local->hw.wiphy->frag_threshold != (u32)-1)
+	if (local->hw.wiphy->frag_threshold != (u32)-1 &&
+	    !local->ops->set_frag_threshold)
 		return;
 
 	rcu_read_lock();
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 04/11] mac80211: extend fast-xmit for more ciphers
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
  2015-04-17 15:15 ` [PATCH 02/11] mac80211_hwsim: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
  2015-04-17 15:15 ` [PATCH 03/11] mac80211: extend fast-xmit to driver fragmentation Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 05/11] mac80211: extend fast-xmit to cover IBSS Johannes Berg
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

When crypto is offloaded then in some cases it's all handled
by the device, and in others only some space for the IV must
be reserved in the frame. Handle both of these cases in the
fast-xmit path, up to a limit of 18 bytes of space for IVs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.h | 10 +++++++---
 net/mac80211/tx.c       | 40 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e365f6213702..1d4c73818c54 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -239,6 +239,8 @@ struct sta_ampdu_mlme {
 /* Value to indicate no TID reservation */
 #define IEEE80211_TID_UNRESERVED	0xff
 
+#define IEEE80211_FAST_XMIT_MAX_IV	18
+
 /**
  * struct ieee80211_fast_tx - TX fastpath information
  * @key: key to use for hw crypto
@@ -250,15 +252,17 @@ struct sta_ampdu_mlme {
  * @band: band this will be transmitted on, for tx_info
  * @rcu_head: RCU head to free this struct
  *
- * Try to keep this struct small so it fits into a single cacheline.
+ * This struct is small enough so that the common case (maximum crypto
+ * header length of 8 like for CCMP/GCMP) fits into a single 64-byte
+ * cache line.
  */
 struct ieee80211_fast_tx {
 	struct ieee80211_key *key;
-	u8 hdr[30 + 2 + IEEE80211_CCMP_HDR_LEN +
-	       sizeof(rfc1042_header)];
 	u8 hdr_len;
 	u8 sa_offs, da_offs, pn_offs;
 	u8 band;
+	u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
+	       sizeof(rfc1042_header)];
 
 	struct rcu_head rcu_head;
 };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 86d64bd11c01..02aafe1cc121 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2519,10 +2519,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp)
 	if (!build.key)
 		build.key = rcu_access_pointer(sdata->default_unicast_key);
 	if (build.key) {
-		bool gen_iv, iv_spc;
+		bool gen_iv, iv_spc, mmic;
 
 		gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
 		iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+		mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC;
 
 		/* don't handle software crypto */
 		if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
@@ -2551,9 +2552,42 @@ void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp)
 			if (gen_iv || iv_spc)
 				build.hdr_len += IEEE80211_GCMP_HDR_LEN;
 			break;
-		default:
-			/* don't do fast-xmit for these ciphers (yet) */
+		case WLAN_CIPHER_SUITE_TKIP:
+			/* cannot handle MMIC or IV generation in xmit-fast */
+			if (mmic || gen_iv)
+				return;
+			if (iv_spc)
+				build.hdr_len += IEEE80211_TKIP_IV_LEN;
+			break;
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
+			/* cannot handle IV generation in fast-xmit */
+			if (gen_iv)
+				return;
+			if (iv_spc)
+				build.hdr_len += IEEE80211_WEP_IV_LEN;
+			break;
+		case WLAN_CIPHER_SUITE_AES_CMAC:
+		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+			WARN(1,
+			     "management cipher suite 0x%x enabled for data\n",
+			     build.key->conf.cipher);
 			return;
+		default:
+			/* we don't know how to generate IVs for this at all */
+			if (WARN_ON(gen_iv))
+				return;
+			/* pure hardware keys are OK, of course */
+			if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME))
+				break;
+			/* cipher scheme might require space allocation */
+			if (iv_spc &&
+			    build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV)
+				return;
+			if (iv_spc)
+				build.hdr_len += build.key->conf.iv_len;
 		}
 
 		fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 05/11] mac80211: extend fast-xmit to cover IBSS
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (2 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 04/11] mac80211: extend fast-xmit for more ciphers Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 06/11] wlcore: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

IBSS can be supported very easily since it uses the standard station
authorization state etc. so it just needs to be covered by the header
building switch statement.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 02aafe1cc121..0f18ee11f097 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2455,6 +2455,13 @@ void ieee80211_check_fast_xmit(struct sta_info *sta, gfp_t gfp)
 	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
 
 	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_ADHOC:
+		/* DA SA BSSID */
+		build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+		build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+		memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN);
+		build.hdr_len = 24;
+		break;
 	case NL80211_IFTYPE_STATION:
 		if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
 			/* DA SA BSSID */
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 06/11] wlcore: enable IEEE80211_HW_SUPPORT_FAST_XMIT
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (3 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 05/11] mac80211: extend fast-xmit to cover IBSS Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 07/11] ath10k: " Johannes Berg
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

The driver can clearly enable fast-xmit since it does rate
control in the device and thus must do duration calculation
there as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ti/wlcore/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 1e136993580f..34cef10aefc5 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6077,7 +6077,8 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
 		IEEE80211_HW_AMPDU_AGGREGATION |
 		IEEE80211_HW_TX_AMPDU_SETUP_IN_HW |
 		IEEE80211_HW_QUEUE_CONTROL |
-		IEEE80211_HW_CHANCTX_STA_CSA;
+		IEEE80211_HW_CHANCTX_STA_CSA |
+		IEEE80211_HW_SUPPORT_FAST_XMIT;
 
 	wl->hw->wiphy->cipher_suites = cipher_suites;
 	wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 07/11] ath10k: enable IEEE80211_HW_SUPPORT_FAST_XMIT
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (4 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 06/11] wlcore: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-22  9:00   ` Kalle Valo
  2015-04-17 15:15 ` [PATCH 08/11] mac80211: allow checksum offload only in fast-xmit Johannes Berg
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

The driver can clearly enable fast-xmit since it does rate
control in the device and thus must do duration calculation
there as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 6fd7189b7b01..9c0c0a12a85a 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -5500,7 +5500,8 @@ int ath10k_mac_register(struct ath10k *ar)
 			IEEE80211_HW_HAS_RATE_CONTROL |
 			IEEE80211_HW_AP_LINK_PS |
 			IEEE80211_HW_SPECTRUM_MGMT |
-			IEEE80211_HW_SW_CRYPTO_CONTROL;
+			IEEE80211_HW_SW_CRYPTO_CONTROL |
+			IEEE80211_HW_SUPPORT_FAST_XMIT;
 
 	ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS;
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 08/11] mac80211: allow checksum offload only in fast-xmit
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (5 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 07/11] ath10k: " Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 09/11] mac80211: enable changing netdev features with ethtool Johannes Berg
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

When we go through the complete TX processing, there are a number
of things like fragmentation and software crypto that require the
checksum to be calculated already.

In favour of maintainability, instead of adding the necessary call
to skb_checksum_help() in all the places that need it, just do it
once before the regular TX processing.

Right now this only affects the TI wlcore and QCA ath10k drivers
since they're the only ones using checksum offload. The previous
commits enabled fast-xmit for them in almost all cases.

For wlcore this even fixes a corner case: when a key fails to be
programmed to hardware software encryption gets used, encrypting
frames with a bad checksum.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0f18ee11f097..e76f3e96eb84 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2835,10 +2835,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 
 	rcu_read_lock();
 
-	if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
+		goto out_free;
 
 	if (!IS_ERR_OR_NULL(sta)) {
 		struct ieee80211_fast_tx *fast_tx;
@@ -2850,6 +2848,21 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 			goto out;
 	}
 
+	/* the frame could be fragmented, software-encrypted, and other things
+	 * so we cannot really handle checksum offload with it - fix it up in
+	 * software before we handle anything else.
+	 */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->encapsulation)
+			skb_set_inner_transport_header(skb,
+						       skb_checksum_start_offset(skb));
+		else
+			skb_set_transport_header(skb,
+						 skb_checksum_start_offset(skb));
+		if (skb_checksum_help(skb))
+			goto out_free;
+	}
+
 	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
 	if (IS_ERR(skb))
 		goto out;
@@ -2859,6 +2872,9 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 	dev->trans_start = jiffies;
 
 	ieee80211_xmit(sdata, sta, skb);
+	goto out;
+ out_free:
+	kfree_skb(skb);
  out:
 	rcu_read_unlock();
 }
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 09/11] mac80211: enable changing netdev features with ethtool
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (6 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 08/11] mac80211: allow checksum offload only in fast-xmit Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 10/11] mac80211: allow drivers to support S/G Johannes Berg
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Avri Altman

From: Avri Altman <avri.altman@intel.com>

For drivers that have offloading features, these are currently
forced and cannot be modified in any way. Enable that so users
can change the features if needed, but of course this requires
driver interaction.

For AP_VLAN don't allow direct changes but propagate them from
the AP interface.

To make this even possible, set dev->hw_features initially so
that there's something that can be modified.

Additionally, add NETIF_F_RXCSUM to the whitelist, if a driver
has this capability it doesn't have to advertise this but can
just set the skb->checksum field accordingly, but advertising
it will allow it to be changed with ethtool.

Signed-off-by: Avri Altman <avri.altman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  6 ++++++
 net/mac80211/driver-ops.h  | 22 ++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  5 +++++
 net/mac80211/iface.c       | 39 +++++++++++++++++++++++++++++++++++++++
 net/mac80211/main.c        |  6 +-----
 net/mac80211/trace.h       | 28 ++++++++++++++++++++++++++++
 6 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9001bd685b1e..0f7fbdb31318 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2998,6 +2998,8 @@ enum ieee80211_reconfig_type {
  *
  * @get_ringparam: Get tx and rx ring current and maximum sizes.
  *
+ * @set_features: change netdev features for the given virtual interface
+ *
  * @tx_frames_pending: Check if there is any pending frame in the hardware
  *	queues before entering power save.
  *
@@ -3301,6 +3303,10 @@ struct ieee80211_ops {
 	int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx);
 	void (*get_ringparam)(struct ieee80211_hw *hw,
 			      u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max);
+	int (*set_features)(struct ieee80211_hw *hw,
+			    struct ieee80211_vif *vif,
+			    netdev_features_t features,
+			    netdev_features_t changed);
 	bool (*tx_frames_pending)(struct ieee80211_hw *hw);
 	int (*set_bitrate_mask)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 				const struct cfg80211_bitrate_mask *mask);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 26e1ca8a474a..a0cd94a9a0b1 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -894,6 +894,28 @@ static inline void drv_get_ringparam(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline int drv_set_features(struct ieee80211_local *local,
+				   struct ieee80211_sub_if_data *sdata,
+				   netdev_features_t features,
+				   netdev_features_t changed)
+{
+	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	trace_drv_set_features(local, sdata, features, changed);
+	if (local->ops->set_features)
+		ret = local->ops->set_features(&local->hw,
+					       &sdata->vif,
+					       features, changed);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
 static inline bool drv_tx_frames_pending(struct ieee80211_local *local)
 {
 	bool ret = false;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 09a15a855c5a..3f837fea05ae 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -83,6 +83,11 @@ struct ieee80211_local;
 
 #define IEEE80211_DEAUTH_FRAME_LEN	(24 /* hdr */ + 2 /* reason */)
 
+/* Only these features can be passed through mac80211 */
+#define IEEE80211_SUPPORTED_NETDEV_FEATURES	\
+	(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |	\
+	 NETIF_F_HW_CSUM | NETIF_F_RXCSUM)
+
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ac596a7cb7..5cdac668d532 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1101,6 +1101,43 @@ static void ieee80211_uninit(struct net_device *dev)
 	ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev));
 }
 
+static int ieee80211_netdev_set_features(struct net_device *dev,
+					 netdev_features_t features)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	netdev_features_t changed = dev->features ^ features;
+	int ret;
+
+	if (!(changed & IEEE80211_SUPPORTED_NETDEV_FEATURES))
+		return 0;
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_AP_VLAN:
+		/* these aren't known to the driver */
+		return -EOPNOTSUPP;
+	default:
+		break;
+	}
+
+	ret = drv_set_features(local, sdata, features, changed);
+	if (ret)
+		return ret;
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
+		struct ieee80211_sub_if_data *vlan;
+
+		/* propagate to VLANs as they're dependent */
+		list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
+			vlan->dev->features = features;
+			netdev_features_change(vlan->dev);
+		}
+	}
+
+	return 0;
+}
+
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
 					 void *accel_priv,
@@ -1118,6 +1155,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 	.ndo_change_mtu 	= ieee80211_change_mtu,
 	.ndo_set_mac_address 	= ieee80211_change_mac,
 	.ndo_select_queue	= ieee80211_netdev_select_queue,
+	.ndo_set_features	= ieee80211_netdev_set_features,
 };
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
@@ -1781,6 +1819,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		}
 
 		ndev->features |= local->hw.netdev_features;
+		ndev->hw_features |= local->hw.netdev_features;
 
 		netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index df3051d96aff..3ffae3714f36 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -789,7 +789,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	enum ieee80211_band band;
 	int channels, max_bitrates;
 	bool supp_ht, supp_vht;
-	netdev_features_t feature_whitelist;
 	struct cfg80211_chan_def dflt_chandef = {};
 
 	if (hw->flags & IEEE80211_HW_QUEUE_CONTROL &&
@@ -838,10 +837,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
-	/* Only HW csum features are currently compatible with mac80211 */
-	feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			    NETIF_F_HW_CSUM;
-	if (WARN_ON(hw->netdev_features & ~feature_whitelist))
+	if (WARN_ON(hw->netdev_features & ~IEEE80211_SUPPORTED_NETDEV_FEATURES))
 		return -EINVAL;
 
 	if (hw->max_report_rates == 0)
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 790bd45081c4..4439a0ddabba 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1191,6 +1191,34 @@ TRACE_EVENT(drv_get_ringparam,
 	)
 );
 
+TRACE_EVENT(drv_set_features,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 netdev_features_t features,
+		 netdev_features_t changed),
+
+	TP_ARGS(local, sdata, features, changed),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(netdev_features_t, features)
+		__field(netdev_features_t, changed)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->features = features;
+		__entry->changed = changed;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT VIF_PR_FMT " features=0x%llx, changed=0x%llx",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->features, __entry->changed
+	)
+);
+
 DEFINE_EVENT(local_only_evt, drv_tx_frames_pending,
 	TP_PROTO(struct ieee80211_local *local),
 	TP_ARGS(local)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 10/11] mac80211: allow drivers to support S/G
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (7 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 09/11] mac80211: enable changing netdev features with ethtool Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-17 15:15 ` [PATCH 11/11] mac80211: allow segmentation offloads Johannes Berg
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

If drivers want to support S/G (really just gather DMA on TX) then
we can now easily support this on the fast-xmit path since it just
needs to write to the ethernet header (and already has a check for
that being possible.)

However, disallow this on the regular TX path (which has to handle
fragmentation, software crypto, etc.) by calling skb_linearize().

Also allow the related HIGHDMA since that's not interesting to the
code in mac80211 at all anyway.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 3 ++-
 net/mac80211/tx.c          | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3f837fea05ae..25a456c48043 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -86,7 +86,8 @@ struct ieee80211_local;
 /* Only these features can be passed through mac80211 */
 #define IEEE80211_SUPPORTED_NETDEV_FEATURES	\
 	(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |	\
-	 NETIF_F_HW_CSUM | NETIF_F_RXCSUM)
+	 NETIF_F_HW_CSUM | NETIF_F_RXCSUM |	\
+	 NETIF_F_SG | NETIF_F_HIGHDMA)
 
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e76f3e96eb84..53a16257dfc1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2848,6 +2848,12 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 			goto out;
 	}
 
+	/* we cannot process non-linear frames on this path */
+	if (skb_linearize(skb)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
 	/* the frame could be fragmented, software-encrypted, and other things
 	 * so we cannot really handle checksum offload with it - fix it up in
 	 * software before we handle anything else.
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 11/11] mac80211: allow segmentation offloads
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (8 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 10/11] mac80211: allow drivers to support S/G Johannes Berg
@ 2015-04-17 15:15 ` Johannes Berg
  2015-04-20  8:25 ` [PATCH 01/11] mac80211: add TX fastpath Eliad Peller
  2015-04-22 11:00 ` Johannes Berg
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-17 15:15 UTC (permalink / raw)
  To: linux-wireless; +Cc: Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

Implement the necessary software segmentation on the normal
TX path so that fast-xmit can use segmentation offload if
the hardware (or driver) supports it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  3 +-
 net/mac80211/tx.c          | 70 ++++++++++++++++++++++++++++++----------------
 2 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 25a456c48043..d912e614f53b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -87,7 +87,8 @@ struct ieee80211_local;
 #define IEEE80211_SUPPORTED_NETDEV_FEATURES	\
 	(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |	\
 	 NETIF_F_HW_CSUM | NETIF_F_RXCSUM |	\
-	 NETIF_F_SG | NETIF_F_HIGHDMA)
+	 NETIF_F_SG | NETIF_F_HIGHDMA |		\
+	 NETIF_F_GSO_SOFTWARE)
 
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 53a16257dfc1..24b082f65a20 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2827,6 +2827,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sta_info *sta;
+	struct sk_buff *next;
 
 	if (unlikely(skb->len < ETH_HLEN)) {
 		kfree_skb(skb);
@@ -2848,36 +2849,57 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 			goto out;
 	}
 
-	/* we cannot process non-linear frames on this path */
-	if (skb_linearize(skb)) {
-		kfree_skb(skb);
-		goto out;
-	}
+	if (netif_needs_gso(dev, skb, 0)) {
+		struct sk_buff *segs;
 
-	/* the frame could be fragmented, software-encrypted, and other things
-	 * so we cannot really handle checksum offload with it - fix it up in
-	 * software before we handle anything else.
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (skb->encapsulation)
-			skb_set_inner_transport_header(skb,
-						       skb_checksum_start_offset(skb));
-		else
-			skb_set_transport_header(skb,
-						 skb_checksum_start_offset(skb));
-		if (skb_checksum_help(skb))
+		segs = skb_gso_segment(skb, 0);
+		if (IS_ERR(segs)) {
 			goto out_free;
+		} else if (segs) {
+			consume_skb(skb);
+			skb = segs;
+		}
+	} else {
+		/* we cannot process non-linear frames on this path */
+		if (skb_linearize(skb)) {
+			kfree_skb(skb);
+			goto out;
+		}
+
+		/* the frame could be fragmented, software-encrypted, and other
+		 * things so we cannot really handle checksum offload with it -
+		 * fix it up in software before we handle anything else.
+		 */
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			if (skb->encapsulation)
+				skb_set_inner_transport_header(skb,
+							       skb_checksum_start_offset(skb));
+			else
+				skb_set_transport_header(skb,
+							 skb_checksum_start_offset(skb));
+			if (skb_checksum_help(skb))
+				goto out_free;
+		}
 	}
 
-	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
-	if (IS_ERR(skb))
-		goto out;
+	next = skb;
+	while (next) {
+		skb = next;
+		next = skb->next;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-	dev->trans_start = jiffies;
+		skb->prev = NULL;
+		skb->next = NULL;
+
+		skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
+		if (IS_ERR(skb))
+			goto out;
 
-	ieee80211_xmit(sdata, sta, skb);
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += skb->len;
+		dev->trans_start = jiffies;
+
+		ieee80211_xmit(sdata, sta, skb);
+	}
 	goto out;
  out_free:
 	kfree_skb(skb);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 01/11] mac80211: add TX fastpath
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (9 preceding siblings ...)
  2015-04-17 15:15 ` [PATCH 11/11] mac80211: allow segmentation offloads Johannes Berg
@ 2015-04-20  8:25 ` Eliad Peller
  2015-04-20  8:47   ` Johannes Berg
  2015-04-22 11:00 ` Johannes Berg
  11 siblings, 1 reply; 17+ messages in thread
From: Eliad Peller @ 2015-04-20  8:25 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-wireless, Johannes Berg

On Fri, Apr 17, 2015 at 6:15 PM, Johannes Berg
<johannes@sipsolutions.net> wrote:
> From: Johannes Berg <johannes.berg@intel.com>
>
> In order to speed up mac80211's TX path, add the "fast-xmit" cache
> that will cache the data frame 802.11 header and other data to be
> able to build the frame more quickly. This cache is rebuilt when
> external triggers imply changes, but a lot of the checks done per
> packet today are simplified away to the check for the cache.
>
> There's also a more detailed description in the code.
>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
[...]

> +       build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
> +       if (!build.key)
> +               build.key = rcu_access_pointer(sdata->default_unicast_key);
don't you need rcu_dereference here? (and you don't seem to be inside
rcu section here)

Eliad.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 01/11] mac80211: add TX fastpath
  2015-04-20  8:25 ` [PATCH 01/11] mac80211: add TX fastpath Eliad Peller
@ 2015-04-20  8:47   ` Johannes Berg
  2015-04-21  6:54     ` Eliad Peller
  0 siblings, 1 reply; 17+ messages in thread
From: Johannes Berg @ 2015-04-20  8:47 UTC (permalink / raw)
  To: Eliad Peller; +Cc: linux-wireless

Hey, somebody is reviewing my patches :-)

> > +       build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
> > +       if (!build.key)
> > +               build.key = rcu_access_pointer(sdata->default_unicast_key);
> don't you need rcu_dereference here? (and you don't seem to be inside
> rcu section here)

It's a bit complicated.

I used to think I don't need it, but perhaps I do to avoid accessing bad
memory in this function.

The thing is that this function is going to be called immediately
whenever those pointers change, so that the RCU handling of the fast_tx
struct itself should prevent the TX path from accessing a bad key
pointer.

However, it seems possible that we go into this function on another CPU
for unrelated reasons, and if that CPU then stalls after getting the key
pointer but before assigning the fast_tx pointer, then it might
overwrite the assignment or clearing from the CPU processing the key
change.

So indeed it looks like this isn't safe as is right now.

To fix that, I think I can hold the lock longer, so that the lifetime of
the key and the fast_tx pointer are more closely correlated. If I
acquire the spinlock before checking for the key, then the CPU that
invalidates the key pointer cannot race in this way with another caller,
since the key pointer would (for this purpose) be protected by the lock.
Then either the CPU that deleted the key will have to wait (while the
key is still pretty much valid) and then will overwrite the fast_tx w/o
the key, or the other CPU will have to wait and will find the key
pointer changed/NULL already.

Right? what do you think?

johannes


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 01/11] mac80211: add TX fastpath
  2015-04-20  8:47   ` Johannes Berg
@ 2015-04-21  6:54     ` Eliad Peller
  2015-04-21  7:06       ` Johannes Berg
  0 siblings, 1 reply; 17+ messages in thread
From: Eliad Peller @ 2015-04-21  6:54 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-wireless

On Mon, Apr 20, 2015 at 11:47 AM, Johannes Berg
<johannes@sipsolutions.net> wrote:
> Hey, somebody is reviewing my patches :-)
>
i didn't delve into them too much, but generally they look good :)

>> > +       build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
>> > +       if (!build.key)
>> > +               build.key = rcu_access_pointer(sdata->default_unicast_key);
>> don't you need rcu_dereference here? (and you don't seem to be inside
>> rcu section here)
>
> It's a bit complicated.
>
> I used to think I don't need it, but perhaps I do to avoid accessing bad
> memory in this function.
>
> The thing is that this function is going to be called immediately
> whenever those pointers change, so that the RCU handling of the fast_tx
> struct itself should prevent the TX path from accessing a bad key
> pointer.
>
> However, it seems possible that we go into this function on another CPU
> for unrelated reasons, and if that CPU then stalls after getting the key
> pointer but before assigning the fast_tx pointer, then it might
> overwrite the assignment or clearing from the CPU processing the key
> change.
>
> So indeed it looks like this isn't safe as is right now.
>
> To fix that, I think I can hold the lock longer, so that the lifetime of
> the key and the fast_tx pointer are more closely correlated. If I
> acquire the spinlock before checking for the key, then the CPU that
> invalidates the key pointer cannot race in this way with another caller,
> since the key pointer would (for this purpose) be protected by the lock.
> Then either the CPU that deleted the key will have to wait (while the
> key is still pretty much valid) and then will overwrite the fast_tx w/o
> the key, or the other CPU will have to wait and will find the key
> pointer changed/NULL already.
>
> Right? what do you think?

sounds correct.
i guess taking rcu_lock is a valid option as well (for about the same
reasons). so either one of them should be good.

Eliad.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 01/11] mac80211: add TX fastpath
  2015-04-21  6:54     ` Eliad Peller
@ 2015-04-21  7:06       ` Johannes Berg
  0 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-21  7:06 UTC (permalink / raw)
  To: Eliad Peller; +Cc: linux-wireless

On Tue, 2015-04-21 at 09:54 +0300, Eliad Peller wrote:

> > Hey, somebody is reviewing my patches :-)
> >
> i didn't delve into them too much, but generally they look good :)

:)

> > To fix that, I think I can hold the lock longer, so that the lifetime of
> > the key and the fast_tx pointer are more closely correlated. If I
> > acquire the spinlock before checking for the key, then the CPU that
> > invalidates the key pointer cannot race in this way with another caller,
> > since the key pointer would (for this purpose) be protected by the lock.
> > Then either the CPU that deleted the key will have to wait (while the
> > key is still pretty much valid) and then will overwrite the fast_tx w/o
> > the key, or the other CPU will have to wait and will find the key
> > pointer changed/NULL already.
> >
> > Right? what do you think?
> 
> sounds correct.
> i guess taking rcu_lock is a valid option as well (for about the same
> reasons). so either one of them should be good.

I don't think that would be correct - that just prevents the key from
being freed while we hold it, whereas here we actually need to prevent
the key from being accessed.

Anyway - I've done a bit more testing on this and will likely merge it
(the fixed version of course) in the next couple of days.

johannes


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 07/11] ath10k: enable IEEE80211_HW_SUPPORT_FAST_XMIT
  2015-04-17 15:15 ` [PATCH 07/11] ath10k: " Johannes Berg
@ 2015-04-22  9:00   ` Kalle Valo
  0 siblings, 0 replies; 17+ messages in thread
From: Kalle Valo @ 2015-04-22  9:00 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-wireless, Johannes Berg

Johannes Berg <johannes@sipsolutions.net> writes:

> From: Johannes Berg <johannes.berg@intel.com>
>
> The driver can clearly enable fast-xmit since it does rate
> control in the device and thus must do duration calculation
> there as well.
>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>

I quickly tested mac80211-next/fast-xmit (commit cf9f262fdd3f) with
ath10k and didn't see any problems so:

Acked-by: Kalle Valo <kvalo@qca.qualcomm.com>

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 01/11] mac80211: add TX fastpath
  2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
                   ` (10 preceding siblings ...)
  2015-04-20  8:25 ` [PATCH 01/11] mac80211: add TX fastpath Eliad Peller
@ 2015-04-22 11:00 ` Johannes Berg
  11 siblings, 0 replies; 17+ messages in thread
From: Johannes Berg @ 2015-04-22 11:00 UTC (permalink / raw)
  To: linux-wireless

Ok so I'm going to merge this now, with the exception of patch 9 which
I'm not entirely happy with as posted.

johannes


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2015-04-22 11:00 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-17 15:15 [PATCH 01/11] mac80211: add TX fastpath Johannes Berg
2015-04-17 15:15 ` [PATCH 02/11] mac80211_hwsim: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
2015-04-17 15:15 ` [PATCH 03/11] mac80211: extend fast-xmit to driver fragmentation Johannes Berg
2015-04-17 15:15 ` [PATCH 04/11] mac80211: extend fast-xmit for more ciphers Johannes Berg
2015-04-17 15:15 ` [PATCH 05/11] mac80211: extend fast-xmit to cover IBSS Johannes Berg
2015-04-17 15:15 ` [PATCH 06/11] wlcore: enable IEEE80211_HW_SUPPORT_FAST_XMIT Johannes Berg
2015-04-17 15:15 ` [PATCH 07/11] ath10k: " Johannes Berg
2015-04-22  9:00   ` Kalle Valo
2015-04-17 15:15 ` [PATCH 08/11] mac80211: allow checksum offload only in fast-xmit Johannes Berg
2015-04-17 15:15 ` [PATCH 09/11] mac80211: enable changing netdev features with ethtool Johannes Berg
2015-04-17 15:15 ` [PATCH 10/11] mac80211: allow drivers to support S/G Johannes Berg
2015-04-17 15:15 ` [PATCH 11/11] mac80211: allow segmentation offloads Johannes Berg
2015-04-20  8:25 ` [PATCH 01/11] mac80211: add TX fastpath Eliad Peller
2015-04-20  8:47   ` Johannes Berg
2015-04-21  6:54     ` Eliad Peller
2015-04-21  7:06       ` Johannes Berg
2015-04-22 11:00 ` Johannes Berg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.