All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
@ 2015-12-17  9:20 Janusz Dziedzic
  2015-12-17  9:20 ` [RFC/RFT 2/2] ath9k: request aligned skb Janusz Dziedzic
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Janusz Dziedzic @ 2015-12-17  9:20 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes, nbd, Janusz Dziedzic

HW/driver should set NEED_ALIGNED4_SKBS flag in case require
aligned skbs to four-byte boundaries.

Before we have to do memmove() in the driver before
pass this to HW and memmove() back in tx completion.
This patch allow to save CPU and skip such memmoves.
For each skb we called memmove(ieee80211_hdrsize()) twice.

Currently this was tested with ath9k, both hw/sw crypt for
tkip/ccmp.
For sure more tests required.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
---
 include/net/mac80211.h |  4 ++++
 net/mac80211/debugfs.c |  1 +
 net/mac80211/tkip.c    | 15 ++++++++++++---
 net/mac80211/tx.c      | 21 +++++++++++++++++++--
 net/mac80211/wep.c     |  6 ++++++
 net/mac80211/wpa.c     | 35 +++++++++++++++++++++++++++--------
 6 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7c30faf..0ea9b51 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1910,6 +1910,9 @@ struct ieee80211_txq {
  *	by just its MAC address; this prevents, for example, the same station
  *	from connecting to two virtual AP interfaces at the same time.
  *
+ * @IEEE80211_HW_NEEDS_ALIGNED4_SKBS: Driver need aligned skbs to four-byte.
+ *	Padding will be added after ieee80211_hdr.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -1946,6 +1949,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_AMSDU_IN_AMPDU,
 	IEEE80211_HW_BEACON_TX_STATUS,
 	IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
+	IEEE80211_HW_NEEDS_ALIGNED4_SKBS,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index abbdff0..fd45830 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -126,6 +126,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
 	FLAG(SUPPORTS_AMSDU_IN_AMPDU),
 	FLAG(BEACON_TX_STATUS),
 	FLAG(NEEDS_UNIQUE_STA_ADDR),
+	FLAG(NEEDS_ALIGNED4_SKBS),
 
 	/* keep last for the build bug below */
 	(void *)0x1
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 0ae2077..26b2663 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -204,9 +204,18 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
 	const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
 	struct tkip_ctx *ctx = &key->u.tkip.tx;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
-	u32 iv32 = get_unaligned_le32(&data[4]);
-	u16 iv16 = data[2] | (data[0] << 8);
+	unsigned int hdrlen;
+	const u8 *data;
+	u32 iv32;
+	u16 iv16;
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&key->local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
+
+	data = (u8 *)hdr + hdrlen;
+	iv32 = get_unaligned_le32(&data[4]);
+	iv16 = data[2] | (data[0] << 8);
 
 	spin_lock(&key->u.tkip.txlock);
 	ieee80211_compute_tkip_p1k(key, iv32);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3311ce0..30ee9ad 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -937,6 +937,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 		return TX_DROP;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
 
 	/* internal error, why isn't DONTFRAG set? */
 	if (WARN_ON(skb->len + FCS_LEN <= frag_threshold))
@@ -1796,6 +1798,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 
 	hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
 
 	if (skb->len < len_rthdr + hdrlen)
 		goto fail;
@@ -2020,6 +2024,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_sub_if_data *ap_sdata;
 	enum ieee80211_band band;
+	int padsize = 0;
 	int ret;
 
 	if (IS_ERR(sta))
@@ -2237,6 +2242,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 		hdrlen += 2;
 	}
 
+	/* Check if HW require skb to be aligned */
+	if (ieee80211_hw_check(&sdata->local->hw, NEEDS_ALIGNED4_SKBS))
+		padsize = hdrlen & 3;
+
 	/*
 	 * Drop unicast frames to unauthorised stations unless they are
 	 * EAPOL frames from the local station.
@@ -2323,6 +2332,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	h_pos -= skip_header_bytes;
 
 	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
+	head_need += padsize;
 
 	/*
 	 * So we need to modify the skb header and hence need a copy of
@@ -2361,6 +2371,9 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	}
 #endif
 
+	if (padsize)
+		skb_push(skb, padsize);
+
 	if (ieee80211_is_data_qos(fc)) {
 		__le16 *qos_control;
 
@@ -2374,8 +2387,8 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	} else
 		memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
 
-	nh_pos += hdrlen;
-	h_pos += hdrlen;
+	nh_pos += hdrlen + padsize;
+	h_pos += hdrlen + padsize;
 
 	/* Update skb pointers to various headers since this modified frame
 	 * is going to go through Linux networking code that may potentially
@@ -2544,6 +2557,10 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
 		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
 	}
 
+	/* Check if aligned skb required */
+	if (ieee80211_hw_check(&local->hw, NEEDS_ALIGNED4_SKBS))
+		build.hdr_len += build.hdr_len & 3;
+
 	/* We store the key here so there's no point in using rcu_dereference()
 	 * but that's fine because the code that changes the pointers will call
 	 * this function after doing so. For a single CPU that would be enough,
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index efa3f48..46c7c67 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -102,6 +102,9 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
 		return NULL;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
+
 	newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN);
 	memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen);
 
@@ -123,6 +126,9 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
 	unsigned int hdrlen;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
+
 	memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen);
 	skb_pull(skb, IEEE80211_WEP_IV_LEN);
 }
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d824c38..18110c8 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -43,6 +43,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
 	if (skb->len < hdrlen)
 		return TX_DROP;
 
@@ -201,6 +203,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	}
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
 	len = skb->len - hdrlen;
 
 	if (info->control.hw_key)
@@ -307,7 +311,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 }
 
 
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
+				unsigned int padsize)
 {
 	__le16 mask_fc;
 	int a4_included, mgmt;
@@ -329,7 +334,8 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
 	mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-	len_a = hdrlen - 2;
+	hdrlen += padsize;
+	len_a = hdrlen - 2 - padsize;
 	a4_included = ieee80211_has_a4(hdr->frame_control);
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
@@ -405,6 +411,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tail;
+	unsigned int padsize = 0;
 	u8 *pos;
 	u8 pn[6];
 	u64 pn64;
@@ -425,6 +432,9 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
 	}
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		padsize = hdrlen & 3;
+	hdrlen += padsize;
 	len = skb->len - hdrlen;
 
 	if (info->control.hw_key)
@@ -463,7 +473,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
 		return 0;
 
 	pos += IEEE80211_CCMP_HDR_LEN;
-	ccmp_special_blocks(skb, pn, b_0, aad);
+
+	ccmp_special_blocks(skb, pn, b_0, aad, padsize);
 	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
 				  skb_put(skb, mic_len), mic_len);
 
@@ -534,7 +545,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 			u8 aad[2 * AES_BLOCK_SIZE];
 			u8 b_0[AES_BLOCK_SIZE];
 			/* hardware didn't decrypt/verify MIC */
-			ccmp_special_blocks(skb, pn, b_0, aad);
+			ccmp_special_blocks(skb, pn, b_0, aad, 0);
 
 			if (ieee80211_aes_ccm_decrypt(
 				    key->u.ccmp.tfm, b_0, aad,
@@ -556,7 +567,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 	return RX_CONTINUE;
 }
 
-static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
+static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad,
+				unsigned int padsize)
 {
 	__le16 mask_fc;
 	u8 qos_tid;
@@ -571,7 +583,8 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
 	/* AAD (extra authenticate-only data) / masked 802.11 header
 	 * FC | A1 | A2 | A3 | SC | [A4] | [QC]
 	 */
-	put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]);
+	put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2 - padsize,
+			   &aad[0]);
 	/* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
 	 * Retry, PwrMgt, MoreData; set Protected
 	 */
@@ -633,6 +646,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	struct ieee80211_key *key = tx->key;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, len, tail;
+	unsigned int padsize = 0;
 	u8 *pos;
 	u8 pn[6];
 	u64 pn64;
@@ -652,6 +666,9 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	}
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		padsize = hdrlen & 3;
+	hdrlen += padsize;
 	len = skb->len - hdrlen;
 
 	if (info->control.hw_key)
@@ -692,7 +709,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 		return 0;
 
 	pos += IEEE80211_GCMP_HDR_LEN;
-	gcmp_special_blocks(skb, pn, j_0, aad);
+	gcmp_special_blocks(skb, pn, j_0, aad, padsize);
 	ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
 				  skb_put(skb, IEEE80211_GCMP_MIC_LEN));
 
@@ -760,7 +777,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
 			u8 aad[2 * AES_BLOCK_SIZE];
 			u8 j_0[AES_BLOCK_SIZE];
 			/* hardware didn't decrypt/verify MIC */
-			gcmp_special_blocks(skb, pn, j_0, aad);
+			gcmp_special_blocks(skb, pn, j_0, aad, 0);
 
 			if (ieee80211_aes_gcm_decrypt(
 				    key->u.gcmp.tfm, j_0, aad,
@@ -804,6 +821,8 @@ ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx,
 		return TX_DROP;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (ieee80211_hw_check(&tx->local->hw, NEEDS_ALIGNED4_SKBS))
+		hdrlen += hdrlen & 3;
 
 	pos = skb_push(skb, iv_len);
 	memmove(pos, pos + iv_len, hdrlen);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC/RFT 2/2] ath9k: request aligned skb
  2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
@ 2015-12-17  9:20 ` Janusz Dziedzic
  2015-12-21 18:53   ` Souptick Joarder
  2015-12-17  9:39 ` [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Felix Fietkau
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Janusz Dziedzic @ 2015-12-17  9:20 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes, nbd, Janusz Dziedzic

Set NEEDS_ALIGNED4_SKB hw flag.
This allow driver to save CPU and remove two memmove
from tx path.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
---
 drivers/net/wireless/ath/ath9k/init.c |  1 +
 drivers/net/wireless/ath/ath9k/xmit.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 6abace6..d578a00 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -831,6 +831,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, RX_INCLUDES_FCS);
 	ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING);
 	ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
+	ieee80211_hw_set(hw, NEEDS_ALIGNED4_SKBS);
 
 	if (ath9k_ps_enable)
 		ieee80211_hw_set(hw, SUPPORTS_PS);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 82fc76f..c3bd1b1 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -2267,11 +2267,15 @@ static int ath_tx_prepare(struct ieee80211_hw *hw, struct sk_buff *skb,
 	padpos = ieee80211_hdrlen(hdr->frame_control);
 	padsize = padpos & 3;
 	if (padsize && skb->len > padpos) {
-		if (skb_headroom(skb) < padsize)
-			return -ENOMEM;
+		if (ieee80211_hw_check(hw, NEEDS_ALIGNED4_SKBS)) {
+			frmlen -= padsize;
+		} else {
+			if (skb_headroom(skb) < padsize)
+				return -ENOMEM;
 
-		skb_push(skb, padsize);
-		memmove(skb->data, skb->data + padsize, padpos);
+			skb_push(skb, padsize);
+			memmove(skb->data, skb->data + padsize, padpos);
+		}
 	}
 
 	setup_frame_info(hw, sta, skb, frmlen);
@@ -2494,7 +2498,8 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
 
 	padpos = ieee80211_hdrlen(hdr->frame_control);
 	padsize = padpos & 3;
-	if (padsize && skb->len>padpos+padsize) {
+	if (padsize && skb->len > padpos + padsize &&
+	    !ieee80211_hw_check(sc->hw, NEEDS_ALIGNED4_SKBS)) {
 		/*
 		 * Remove MAC header padding before giving the frame back to
 		 * mac80211.
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
  2015-12-17  9:20 ` [RFC/RFT 2/2] ath9k: request aligned skb Janusz Dziedzic
@ 2015-12-17  9:39 ` Felix Fietkau
  2015-12-17 10:04 ` Johannes Berg
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Felix Fietkau @ 2015-12-17  9:39 UTC (permalink / raw)
  To: Janusz Dziedzic, linux-wireless; +Cc: johannes

On 2015-12-17 10:20, Janusz Dziedzic wrote:
> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
> aligned skbs to four-byte boundaries.
> 
> Before we have to do memmove() in the driver before
> pass this to HW and memmove() back in tx completion.
> This patch allow to save CPU and skip such memmoves.
> For each skb we called memmove(ieee80211_hdrsize()) twice.
> 
> Currently this was tested with ath9k, both hw/sw crypt for
> tkip/ccmp.
> For sure more tests required.
> 
> Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Nice. By the way, this alignment requirement is not ath9k specific -
mt76 (currently out-of-tree), mt7601u and rt2x00 can use this as well.

- Felix

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
  2015-12-17  9:20 ` [RFC/RFT 2/2] ath9k: request aligned skb Janusz Dziedzic
  2015-12-17  9:39 ` [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Felix Fietkau
@ 2015-12-17 10:04 ` Johannes Berg
  2015-12-17 10:09   ` Felix Fietkau
  2015-12-17 10:12 ` Felix Fietkau
  2015-12-17 10:29 ` Johannes Berg
  4 siblings, 1 reply; 12+ messages in thread
From: Johannes Berg @ 2015-12-17 10:04 UTC (permalink / raw)
  To: Janusz Dziedzic, linux-wireless; +Cc: nbd

On Thu, 2015-12-17 at 10:20 +0100, Janusz Dziedzic wrote:
> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
> aligned skbs to four-byte boundaries.
> 
> Before we have to do memmove() in the driver before
> pass this to HW and memmove() back in tx completion.
> This patch allow to save CPU and skip such memmoves.
> For each skb we called memmove(ieee80211_hdrsize()) twice.

IMHO this is pretty awful from a code complexity POV. You also forgot
to update fast-xmit maximum header length.

Note that we (iwlwifi) also kinda need this, but essentially solve it
with the DMA engine. Can't ath9k do the same?

johannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17 10:04 ` Johannes Berg
@ 2015-12-17 10:09   ` Felix Fietkau
  0 siblings, 0 replies; 12+ messages in thread
From: Felix Fietkau @ 2015-12-17 10:09 UTC (permalink / raw)
  To: Johannes Berg, Janusz Dziedzic, linux-wireless

On 2015-12-17 11:04, Johannes Berg wrote:
> On Thu, 2015-12-17 at 10:20 +0100, Janusz Dziedzic wrote:
>> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
>> aligned skbs to four-byte boundaries.
>> 
>> Before we have to do memmove() in the driver before
>> pass this to HW and memmove() back in tx completion.
>> This patch allow to save CPU and skip such memmoves.
>> For each skb we called memmove(ieee80211_hdrsize()) twice.
> 
> IMHO this is pretty awful from a code complexity POV. You also forgot
> to update fast-xmit maximum header length.
> 
> Note that we (iwlwifi) also kinda need this, but essentially solve it
> with the DMA engine. Can't ath9k do the same?
I tried that approach a few years ago, but it turned out to make the
hardware unstable, causing random DMA lockups.

- Felix

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
                   ` (2 preceding siblings ...)
  2015-12-17 10:04 ` Johannes Berg
@ 2015-12-17 10:12 ` Felix Fietkau
  2015-12-18  8:43   ` Janusz Dziedzic
  2015-12-17 10:29 ` Johannes Berg
  4 siblings, 1 reply; 12+ messages in thread
From: Felix Fietkau @ 2015-12-17 10:12 UTC (permalink / raw)
  To: Janusz Dziedzic, linux-wireless; +Cc: johannes

On 2015-12-17 10:20, Janusz Dziedzic wrote:
> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
> aligned skbs to four-byte boundaries.
> 
> Before we have to do memmove() in the driver before
> pass this to HW and memmove() back in tx completion.
> This patch allow to save CPU and skip such memmoves.
> For each skb we called memmove(ieee80211_hdrsize()) twice.
> 
> Currently this was tested with ath9k, both hw/sw crypt for
> tkip/ccmp.
> For sure more tests required.
> 
> Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
> ---
>  include/net/mac80211.h |  4 ++++
>  net/mac80211/debugfs.c |  1 +
>  net/mac80211/tkip.c    | 15 ++++++++++++---
>  net/mac80211/tx.c      | 21 +++++++++++++++++++--
>  net/mac80211/wep.c     |  6 ++++++
>  net/mac80211/wpa.c     | 35 +++++++++++++++++++++++++++--------
>  6 files changed, 69 insertions(+), 13 deletions(-)
> 
> diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
> index 0ae2077..26b2663 100644
> --- a/net/mac80211/tkip.c
> +++ b/net/mac80211/tkip.c
> @@ -204,9 +204,18 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
>  	const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
>  	struct tkip_ctx *ctx = &key->u.tkip.tx;
>  	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
> -	const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
> -	u32 iv32 = get_unaligned_le32(&data[4]);
> -	u16 iv16 = data[2] | (data[0] << 8);
> +	unsigned int hdrlen;
> +	const u8 *data;
> +	u32 iv32;
> +	u16 iv16;
> +
> +	hdrlen = ieee80211_hdrlen(hdr->frame_control);
> +	if (ieee80211_hw_check(&key->local->hw, NEEDS_ALIGNED4_SKBS))
> +		hdrlen += hdrlen & 3;
I think this check is duplicated way too often, maybe you should
implement a wrapper for ieee80211_hdrlen and convert all relevant call
sites. Makes it easier to spot places where this was forgotten.

- Felix

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
                   ` (3 preceding siblings ...)
  2015-12-17 10:12 ` Felix Fietkau
@ 2015-12-17 10:29 ` Johannes Berg
  2015-12-17 10:35   ` Felix Fietkau
  4 siblings, 1 reply; 12+ messages in thread
From: Johannes Berg @ 2015-12-17 10:29 UTC (permalink / raw)
  To: Janusz Dziedzic, linux-wireless; +Cc: nbd

On Thu, 2015-12-17 at 10:20 +0100, Janusz Dziedzic wrote:
> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
> aligned skbs to four-byte boundaries.
> 
> Before we have to do memmove() in the driver before
> pass this to HW and memmove() back in tx completion.
> This patch allow to save CPU and skip such memmoves.

Can you quantify that btw? It shouldn't be that expensive since it's
all in the cache for read, so just has to be written out to RAM before
the DMA can happen... And on status it will be pulled into the cache
anyway, and you don't even need the write to happen before you can
continue.

johannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17 10:29 ` Johannes Berg
@ 2015-12-17 10:35   ` Felix Fietkau
  2015-12-17 10:45     ` Johannes Berg
  0 siblings, 1 reply; 12+ messages in thread
From: Felix Fietkau @ 2015-12-17 10:35 UTC (permalink / raw)
  To: Johannes Berg, Janusz Dziedzic, linux-wireless

On 2015-12-17 11:29, Johannes Berg wrote:
> On Thu, 2015-12-17 at 10:20 +0100, Janusz Dziedzic wrote:
>> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
>> aligned skbs to four-byte boundaries.
>> 
>> Before we have to do memmove() in the driver before
>> pass this to HW and memmove() back in tx completion.
>> This patch allow to save CPU and skip such memmoves.
> 
> Can you quantify that btw? It shouldn't be that expensive since it's
> all in the cache for read, so just has to be written out to RAM before
> the DMA can happen...
On many devices that I'm using, the data path is definitely too bloated
for the packet to still be in cache, so I do think this will help.
Remember, having only 32 KiB Dcache (no L2) is not uncommon on devices
running ath9k.

> And on status it will be pulled into the cache
> anyway, and you don't even need the write to happen before you can
> continue.
This doesn't make sense to me. The write needs to happen before the
device can do DMA...

- Felix

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17 10:35   ` Felix Fietkau
@ 2015-12-17 10:45     ` Johannes Berg
  0 siblings, 0 replies; 12+ messages in thread
From: Johannes Berg @ 2015-12-17 10:45 UTC (permalink / raw)
  To: Felix Fietkau, Janusz Dziedzic, linux-wireless

On Thu, 2015-12-17 at 11:35 +0100, Felix Fietkau wrote:
> 
> On many devices that I'm using, the data path is definitely too
> bloated
> for the packet to still be in cache, so I do think this will help.
> Remember, having only 32 KiB Dcache (no L2) is not uncommon on
> devices
> running ath9k.

Hm, ok :)

> > And on status it will be pulled into the cache
> > anyway, and you don't even need the write to happen before you can
> > continue.
> This doesn't make sense to me. The write needs to happen before the
> device can do DMA...
> 

On *status* :)

johannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag
  2015-12-17 10:12 ` Felix Fietkau
@ 2015-12-18  8:43   ` Janusz Dziedzic
  0 siblings, 0 replies; 12+ messages in thread
From: Janusz Dziedzic @ 2015-12-18  8:43 UTC (permalink / raw)
  To: Felix Fietkau; +Cc: linux-wireless, Johannes Berg

On 17 December 2015 at 11:12, Felix Fietkau <nbd@openwrt.org> wrote:
> On 2015-12-17 10:20, Janusz Dziedzic wrote:
>> HW/driver should set NEED_ALIGNED4_SKBS flag in case require
>> aligned skbs to four-byte boundaries.
>>
>> Before we have to do memmove() in the driver before
>> pass this to HW and memmove() back in tx completion.
>> This patch allow to save CPU and skip such memmoves.
>> For each skb we called memmove(ieee80211_hdrsize()) twice.
>>
>> Currently this was tested with ath9k, both hw/sw crypt for
>> tkip/ccmp.
>> For sure more tests required.
>>
>> Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
>> ---
>>  include/net/mac80211.h |  4 ++++
>>  net/mac80211/debugfs.c |  1 +
>>  net/mac80211/tkip.c    | 15 ++++++++++++---
>>  net/mac80211/tx.c      | 21 +++++++++++++++++++--
>>  net/mac80211/wep.c     |  6 ++++++
>>  net/mac80211/wpa.c     | 35 +++++++++++++++++++++++++++--------
>>  6 files changed, 69 insertions(+), 13 deletions(-)
>>
>> diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
>> index 0ae2077..26b2663 100644
>> --- a/net/mac80211/tkip.c
>> +++ b/net/mac80211/tkip.c
>> @@ -204,9 +204,18 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
>>       const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
>>       struct tkip_ctx *ctx = &key->u.tkip.tx;
>>       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
>> -     const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
>> -     u32 iv32 = get_unaligned_le32(&data[4]);
>> -     u16 iv16 = data[2] | (data[0] << 8);
>> +     unsigned int hdrlen;
>> +     const u8 *data;
>> +     u32 iv32;
>> +     u16 iv16;
>> +
>> +     hdrlen = ieee80211_hdrlen(hdr->frame_control);
>> +     if (ieee80211_hw_check(&key->local->hw, NEEDS_ALIGNED4_SKBS))
>> +             hdrlen += hdrlen & 3;
> I think this check is duplicated way too often, maybe you should
> implement a wrapper for ieee80211_hdrlen and convert all relevant call
> sites. Makes it easier to spot places where this was forgotten.
>
Or other option is to add this to ieee80211_tx_data - while this param
we pass to most of encrypt funtions ...
>From other side I see ieee80211_tx_data skbs list could be used -
seems only for fragmentation?

@Johannes - will be safe add "real" hdrlen to ieee80211_tx_data?

Other option I see is ieee80211_tx_info or like Felix suggest new
hdrlen() function.

BR
Janusz

> - Felix

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 2/2] ath9k: request aligned skb
  2015-12-17  9:20 ` [RFC/RFT 2/2] ath9k: request aligned skb Janusz Dziedzic
@ 2015-12-21 18:53   ` Souptick Joarder
  2015-12-21 22:48     ` Julian Calaby
  0 siblings, 1 reply; 12+ messages in thread
From: Souptick Joarder @ 2015-12-21 18:53 UTC (permalink / raw)
  To: Janusz Dziedzic; +Cc: linux-wireless, johannes, nbd

On Thu, Dec 17, 2015 at 2:50 PM, Janusz Dziedzic
<janusz.dziedzic@tieto.com> wrote:
>
> Set NEEDS_ALIGNED4_SKB hw flag.
> This allow driver to save CPU and remove two memmove
> from tx path.
>
> Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
> ---
>  drivers/net/wireless/ath/ath9k/init.c |  1 +
>  drivers/net/wireless/ath/ath9k/xmit.c | 15 ++++++++++-----
>  2 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
> index 6abace6..d578a00 100644
> --- a/drivers/net/wireless/ath/ath9k/init.c
> +++ b/drivers/net/wireless/ath/ath9k/init.c
> @@ -831,6 +831,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
>         ieee80211_hw_set(hw, RX_INCLUDES_FCS);
>         ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING);
>         ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
> +       ieee80211_hw_set(hw, NEEDS_ALIGNED4_SKBS);
>
>         if (ath9k_ps_enable)
>                 ieee80211_hw_set(hw, SUPPORTS_PS);
> diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
> index 82fc76f..c3bd1b1 100644
> --- a/drivers/net/wireless/ath/ath9k/xmit.c
> +++ b/drivers/net/wireless/ath/ath9k/xmit.c
> @@ -2267,11 +2267,15 @@ static int ath_tx_prepare(struct ieee80211_hw *hw, struct sk_buff *skb,
>         padpos = ieee80211_hdrlen(hdr->frame_control);
>         padsize = padpos & 3;
>         if (padsize && skb->len > padpos) {
> -               if (skb_headroom(skb) < padsize)
> -                       return -ENOMEM;
> +               if (ieee80211_hw_check(hw, NEEDS_ALIGNED4_SKBS)) {
> +                       frmlen -= padsize;
> +               } else {
> +                       if (skb_headroom(skb) < padsize)
> +                               return -ENOMEM;
>
> -               skb_push(skb, padsize);
> -               memmove(skb->data, skb->data + padsize, padpos);

                  why the same lines has been removed and added again ?
> +                       skb_push(skb, padsize);
> +                       memmove(skb->data, skb->data + padsize, padpos);
> +               }
>         }



>
>
>         setup_frame_info(hw, sta, skb, frmlen);
> @@ -2494,7 +2498,8 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
>
>         padpos = ieee80211_hdrlen(hdr->frame_control);
>         padsize = padpos & 3;
> -       if (padsize && skb->len>padpos+padsize) {
> +       if (padsize && skb->len > padpos + padsize &&
> +           !ieee80211_hw_check(sc->hw, NEEDS_ALIGNED4_SKBS)) {
>                 /*
>                  * Remove MAC header padding before giving the frame back to
>                  * mac80211.
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Regards
Souptick

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/RFT 2/2] ath9k: request aligned skb
  2015-12-21 18:53   ` Souptick Joarder
@ 2015-12-21 22:48     ` Julian Calaby
  0 siblings, 0 replies; 12+ messages in thread
From: Julian Calaby @ 2015-12-21 22:48 UTC (permalink / raw)
  To: Souptick Joarder
  Cc: Janusz Dziedzic, linux-wireless, Johannes Berg, Felix Fietkau

Hi,

On Tue, Dec 22, 2015 at 5:53 AM, Souptick Joarder <jrdr.linux@gmail.com> wrote:
> On Thu, Dec 17, 2015 at 2:50 PM, Janusz Dziedzic
> <janusz.dziedzic@tieto.com> wrote:
>>
>> Set NEEDS_ALIGNED4_SKB hw flag.
>> This allow driver to save CPU and remove two memmove
>> from tx path.
>>
>> Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
>> ---
>>  drivers/net/wireless/ath/ath9k/init.c |  1 +
>>  drivers/net/wireless/ath/ath9k/xmit.c | 15 ++++++++++-----
>>  2 files changed, 11 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
>> index 6abace6..d578a00 100644
>> --- a/drivers/net/wireless/ath/ath9k/init.c
>> +++ b/drivers/net/wireless/ath/ath9k/init.c
>> @@ -831,6 +831,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
>>         ieee80211_hw_set(hw, RX_INCLUDES_FCS);
>>         ieee80211_hw_set(hw, HOST_BROADCAST_PS_BUFFERING);
>>         ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
>> +       ieee80211_hw_set(hw, NEEDS_ALIGNED4_SKBS);
>>
>>         if (ath9k_ps_enable)
>>                 ieee80211_hw_set(hw, SUPPORTS_PS);
>> diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
>> index 82fc76f..c3bd1b1 100644
>> --- a/drivers/net/wireless/ath/ath9k/xmit.c
>> +++ b/drivers/net/wireless/ath/ath9k/xmit.c
>> @@ -2267,11 +2267,15 @@ static int ath_tx_prepare(struct ieee80211_hw *hw, struct sk_buff *skb,
>>         padpos = ieee80211_hdrlen(hdr->frame_control);
>>         padsize = padpos & 3;
>>         if (padsize && skb->len > padpos) {
>> -               if (skb_headroom(skb) < padsize)
>> -                       return -ENOMEM;
>> +               if (ieee80211_hw_check(hw, NEEDS_ALIGNED4_SKBS)) {
>> +                       frmlen -= padsize;
>> +               } else {
>> +                       if (skb_headroom(skb) < padsize)
>> +                               return -ENOMEM;
>>
>> -               skb_push(skb, padsize);
>> -               memmove(skb->data, skb->data + padsize, padpos);
>
>                   why the same lines has been removed and added again ?

The indentation is different as they're now in another if statement
under the if (padsize... statement.

>> +                       skb_push(skb, padsize);
>> +                       memmove(skb->data, skb->data + padsize, padpos);
>> +               }
>>         }
>
>
>
>>
>>
>>         setup_frame_info(hw, sta, skb, frmlen);
>> @@ -2494,7 +2498,8 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
>>
>>         padpos = ieee80211_hdrlen(hdr->frame_control);
>>         padsize = padpos & 3;
>> -       if (padsize && skb->len>padpos+padsize) {
>> +       if (padsize && skb->len > padpos + padsize &&
>> +           !ieee80211_hw_check(sc->hw, NEEDS_ALIGNED4_SKBS)) {
>>                 /*
>>                  * Remove MAC header padding before giving the frame back to
>>                  * mac80211.
>> --
>> 1.9.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> Regards
> Souptick
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.calaby@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-12-21 22:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-17  9:20 [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Janusz Dziedzic
2015-12-17  9:20 ` [RFC/RFT 2/2] ath9k: request aligned skb Janusz Dziedzic
2015-12-21 18:53   ` Souptick Joarder
2015-12-21 22:48     ` Julian Calaby
2015-12-17  9:39 ` [RFC/RFT 1/2] mac80211: Add NEED_ALIGNED4_SKBS hw flag Felix Fietkau
2015-12-17 10:04 ` Johannes Berg
2015-12-17 10:09   ` Felix Fietkau
2015-12-17 10:12 ` Felix Fietkau
2015-12-18  8:43   ` Janusz Dziedzic
2015-12-17 10:29 ` Johannes Berg
2015-12-17 10:35   ` Felix Fietkau
2015-12-17 10:45     ` Johannes Berg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.