netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
To: netdev@vger.kernel.org, edumazet@google.com
Cc: Paolo Abeni <pabeni@redhat.com>,
	cpaasch@apple.com, fw@strlen.de, peter.krystad@linux.intel.com,
	dcaratti@redhat.com, matthieu.baerts@tessares.net
Subject: [RFC PATCH v2 39/45] mptcp: rework mptcp_sendmsg_frag to accept optional dfrag
Date: Wed,  2 Oct 2019 16:36:49 -0700	[thread overview]
Message-ID: <20191002233655.24323-40-mathew.j.martineau@linux.intel.com> (raw)
In-Reply-To: <20191002233655.24323-1-mathew.j.martineau@linux.intel.com>

From: Paolo Abeni <pabeni@redhat.com>

This will simplify mptcp-level retransmission implementation
in the next patch. If dfrag is provided by the caller, skip
kernel space memory allocation and use data and metadata
provided by the dfrag itself.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 133 +++++++++++++++++++++++++------------------
 1 file changed, 78 insertions(+), 55 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8319ee807481..6366dfd65aa8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -95,7 +95,7 @@ static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
 	return NULL;
 }
 
-static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
+static inline bool mptcp_skb_can_collapse_to(u64 write_seq,
 					     const struct sk_buff *skb,
 					     const struct mptcp_ext *mpext)
 {
@@ -103,7 +103,7 @@ static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
 		return false;
 
 	/* can collapse only if MPTCP level sequence is in order */
-	return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
+	return mpext && mpext->data_seq + mpext->data_len == write_seq;
 }
 
 static inline bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
@@ -168,31 +168,44 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
 }
 
 static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
-			      struct msghdr *msg, long *timeo, int *pmss_now,
+			      struct msghdr *msg, struct mptcp_data_frag *dfrag,
+			      long *timeo, int *pmss_now,
 			      int *ps_goal)
 {
 	int mss_now, avail_size, size_goal, offset, ret, frag_truesize = 0;
 	bool dfrag_collapsed, collapsed, can_collapse = false;
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_ext *mpext = NULL;
-	struct mptcp_data_frag *dfrag;
+	bool retransmission = !!dfrag;
 	struct page_frag *pfrag;
 	struct sk_buff *skb;
+	struct page *page;
+	u64 *write_seq;
 	size_t psize;
+	int *poffset;
 
 	/* use the mptcp page cache so that we can easily move the data
 	 * from one substream to another, but do per subflow memory accounting
+	 * Note: pfrag is used only !retransmission, but the compiler if
+	 * fooled into a warning if we don't init here
 	 */
 	pfrag = sk_page_frag(sk);
-	while (!mptcp_page_frag_refill(ssk, pfrag)) {
-		ret = sk_stream_wait_memory(ssk, timeo);
-		if (ret)
-			return ret;
-
-		/* id sk_stream_wait_memory() sleeps snd_una can change
-		 * significantly, refresh the rtx queue
-		 */
-		mptcp_clean_una(sk);
+	if (!retransmission) {
+		while (!mptcp_page_frag_refill(ssk, pfrag)) {
+			ret = sk_stream_wait_memory(ssk, timeo);
+			if (ret)
+				return ret;
+
+			/* id sk_stream_wait_memory() sleeps snd_una can change
+			 * significantly, refresh the rtx queue
+			 */
+			mptcp_clean_una(sk);
+		}
+		write_seq = &msk->write_seq;
+		page = pfrag->page;
+	} else {
+		write_seq = &dfrag->data_seq;
+		page = dfrag->page;
 	}
 
 	/* compute copy limit */
@@ -211,63 +224,73 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 		 * SSN association set here
 		 */
 		can_collapse = (size_goal - skb->len > 0) &&
-			      mptcp_skb_can_collapse_to(msk, skb, mpext);
+			      mptcp_skb_can_collapse_to(*write_seq, skb, mpext);
 		if (!can_collapse)
 			TCP_SKB_CB(skb)->eor = 1;
 		else
 			avail_size = size_goal - skb->len;
 	}
 
-	/* reuse tail pfrag, if possible, or carve a new one from the page
-	 * allocator
-	 */
-	dfrag = mptcp_rtx_tail(sk);
-	offset = pfrag->offset;
-	dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
-	if (!dfrag_collapsed) {
-		dfrag = mptcp_carve_data_frag(msk, pfrag, offset);
+	if (!retransmission) {
+		/* reuse tail pfrag, if possible, or carve a new one from the
+		 * page allocator
+		 */
+		dfrag = mptcp_rtx_tail(sk);
+		offset = pfrag->offset;
+		dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
+		if (!dfrag_collapsed) {
+			dfrag = mptcp_carve_data_frag(msk, pfrag, offset);
+			offset = dfrag->offset;
+			frag_truesize = dfrag->overhead;
+		}
+		poffset = &pfrag->offset;
+		psize = min_t(size_t, pfrag->size - offset, avail_size);
+
+		/* Copy to page */
+		pr_debug("left=%zu", msg_data_left(msg));
+		psize = copy_page_from_iter(pfrag->page, offset,
+					    min_t(size_t, msg_data_left(msg),
+						  psize),
+					    &msg->msg_iter);
+		pr_debug("left=%zu", msg_data_left(msg));
+		if (!psize)
+			return -EINVAL;
+
+		if (!sk_wmem_schedule(sk, psize + dfrag->overhead))
+			return -ENOMEM;
+	} else {
 		offset = dfrag->offset;
-		frag_truesize = dfrag->overhead;
+		poffset = &dfrag->offset;
+		psize = min_t(size_t, dfrag->data_len, avail_size);
 	}
-	psize = min_t(size_t, pfrag->size - offset, avail_size);
-
-	/* Copy to page */
-	pr_debug("left=%zu", msg_data_left(msg));
-	psize = copy_page_from_iter(pfrag->page, offset,
-				    min_t(size_t, msg_data_left(msg), psize),
-				    &msg->msg_iter);
-	pr_debug("left=%zu", msg_data_left(msg));
-	if (!psize)
-		return -EINVAL;
-
-	if (!sk_wmem_schedule(sk, psize + dfrag->overhead))
-		return -ENOMEM;
 
 	/* tell the TCP stack to delay the push so that we can safely
 	 * access the skb after the sendpages call
 	 */
-	ret = do_tcp_sendpages(ssk, pfrag->page, offset, psize,
+	ret = do_tcp_sendpages(ssk, page, offset, psize,
 			       msg->msg_flags | MSG_SENDPAGE_NOTLAST);
 	if (ret <= 0)
 		return ret;
 
 	frag_truesize += ret;
-	if (unlikely(ret < psize))
-		iov_iter_revert(&msg->msg_iter, psize - ret);
+	if (!retransmission) {
+		if (unlikely(ret < psize))
+			iov_iter_revert(&msg->msg_iter, psize - ret);
 
-	/* send successful, keep track of sent data for mptcp-level
-	 * retransmission
-	 */
-	dfrag->data_len += ret;
-	if (!dfrag_collapsed) {
-		get_page(dfrag->page);
-		list_add_tail(&dfrag->list, &msk->rtx_queue);
-	}
+		/* send successful, keep track of sent data for mptcp-level
+		 * retransmission
+		 */
+		dfrag->data_len += ret;
+		if (!dfrag_collapsed) {
+			get_page(dfrag->page);
+			list_add_tail(&dfrag->list, &msk->rtx_queue);
+		}
 
-	/* charge data on mptcp rtx queue to the master socket
-	 * Note: we charge such data both to sk and ssk
-	 */
-	sk->sk_forward_alloc -= frag_truesize;
+		/* charge data on mptcp rtx queue to the master socket
+		 * Note: we charge such data both to sk and ssk
+		 */
+		sk->sk_forward_alloc -= frag_truesize;
+	}
 
 	collapsed = skb == tcp_write_queue_tail(ssk);
 	if (collapsed) {
@@ -281,7 +304,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
 	if (mpext) {
 		memset(mpext, 0, sizeof(*mpext));
-		mpext->data_seq = msk->write_seq;
+		mpext->data_seq = *write_seq;
 		mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
 		mpext->data_len = ret;
 		mpext->checksum = 0xbeef;
@@ -297,8 +320,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	 */
 
 out:
-	pfrag->offset += frag_truesize;
-	msk->write_seq += ret;
+	*poffset += frag_truesize;
+	*write_seq += ret;
 	mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
 
 	return ret;
@@ -347,7 +370,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	mptcp_clean_una(sk);
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	while (msg_data_left(msg)) {
-		ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
+		ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now,
 					 &size_goal);
 		if (ret < 0)
 			break;
-- 
2.23.0


  parent reply	other threads:[~2019-10-02 23:37 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-02 23:36 [RFC PATCH v2 00/45] Multipath TCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 01/45] tcp: Add MPTCP option number Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 02/45] net: Make sock protocol value checks more specific Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 03/45] sock: Make sk_protocol a 16-bit value Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 04/45] tcp: Define IPPROTO_MPTCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 05/45] mptcp: Add MPTCP socket stubs Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 06/45] mptcp: Handle MPTCP TCP options Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 07/45] mptcp: Associate MPTCP context with TCP socket Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 08/45] tcp: Expose tcp struct and routine for MPTCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 09/45] mptcp: Handle MP_CAPABLE options for outgoing connections Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 10/45] mptcp: add mptcp_poll Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 11/45] tcp, ulp: Add clone operation to tcp_ulp_ops Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 12/45] mptcp: Create SUBFLOW socket for incoming connections Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 13/45] mptcp: Add key generation and token tree Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 14/45] mptcp: Add shutdown() socket operation Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 15/45] mptcp: Add setsockopt()/getsockopt() socket operations Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 16/45] tcp: clean ext on tx recycle Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 17/45] mptcp: Add MPTCP to skb extensions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 18/45] tcp: Prevent coalesce/collapse when skb has MPTCP extensions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 19/45] tcp: Export low-level TCP functions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 20/45] mptcp: Write MPTCP DSS headers to outgoing data packets Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 21/45] mptcp: Implement MPTCP receive path Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 22/45] mptcp: use sk_page_frag() in sendmsg Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 23/45] mptcp: sendmsg() do spool all the provided data Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 24/45] mptcp: allow collapsing consecutive sendpages on the same substream Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 25/45] tcp: Check for filled TCP option space before SACK Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 26/45] mptcp: Add path manager interface Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 27/45] mptcp: Add ADD_ADDR handling Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 28/45] mptcp: Add handling of incoming MP_JOIN requests Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 29/45] mptcp: harmonize locking on all socket operations Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 30/45] mptcp: new sysctl to control the activation per NS Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 31/45] mptcp: add basic kselftest for mptcp Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 32/45] mptcp: Add handling of outgoing MP_JOIN requests Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 33/45] mptcp: Implement path manager interface commands Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 34/45] mptcp: Make MPTCP socket block/wakeup ignore sk_receive_queue Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 35/45] mptcp: update per unacked sequence on pkt reception Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 36/45] mptcp: queue data for mptcp level retransmission Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 37/45] mptcp: introduce MPTCP retransmission timer Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 38/45] mptcp: implement memory accounting for mptcp rtx queue Mat Martineau
2019-10-02 23:36 ` Mat Martineau [this message]
2019-10-02 23:36 ` [RFC PATCH v2 40/45] mptcp: implement and use MPTCP-level retransmission Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 41/45] selftests: mptcp: make tc delays random Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 42/45] selftests: mptcp: extend mptcp_connect tool for ipv6 family Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 43/45] selftests: mptcp: add accept/getpeer checks Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 44/45] selftests: mptcp: add ipv6 connectivity Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 45/45] selftests: mptcp: random ethtool tweaking Mat Martineau
2019-10-02 23:53 ` [RFC PATCH v2 00/45] Multipath TCP Mat Martineau
2019-10-03  0:12 ` David Miller
2019-10-03  0:27   ` Mat Martineau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191002233655.24323-40-mathew.j.martineau@linux.intel.com \
    --to=mathew.j.martineau@linux.intel.com \
    --cc=cpaasch@apple.com \
    --cc=dcaratti@redhat.com \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=matthieu.baerts@tessares.net \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peter.krystad@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).