From: Mat Martineau <mathew.j.martineau@linux.intel.com>
To: netdev@vger.kernel.org, edumazet@google.com
Cc: Paolo Abeni <pabeni@redhat.com>,
cpaasch@apple.com, fw@strlen.de, peter.krystad@linux.intel.com,
dcaratti@redhat.com, matthieu.baerts@tessares.net
Subject: [RFC PATCH v2 24/45] mptcp: allow collapsing consecutive sendpages on the same substream
Date: Wed, 2 Oct 2019 16:36:34 -0700 [thread overview]
Message-ID: <20191002233655.24323-25-mathew.j.martineau@linux.intel.com> (raw)
In-Reply-To: <20191002233655.24323-1-mathew.j.martineau@linux.intel.com>
From: Paolo Abeni <pabeni@redhat.com>
If the current sendmsg() lands on the same subflow we used last, we
can try to collapse the data.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/mptcp/protocol.c | 74 +++++++++++++++++++++++++++++++++++---------
1 file changed, 59 insertions(+), 15 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 758369256f9b..6b31c0e460d9 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -58,11 +58,24 @@ static struct sock *mptcp_subflow_get_ref(const struct mptcp_sock *msk)
return NULL;
}
+static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
+ const struct sk_buff *skb,
+ const struct mptcp_ext *mpext)
+{
+ if (!tcp_skb_can_collapse_to(skb))
+ return false;
+
+ /* can collapse only if MPTCP level sequence is in order */
+ return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
- struct msghdr *msg, long *timeo)
+ struct msghdr *msg, long *timeo, int *pmss_now,
+ int *ps_goal)
{
- int mss_now = 0, size_goal = 0, ret = 0;
+ int mss_now, avail_size, size_goal, ret;
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool collapsed, can_collapse = false;
struct mptcp_ext *mpext = NULL;
struct page_frag *pfrag;
struct sk_buff *skb;
@@ -80,8 +93,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* compute copy limit */
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
- psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
+ *pmss_now = mss_now;
+ *ps_goal = size_goal;
+ avail_size = size_goal;
+ skb = tcp_write_queue_tail(ssk);
+ if (skb) {
+ mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+
+ /* Limit the write to the size available in the
+ * current skb, if any, so that we create at most a new skb.
+ * Explicitly tells TCP internals to avoid collapsing on later
+ * queue management operation, to avoid breaking the ext <->
+ * SSN association set here
+ */
+ can_collapse = (size_goal - skb->len > 0) &&
+ mptcp_skb_can_collapse_to(msk, skb, mpext);
+ if (!can_collapse)
+ TCP_SKB_CB(skb)->eor = 1;
+ else
+ avail_size = size_goal - skb->len;
+ }
+ psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
+ /* Copy to page */
pr_debug("left=%zu", msg_data_left(msg));
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
min_t(size_t, msg_data_left(msg), psize),
@@ -90,14 +124,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (!psize)
return -EINVAL;
- /* Mark the end of the previous write so the beginning of the
- * next write (with its own mptcp skb extension data) is not
- * collapsed.
+ /* tell the TCP stack to delay the push so that we can safely
+ * access the skb after the sendpages call
*/
- skb = tcp_write_queue_tail(ssk);
- if (skb)
- TCP_SKB_CB(skb)->eor = 1;
-
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
@@ -105,6 +134,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (unlikely(ret < psize))
iov_iter_revert(&msg->msg_iter, psize - ret);
+ collapsed = skb == tcp_write_queue_tail(ssk);
+ if (collapsed) {
+ WARN_ON_ONCE(!can_collapse);
+ /* when collapsing mpext always exists */
+ mpext->data_len += ret;
+ goto out;
+ }
+
skb = tcp_write_queue_tail(ssk);
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (mpext) {
@@ -119,23 +156,26 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u checksum=%u, dsn64=%d",
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
mpext->checksum, mpext->dsn64);
- } /* TODO: else fallback */
+ }
+ /* TODO: else fallback; allocation can fail, but we can't easily retire
+ * skbs from the write_queue, as we need to roll-back TCP status
+ */
+out:
pfrag->offset += ret;
msk->write_seq += ret;
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
- tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
return ret;
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
- int ret = 0;
long timeo;
lock_sock(sk);
@@ -170,15 +210,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(ssk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
while (msg_data_left(msg)) {
- ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
+ ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
+ &size_goal);
if (ret < 0)
break;
copied += ret;
}
- if (copied > 0)
+ if (copied) {
ret = copied;
+ tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
+ size_goal);
+ }
release_sock(ssk);
--
2.23.0
next prev parent reply other threads:[~2019-10-02 23:38 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-02 23:36 [RFC PATCH v2 00/45] Multipath TCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 01/45] tcp: Add MPTCP option number Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 02/45] net: Make sock protocol value checks more specific Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 03/45] sock: Make sk_protocol a 16-bit value Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 04/45] tcp: Define IPPROTO_MPTCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 05/45] mptcp: Add MPTCP socket stubs Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 06/45] mptcp: Handle MPTCP TCP options Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 07/45] mptcp: Associate MPTCP context with TCP socket Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 08/45] tcp: Expose tcp struct and routine for MPTCP Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 09/45] mptcp: Handle MP_CAPABLE options for outgoing connections Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 10/45] mptcp: add mptcp_poll Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 11/45] tcp, ulp: Add clone operation to tcp_ulp_ops Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 12/45] mptcp: Create SUBFLOW socket for incoming connections Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 13/45] mptcp: Add key generation and token tree Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 14/45] mptcp: Add shutdown() socket operation Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 15/45] mptcp: Add setsockopt()/getsockopt() socket operations Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 16/45] tcp: clean ext on tx recycle Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 17/45] mptcp: Add MPTCP to skb extensions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 18/45] tcp: Prevent coalesce/collapse when skb has MPTCP extensions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 19/45] tcp: Export low-level TCP functions Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 20/45] mptcp: Write MPTCP DSS headers to outgoing data packets Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 21/45] mptcp: Implement MPTCP receive path Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 22/45] mptcp: use sk_page_frag() in sendmsg Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 23/45] mptcp: sendmsg() do spool all the provided data Mat Martineau
2019-10-02 23:36 ` Mat Martineau [this message]
2019-10-02 23:36 ` [RFC PATCH v2 25/45] tcp: Check for filled TCP option space before SACK Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 26/45] mptcp: Add path manager interface Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 27/45] mptcp: Add ADD_ADDR handling Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 28/45] mptcp: Add handling of incoming MP_JOIN requests Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 29/45] mptcp: harmonize locking on all socket operations Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 30/45] mptcp: new sysctl to control the activation per NS Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 31/45] mptcp: add basic kselftest for mptcp Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 32/45] mptcp: Add handling of outgoing MP_JOIN requests Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 33/45] mptcp: Implement path manager interface commands Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 34/45] mptcp: Make MPTCP socket block/wakeup ignore sk_receive_queue Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 35/45] mptcp: update per unacked sequence on pkt reception Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 36/45] mptcp: queue data for mptcp level retransmission Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 37/45] mptcp: introduce MPTCP retransmission timer Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 38/45] mptcp: implement memory accounting for mptcp rtx queue Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 39/45] mptcp: rework mptcp_sendmsg_frag to accept optional dfrag Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 40/45] mptcp: implement and use MPTCP-level retransmission Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 41/45] selftests: mptcp: make tc delays random Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 42/45] selftests: mptcp: extend mptcp_connect tool for ipv6 family Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 43/45] selftests: mptcp: add accept/getpeer checks Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 44/45] selftests: mptcp: add ipv6 connectivity Mat Martineau
2019-10-02 23:36 ` [RFC PATCH v2 45/45] selftests: mptcp: random ethtool tweaking Mat Martineau
2019-10-02 23:53 ` [RFC PATCH v2 00/45] Multipath TCP Mat Martineau
2019-10-03 0:12 ` David Miller
2019-10-03 0:27 ` Mat Martineau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191002233655.24323-25-mathew.j.martineau@linux.intel.com \
--to=mathew.j.martineau@linux.intel.com \
--cc=cpaasch@apple.com \
--cc=dcaratti@redhat.com \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=matthieu.baerts@tessares.net \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=peter.krystad@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).