From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Willem de Bruijn <willemb@google.com>,
Peter Oskolkov <posk@google.com>,
Eric Dumazet <edumazet@google.com>,
Florian Westphal <fw@strlen.de>,
"David S. Miller" <davem@davemloft.net>,
Mao Wenan <maowenan@huawei.com>,
Ben Hutchings <ben.hutchings@codethink.co.uk>
Subject: [PATCH 4.4 27/34] ip: add helpers to process in-order fragments faster.
Date: Thu, 7 Feb 2019 12:42:09 +0100 [thread overview]
Message-ID: <20190207113026.617948238@linuxfoundation.org> (raw)
In-Reply-To: <20190207113025.552605181@linuxfoundation.org>
4.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Peter Oskolkov <posk@google.com>
commit 353c9cb360874e737fb000545f783df756c06f9a upstream.
This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.
The new logic (fully implemented in the second patch) is as follows:
* Nodes in the rb-tree will now contain not single fragments, but lists
of consecutive fragments ("runs").
* At each point in time, the current "active" run at the tail is
maintained/tracked. Fragments that arrive in-order, adjacent
to the previous tail fragment, are added to this tail run without
triggering the re-balancing of the rb-tree.
* If a fragment arrives out of order with the offset _before_ the tail run,
it is inserted into the rb-tree as a single fragment.
* If a fragment arrives after the current tail fragment (with a gap),
it starts a new "tail" run, as is inserted into the rb-tree
at the end as the head of the new run.
skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/inet_frag.h | 6 +++
net/ipv4/ip_fragment.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -55,7 +55,9 @@ struct frag_v6_compare_key {
* @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
@@ -76,6 +78,7 @@ struct inet_frag_queue {
struct sk_buff *fragments; /* Used in IPv6. */
struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
@@ -112,6 +115,9 @@ void inet_frag_kill(struct inet_frag_que
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -58,6 +58,57 @@
static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+ struct inet_skb_parm h;
+ struct sk_buff *next_frag;
+ int frag_run_len;
+};
+
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ ip4_frag_init_run(skb);
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
+
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -658,6 +709,28 @@ struct sk_buff *ip_check_defrag(struct n
}
EXPORT_SYMBOL(ip_check_defrag);
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
#ifdef CONFIG_SYSCTL
static int dist_min;
next prev parent reply other threads:[~2019-02-07 11:43 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-07 11:41 [PATCH 4.4 00/34] 4.4.174-stable review Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 01/34] inet: frags: change inet_frags_init_net() return value Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 02/34] inet: frags: add a pointer to struct netns_frags Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 03/34] inet: frags: refactor ipfrag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 04/34] inet: frags: refactor ipv6_frag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 05/34] inet: frags: refactor lowpan_net_frag_init() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 06/34] rhashtable: add rhashtable_lookup_get_insert_key() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 07/34] rhashtable: Add rhashtable_lookup() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 08/34] rhashtable: add schedule points Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 09/34] inet: frags: use rhashtables for reassembly units Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 10/34] net: ieee802154: 6lowpan: fix frag reassembly Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 11/34] ipfrag: really prevent allocation on netns exit Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 12/34] inet: frags: remove some helpers Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 13/34] inet: frags: get rif of inet_frag_evicting() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 14/34] inet: frags: remove inet_frag_maybe_warn_overflow() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 15/34] inet: frags: break the 2GB limit for frags storage Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 16/34] inet: frags: do not clone skb in ip_expire() Greg Kroah-Hartman
2019-02-07 11:41 ` [PATCH 4.4 17/34] ipv6: frags: rewrite ip6_expire_frag_queue() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 18/34] rhashtable: reorganize struct rhashtable layout Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 19/34] inet: frags: reorganize struct netns_frags Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 20/34] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 21/34] inet: frags: fix ip6frag_low_thresh boundary Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 22/34] ip: discard IPv4 datagrams with overlapping segments Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 23/34] net: modify skb_rbtree_purge to return the truesize of all purged skbs Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 24/34] ipv6: defrag: drop non-last frags smaller than min mtu Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 25/34] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 26/34] ip: use rb trees for IP frag queue Greg Kroah-Hartman
2019-02-07 11:42 ` Greg Kroah-Hartman [this message]
2019-02-07 11:42 ` [PATCH 4.4 28/34] ip: process in-order fragments efficiently Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 29/34] ip: frags: fix crash in ip_do_fragment() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 30/34] ipv4: frags: precedence bug in ip_expire() Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 31/34] inet: frags: better deal with smp races Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 32/34] net: fix pskb_trim_rcsum_slow() with odd trim offset Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 33/34] net: ipv4: do not handle duplicate fragments as overlapping Greg Kroah-Hartman
2019-02-07 11:42 ` [PATCH 4.4 34/34] rcu: Force boolean subscript for expedited stall warnings Greg Kroah-Hartman
2019-02-07 14:20 ` [PATCH 4.4 00/34] 4.4.174-stable review Guenter Roeck
2019-02-07 14:41 ` Guenter Roeck
2019-02-07 15:46 ` Greg Kroah-Hartman
2019-02-07 18:57 ` Guenter Roeck
2019-02-07 15:47 ` Greg Kroah-Hartman
2019-02-07 19:16 ` Guenter Roeck
2019-02-07 18:18 ` kernelci.org bot
2019-02-08 6:13 ` Naresh Kamboju
2019-02-08 6:46 ` Greg Kroah-Hartman
2019-02-08 10:03 ` Jon Hunter
2019-02-08 10:28 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190207113026.617948238@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=ben.hutchings@codethink.co.uk \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=maowenan@huawei.com \
--cc=posk@google.com \
--cc=stable@vger.kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).