All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 08/31] netfilter: xt_cgroup: shrink size of v2 path
Date: Tue,  9 Oct 2018 01:01:02 +0200	[thread overview]
Message-ID: <20181008230125.2330-9-pablo@netfilter.org> (raw)
In-Reply-To: <20181008230125.2330-1-pablo@netfilter.org>

cgroup v2 path field is PATH_MAX which is too large, this is placing too
much pressure on memory allocation for people with many rules doing
cgroup v1 classid matching, side effects of this are bug reports like:

https://bugzilla.kernel.org/show_bug.cgi?id=200639

This patch registers a new revision that shrinks the cgroup path to 512
bytes, which is the same approach we follow in similar extensions that
have a path field.

Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/uapi/linux/netfilter/xt_cgroup.h | 16 +++++++
 net/netfilter/xt_cgroup.c                | 72 ++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
index e96dfa1b34f7..b74e370d6133 100644
--- a/include/uapi/linux/netfilter/xt_cgroup.h
+++ b/include/uapi/linux/netfilter/xt_cgroup.h
@@ -22,4 +22,20 @@ struct xt_cgroup_info_v1 {
 	void		*priv __attribute__((aligned(8)));
 };
 
+#define XT_CGROUP_PATH_MAX	512
+
+struct xt_cgroup_info_v2 {
+	__u8		has_path;
+	__u8		has_classid;
+	__u8		invert_path;
+	__u8		invert_classid;
+	union {
+		char	path[XT_CGROUP_PATH_MAX];
+		__u32	classid;
+	};
+
+	/* kernel internal data */
+	void		*priv __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_CGROUP_H */
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 5d92e1781980..5cb1ecb29ea4 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -68,6 +68,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct cgroup *cgrp;
+
+	if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+		return -EINVAL;
+
+	if (!info->has_path && !info->has_classid) {
+		pr_info("xt_cgroup: no path or classid specified\n");
+		return -EINVAL;
+	}
+
+	if (info->has_path && info->has_classid) {
+		pr_info_ratelimited("path and classid specified\n");
+		return -EINVAL;
+	}
+
+	info->priv = NULL;
+	if (info->has_path) {
+		cgrp = cgroup_get_from_path(info->path);
+		if (IS_ERR(cgrp)) {
+			pr_info_ratelimited("invalid path, errno=%ld\n",
+					    PTR_ERR(cgrp));
+			return -EINVAL;
+		}
+		info->priv = cgrp;
+	}
+
+	return 0;
+}
+
 static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -99,6 +131,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 			info->invert_classid;
 }
 
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_cgroup_info_v2 *info = par->matchinfo;
+	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+		return false;
+
+	if (ancestor)
+		return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+			info->invert_path;
+	else
+		return (info->classid == sock_cgroup_classid(skcd)) ^
+			info->invert_classid;
+}
+
 static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
 {
 	struct xt_cgroup_info_v1 *info = par->matchinfo;
@@ -107,6 +157,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
 		cgroup_put(info->priv);
 }
 
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+	if (info->priv)
+		cgroup_put(info->priv);
+}
+
 static struct xt_match cgroup_mt_reg[] __read_mostly = {
 	{
 		.name		= "cgroup",
@@ -134,6 +192,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = {
 				  (1 << NF_INET_POST_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 	},
+	{
+		.name		= "cgroup",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= cgroup_mt_check_v2,
+		.match		= cgroup_mt_v2,
+		.matchsize	= sizeof(struct xt_cgroup_info_v2),
+		.usersize	= offsetof(struct xt_cgroup_info_v2, priv),
+		.destroy	= cgroup_mt_destroy_v2,
+		.me		= THIS_MODULE,
+		.hooks		= (1 << NF_INET_LOCAL_OUT) |
+				  (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+	},
 };
 
 static int __init cgroup_mt_init(void)
-- 
2.11.0

  parent reply	other threads:[~2018-10-09  6:15 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-08 23:00 [PATCH 00/31] Netfilter updates for net-next Pablo Neira Ayuso
2018-10-08 23:00 ` [PATCH 01/31] netfilter: nf_tables: rt: allow checking if dst has xfrm attached Pablo Neira Ayuso
2018-10-08 23:00 ` [PATCH 02/31] netfilter: nf_tables: split set destruction in deactivate and destroy phase Pablo Neira Ayuso
2018-10-08 23:00 ` [PATCH 03/31] netfilter: nf_tables: warn when expr implements only one of activate/deactivate Pablo Neira Ayuso
2018-10-08 23:00 ` [PATCH 04/31] netfilter: nf_tables: asynchronous release Pablo Neira Ayuso
2018-10-08 23:00 ` [PATCH 05/31] netfilter: remove obsolete need_conntrack stub Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 06/31] netfilter: nf_tables: add xfrm expression Pablo Neira Ayuso
2018-10-10 11:39   ` Eyal Birger
2018-10-10 12:53     ` Florian Westphal
2018-10-08 23:01 ` [PATCH 07/31] netfilter: ctnetlink: Support L3 protocol-filter on flush Pablo Neira Ayuso
2019-04-25 10:07   ` Nicolas Dichtel
2019-04-25 15:41     ` Nicolas Dichtel
2019-04-26 19:25       ` Pablo Neira Ayuso
2019-04-29 14:53         ` Nicolas Dichtel
2019-04-29 15:23           ` Pablo Neira Ayuso
2019-04-29 15:39             ` Nicolas Dichtel
2019-05-01  8:47     ` Kristian Evensen
2019-05-02  7:28       ` Nicolas Dichtel
2019-05-02  7:46         ` Florian Westphal
2019-05-02  8:09           ` Kristian Evensen
2019-05-02  8:27           ` Nicolas Dichtel
2019-05-02 11:31           ` Pablo Neira Ayuso
2019-05-02 12:56             ` Nicolas Dichtel
2019-05-02 15:06               ` Pablo Neira Ayuso
2019-05-03  7:02                 ` Nicolas Dichtel
2019-05-03  7:14                   ` Kristian Evensen
2018-10-08 23:01 ` Pablo Neira Ayuso [this message]
2018-10-08 23:01 ` [PATCH 09/31] netfilter: nf_tables: avoid BUG_ON usage Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 10/31] netfilter: xtables: avoid BUG_ON Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 11/31] netfilter: nf_nat_ipv4: remove obsolete EXPORT_SYMBOL Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 12/31] netfilter: cttimeout: remove superfluous check on layer 4 netlink functions Pablo Neira Ayuso
2018-11-01 14:57   ` Eric Dumazet
2018-11-01 23:26     ` Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 13/31] netfilter: nat: remove unnecessary rcu_read_lock in nf_nat_redirect_ipv{4/6} Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 14/31] netfilter: conntrack: pass nf_hook_state to packet and error handlers Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 15/31] netfilter: conntrack: remove the l4proto->new() function Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 16/31] netfilter: conntrack: deconstify packet callback skb pointer Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 17/31] netfilter: conntrack: avoid using ->error callback if possible Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 18/31] netfilter: conntrack: remove error callback and handle icmp from core Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 19/31] netfilter: conntrack: remove unused proto arg from netns init functions Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 20/31] netfilter: conntrack: remove l3->l4 mapping information Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 21/31] netfilter: conntrack: clamp l4proto array size at largers supported protocol Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 22/31] netfilter: nat: remove duplicate skb_is_nonlinear() in __nf_nat_mangle_tcp_packet() Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 23/31] netfilter: nf_tables: use rhashtable_walk_enter instead of rhashtable_walk_init Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 24/31] netfilter: ctnetlink: must check mark attributes vs NULL Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 25/31] netfilter: masquerade: don't flush all conntracks if only one address deleted on device Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 26/31] netfilter: nf_tables: add SECMARK support Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 27/31] netfilter: nf_tables: add requirements for connsecmark support Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 28/31] netfilter: nf_flow_table: remove unnecessary nat flag check code Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 29/31] netfilter: nf_tables: use rhashtable_lookup() instead of rhashtable_lookup_fast() Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 30/31] netfilter: xt_quota: fix the behavior of xt_quota module Pablo Neira Ayuso
2018-10-08 23:01 ` [PATCH 31/31] netfilter: xt_quota: Don't use aligned attribute in sizeof Pablo Neira Ayuso
2018-10-09  4:29 ` [PATCH 00/31] Netfilter updates for net-next David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181008230125.2330-9-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.