linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] memcg: enable accounting for nft objects
       [not found] <20220228122429.GC26547@breakpoint.cc>
@ 2022-03-21  5:02 ` Vasily Averin
  2022-03-22 10:25   ` Florian Westphal
  2022-03-24 14:19   ` Pablo Neira Ayuso
  2022-03-24 18:05 ` [PATCH v2 RESEND] " Vasily Averin
  1 sibling, 2 replies; 17+ messages in thread
From: Vasily Averin @ 2022-03-21  5:02 UTC (permalink / raw)
  To: Florian Westphal, Jozsef Kadlecsik, Pablo Neira Ayuso
  Cc: linux-kernel, netfilter-devel, kernel

nftables replaces iptables, but it lacks memcg accounting.

This patch account most of the memory allocation associated with nft
and should protect the host from misusing nft inside a memcg restricted
container.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
  net/netfilter/core.c          |  2 +-
  net/netfilter/nf_tables_api.c | 44 +++++++++++++++++------------------
  2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8a77a3fd69bc..77ae3e8d344c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -58,7 +58,7 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
  	if (num == 0)
  		return NULL;
  
-	e = kvzalloc(alloc, GFP_KERNEL);
+	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
  	if (e)
  		e->num_hook_entries = num;
  	return e;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d71a33ae39b3..04be94236a34 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1113,16 +1113,16 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
  	}
  
  	err = -ENOMEM;
-	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
  	if (table == NULL)
  		goto err_kzalloc;
  
-	table->name = nla_strdup(attr, GFP_KERNEL);
+	table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
  	if (table->name == NULL)
  		goto err_strdup;
  
  	if (nla[NFTA_TABLE_USERDATA]) {
-		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
+		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
  		if (table->udata == NULL)
  			goto err_table_udata;
  
@@ -1803,7 +1803,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
  	struct nft_hook *hook;
  	int err;
  
-	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
  	if (!hook) {
  		err = -ENOMEM;
  		goto err_hook_alloc;
@@ -2026,7 +2026,7 @@ static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
  	if (size > INT_MAX)
  		return NULL;
  
-	blob = kvmalloc(size, GFP_KERNEL);
+	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
  	if (!blob)
  		return NULL;
  
@@ -2126,7 +2126,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
  		if (err < 0)
  			return err;
  
-		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
  		if (basechain == NULL) {
  			nft_chain_release_hook(&hook);
  			return -ENOMEM;
@@ -2156,7 +2156,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
  		if (flags & NFT_CHAIN_HW_OFFLOAD)
  			return -EOPNOTSUPP;
  
-		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
  		if (chain == NULL)
  			return -ENOMEM;
  
@@ -2169,7 +2169,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
  	chain->table = table;
  
  	if (nla[NFTA_CHAIN_NAME]) {
-		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
  	} else {
  		if (!(flags & NFT_CHAIN_BINDING)) {
  			err = -EINVAL;
@@ -2177,7 +2177,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
  		}
  
  		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
-		chain->name = kstrdup(name, GFP_KERNEL);
+		chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
  	}
  
  	if (!chain->name) {
@@ -2186,7 +2186,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
  	}
  
  	if (nla[NFTA_CHAIN_USERDATA]) {
-		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
+		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
  		if (chain->udata == NULL) {
  			err = -ENOMEM;
  			goto err_destroy_chain;
@@ -2349,7 +2349,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
  		char *name;
  
  		err = -ENOMEM;
-		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
  		if (!name)
  			goto err;
  
@@ -2797,7 +2797,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
  		goto err1;
  
  	err = -ENOMEM;
-	expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
+	expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
  	if (expr == NULL)
  		goto err2;
  
@@ -3405,7 +3405,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
  	}
  
  	err = -ENOMEM;
-	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
  	if (rule == NULL)
  		goto err_release_expr;
  
@@ -3818,7 +3818,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
  		free_page((unsigned long)inuse);
  	}
  
-	set->name = kasprintf(GFP_KERNEL, name, min + n);
+	set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
  	if (!set->name)
  		return -ENOMEM;
  
@@ -4382,11 +4382,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
  	alloc_size = sizeof(*set) + size + udlen;
  	if (alloc_size < size || alloc_size > INT_MAX)
  		return -ENOMEM;
-	set = kvzalloc(alloc_size, GFP_KERNEL);
+	set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
  	if (!set)
  		return -ENOMEM;
  
-	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
  	if (!name) {
  		err = -ENOMEM;
  		goto err_set_name;
@@ -5921,7 +5921,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
  	err = -ENOMEM;
  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
  				      elem.key_end.val.data, elem.data.val.data,
-				      timeout, expiration, GFP_KERNEL);
+				      timeout, expiration, GFP_KERNEL_ACCOUNT);
  	if (elem.priv == NULL)
  		goto err_parse_data;
  
@@ -6165,7 +6165,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
  	err = -ENOMEM;
  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
  				      elem.key_end.val.data, NULL, 0, 0,
-				      GFP_KERNEL);
+				      GFP_KERNEL_ACCOUNT);
  	if (elem.priv == NULL)
  		goto fail_elem;
  
@@ -6477,7 +6477,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
  	}
  
  	err = -ENOMEM;
-	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
  	if (!obj)
  		goto err2;
  
@@ -6643,7 +6643,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
  	obj->key.table = table;
  	obj->handle = nf_tables_alloc_handle(table);
  
-	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
  	if (!obj->key.name) {
  		err = -ENOMEM;
  		goto err_strdup;
@@ -7404,7 +7404,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
  
  	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
  
-	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
  	if (!flowtable)
  		return -ENOMEM;
  
@@ -7412,7 +7412,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
  	flowtable->handle = nf_tables_alloc_handle(table);
  	INIT_LIST_HEAD(&flowtable->hook_list);
  
-	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
  	if (!flowtable->name) {
  		err = -ENOMEM;
  		goto err1;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] memcg: enable accounting for nft objects
  2022-03-21  5:02 ` [PATCH v2] memcg: enable accounting for nft objects Vasily Averin
@ 2022-03-22 10:25   ` Florian Westphal
  2022-03-24 14:19   ` Pablo Neira Ayuso
  1 sibling, 0 replies; 17+ messages in thread
From: Florian Westphal @ 2022-03-22 10:25 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Florian Westphal, Jozsef Kadlecsik, Pablo Neira Ayuso,
	linux-kernel, netfilter-devel, kernel

Vasily Averin <vasily.averin@linux.dev> wrote:
> nftables replaces iptables, but it lacks memcg accounting.
> 
> This patch account most of the memory allocation associated with nft
> and should protect the host from misusing nft inside a memcg restricted
> container.

LGTM.

Acked-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] memcg: enable accounting for nft objects
  2022-03-21  5:02 ` [PATCH v2] memcg: enable accounting for nft objects Vasily Averin
  2022-03-22 10:25   ` Florian Westphal
@ 2022-03-24 14:19   ` Pablo Neira Ayuso
  2022-03-24 17:23     ` Vasily Averin
  1 sibling, 1 reply; 17+ messages in thread
From: Pablo Neira Ayuso @ 2022-03-24 14:19 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Florian Westphal, Jozsef Kadlecsik, linux-kernel,
	netfilter-devel, kernel

Hm. Patch does not apply for some reason. git am complains. And
manually applying this also fails.

patch -p1 < vasily.averin.txt
patching file net/netfilter/core.c
Hunk #1 FAILED at 58.
1 out of 1 hunk FAILED -- saving rejects to file net/netfilter/core.c.rej
patching file net/netfilter/nf_tables_api.c
Hunk #1 FAILED at 1113.
Hunk #2 FAILED at 1803.
Hunk #3 FAILED at 2026.
Hunk #4 FAILED at 2126.
Hunk #5 FAILED at 2156.
Hunk #6 FAILED at 2169.
Hunk #7 FAILED at 2177.
Hunk #8 FAILED at 2186.
Hunk #9 FAILED at 2349.
Hunk #10 FAILED at 2797.
Hunk #11 FAILED at 3405.
Hunk #12 FAILED at 3818.
Hunk #13 FAILED at 4382.
Hunk #14 FAILED at 5921.
Hunk #15 FAILED at 6165.
Hunk #16 FAILED at 6477.
Hunk #17 FAILED at 6643.
Hunk #18 FAILED at 7404.
Hunk #19 FAILED at 7412.
19 out of 19 hunks FAILED -- saving rejects to file net/netfilter/nf_tables_api.c.rej

On Mon, Mar 21, 2022 at 08:02:22AM +0300, Vasily Averin wrote:
> nftables replaces iptables, but it lacks memcg accounting.
> 
> This patch account most of the memory allocation associated with nft
> and should protect the host from misusing nft inside a memcg restricted
> container.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
>  net/netfilter/core.c          |  2 +-
>  net/netfilter/nf_tables_api.c | 44 +++++++++++++++++------------------
>  2 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> index 8a77a3fd69bc..77ae3e8d344c 100644
> --- a/net/netfilter/core.c
> +++ b/net/netfilter/core.c
> @@ -58,7 +58,7 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
>  	if (num == 0)
>  		return NULL;
> -	e = kvzalloc(alloc, GFP_KERNEL);
> +	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
>  	if (e)
>  		e->num_hook_entries = num;
>  	return e;
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index d71a33ae39b3..04be94236a34 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -1113,16 +1113,16 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
>  	}
>  	err = -ENOMEM;
> -	table = kzalloc(sizeof(*table), GFP_KERNEL);
> +	table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
>  	if (table == NULL)
>  		goto err_kzalloc;
> -	table->name = nla_strdup(attr, GFP_KERNEL);
> +	table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
>  	if (table->name == NULL)
>  		goto err_strdup;
>  	if (nla[NFTA_TABLE_USERDATA]) {
> -		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
> +		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
>  		if (table->udata == NULL)
>  			goto err_table_udata;
> @@ -1803,7 +1803,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
>  	struct nft_hook *hook;
>  	int err;
> -	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
> +	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
>  	if (!hook) {
>  		err = -ENOMEM;
>  		goto err_hook_alloc;
> @@ -2026,7 +2026,7 @@ static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
>  	if (size > INT_MAX)
>  		return NULL;
> -	blob = kvmalloc(size, GFP_KERNEL);
> +	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
>  	if (!blob)
>  		return NULL;
> @@ -2126,7 +2126,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>  		if (err < 0)
>  			return err;
> -		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
> +		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
>  		if (basechain == NULL) {
>  			nft_chain_release_hook(&hook);
>  			return -ENOMEM;
> @@ -2156,7 +2156,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>  		if (flags & NFT_CHAIN_HW_OFFLOAD)
>  			return -EOPNOTSUPP;
> -		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
> +		chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
>  		if (chain == NULL)
>  			return -ENOMEM;
> @@ -2169,7 +2169,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>  	chain->table = table;
>  	if (nla[NFTA_CHAIN_NAME]) {
> -		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
> +		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
>  	} else {
>  		if (!(flags & NFT_CHAIN_BINDING)) {
>  			err = -EINVAL;
> @@ -2177,7 +2177,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>  		}
>  		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
> -		chain->name = kstrdup(name, GFP_KERNEL);
> +		chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
>  	}
>  	if (!chain->name) {
> @@ -2186,7 +2186,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>  	}
>  	if (nla[NFTA_CHAIN_USERDATA]) {
> -		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
> +		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
>  		if (chain->udata == NULL) {
>  			err = -ENOMEM;
>  			goto err_destroy_chain;
> @@ -2349,7 +2349,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
>  		char *name;
>  		err = -ENOMEM;
> -		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
> +		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
>  		if (!name)
>  			goto err;
> @@ -2797,7 +2797,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
>  		goto err1;
>  	err = -ENOMEM;
> -	expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
> +	expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
>  	if (expr == NULL)
>  		goto err2;
> @@ -3405,7 +3405,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
>  	}
>  	err = -ENOMEM;
> -	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
> +	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
>  	if (rule == NULL)
>  		goto err_release_expr;
> @@ -3818,7 +3818,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
>  		free_page((unsigned long)inuse);
>  	}
> -	set->name = kasprintf(GFP_KERNEL, name, min + n);
> +	set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
>  	if (!set->name)
>  		return -ENOMEM;
> @@ -4382,11 +4382,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
>  	alloc_size = sizeof(*set) + size + udlen;
>  	if (alloc_size < size || alloc_size > INT_MAX)
>  		return -ENOMEM;
> -	set = kvzalloc(alloc_size, GFP_KERNEL);
> +	set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
>  	if (!set)
>  		return -ENOMEM;
> -	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
> +	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
>  	if (!name) {
>  		err = -ENOMEM;
>  		goto err_set_name;
> @@ -5921,7 +5921,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
>  	err = -ENOMEM;
>  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
>  				      elem.key_end.val.data, elem.data.val.data,
> -				      timeout, expiration, GFP_KERNEL);
> +				      timeout, expiration, GFP_KERNEL_ACCOUNT);
>  	if (elem.priv == NULL)
>  		goto err_parse_data;
> @@ -6165,7 +6165,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
>  	err = -ENOMEM;
>  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
>  				      elem.key_end.val.data, NULL, 0, 0,
> -				      GFP_KERNEL);
> +				      GFP_KERNEL_ACCOUNT);
>  	if (elem.priv == NULL)
>  		goto fail_elem;
> @@ -6477,7 +6477,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
>  	}
>  	err = -ENOMEM;
> -	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
> +	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
>  	if (!obj)
>  		goto err2;
> @@ -6643,7 +6643,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
>  	obj->key.table = table;
>  	obj->handle = nf_tables_alloc_handle(table);
> -	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
> +	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
>  	if (!obj->key.name) {
>  		err = -ENOMEM;
>  		goto err_strdup;
> @@ -7404,7 +7404,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
>  	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
> -	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
> +	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
>  	if (!flowtable)
>  		return -ENOMEM;
> @@ -7412,7 +7412,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
>  	flowtable->handle = nf_tables_alloc_handle(table);
>  	INIT_LIST_HEAD(&flowtable->hook_list);
> -	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
> +	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
>  	if (!flowtable->name) {
>  		err = -ENOMEM;
>  		goto err1;
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] memcg: enable accounting for nft objects
  2022-03-24 14:19   ` Pablo Neira Ayuso
@ 2022-03-24 17:23     ` Vasily Averin
  0 siblings, 0 replies; 17+ messages in thread
From: Vasily Averin @ 2022-03-24 17:23 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Florian Westphal, Jozsef Kadlecsik, linux-kernel,
	netfilter-devel, kernel

I'm sorry,
perhaps it was casued by Thunderbird settigns on my new work place,
I'll double check it and then send fixed patch version.

Thank you,
	Vasily Averin


On 3/24/22 17:19, Pablo Neira Ayuso wrote:
> Hm. Patch does not apply for some reason. git am complains. And
> manually applying this also fails.
> 
> patch -p1 < vasily.averin.txt
> patching file net/netfilter/core.c
> Hunk #1 FAILED at 58.
> 1 out of 1 hunk FAILED -- saving rejects to file net/netfilter/core.c.rej
> patching file net/netfilter/nf_tables_api.c
> Hunk #1 FAILED at 1113.
> Hunk #2 FAILED at 1803.
> Hunk #3 FAILED at 2026.
> Hunk #4 FAILED at 2126.
> Hunk #5 FAILED at 2156.
> Hunk #6 FAILED at 2169.
> Hunk #7 FAILED at 2177.
> Hunk #8 FAILED at 2186.
> Hunk #9 FAILED at 2349.
> Hunk #10 FAILED at 2797.
> Hunk #11 FAILED at 3405.
> Hunk #12 FAILED at 3818.
> Hunk #13 FAILED at 4382.
> Hunk #14 FAILED at 5921.
> Hunk #15 FAILED at 6165.
> Hunk #16 FAILED at 6477.
> Hunk #17 FAILED at 6643.
> Hunk #18 FAILED at 7404.
> Hunk #19 FAILED at 7412.
> 19 out of 19 hunks FAILED -- saving rejects to file net/netfilter/nf_tables_api.c.rej
> 
> On Mon, Mar 21, 2022 at 08:02:22AM +0300, Vasily Averin wrote:
>> nftables replaces iptables, but it lacks memcg accounting.
>>
>> This patch account most of the memory allocation associated with nft
>> and should protect the host from misusing nft inside a memcg restricted
>> container.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> ---
>>  net/netfilter/core.c          |  2 +-
>>  net/netfilter/nf_tables_api.c | 44 +++++++++++++++++------------------
>>  2 files changed, 23 insertions(+), 23 deletions(-)
>>
>> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
>> index 8a77a3fd69bc..77ae3e8d344c 100644
>> --- a/net/netfilter/core.c
>> +++ b/net/netfilter/core.c
>> @@ -58,7 +58,7 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
>>  	if (num == 0)
>>  		return NULL;
>> -	e = kvzalloc(alloc, GFP_KERNEL);
>> +	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
>>  	if (e)
>>  		e->num_hook_entries = num;
>>  	return e;
>> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
>> index d71a33ae39b3..04be94236a34 100644
>> --- a/net/netfilter/nf_tables_api.c
>> +++ b/net/netfilter/nf_tables_api.c
>> @@ -1113,16 +1113,16 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
>>  	}
>>  	err = -ENOMEM;
>> -	table = kzalloc(sizeof(*table), GFP_KERNEL);
>> +	table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
>>  	if (table == NULL)
>>  		goto err_kzalloc;
>> -	table->name = nla_strdup(attr, GFP_KERNEL);
>> +	table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
>>  	if (table->name == NULL)
>>  		goto err_strdup;
>>  	if (nla[NFTA_TABLE_USERDATA]) {
>> -		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
>> +		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
>>  		if (table->udata == NULL)
>>  			goto err_table_udata;
>> @@ -1803,7 +1803,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
>>  	struct nft_hook *hook;
>>  	int err;
>> -	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
>> +	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
>>  	if (!hook) {
>>  		err = -ENOMEM;
>>  		goto err_hook_alloc;
>> @@ -2026,7 +2026,7 @@ static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
>>  	if (size > INT_MAX)
>>  		return NULL;
>> -	blob = kvmalloc(size, GFP_KERNEL);
>> +	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
>>  	if (!blob)
>>  		return NULL;
>> @@ -2126,7 +2126,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>>  		if (err < 0)
>>  			return err;
>> -		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
>> +		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
>>  		if (basechain == NULL) {
>>  			nft_chain_release_hook(&hook);
>>  			return -ENOMEM;
>> @@ -2156,7 +2156,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>>  		if (flags & NFT_CHAIN_HW_OFFLOAD)
>>  			return -EOPNOTSUPP;
>> -		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
>> +		chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
>>  		if (chain == NULL)
>>  			return -ENOMEM;
>> @@ -2169,7 +2169,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>>  	chain->table = table;
>>  	if (nla[NFTA_CHAIN_NAME]) {
>> -		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
>> +		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
>>  	} else {
>>  		if (!(flags & NFT_CHAIN_BINDING)) {
>>  			err = -EINVAL;
>> @@ -2177,7 +2177,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>>  		}
>>  		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
>> -		chain->name = kstrdup(name, GFP_KERNEL);
>> +		chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
>>  	}
>>  	if (!chain->name) {
>> @@ -2186,7 +2186,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
>>  	}
>>  	if (nla[NFTA_CHAIN_USERDATA]) {
>> -		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
>> +		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
>>  		if (chain->udata == NULL) {
>>  			err = -ENOMEM;
>>  			goto err_destroy_chain;
>> @@ -2349,7 +2349,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
>>  		char *name;
>>  		err = -ENOMEM;
>> -		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
>> +		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
>>  		if (!name)
>>  			goto err;
>> @@ -2797,7 +2797,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
>>  		goto err1;
>>  	err = -ENOMEM;
>> -	expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
>> +	expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
>>  	if (expr == NULL)
>>  		goto err2;
>> @@ -3405,7 +3405,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
>>  	}
>>  	err = -ENOMEM;
>> -	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
>> +	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
>>  	if (rule == NULL)
>>  		goto err_release_expr;
>> @@ -3818,7 +3818,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
>>  		free_page((unsigned long)inuse);
>>  	}
>> -	set->name = kasprintf(GFP_KERNEL, name, min + n);
>> +	set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
>>  	if (!set->name)
>>  		return -ENOMEM;
>> @@ -4382,11 +4382,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
>>  	alloc_size = sizeof(*set) + size + udlen;
>>  	if (alloc_size < size || alloc_size > INT_MAX)
>>  		return -ENOMEM;
>> -	set = kvzalloc(alloc_size, GFP_KERNEL);
>> +	set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
>>  	if (!set)
>>  		return -ENOMEM;
>> -	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
>> +	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
>>  	if (!name) {
>>  		err = -ENOMEM;
>>  		goto err_set_name;
>> @@ -5921,7 +5921,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
>>  	err = -ENOMEM;
>>  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
>>  				      elem.key_end.val.data, elem.data.val.data,
>> -				      timeout, expiration, GFP_KERNEL);
>> +				      timeout, expiration, GFP_KERNEL_ACCOUNT);
>>  	if (elem.priv == NULL)
>>  		goto err_parse_data;
>> @@ -6165,7 +6165,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
>>  	err = -ENOMEM;
>>  	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
>>  				      elem.key_end.val.data, NULL, 0, 0,
>> -				      GFP_KERNEL);
>> +				      GFP_KERNEL_ACCOUNT);
>>  	if (elem.priv == NULL)
>>  		goto fail_elem;
>> @@ -6477,7 +6477,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
>>  	}
>>  	err = -ENOMEM;
>> -	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
>> +	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
>>  	if (!obj)
>>  		goto err2;
>> @@ -6643,7 +6643,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
>>  	obj->key.table = table;
>>  	obj->handle = nf_tables_alloc_handle(table);
>> -	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
>> +	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
>>  	if (!obj->key.name) {
>>  		err = -ENOMEM;
>>  		goto err_strdup;
>> @@ -7404,7 +7404,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
>>  	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
>> -	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
>> +	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
>>  	if (!flowtable)
>>  		return -ENOMEM;
>> @@ -7412,7 +7412,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
>>  	flowtable->handle = nf_tables_alloc_handle(table);
>>  	INIT_LIST_HEAD(&flowtable->hook_list);
>> -	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
>> +	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
>>  	if (!flowtable->name) {
>>  		err = -ENOMEM;
>>  		goto err1;
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 RESEND] memcg: enable accounting for nft objects
       [not found] <20220228122429.GC26547@breakpoint.cc>
  2022-03-21  5:02 ` [PATCH v2] memcg: enable accounting for nft objects Vasily Averin
@ 2022-03-24 18:05 ` Vasily Averin
  2022-03-28  8:15   ` Pablo Neira Ayuso
  1 sibling, 1 reply; 17+ messages in thread
From: Vasily Averin @ 2022-03-24 18:05 UTC (permalink / raw)
  To: Florian Westphal, Jozsef Kadlecsik, Pablo Neira Ayuso
  Cc: linux-kernel, netfilter-devel, kernel

nftables replaces iptables, but it lacks memcg accounting.

This patch account most of the memory allocation associated with nft
and should protect the host from misusing nft inside a memcg restricted
container.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
 net/netfilter/core.c          |  2 +-
 net/netfilter/nf_tables_api.c | 44 +++++++++++++++++------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8a77a3fd69bc..77ae3e8d344c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -58,7 +58,7 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 	if (num == 0)
 		return NULL;
 
-	e = kvzalloc(alloc, GFP_KERNEL);
+	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
 	if (e)
 		e->num_hook_entries = num;
 	return e;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d71a33ae39b3..04be94236a34 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1113,16 +1113,16 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
 	}
 
 	err = -ENOMEM;
-	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
 	if (table == NULL)
 		goto err_kzalloc;
 
-	table->name = nla_strdup(attr, GFP_KERNEL);
+	table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
 	if (table->name == NULL)
 		goto err_strdup;
 
 	if (nla[NFTA_TABLE_USERDATA]) {
-		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
+		table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
 		if (table->udata == NULL)
 			goto err_table_udata;
 
@@ -1803,7 +1803,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
 	struct nft_hook *hook;
 	int err;
 
-	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+	hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
 	if (!hook) {
 		err = -ENOMEM;
 		goto err_hook_alloc;
@@ -2026,7 +2026,7 @@ static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
 	if (size > INT_MAX)
 		return NULL;
 
-	blob = kvmalloc(size, GFP_KERNEL);
+	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
 	if (!blob)
 		return NULL;
 
@@ -2126,7 +2126,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		if (err < 0)
 			return err;
 
-		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
 		if (basechain == NULL) {
 			nft_chain_release_hook(&hook);
 			return -ENOMEM;
@@ -2156,7 +2156,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		if (flags & NFT_CHAIN_HW_OFFLOAD)
 			return -EOPNOTSUPP;
 
-		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
 		if (chain == NULL)
 			return -ENOMEM;
 
@@ -2169,7 +2169,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	chain->table = table;
 
 	if (nla[NFTA_CHAIN_NAME]) {
-		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+		chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
 	} else {
 		if (!(flags & NFT_CHAIN_BINDING)) {
 			err = -EINVAL;
@@ -2177,7 +2177,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		}
 
 		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
-		chain->name = kstrdup(name, GFP_KERNEL);
+		chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
 	}
 
 	if (!chain->name) {
@@ -2186,7 +2186,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	}
 
 	if (nla[NFTA_CHAIN_USERDATA]) {
-		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
+		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
 		if (chain->udata == NULL) {
 			err = -ENOMEM;
 			goto err_destroy_chain;
@@ -2349,7 +2349,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 		char *name;
 
 		err = -ENOMEM;
-		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
 		if (!name)
 			goto err;
 
@@ -2797,7 +2797,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 		goto err1;
 
 	err = -ENOMEM;
-	expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
+	expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
 	if (expr == NULL)
 		goto err2;
 
@@ -3405,7 +3405,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	}
 
 	err = -ENOMEM;
-	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
 	if (rule == NULL)
 		goto err_release_expr;
 
@@ -3818,7 +3818,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 		free_page((unsigned long)inuse);
 	}
 
-	set->name = kasprintf(GFP_KERNEL, name, min + n);
+	set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
 	if (!set->name)
 		return -ENOMEM;
 
@@ -4382,11 +4382,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	alloc_size = sizeof(*set) + size + udlen;
 	if (alloc_size < size || alloc_size > INT_MAX)
 		return -ENOMEM;
-	set = kvzalloc(alloc_size, GFP_KERNEL);
+	set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
 	if (!set)
 		return -ENOMEM;
 
-	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
 	if (!name) {
 		err = -ENOMEM;
 		goto err_set_name;
@@ -5921,7 +5921,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
 				      elem.key_end.val.data, elem.data.val.data,
-				      timeout, expiration, GFP_KERNEL);
+				      timeout, expiration, GFP_KERNEL_ACCOUNT);
 	if (elem.priv == NULL)
 		goto err_parse_data;
 
@@ -6165,7 +6165,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
 				      elem.key_end.val.data, NULL, 0, 0,
-				      GFP_KERNEL);
+				      GFP_KERNEL_ACCOUNT);
 	if (elem.priv == NULL)
 		goto fail_elem;
 
@@ -6477,7 +6477,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 	}
 
 	err = -ENOMEM;
-	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
 	if (!obj)
 		goto err2;
 
@@ -6643,7 +6643,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
 	obj->key.table = table;
 	obj->handle = nf_tables_alloc_handle(table);
 
-	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
 	if (!obj->key.name) {
 		err = -ENOMEM;
 		goto err_strdup;
@@ -7404,7 +7404,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
 
 	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
-	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
 	if (!flowtable)
 		return -ENOMEM;
 
@@ -7412,7 +7412,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
 	flowtable->handle = nf_tables_alloc_handle(table);
 	INIT_LIST_HEAD(&flowtable->hook_list);
 
-	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
 	if (!flowtable->name) {
 		err = -ENOMEM;
 		goto err1;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 RESEND] memcg: enable accounting for nft objects
  2022-03-24 18:05 ` [PATCH v2 RESEND] " Vasily Averin
@ 2022-03-28  8:15   ` Pablo Neira Ayuso
  2022-03-28  9:23     ` Vasily Averin
  0 siblings, 1 reply; 17+ messages in thread
From: Pablo Neira Ayuso @ 2022-03-28  8:15 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Florian Westphal, Jozsef Kadlecsik, linux-kernel,
	netfilter-devel, kernel

On Thu, Mar 24, 2022 at 09:05:50PM +0300, Vasily Averin wrote:
> nftables replaces iptables, but it lacks memcg accounting.
> 
> This patch account most of the memory allocation associated with nft
> and should protect the host from misusing nft inside a memcg restricted
> container.

Applied to nf, thanks

I think nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
use this _ACCOUNT flag variant for objects that are dinamically
allocated from the packet path.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 RESEND] memcg: enable accounting for nft objects
  2022-03-28  8:15   ` Pablo Neira Ayuso
@ 2022-03-28  9:23     ` Vasily Averin
  2022-03-31  8:40       ` [PATCH nft] nft: memcg accounting for dynamically allocated objects Vasily Averin
  0 siblings, 1 reply; 17+ messages in thread
From: Vasily Averin @ 2022-03-28  9:23 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Florian Westphal, Jozsef Kadlecsik, linux-kernel,
	netfilter-devel, kernel

On 3/28/22 11:15, Pablo Neira Ayuso wrote:
> I think nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use this _ACCOUNT flag variant for objects that are dinamically
> allocated from the packet path.

Thank you for the hint.
I think you're right in general, such objects should be accounted too.
Though this requires additional research, because it is not clear for me,
where proper memcg can be found.
In case of nft it was quite easy, memcg was taken from current task.
However, the objects you specified seem can be allocated in other contexts.

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-03-28  9:23     ` Vasily Averin
@ 2022-03-31  8:40       ` Vasily Averin
  2022-03-31 18:45         ` Roman Gushchin
  2022-04-01 12:03         ` Florian Westphal
  0 siblings, 2 replies; 17+ messages in thread
From: Vasily Averin @ 2022-03-31  8:40 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kernel, Jozsef Kadlecsik, Florian Westphal, netfilter-devel,
	linux-kernel, Roman Gushchin

nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
use __GFP_ACCOUNT flag for objects that are dynamically
allocated from the packet path.

Such objects are allocated inside .init() or .clone() callbacks
of struct nft_expr_ops executed in task context while processing
netlink messages.

In addition, this patch adds accounting to nft_set_elem_expr_clone()
used for the same purposes.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 net/netfilter/nft_connlimit.c | 4 ++--
 net/netfilter/nft_counter.c   | 4 ++--
 net/netfilter/nft_last.c      | 4 ++--
 net/netfilter/nft_limit.c     | 4 ++--
 net/netfilter/nft_quota.c     | 4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04be94236a34..e01241151ef7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
 	int err, i, k;
 
 	for (i = 0; i < set->num_exprs; i++) {
-		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
 		if (!expr)
 			goto err_expr;
 
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 3362417ebfdb..9c2146aac59e 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 			invert = true;
 	}
 
-	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
 	if (!priv->list)
 		return -ENOMEM;
 
@@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
 	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
 	struct nft_connlimit *priv_src = nft_expr_priv(src);
 
-	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
 	if (!priv_dst->list)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f179e8c3b0ca..040a697d96b3 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
 	struct nft_counter __percpu *cpu_stats;
 	struct nft_counter *this_cpu;
 
-	cpu_stats = alloc_percpu(struct nft_counter);
+	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
 	if (cpu_stats == NULL)
 		return -ENOMEM;
 
@@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
 
 	nft_counter_fetch(priv, &total);
 
-	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
 	if (cpu_stats == NULL)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 4f745a409d34..4cf4f6349855 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	u64 last_jiffies;
 	int err;
 
-	last = kzalloc(sizeof(*last), GFP_KERNEL);
+	last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
 	if (!last)
 		return -ENOMEM;
 
@@ -105,7 +105,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
 {
 	struct nft_last_priv *priv_dst = nft_expr_priv(dst);
 
-	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC | __GFP_ACCOUNT);
 	if (!priv_dst->last)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a726b623963d..d7779d5f3cbb 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
 				 priv->rate);
 	}
 
-	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
 	if (!priv->limit)
 		return -ENOMEM;
 
@@ -144,7 +144,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
 	priv_dst->burst = priv_src->burst;
 	priv_dst->invert = priv_src->invert;
 
-	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC | __GFP_ACCOUNT);
 	if (!priv_dst->limit)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index f394a0b562f6..2def4d92a170 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 			return -EOPNOTSUPP;
 	}
 
-	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
 	if (!priv->consumed)
 		return -ENOMEM;
 
@@ -236,7 +236,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
 {
 	struct nft_quota *priv_dst = nft_expr_priv(dst);
 
-	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC | __GFP_ACCOUNT);
 	if (!priv_dst->consumed)
 		return -ENOMEM;
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-03-31  8:40       ` [PATCH nft] nft: memcg accounting for dynamically allocated objects Vasily Averin
@ 2022-03-31 18:45         ` Roman Gushchin
  2022-04-01 12:03         ` Florian Westphal
  1 sibling, 0 replies; 17+ messages in thread
From: Roman Gushchin @ 2022-03-31 18:45 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Pablo Neira Ayuso, kernel, Jozsef Kadlecsik, Florian Westphal,
	netfilter-devel, linux-kernel

On Thu, Mar 31, 2022 at 11:40:23AM +0300, Vasily Averin wrote:
> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use __GFP_ACCOUNT flag for objects that are dynamically
> allocated from the packet path.
> 
> Such objects are allocated inside .init() or .clone() callbacks
> of struct nft_expr_ops executed in task context while processing
> netlink messages.
> 
> In addition, this patch adds accounting to nft_set_elem_expr_clone()
> used for the same purposes.

The patch looks good to me. The only concern I have is a potential
performance regression. Are any of these allocations happening on really
hot paths? From a very brief look it seems like the answer is no,
but I'm not well familiar with the netfilter code. Would be nice to
have a confirmation from somebody working on the netfilter.
We are roughly talking about x2 slower memory allocations, which
is usually not a problem at all, given that allocations are still
very cheap.

Please, feel free to add my
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!

> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
>  net/netfilter/nf_tables_api.c | 2 +-
>  net/netfilter/nft_connlimit.c | 4 ++--
>  net/netfilter/nft_counter.c   | 4 ++--
>  net/netfilter/nft_last.c      | 4 ++--
>  net/netfilter/nft_limit.c     | 4 ++--
>  net/netfilter/nft_quota.c     | 4 ++--
>  6 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 04be94236a34..e01241151ef7 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
>  	int err, i, k;
>  
>  	for (i = 0; i < set->num_exprs; i++) {
> -		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
> +		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
>  		if (!expr)
>  			goto err_expr;
>  
> diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
> index 3362417ebfdb..9c2146aac59e 100644
> --- a/net/netfilter/nft_connlimit.c
> +++ b/net/netfilter/nft_connlimit.c
> @@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
>  			invert = true;
>  	}
>  
> -	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
> +	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
>  	if (!priv->list)
>  		return -ENOMEM;
>  
> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
>  	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
>  	struct nft_connlimit *priv_src = nft_expr_priv(src);
>  
> -	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
> +	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (!priv_dst->list)
>  		return -ENOMEM;
>  
> diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
> index f179e8c3b0ca..040a697d96b3 100644
> --- a/net/netfilter/nft_counter.c
> +++ b/net/netfilter/nft_counter.c
> @@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
>  	struct nft_counter __percpu *cpu_stats;
>  	struct nft_counter *this_cpu;
>  
> -	cpu_stats = alloc_percpu(struct nft_counter);
> +	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
>  	if (cpu_stats == NULL)
>  		return -ENOMEM;
>  
> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
>  
>  	nft_counter_fetch(priv, &total);
>  
> -	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
> +	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (cpu_stats == NULL)
>  		return -ENOMEM;
>  
> diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
> index 4f745a409d34..4cf4f6349855 100644
> --- a/net/netfilter/nft_last.c
> +++ b/net/netfilter/nft_last.c
> @@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
>  	u64 last_jiffies;
>  	int err;
>  
> -	last = kzalloc(sizeof(*last), GFP_KERNEL);
> +	last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
>  	if (!last)
>  		return -ENOMEM;
>  
> @@ -105,7 +105,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
>  {
>  	struct nft_last_priv *priv_dst = nft_expr_priv(dst);
>  
> -	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
> +	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (!priv_dst->last)
>  		return -ENOMEM;
>  
> diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
> index a726b623963d..d7779d5f3cbb 100644
> --- a/net/netfilter/nft_limit.c
> +++ b/net/netfilter/nft_limit.c
> @@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
>  				 priv->rate);
>  	}
>  
> -	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
> +	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
>  	if (!priv->limit)
>  		return -ENOMEM;
>  
> @@ -144,7 +144,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
>  	priv_dst->burst = priv_src->burst;
>  	priv_dst->invert = priv_src->invert;
>  
> -	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
> +	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (!priv_dst->limit)
>  		return -ENOMEM;
>  
> diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
> index f394a0b562f6..2def4d92a170 100644
> --- a/net/netfilter/nft_quota.c
> +++ b/net/netfilter/nft_quota.c
> @@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
>  			return -EOPNOTSUPP;
>  	}
>  
> -	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
> +	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
>  	if (!priv->consumed)
>  		return -ENOMEM;
>  
> @@ -236,7 +236,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
>  {
>  	struct nft_quota *priv_dst = nft_expr_priv(dst);
>  
> -	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
> +	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (!priv_dst->consumed)
>  		return -ENOMEM;
>  
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-03-31  8:40       ` [PATCH nft] nft: memcg accounting for dynamically allocated objects Vasily Averin
  2022-03-31 18:45         ` Roman Gushchin
@ 2022-04-01 12:03         ` Florian Westphal
  2022-04-01 18:56           ` Vasily Averin
  1 sibling, 1 reply; 17+ messages in thread
From: Florian Westphal @ 2022-04-01 12:03 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Pablo Neira Ayuso, kernel, Jozsef Kadlecsik, Florian Westphal,
	netfilter-devel, linux-kernel, Roman Gushchin

Vasily Averin <vasily.averin@linux.dev> wrote:
> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use __GFP_ACCOUNT flag for objects that are dynamically
> allocated from the packet path.
> 
> Such objects are allocated inside .init() or .clone() callbacks
> of struct nft_expr_ops executed in task context while processing
> netlink messages.

They can also be called from packet path.

> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 04be94236a34..e01241151ef7 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
>  	int err, i, k;
>  
>  	for (i = 0; i < set->num_exprs; i++) {
> -		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
> +		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
>  		if (!expr)
>  			goto err_expr;

This is ok.

> diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
> index 3362417ebfdb..9c2146aac59e 100644
> --- a/net/netfilter/nft_connlimit.c
> +++ b/net/netfilter/nft_connlimit.c
> @@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
>  			invert = true;
>  	}
>  
> -	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
> +	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
>  	if (!priv->list)
>  		return -ENOMEM;

Same.

> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
>  	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
>  	struct nft_connlimit *priv_src = nft_expr_priv(src);
>  
> -	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
> +	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);

This can be called from packet path, via nft_dynset.c.

nft_do_chain -> nft_dynset_eval -> nft_dynset_new ->
nft_dynset_expr_setup -> nft_expr_clone -> src->ops->clone()

> diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
> index f179e8c3b0ca..040a697d96b3 100644
> --- a/net/netfilter/nft_counter.c
> +++ b/net/netfilter/nft_counter.c
> @@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
>  	struct nft_counter __percpu *cpu_stats;
>  	struct nft_counter *this_cpu;
>  
> -	cpu_stats = alloc_percpu(struct nft_counter);
> +	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);

This is ok.

> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
>  
>  	nft_counter_fetch(priv, &total);
>  
> -	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
> +	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
>  	if (cpu_stats == NULL)
>  		return -ENOMEM;

Same problem as connlimit, can be called from packet path.
Basically all GFP_ATOMIC are suspicious.

Not sure how to resolve this, similar mechanics in iptables world (e.g.
connlimit or SET target) don't use memcg accounting.

Perhaps for now resend with only the GFP_KERNEL parts converted?
Those are safe.

Insertion from packet path is limited by set->size (element count) only
at this time.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-04-01 12:03         ` Florian Westphal
@ 2022-04-01 18:56           ` Vasily Averin
  2022-04-01 19:31             ` Florian Westphal
  0 siblings, 1 reply; 17+ messages in thread
From: Vasily Averin @ 2022-04-01 18:56 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Pablo Neira Ayuso, kernel, Jozsef Kadlecsik, netfilter-devel,
	linux-kernel, Roman Gushchin

On 4/1/22 15:03, Florian Westphal wrote:
> Vasily Averin <vasily.averin@linux.dev> wrote:
>> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
>> use __GFP_ACCOUNT flag for objects that are dynamically
>> allocated from the packet path.
>>
>> Such objects are allocated inside .init() or .clone() callbacks
>> of struct nft_expr_ops executed in task context while processing
>> netlink messages.
> 
> They can also be called from packet path.

>> @@ -214,7 +214,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
>>  	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
>>  	struct nft_connlimit *priv_src = nft_expr_priv(src);
>>  
>> -	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
>> +	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC | __GFP_ACCOUNT);
> 
> This can be called from packet path, via nft_dynset.c.
> 
> nft_do_chain -> nft_dynset_eval -> nft_dynset_new ->
> nft_dynset_expr_setup -> nft_expr_clone -> src->ops->clone()
> 

Thank you, I noticed this case but did not understand that it is related to packet path.

>> @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
>>  
>>  	nft_counter_fetch(priv, &total);
>>  
>> -	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
>> +	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC | __GFP_ACCOUNT);
>>  	if (cpu_stats == NULL)
>>  		return -ENOMEM;
> 
> Same problem as connlimit, can be called from packet path.
> Basically all GFP_ATOMIC are suspicious.
> 
> Not sure how to resolve this, similar mechanics in iptables world (e.g.
> connlimit or SET target) don't use memcg accounting.
> 
> Perhaps for now resend with only the GFP_KERNEL parts converted?
> Those are safe.

It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg
in case of "!in_task()" context.
On the other hand any additional checks on such path will affect performance.

Could you please estimate how often is this code used in the case of nft vs packet path?

If packet path is rare case I think we can keep current code as is.
If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check. 

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-04-01 18:56           ` Vasily Averin
@ 2022-04-01 19:31             ` Florian Westphal
  2022-04-01 21:14               ` Roman Gushchin
  2022-04-02  8:55               ` Vasily Averin
  0 siblings, 2 replies; 17+ messages in thread
From: Florian Westphal @ 2022-04-01 19:31 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Florian Westphal, Pablo Neira Ayuso, kernel, Jozsef Kadlecsik,
	netfilter-devel, linux-kernel, Roman Gushchin

Vasily Averin <vasily.averin@linux.dev> wrote:
> > Same problem as connlimit, can be called from packet path.
> > Basically all GFP_ATOMIC are suspicious.
> > 
> > Not sure how to resolve this, similar mechanics in iptables world (e.g.
> > connlimit or SET target) don't use memcg accounting.
> > 
> > Perhaps for now resend with only the GFP_KERNEL parts converted?
> > Those are safe.
> 
> It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg
> in case of "!in_task()" context.
> On the other hand any additional checks on such path will affect performance.

I'm not sure this works with ksoftirqd serving network stack?

> Could you please estimate how often is this code used in the case of nft vs packet path?

It depends on user configuration.
Update from packet path is used for things like port knocking or other
dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on.

> If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check.

But what task/memcg is used for the accounting in that case?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-04-01 19:31             ` Florian Westphal
@ 2022-04-01 21:14               ` Roman Gushchin
  2022-04-01 23:01                 ` Florian Westphal
  2022-04-02  8:55               ` Vasily Averin
  1 sibling, 1 reply; 17+ messages in thread
From: Roman Gushchin @ 2022-04-01 21:14 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Vasily Averin, Pablo Neira Ayuso, kernel, Jozsef Kadlecsik,
	netfilter-devel, linux-kernel

On Fri, Apr 01, 2022 at 09:31:59PM +0200, Florian Westphal wrote:
> Vasily Averin <vasily.averin@linux.dev> wrote:
> > > Same problem as connlimit, can be called from packet path.
> > > Basically all GFP_ATOMIC are suspicious.
> > > 
> > > Not sure how to resolve this, similar mechanics in iptables world (e.g.
> > > connlimit or SET target) don't use memcg accounting.
> > > 
> > > Perhaps for now resend with only the GFP_KERNEL parts converted?
> > > Those are safe.
> > 
> > It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg
> > in case of "!in_task()" context.
> > On the other hand any additional checks on such path will affect performance.
> 
> I'm not sure this works with ksoftirqd serving network stack?
> 
> > Could you please estimate how often is this code used in the case of nft vs packet path?
> 
> It depends on user configuration.
> Update from packet path is used for things like port knocking or other
> dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on.
> 
> > If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check.
> 
> But what task/memcg is used for the accounting in that case?

Root memcg/no accounting, which is the same.

There is a way to account for a specific memcg in such cases, it's used for
bpf maps, for example. We save a pointer to the memcg which created the map and
charge it for all allocations from a !in_task context. But the performance can
be affected, so let's not do without regression tests and a serious need.

Thanks!

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-04-01 21:14               ` Roman Gushchin
@ 2022-04-01 23:01                 ` Florian Westphal
  0 siblings, 0 replies; 17+ messages in thread
From: Florian Westphal @ 2022-04-01 23:01 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Florian Westphal, Vasily Averin, Pablo Neira Ayuso, kernel,
	Jozsef Kadlecsik, netfilter-devel, linux-kernel

Roman Gushchin <roman.gushchin@linux.dev> wrote:
> On Fri, Apr 01, 2022 at 09:31:59PM +0200, Florian Westphal wrote:
> > But what task/memcg is used for the accounting in that case?
> 
> Root memcg/no accounting, which is the same.
> 
> There is a way to account for a specific memcg in such cases, it's used for
> bpf maps, for example. We save a pointer to the memcg which created the map and
> charge it for all allocations from a !in_task context.

Great, so we could use same scheme later on if its required for some
use case.

>  so let's not do without regression tests and a serious need.

Sounds good. Thanks.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH nft] nft: memcg accounting for dynamically allocated objects
  2022-04-01 19:31             ` Florian Westphal
  2022-04-01 21:14               ` Roman Gushchin
@ 2022-04-02  8:55               ` Vasily Averin
  2022-04-02  9:50                 ` [PATCH v2] " Vasily Averin
  1 sibling, 1 reply; 17+ messages in thread
From: Vasily Averin @ 2022-04-02  8:55 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Pablo Neira Ayuso, kernel, Jozsef Kadlecsik, netfilter-devel,
	linux-kernel, Roman Gushchin

On 4/1/22 22:31, Florian Westphal wrote:
> Vasily Averin <vasily.averin@linux.dev> wrote:
>>> Same problem as connlimit, can be called from packet path.
>>> Basically all GFP_ATOMIC are suspicious.
>>>
>>> Not sure how to resolve this, similar mechanics in iptables world (e.g.
>>> connlimit or SET target) don't use memcg accounting.
>>>
>>> Perhaps for now resend with only the GFP_KERNEL parts converted?
>>> Those are safe.
>>
>> It is safe for packet path too, _ACCOUNT allocation will not be able to find memcg
>> in case of "!in_task()" context.
>> On the other hand any additional checks on such path will affect performance.
> 
> I'm not sure this works with ksoftirqd serving network stack?

Please take look at memcg_kmem_bypass() called from 
memcg_slab_pre_alloc_hook -> get_obj_cgroup_from_current

By default memcg accounting does not work for any kernel threads.
If required thread can use set_active_memcg() but at present it is not widely used.

>> Could you please estimate how often is this code used in the case of nft vs packet path?
> 
> It depends on user configuration.
> Update from packet path is used for things like port knocking or other
> dyanamic filter lists, or somehing like Limiting connections to x-per-address/subnet and so on.

Ok, I think we can skip accounting in such cases at the moment.
I doubt it can be misused and consume significant piece of host memory.
So I'm going to resend the patch w/o accounting in all .clone callbacks. 

>> If the opposite is the case, then I can add __GFP_ACCOUNT flag depending on in_task() check.
> 
> But what task/memcg is used for the accounting in that case?

Thanks to Roman for the explanation in concurrent thread.

Thank you,
	Vasily Averin

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] nft: memcg accounting for dynamically allocated objects
  2022-04-02  8:55               ` Vasily Averin
@ 2022-04-02  9:50                 ` Vasily Averin
  2022-04-05  9:58                   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 17+ messages in thread
From: Vasily Averin @ 2022-04-02  9:50 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Florian Westphal
  Cc: kernel, Jozsef Kadlecsik, netfilter-devel, linux-kernel, Roman Gushchin

nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
use __GFP_ACCOUNT flag for objects that are dynamically
allocated from the packet path.

Such objects are allocated inside nft_expr_ops->init() callbacks
executed in task context while processing netlink messages.

In addition, this patch adds accounting to nft_set_elem_expr_clone()
used for the same purposes.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
---
v2: removed accounting in nft_expr_ops->clone() callbacks
---
 net/netfilter/nf_tables_api.c | 2 +-
 net/netfilter/nft_connlimit.c | 2 +-
 net/netfilter/nft_counter.c   | 2 +-
 net/netfilter/nft_last.c      | 2 +-
 net/netfilter/nft_limit.c     | 2 +-
 net/netfilter/nft_quota.c     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 04be94236a34..e01241151ef7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5447,7 +5447,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
 	int err, i, k;
 
 	for (i = 0; i < set->num_exprs; i++) {
-		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+		expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
 		if (!expr)
 			goto err_expr;
 
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 3362417ebfdb..f5df535bcbd0 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 			invert = true;
 	}
 
-	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
 	if (!priv->list)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f179e8c3b0ca..7691e42f6bbe 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
 	struct nft_counter __percpu *cpu_stats;
 	struct nft_counter *this_cpu;
 
-	cpu_stats = alloc_percpu(struct nft_counter);
+	cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
 	if (cpu_stats == NULL)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 4f745a409d34..97d0d09d48d3 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	u64 last_jiffies;
 	int err;
 
-	last = kzalloc(sizeof(*last), GFP_KERNEL);
+	last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
 	if (!last)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a726b623963d..c1f7e8bb33e6 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
 				 priv->rate);
 	}
 
-	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
 	if (!priv->limit)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index f394a0b562f6..0d2f55900f7b 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 			return -EOPNOTSUPP;
 	}
 
-	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
 	if (!priv->consumed)
 		return -ENOMEM;
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] nft: memcg accounting for dynamically allocated objects
  2022-04-02  9:50                 ` [PATCH v2] " Vasily Averin
@ 2022-04-05  9:58                   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 17+ messages in thread
From: Pablo Neira Ayuso @ 2022-04-05  9:58 UTC (permalink / raw)
  To: Vasily Averin
  Cc: Florian Westphal, kernel, Jozsef Kadlecsik, netfilter-devel,
	linux-kernel, Roman Gushchin

On Sat, Apr 02, 2022 at 12:50:37PM +0300, Vasily Averin wrote:
> nft_*.c files whose NFT_EXPR_STATEFUL flag is set on need to
> use __GFP_ACCOUNT flag for objects that are dynamically
> allocated from the packet path.
> 
> Such objects are allocated inside nft_expr_ops->init() callbacks
> executed in task context while processing netlink messages.
> 
> In addition, this patch adds accounting to nft_set_elem_expr_clone()
> used for the same purposes.

Applied to nf, thanks

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2022-04-06  0:27 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220228122429.GC26547@breakpoint.cc>
2022-03-21  5:02 ` [PATCH v2] memcg: enable accounting for nft objects Vasily Averin
2022-03-22 10:25   ` Florian Westphal
2022-03-24 14:19   ` Pablo Neira Ayuso
2022-03-24 17:23     ` Vasily Averin
2022-03-24 18:05 ` [PATCH v2 RESEND] " Vasily Averin
2022-03-28  8:15   ` Pablo Neira Ayuso
2022-03-28  9:23     ` Vasily Averin
2022-03-31  8:40       ` [PATCH nft] nft: memcg accounting for dynamically allocated objects Vasily Averin
2022-03-31 18:45         ` Roman Gushchin
2022-04-01 12:03         ` Florian Westphal
2022-04-01 18:56           ` Vasily Averin
2022-04-01 19:31             ` Florian Westphal
2022-04-01 21:14               ` Roman Gushchin
2022-04-01 23:01                 ` Florian Westphal
2022-04-02  8:55               ` Vasily Averin
2022-04-02  9:50                 ` [PATCH v2] " Vasily Averin
2022-04-05  9:58                   ` Pablo Neira Ayuso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).