Netfilter-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH nftables 0/4] un-break nftables on big-endian arches
@ 2019-08-13 18:44 Florian Westphal
  2019-08-13 18:44 ` [PATCH nftables 1/4] src: fix jumps on bigendian arches Florian Westphal
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 18:44 UTC (permalink / raw)
  To: netfilter-devel

nftables 0.9.1 fails on s390x.  Breakage includes:

- "jump foo" is rejected
- chain policies are treated as drop
- chain priority is always set to 0 (same bug though).

Also, last test case reveals problems with receive buffer sizes,
these have been fixed as well.

After these changes, all test cases work, except one.
The failure in that test case is caused by timeout rounding
errors due to CONFIG_HZ=100, I'll leave that for the time being
given HZ=100 is rare noways and the "fix" is to adjust the test case.

Florian Westphal (4):
      src: fix jumps on bigendian arches
      src: parser: fix parsing of chain priority and policy on bigendian
      src: mnl: fix setting rcvbuffer size
      src: mnl: retry when we hit -ENOBUFS

 datatype.c     |   26 +++++++++++++++++---------
 mnl.c          |   16 +++++++++++-----
 netlink.c      |   16 +++++++++++++---
 parser_bison.y |    7 +++++--
 4 files changed, 46 insertions(+), 19 deletions(-)



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH nftables 1/4] src: fix jumps on bigendian arches
  2019-08-13 18:44 [PATCH nftables 0/4] un-break nftables on big-endian arches Florian Westphal
@ 2019-08-13 18:44 ` Florian Westphal
  2019-08-13 19:20   ` Pablo Neira Ayuso
  2019-08-13 18:44 ` [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian Florian Westphal
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 18:44 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

table bla {
  chain foo { }
  chain bar { jump foo }
 }
}

Fails to restore on big-endian platforms:
jump.nft:5:2-9: Error: Could not process rule: No such file or directory
 jump foo

nft passes a 0-length name to the kernel.

This is because when we export the value (the string), we provide
the size of the destination buffer.

In earlier versions, the parser allocated the name with the same
fixed size and all was fine.

After the fix, the export places the name in the wrong location
in the destination buffer.

This makes tests/shell/testcases/chains/0001jumps_0 work on s390x.

Fixes: 142350f154c78 ("src: invalid read when importing chain name")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 src/datatype.c | 26 +++++++++++++++++---------
 src/netlink.c  | 16 +++++++++++++---
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/datatype.c b/src/datatype.c
index 28f726f4e84c..6908bc22d783 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -244,10 +244,24 @@ const struct datatype invalid_type = {
 	.print		= invalid_type_print,
 };
 
-static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
+static void verdict_jump_chain_print(const char *what, const struct expr *e,
+				     struct output_ctx *octx)
 {
 	char chain[NFT_CHAIN_MAXNAMELEN];
+	unsigned int len;
+
+	memset(chain, 0, sizeof(chain));
 
+	len = e->len / BITS_PER_BYTE;
+	if (len >= sizeof(chain))
+		len = sizeof(chain) - 1;
+
+	mpz_export_data(chain, e->value, BYTEORDER_HOST_ENDIAN, len);
+	nft_print(octx, "%s %s", what, chain);
+}
+
+static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
+{
 	switch (expr->verdict) {
 	case NFT_CONTINUE:
 		nft_print(octx, "continue");
@@ -257,10 +271,7 @@ static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
 		break;
 	case NFT_JUMP:
 		if (expr->chain->etype == EXPR_VALUE) {
-			mpz_export_data(chain, expr->chain->value,
-					BYTEORDER_HOST_ENDIAN,
-					NFT_CHAIN_MAXNAMELEN);
-			nft_print(octx, "jump %s", chain);
+			verdict_jump_chain_print("jump", expr->chain, octx);
 		} else {
 			nft_print(octx, "jump ");
 			expr_print(expr->chain, octx);
@@ -268,10 +279,7 @@ static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
 		break;
 	case NFT_GOTO:
 		if (expr->chain->etype == EXPR_VALUE) {
-			mpz_export_data(chain, expr->chain->value,
-					BYTEORDER_HOST_ENDIAN,
-					NFT_CHAIN_MAXNAMELEN);
-			nft_print(octx, "goto %s", chain);
+			verdict_jump_chain_print("goto", expr->chain, octx);
 		} else {
 			nft_print(octx, "goto ");
 			expr_print(expr->chain, octx);
diff --git a/src/netlink.c b/src/netlink.c
index aeeb12eaca93..f8e1120447d9 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -222,17 +222,27 @@ static void netlink_gen_verdict(const struct expr *expr,
 				struct nft_data_linearize *data)
 {
 	char chain[NFT_CHAIN_MAXNAMELEN];
+	unsigned int len;
 
 	data->verdict = expr->verdict;
 
 	switch (expr->verdict) {
 	case NFT_JUMP:
 	case NFT_GOTO:
+		len = expr->chain->len / BITS_PER_BYTE;
+
+		if (!len)
+			BUG("chain length is 0");
+
+		if (len > sizeof(chain))
+			BUG("chain is too large (%u, %u max)",
+			    len, (unsigned int)sizeof(chain));
+
+		memset(chain, 0, sizeof(chain));
+
 		mpz_export_data(chain, expr->chain->value,
-				BYTEORDER_HOST_ENDIAN,
-				NFT_CHAIN_MAXNAMELEN);
+				BYTEORDER_HOST_ENDIAN, len);
 		snprintf(data->chain, NFT_CHAIN_MAXNAMELEN, "%s", chain);
-		data->chain[NFT_CHAIN_MAXNAMELEN-1] = '\0';
 		break;
 	}
 }
-- 
2.21.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian
  2019-08-13 18:44 [PATCH nftables 0/4] un-break nftables on big-endian arches Florian Westphal
  2019-08-13 18:44 ` [PATCH nftables 1/4] src: fix jumps on bigendian arches Florian Westphal
@ 2019-08-13 18:44 ` Florian Westphal
  2019-08-13 19:26   ` Pablo Neira Ayuso
  2019-08-13 18:44 ` [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size Florian Westphal
  2019-08-13 18:44 ` [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS Florian Westphal
  3 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 18:44 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

tests/shell/testcases/flowtable/0001flowtable_0
tests/shell/testcases/nft-f/0008split_tables_0
fail the 'dump compare' on s390x.
The priority (10) turns to 0, and accept turned to drop.

Problem is that '$1' is a 64bit value -- then we pass the address
and import 'int' -- we then get the upper all zero bits.

Use an intermediate value instead.

Fixes: 627c451b2351 ("src: allow variables in the chain priority specification")
Fixes: dba4a9b4b5fe ("src: allow variable in chain policy")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 src/parser_bison.y | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/parser_bison.y b/src/parser_bison.y
index 939b9a8db6d7..406cf54bdeb8 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -1972,11 +1972,12 @@ extended_prio_name	:	OUT
 extended_prio_spec	:	int_num
 			{
 				struct prio_spec spec = {0};
+				int value = (int)$1;
 
 				spec.expr = constant_expr_alloc(&@$, &integer_type,
 								BYTEORDER_HOST_ENDIAN,
 								sizeof(int) *
-								BITS_PER_BYTE, &$1);
+								BITS_PER_BYTE, &value);
 				$$ = spec;
 			}
 			|	variable_expr
@@ -2052,10 +2053,12 @@ policy_expr		:	variable_expr
 			}
 			|	chain_policy
 			{
+				int value = (int)$1;
+
 				$$ = constant_expr_alloc(&@$, &integer_type,
 							 BYTEORDER_HOST_ENDIAN,
 							 sizeof(int) *
-							 BITS_PER_BYTE, &$1);
+							 BITS_PER_BYTE, &value);
 			}
 			;
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size
  2019-08-13 18:44 [PATCH nftables 0/4] un-break nftables on big-endian arches Florian Westphal
  2019-08-13 18:44 ` [PATCH nftables 1/4] src: fix jumps on bigendian arches Florian Westphal
  2019-08-13 18:44 ` [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian Florian Westphal
@ 2019-08-13 18:44 ` Florian Westphal
  2019-08-13 19:26   ` Pablo Neira Ayuso
  2019-08-13 18:44 ` [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS Florian Westphal
  3 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 18:44 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

Kernel expects socklen_t (int).
Using size_t causes kernel to read upper 0-bits.

This caused tests/shell/testcases/transactions/0049huge_0
to fail on s390x -- it uses 'echo' mode and will quickly
overrun the tiny buffer size set due to this bug.

Fixes: 89c82c261bb5 ("mnl: estimate receiver buffer size")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 src/mnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mnl.c b/src/mnl.c
index f24d2ce0c56a..97a2e0765189 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -240,7 +240,7 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl,
 
 static unsigned int nlsndbufsiz;
 
-static int mnl_set_rcvbuffer(const struct mnl_socket *nl, size_t bufsiz)
+static int mnl_set_rcvbuffer(const struct mnl_socket *nl, socklen_t bufsiz)
 {
 	socklen_t len = sizeof(nlsndbufsiz);
 	int ret;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS
  2019-08-13 18:44 [PATCH nftables 0/4] un-break nftables on big-endian arches Florian Westphal
                   ` (2 preceding siblings ...)
  2019-08-13 18:44 ` [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size Florian Westphal
@ 2019-08-13 18:44 ` Florian Westphal
  2019-08-13 19:34   ` Pablo Neira Ayuso
  3 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 18:44 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

tests/shell/testcases/transactions/0049huge_0

still fails with ENOBUFS error after endian fix done in
previous patch.  Its enough to increase the scale factor (4)
on s390x, but rather than continue with these "guess the proper
size" game, just increase the buffer size and retry up to 3 times.

This makes above test work on s390x.

So, implement what Pablo suggested in the earlier commit:
    We could also explore increasing the buffer and retry if
    mnl_nft_socket_sendmsg() hits ENOBUFS if we ever hit this problem again.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 src/mnl.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mnl.c b/src/mnl.c
index 97a2e0765189..0c7a4c1fa63f 100644
--- a/src/mnl.c
+++ b/src/mnl.c
@@ -311,6 +311,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
 	int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
 	uint32_t iov_len = nftnl_batch_iovec_len(ctx->batch);
 	char rcv_buf[MNL_SOCKET_BUFFER_SIZE];
+	unsigned int enobuf_restarts = 0;
 	size_t avg_msg_size, batch_size;
 	const struct sockaddr_nl snl = {
 		.nl_family = AF_NETLINK
@@ -320,6 +321,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
 		.tv_usec	= 0
 	};
 	struct iovec iov[iov_len];
+	unsigned int scale = 4;
 	struct msghdr msg = {};
 	fd_set readfds;
 
@@ -327,9 +329,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
 
 	batch_size = mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len);
 	avg_msg_size = div_round_up(batch_size, num_cmds);
-
-	mnl_set_rcvbuffer(ctx->nft->nf_sock, num_cmds * avg_msg_size * 4);
-
+restart:
 	ret = mnl_nft_socket_sendmsg(ctx, &msg);
 	if (ret == -1)
 		return -1;
@@ -347,8 +347,14 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
 			break;
 
 		ret = mnl_socket_recvfrom(nl, rcv_buf, sizeof(rcv_buf));
-		if (ret == -1)
+		if (ret == -1) {
+			if (errno == ENOBUFS && enobuf_restarts++ < 3) {
+				mnl_set_rcvbuffer(ctx->nft->nf_sock, num_cmds * avg_msg_size * scale);
+				scale *= 2;
+				goto restart;
+			}
 			return -1;
+		}
 
 		ret = mnl_cb_run(rcv_buf, ret, 0, portid, &netlink_echo_callback, ctx);
 		/* Continue on error, make sure we get all acknowledgments */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 1/4] src: fix jumps on bigendian arches
  2019-08-13 18:44 ` [PATCH nftables 1/4] src: fix jumps on bigendian arches Florian Westphal
@ 2019-08-13 19:20   ` Pablo Neira Ayuso
  2019-08-13 19:34     ` Florian Westphal
  0 siblings, 1 reply; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:20 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 08:44:06PM +0200, Florian Westphal wrote:
> table bla {
>   chain foo { }
>   chain bar { jump foo }
>  }
> }
> 
> Fails to restore on big-endian platforms:
> jump.nft:5:2-9: Error: Could not process rule: No such file or directory
>  jump foo
> 
> nft passes a 0-length name to the kernel.
> 
> This is because when we export the value (the string), we provide
> the size of the destination buffer.
> 
> In earlier versions, the parser allocated the name with the same
> fixed size and all was fine.
> 
> After the fix, the export places the name in the wrong location
> in the destination buffer.
> 
> This makes tests/shell/testcases/chains/0001jumps_0 work on s390x.
> 
> Fixes: 142350f154c78 ("src: invalid read when importing chain name")
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  src/datatype.c | 26 +++++++++++++++++---------
>  src/netlink.c  | 16 +++++++++++++---
>  2 files changed, 30 insertions(+), 12 deletions(-)
> 
> diff --git a/src/datatype.c b/src/datatype.c
> index 28f726f4e84c..6908bc22d783 100644
> --- a/src/datatype.c
> +++ b/src/datatype.c
> @@ -244,10 +244,24 @@ const struct datatype invalid_type = {
>  	.print		= invalid_type_print,
>  };
>  
> -static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
> +static void verdict_jump_chain_print(const char *what, const struct expr *e,
> +				     struct output_ctx *octx)
>  {
>  	char chain[NFT_CHAIN_MAXNAMELEN];

Probably:

        chat chain[NFT_CHAIN_MAXNAMELEN + 1] = {};

to ensure space for \0.

> +	unsigned int len;
> +
> +	memset(chain, 0, sizeof(chain));

remove this memset then.

> +	len = e->len / BITS_PER_BYTE;

        div_round_up() ?

> +	if (len >= sizeof(chain))
> +		len = sizeof(chain) - 1;

Probably BUG() here instead if e->len > NFT_CHAIN_MAXNAMELEN? This
should not happen.

> +
> +	mpz_export_data(chain, e->value, BYTEORDER_HOST_ENDIAN, len);
> +	nft_print(octx, "%s %s", what, chain);
> +}
> +
> +static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
> +{
>  	switch (expr->verdict) {
>  	case NFT_CONTINUE:
>  		nft_print(octx, "continue");
> @@ -257,10 +271,7 @@ static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
>  		break;
>  	case NFT_JUMP:
>  		if (expr->chain->etype == EXPR_VALUE) {
> -			mpz_export_data(chain, expr->chain->value,
> -					BYTEORDER_HOST_ENDIAN,
> -					NFT_CHAIN_MAXNAMELEN);
> -			nft_print(octx, "jump %s", chain);
> +			verdict_jump_chain_print("jump", expr->chain, octx);
>  		} else {
>  			nft_print(octx, "jump ");
>  			expr_print(expr->chain, octx);
> @@ -268,10 +279,7 @@ static void verdict_type_print(const struct expr *expr, struct output_ctx *octx)
>  		break;
>  	case NFT_GOTO:
>  		if (expr->chain->etype == EXPR_VALUE) {
> -			mpz_export_data(chain, expr->chain->value,
> -					BYTEORDER_HOST_ENDIAN,
> -					NFT_CHAIN_MAXNAMELEN);
> -			nft_print(octx, "goto %s", chain);
> +			verdict_jump_chain_print("goto", expr->chain, octx);
>  		} else {
>  			nft_print(octx, "goto ");
>  			expr_print(expr->chain, octx);
> diff --git a/src/netlink.c b/src/netlink.c
> index aeeb12eaca93..f8e1120447d9 100644
> --- a/src/netlink.c
> +++ b/src/netlink.c
> @@ -222,17 +222,27 @@ static void netlink_gen_verdict(const struct expr *expr,
>  				struct nft_data_linearize *data)
>  {
>  	char chain[NFT_CHAIN_MAXNAMELEN];

        ...[NFT_CHAIN_MAXNAMELEN + 1] = {};

> +	unsigned int len;
>  
>  	data->verdict = expr->verdict;
>  
>  	switch (expr->verdict) {
>  	case NFT_JUMP:
>  	case NFT_GOTO:
> +		len = expr->chain->len / BITS_PER_BYTE;

                div_round_up()

> +
> +		if (!len)
> +			BUG("chain length is 0");
> +
> +		if (len > sizeof(chain))
> +			BUG("chain is too large (%u, %u max)",
> +			    len, (unsigned int)sizeof(chain));
> +
> +		memset(chain, 0, sizeof(chain));
> +
>  		mpz_export_data(chain, expr->chain->value,
> -				BYTEORDER_HOST_ENDIAN,
> -				NFT_CHAIN_MAXNAMELEN);
> +				BYTEORDER_HOST_ENDIAN, len);
>  		snprintf(data->chain, NFT_CHAIN_MAXNAMELEN, "%s", chain);
> -		data->chain[NFT_CHAIN_MAXNAMELEN-1] = '\0';
>  		break;
>  	}
>  }
> -- 
> 2.21.0
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian
  2019-08-13 18:44 ` [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian Florian Westphal
@ 2019-08-13 19:26   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:26 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 08:44:07PM +0200, Florian Westphal wrote:
> tests/shell/testcases/flowtable/0001flowtable_0
> tests/shell/testcases/nft-f/0008split_tables_0
> fail the 'dump compare' on s390x.
> The priority (10) turns to 0, and accept turned to drop.
> 
> Problem is that '$1' is a 64bit value -- then we pass the address
> and import 'int' -- we then get the upper all zero bits.
> 
> Use an intermediate value instead.

Probably add this:

%union {
        uint64_t                val;
        uint32_t                val32;

then update this to use it?

%type <val32>                   int_num chain_policy

We can get someone to review all these typing, maybe some more folks
need <val32>.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size
  2019-08-13 18:44 ` [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size Florian Westphal
@ 2019-08-13 19:26   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:26 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 08:44:08PM +0200, Florian Westphal wrote:
> Kernel expects socklen_t (int).
> Using size_t causes kernel to read upper 0-bits.
> 
> This caused tests/shell/testcases/transactions/0049huge_0
> to fail on s390x -- it uses 'echo' mode and will quickly
> overrun the tiny buffer size set due to this bug.
> 
> Fixes: 89c82c261bb5 ("mnl: estimate receiver buffer size")
> Signed-off-by: Florian Westphal <fw@strlen.de>

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 1/4] src: fix jumps on bigendian arches
  2019-08-13 19:20   ` Pablo Neira Ayuso
@ 2019-08-13 19:34     ` Florian Westphal
  2019-08-13 19:35       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 19:34 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> >  	char chain[NFT_CHAIN_MAXNAMELEN];
> 
> Probably:
> 
>         chat chain[NFT_CHAIN_MAXNAMELEN + 1] = {};


> to ensure space for \0.

Not sure thats needed, the policy is:

[NFTA_CHAIN_NAME] = { .type = NLA_STRING,
		      .len = NFT_CHAIN_MAXNAMELEN - 1 },

> > +	unsigned int len;
> > +
> > +	memset(chain, 0, sizeof(chain));
> 
> remove this memset then.
> 
> > +	len = e->len / BITS_PER_BYTE;
> 
>         div_round_up() ?

Do we have strings that are not divisible by BITS_PER_BYTE?

> > +	if (len >= sizeof(chain))
> > +		len = sizeof(chain) - 1;
> 
> Probably BUG() here instead if e->len > NFT_CHAIN_MAXNAMELEN? This
> should not happen.

Yes, I can change this.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS
  2019-08-13 18:44 ` [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS Florian Westphal
@ 2019-08-13 19:34   ` Pablo Neira Ayuso
  2019-08-13 19:36     ` Florian Westphal
  0 siblings, 1 reply; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:34 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 08:44:09PM +0200, Florian Westphal wrote:
> tests/shell/testcases/transactions/0049huge_0
> 
> still fails with ENOBUFS error after endian fix done in
> previous patch.  Its enough to increase the scale factor (4)
> on s390x, but rather than continue with these "guess the proper
> size" game, just increase the buffer size and retry up to 3 times.
> 
> This makes above test work on s390x.
> 
> So, implement what Pablo suggested in the earlier commit:
>     We could also explore increasing the buffer and retry if
>     mnl_nft_socket_sendmsg() hits ENOBUFS if we ever hit this problem again.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  src/mnl.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mnl.c b/src/mnl.c
> index 97a2e0765189..0c7a4c1fa63f 100644
> --- a/src/mnl.c
> +++ b/src/mnl.c
> @@ -311,6 +311,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
>  	int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
>  	uint32_t iov_len = nftnl_batch_iovec_len(ctx->batch);
>  	char rcv_buf[MNL_SOCKET_BUFFER_SIZE];
> +	unsigned int enobuf_restarts = 0;
>  	size_t avg_msg_size, batch_size;
>  	const struct sockaddr_nl snl = {
>  		.nl_family = AF_NETLINK
> @@ -320,6 +321,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
>  		.tv_usec	= 0
>  	};
>  	struct iovec iov[iov_len];
> +	unsigned int scale = 4;
>  	struct msghdr msg = {};
>  	fd_set readfds;
>  
> @@ -327,9 +329,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
>  
>  	batch_size = mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len);
>  	avg_msg_size = div_round_up(batch_size, num_cmds);
> -
> -	mnl_set_rcvbuffer(ctx->nft->nf_sock, num_cmds * avg_msg_size * 4);

Leaving this in place does not harm, right? This would speed up things
for x86_64.

It looks like s390 allocates larger page there to accomodate each
netlink event.

All this probing and guess games could be fixed if there is a
getsockopt() to fetch sk->sk_rmem_alloc, this is already exposed in
netlink via /proc. Later :-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 1/4] src: fix jumps on bigendian arches
  2019-08-13 19:34     ` Florian Westphal
@ 2019-08-13 19:35       ` Pablo Neira Ayuso
  0 siblings, 0 replies; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:35 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 09:34:39PM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > >  	char chain[NFT_CHAIN_MAXNAMELEN];
> > 
> > Probably:
> > 
> >         chat chain[NFT_CHAIN_MAXNAMELEN + 1] = {};
> 
> 
> > to ensure space for \0.
> 
> Not sure thats needed, the policy is:
> 
> [NFTA_CHAIN_NAME] = { .type = NLA_STRING,
> 		      .len = NFT_CHAIN_MAXNAMELEN - 1 },

Right.

> > > +	unsigned int len;
> > > +
> > > +	memset(chain, 0, sizeof(chain));
> > 
> > remove this memset then.
> > 
> > > +	len = e->len / BITS_PER_BYTE;
> > 
> >         div_round_up() ?
> 
> Do we have strings that are not divisible by BITS_PER_BYTE?

Nope.

> > > +	if (len >= sizeof(chain))
> > > +		len = sizeof(chain) - 1;
> > 
> > Probably BUG() here instead if e->len > NFT_CHAIN_MAXNAMELEN? This
> > should not happen.
> 
> Yes, I can change this.

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS
  2019-08-13 19:34   ` Pablo Neira Ayuso
@ 2019-08-13 19:36     ` Florian Westphal
  2019-08-13 19:39       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2019-08-13 19:36 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > diff --git a/src/mnl.c b/src/mnl.c
> > index 97a2e0765189..0c7a4c1fa63f 100644
> > --- a/src/mnl.c
> > +++ b/src/mnl.c
> > @@ -311,6 +311,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> >  	int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
> >  	uint32_t iov_len = nftnl_batch_iovec_len(ctx->batch);
> >  	char rcv_buf[MNL_SOCKET_BUFFER_SIZE];
> > +	unsigned int enobuf_restarts = 0;
> >  	size_t avg_msg_size, batch_size;
> >  	const struct sockaddr_nl snl = {
> >  		.nl_family = AF_NETLINK
> > @@ -320,6 +321,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> >  		.tv_usec	= 0
> >  	};
> >  	struct iovec iov[iov_len];
> > +	unsigned int scale = 4;
> >  	struct msghdr msg = {};
> >  	fd_set readfds;
> >  
> > @@ -327,9 +329,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> >  
> >  	batch_size = mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len);
> >  	avg_msg_size = div_round_up(batch_size, num_cmds);
> > -
> > -	mnl_set_rcvbuffer(ctx->nft->nf_sock, num_cmds * avg_msg_size * 4);
> 
> Leaving this in place does not harm, right? This would speed up things
> for x86_64.

ok, I can keep it.

> It looks like s390 allocates larger page there to accomodate each
> netlink event.
> 
> All this probing and guess games could be fixed if there is a
> getsockopt() to fetch sk->sk_rmem_alloc, this is already exposed in
> netlink via /proc. Later :-)

How? The error occurs because sk_rmem_alloc is not large enough to
store all the netlink acks in the socket backlog.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS
  2019-08-13 19:36     ` Florian Westphal
@ 2019-08-13 19:39       ` Pablo Neira Ayuso
  0 siblings, 0 replies; 13+ messages in thread
From: Pablo Neira Ayuso @ 2019-08-13 19:39 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Tue, Aug 13, 2019 at 09:36:29PM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > diff --git a/src/mnl.c b/src/mnl.c
> > > index 97a2e0765189..0c7a4c1fa63f 100644
> > > --- a/src/mnl.c
> > > +++ b/src/mnl.c
> > > @@ -311,6 +311,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> > >  	int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
> > >  	uint32_t iov_len = nftnl_batch_iovec_len(ctx->batch);
> > >  	char rcv_buf[MNL_SOCKET_BUFFER_SIZE];
> > > +	unsigned int enobuf_restarts = 0;
> > >  	size_t avg_msg_size, batch_size;
> > >  	const struct sockaddr_nl snl = {
> > >  		.nl_family = AF_NETLINK
> > > @@ -320,6 +321,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> > >  		.tv_usec	= 0
> > >  	};
> > >  	struct iovec iov[iov_len];
> > > +	unsigned int scale = 4;
> > >  	struct msghdr msg = {};
> > >  	fd_set readfds;
> > >  
> > > @@ -327,9 +329,7 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct list_head *err_list,
> > >  
> > >  	batch_size = mnl_nft_batch_to_msg(ctx, &msg, &snl, iov, iov_len);
> > >  	avg_msg_size = div_round_up(batch_size, num_cmds);
> > > -
> > > -	mnl_set_rcvbuffer(ctx->nft->nf_sock, num_cmds * avg_msg_size * 4);
> > 
> > Leaving this in place does not harm, right? This would speed up things
> > for x86_64.
> 
> ok, I can keep it.
> 
> > It looks like s390 allocates larger page there to accomodate each
> > netlink event.
> > 
> > All this probing and guess games could be fixed if there is a
> > getsockopt() to fetch sk->sk_rmem_alloc, this is already exposed in
> > netlink via /proc. Later :-)
> 
> How? The error occurs because sk_rmem_alloc is not large enough to
> store all the netlink acks in the socket backlog.

Oh, indeed. We'll need this probing in place as we cannot know what
page size of the netlink event is used on every arch.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, back to index

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-13 18:44 [PATCH nftables 0/4] un-break nftables on big-endian arches Florian Westphal
2019-08-13 18:44 ` [PATCH nftables 1/4] src: fix jumps on bigendian arches Florian Westphal
2019-08-13 19:20   ` Pablo Neira Ayuso
2019-08-13 19:34     ` Florian Westphal
2019-08-13 19:35       ` Pablo Neira Ayuso
2019-08-13 18:44 ` [PATCH nftables 2/4] src: parser: fix parsing of chain priority and policy on bigendian Florian Westphal
2019-08-13 19:26   ` Pablo Neira Ayuso
2019-08-13 18:44 ` [PATCH nftables 3/4] src: mnl: fix setting rcvbuffer size Florian Westphal
2019-08-13 19:26   ` Pablo Neira Ayuso
2019-08-13 18:44 ` [PATCH nftables 4/4] src: mnl: retry when we hit -ENOBUFS Florian Westphal
2019-08-13 19:34   ` Pablo Neira Ayuso
2019-08-13 19:36     ` Florian Westphal
2019-08-13 19:39       ` Pablo Neira Ayuso

Netfilter-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netfilter-devel/0 netfilter-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netfilter-devel netfilter-devel/ https://lore.kernel.org/netfilter-devel \
		netfilter-devel@vger.kernel.org netfilter-devel@archiver.kernel.org
	public-inbox-index netfilter-devel


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netfilter-devel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox