Re: [PATCH net-next 3/4] net: flow_offload: mangle action at byte level

From: Vlad Buslov <vladbu@mellanox.com>
To: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: "netfilter-devel@vger.kernel.org"
	<netfilter-devel@vger.kernel.org>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"vishal@chelsio.com" <vishal@chelsio.com>,
	"jakub.kicinski@netronome.com" <jakub.kicinski@netronome.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	"jiri@resnulli.us" <jiri@resnulli.us>
Subject: Re: [PATCH net-next 3/4] net: flow_offload: mangle action at byte level
Date: Fri, 30 Aug 2019 15:28:13 +0000	[thread overview]
Message-ID: <vbfzhjqu0zf.fsf@mellanox.com> (raw)
In-Reply-To: <20190830005336.23604-4-pablo@netfilter.org>

On Fri 30 Aug 2019 at 03:53, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> The flow mangle action is originally modeled after the tc pedit action,
> this has a number of shortcomings:
>
> 1) The tc pedit offset must be set on the 32-bits boundaries. Many
>    protocol header field offsets are not aligned to 32-bits, eg. port
>    destination, port source and ethernet destination. This patch adjusts
>    the offset accordingly and trim off length in these case, so drivers get
>    an exact offset and length to the header fields.
>
> 2) The maximum mangle length is one word of 32-bits, hence you need to
>    up to four actions to mangle an IPv6 address. This patch coalesces
>    consecutive tc pedit actions into one single action so drivers can
>    configure the IPv6 mangling in one go. Ethernet address fields now
>    require one single action instead of two too.
>
> The following drivers have been updated accordingly to use this new
> mangle action layout:
>
> 1) The cxgb4 driver does not need to split protocol field matching
>    larger than one 32-bit words into multiple definitions. Instead one
>    single definition per protocol field is enough. Checking for
>    transport protocol ports is also simplified.
>
> 2) The mlx5 driver logic to disallow IPv4 ttl and IPv6 hoplimit fields
>    becomes more simple too.
>
> 3) The nfp driver uses the nfp_fl_set_helper() function to configure the
>    payload mangling. The memchr_inv() function is used to check for
>    proper initialization of the value and mask. The driver has been
>    updated to refer to the exact protocol header offsets too.
>
> As a result, this patch reduces code complexity on the driver side at
> the cost of adding ~100 LOC at the core to perform offset and length
> adjustment; and to coalesce consecutive actions.
>
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> ---

[...]

> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index e30a151d8527..e8827fa8263a 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -3289,11 +3289,128 @@ void tc_cleanup_flow_action(struct flow_action *flow_action)
>  }
>  EXPORT_SYMBOL(tc_cleanup_flow_action);
>
> +static unsigned int flow_action_mangle_type(enum pedit_cmd cmd)
> +{
> +	switch (cmd) {
> +	case TCA_PEDIT_KEY_EX_CMD_SET:
> +		return FLOW_ACTION_MANGLE;
> +	case TCA_PEDIT_KEY_EX_CMD_ADD:
> +		return FLOW_ACTION_ADD;
> +	default:
> +		WARN_ON_ONCE(1);
> +	}
> +	return 0;
> +}
> +
> +struct flow_action_mangle_ctx {
> +	u8	cmd;
> +	u8	offset;
> +	u8	htype;
> +	u8	idx;
> +	u8	val[FLOW_ACTION_MANGLE_MAXLEN];
> +	u8	mask[FLOW_ACTION_MANGLE_MAXLEN];
> +	u32	first_word;
> +	u32	last_word;
> +};
> +
> +static void flow_action_mangle_entry(struct flow_action_entry *entry,
> +				     const struct flow_action_mangle_ctx *ctx)
> +{
> +	u32 delta;
> +
> +	entry->id = ctx->cmd;
> +	entry->mangle.htype = ctx->htype;
> +	entry->mangle.offset = ctx->offset;
> +	entry->mangle.len = ctx->idx;
> +
> +	/* Adjust offset. */
> +	delta = sizeof(u32) - (fls(ctx->first_word) / BITS_PER_BYTE);
> +	entry->mangle.offset += delta;
> +
> +	/* Adjust length. */
> +	entry->mangle.len -= ((ffs(ctx->last_word) / BITS_PER_BYTE) + delta);
> +
> +	/* Copy value and mask from offset using the adjusted length. */
> +	memcpy(entry->mangle.val, &ctx->val[delta], entry->mangle.len);
> +	memcpy(entry->mangle.mask, &ctx->mask[delta], entry->mangle.len);
> +}
> +
> +static void flow_action_mangle_ctx_update(struct flow_action_mangle_ctx *ctx,
> +					  const struct tc_action *act, int k)
> +{
> +	u32 val, mask;
> +
> +	val = tcf_pedit_val(act, k);
> +	mask = ~tcf_pedit_mask(act, k);
> +
> +	memcpy(&ctx->val[ctx->idx], &val, sizeof(u32));
> +	memcpy(&ctx->mask[ctx->idx], &mask, sizeof(u32));
> +	ctx->idx += sizeof(u32);
> +}
> +
> +static void flow_action_mangle_ctx_init(struct flow_action_mangle_ctx *ctx,
> +                                        const struct tc_action *act, int k)
> +{
> +	ctx->cmd = flow_action_mangle_type(tcf_pedit_cmd(act, k));
> +	ctx->offset = tcf_pedit_offset(act, k);
> +	ctx->htype = tcf_pedit_htype(act, k);
> +	ctx->idx = 0;
> +
> +	ctx->first_word = ntohl(~tcf_pedit_mask(act, k));
> +	ctx->last_word = ctx->first_word;
> +
> +	flow_action_mangle_ctx_update(ctx, act, k);
> +}
> +
> +static int flow_action_mangle(struct flow_action *flow_action,
> +			      struct flow_action_entry *entry,
> +			      const struct tc_action *act, int j)
> +{
> +	struct flow_action_mangle_ctx ctx;
> +	int k;
> +
> +	if (tcf_pedit_cmd(act, 0) > TCA_PEDIT_KEY_EX_CMD_ADD)
> +		return -1;
> +
> +	flow_action_mangle_ctx_init(&ctx, act, 0);
> +
> +	/* Special case: one single 32-bits word. */
> +	if (tcf_pedit_nkeys(act) == 1) {
> +		flow_action_mangle_entry(entry, &ctx);
> +		return j;
> +	}
> +
> +	for (k = 1; k < tcf_pedit_nkeys(act); k++) {
> +		if (tcf_pedit_cmd(act, k) > TCA_PEDIT_KEY_EX_CMD_ADD)
> +			return -1;
> +
> +		/* Offset is contiguous and type is the same, coalesce. */
> +		if (ctx.idx < FLOW_ACTION_MANGLE_MAXLEN &&
> +		    ctx.offset + ctx.idx == tcf_pedit_offset(act, k) &&
> +		    ctx.cmd == flow_action_mangle_type(tcf_pedit_cmd(act, k))) {
> +			flow_action_mangle_ctx_update(&ctx, act, k);
> +			continue;

Hi Pablo,

With this change you coalesce multiple pedits into single
flow_action_entry, which means that resulting rule->action.num_entries
is incorrect because number of filled flow actions entries will be less
than num_actions. With this, I get mlx5 rejecting such flow_rule with
"mlx5_core: The offload action is not supported." due to trailing
unfilled flow action(s) with id==0.

> +		}
> +		ctx.last_word = ntohl(~tcf_pedit_mask(act, k - 1));
> +
> +		/* Cannot coalesce, set up this entry. */
> +		flow_action_mangle_entry(entry, &ctx);
> +
> +		flow_action_mangle_ctx_init(&ctx, act, k);
> +		entry = &flow_action->entries[++j];
> +	}
> +
> +	ctx.last_word = ntohl(~tcf_pedit_mask(act, k - 1));
> +	flow_action_mangle_entry(entry, &ctx);
> +
> +	return j;
> +}
> +
>  int tc_setup_flow_action(struct flow_action *flow_action,
>  			 const struct tcf_exts *exts, bool rtnl_held)
>  {
>  	const struct tc_action *act;
> -	int i, j, k, err = 0;
> +	int i, j, err = 0;
>
>  	if (!exts)
>  		return 0;
> @@ -3366,25 +3483,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>  		} else if (is_tcf_tunnel_release(act)) {
>  			entry->id = FLOW_ACTION_TUNNEL_DECAP;
>  		} else if (is_tcf_pedit(act)) {
> -			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
> -				switch (tcf_pedit_cmd(act, k)) {
> -				case TCA_PEDIT_KEY_EX_CMD_SET:
> -					entry->id = FLOW_ACTION_MANGLE;
> -					break;
> -				case TCA_PEDIT_KEY_EX_CMD_ADD:
> -					entry->id = FLOW_ACTION_ADD;
> -					break;
> -				default:
> -					err = -EOPNOTSUPP;
> -					goto err_out;
> -				}
> -				entry->mangle.htype = tcf_pedit_htype(act, k);
> -				entry->mangle.mask = ~tcf_pedit_mask(act, k);
> -				entry->mangle.val = tcf_pedit_val(act, k) &
> -							entry->mangle.mask;
> -				entry->mangle.offset = tcf_pedit_offset(act, k);
> -				entry = &flow_action->entries[++j];
> -			}
> +			j = flow_action_mangle(flow_action, entry, act, j);
> +			if (j < 0)
> +				goto err_out;
>  		} else if (is_tcf_csum(act)) {
>  			entry->id = FLOW_ACTION_CSUM;
>  			entry->csum_flags = tcf_csum_update_flags(act);
> @@ -3439,8 +3540,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>  			goto err_out;
>  		}
>
> -		if (!is_tcf_pedit(act))
> -			j++;
> +		j++;
>  	}
>
>  err_out: