* [PATCH net-next v2] openvswitch: add TTL decrement action
@ 2019-12-17 15:51 Matteo Croce
2019-12-17 16:30 ` Nikolay Aleksandrov
2019-12-18 3:06 ` Pravin Shelar
0 siblings, 2 replies; 8+ messages in thread
From: Matteo Croce @ 2019-12-17 15:51 UTC (permalink / raw)
To: netdev, dev
Cc: linux-kernel, Pravin B Shelar, David S. Miller, Bindiya Kurle,
Simon Horman, Ben Pfaff
New action to decrement TTL instead of setting it to a fixed value.
This action will decrement the TTL and, in case of expired TTL, drop it
or execute an action passed via a nested attribute.
The default TTL expired action is to drop the packet.
Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
Tested with a corresponding change in the userspace:
# ovs-dpctl dump-flows
in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
# ping -c1 192.168.0.2 -t 42
IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
# ping -c1 192.168.0.2 -t 120
IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
# ping -c1 192.168.0.2 -t 1
#
Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
include/uapi/linux/openvswitch.h | 22 +++++++
net/openvswitch/actions.c | 71 +++++++++++++++++++++
net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
3 files changed, 198 insertions(+)
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index a87b44cd5590..b6684bc04883 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -927,6 +927,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_METER, /* u32 meter ID. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
+ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
@@ -939,6 +940,23 @@ enum ovs_action_attr {
};
#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+enum ovs_dec_ttl_attr {
+ OVS_DEC_TTL_ATTR_UNSPEC,
+ OVS_DEC_TTL_ATTR_ACTION_TYPE, /* Action Type u32 */
+ OVS_DEC_TTL_ATTR_ACTION, /* nested action */
+ __OVS_DEC_TTL_ATTR_MAX,
+#ifdef __KERNEL__
+ OVS_DEC_TTL_ATTR_ARG /* struct sample_arg */
+#endif
+};
+
+#ifdef __KERNEL__
+struct dec_ttl_arg {
+ u32 action_type; /* dec_ttl action type.*/
+};
+#endif
+
+#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
/* Meters. */
#define OVS_METER_FAMILY "ovs_meter"
@@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
__OVS_CT_LIMIT_ATTR_MAX
};
+enum ovs_dec_ttl_action { /*Actions supported by dec_ttl */
+ OVS_DEC_TTL_ACTION_DROP,
+ OVS_DEC_TTL_ACTION_USER_SPACE
+};
#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4c8395462303..5329668732b1 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
+static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *fk, const struct nlattr *attr, bool last)
+{
+ struct nlattr *actions;
+ struct nlattr *dec_ttl_arg;
+ int rem = nla_len(attr);
+ const struct dec_ttl_arg *arg;
+
+ /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
+ dec_ttl_arg = nla_data(attr);
+ arg = nla_data(dec_ttl_arg);
+ actions = nla_next(dec_ttl_arg, &rem);
+
+ switch (arg->action_type) {
+ case OVS_DEC_TTL_ACTION_DROP:
+ consume_skb(skb);
+ break;
+
+ case OVS_DEC_TTL_ACTION_USER_SPACE:
+ return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
+ }
+
+ return 0;
+}
+
/* When 'last' is true, sample() should always consume the 'skb'.
* Otherwise, sample() should keep 'skb' intact regardless what
* actions are executed within sample().
@@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
nla_len(actions), last, clone_flow_key);
}
+static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int err;
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct ipv6hdr *nh = ipv6_hdr(skb);
+
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ sizeof(*nh));
+ if (unlikely(err))
+ return err;
+
+ if (nh->hop_limit <= 1)
+ return -EHOSTUNREACH;
+
+ key->ip.ttl = --nh->hop_limit;
+ } else {
+ struct iphdr *nh = ip_hdr(skb);
+ u8 old_ttl;
+
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ sizeof(*nh));
+ if (unlikely(err))
+ return err;
+
+ nh = ip_hdr(skb);
+ if (nh->ttl <= 1)
+ return -EHOSTUNREACH;
+
+ old_ttl = nh->ttl--;
+ csum_replace2(&nh->check, htons(old_ttl << 8),
+ htons(nh->ttl << 8));
+ key->ip.ttl = nh->ttl;
+ }
+
+ return 0;
+}
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
}
+
+ case OVS_ACTION_ATTR_DEC_TTL:
+ err = execute_dec_ttl(skb, key);
+ if (err == -EHOSTUNREACH) {
+ err = dec_ttl(dp, skb, key, a, true);
+ return err;
+ }
+ break;
}
if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 65c2e3458ff5..a9eea2ffb8b0 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_DEC_TTL:
break;
case OVS_ACTION_ATTR_CT:
@@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return 0;
}
+static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log)
+{
+ struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
+ const struct nlattr *action_type, *action;
+ struct nlattr *a;
+ int rem, start, err;
+ struct dec_ttl_arg arg;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
+ return -EINVAL;
+
+ attrs[type] = a;
+ }
+ if (rem)
+ return -EINVAL;
+
+ action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
+ if (!action_type || nla_len(action_type) != sizeof(u32))
+ return -EINVAL;
+
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
+ if (start < 0)
+ return start;
+
+ arg.action_type = nla_get_u32(action_type);
+ err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
+ &arg, sizeof(arg), log);
+ if (err)
+ return err;
+
+ if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
+ action = attrs[OVS_DEC_TTL_ATTR_ACTION];
+ if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
+ return -EINVAL;
+
+ err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
+ vlan_tci, mpls_label_count, log);
+ if (err)
+ return err;
+ }
+
+ add_nested_action_end(*sfa, start);
+
+ return 0;
+}
+
static int validate_and_copy_clone(struct net *net,
const struct nlattr *attr,
const struct sw_flow_key *key,
@@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_METER] = sizeof(u32),
[OVS_ACTION_ATTR_CLONE] = (u32)-1,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+ [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
break;
}
+ case OVS_ACTION_ATTR_DEC_TTL:
+ err = validate_and_copy_dec_ttl(net, a, key, sfa,
+ eth_type, vlan_tci,
+ mpls_label_count, log);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
@@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
return err;
}
+static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
+{
+ struct nlattr *start, *ac_start = NULL, *dec_ttl;
+ int err = 0, rem = nla_len(att);
+ const struct dec_ttl_arg *arg;
+ struct nlattr *actions;
+
+ start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
+ if (!start)
+ return -EMSGSIZE;
+
+ dec_ttl = nla_data(att);
+ arg = nla_data(dec_ttl);
+ actions = nla_next(dec_ttl, &rem);
+
+ if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
+ nla_nest_cancel(skb, start);
+ return -EMSGSIZE;
+ }
+
+ if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
+ ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+ if (!ac_start) {
+ nla_nest_cancel(skb, ac_start);
+ nla_nest_cancel(skb, start);
+ return -EMSGSIZE;
+ }
+ err = ovs_nla_put_actions(actions, rem, skb);
+ nla_nest_end(skb, ac_start);
+ }
+ nla_nest_end(skb, start);
+
+ return err;
+}
+
static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
return err;
break;
+ case OVS_ACTION_ATTR_DEC_TTL:
+ err = dec_ttl_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
--
2.23.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-17 15:51 [PATCH net-next v2] openvswitch: add TTL decrement action Matteo Croce
@ 2019-12-17 16:30 ` Nikolay Aleksandrov
2019-12-20 12:36 ` Matteo Croce
2019-12-18 3:06 ` Pravin Shelar
1 sibling, 1 reply; 8+ messages in thread
From: Nikolay Aleksandrov @ 2019-12-17 16:30 UTC (permalink / raw)
To: Matteo Croce, netdev, dev
Cc: linux-kernel, Pravin B Shelar, David S. Miller, Bindiya Kurle,
Simon Horman, Ben Pfaff
On 17/12/2019 17:51, Matteo Croce wrote:
> New action to decrement TTL instead of setting it to a fixed value.
> This action will decrement the TTL and, in case of expired TTL, drop it
> or execute an action passed via a nested attribute.
> The default TTL expired action is to drop the packet.
>
> Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>
> Tested with a corresponding change in the userspace:
>
> # ovs-dpctl dump-flows
> in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>
> # ping -c1 192.168.0.2 -t 42
> IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> # ping -c1 192.168.0.2 -t 120
> IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> # ping -c1 192.168.0.2 -t 1
> #
>
> Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Matteo Croce <mcroce@redhat.com>
> ---
> include/uapi/linux/openvswitch.h | 22 +++++++
> net/openvswitch/actions.c | 71 +++++++++++++++++++++
> net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> 3 files changed, 198 insertions(+)
>
Hi Matteo,
[snip]
> +}
> +
> /* When 'last' is true, sample() should always consume the 'skb'.
> * Otherwise, sample() should keep 'skb' intact regardless what
> * actions are executed within sample().
> @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> nla_len(actions), last, clone_flow_key);
> }
>
> +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> + int err;
> +
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + struct ipv6hdr *nh = ipv6_hdr(skb);
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
It seems the IPv4 version below is ok as the ptr is reloaded.
One q as I don't know ovs that much - can this action be called only with
skb->protocol == ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
Thanks,
Nik
> + if (unlikely(err))
> + return err;
> +
> + if (nh->hop_limit <= 1)
> + return -EHOSTUNREACH;
> +
> + key->ip.ttl = --nh->hop_limit;
> + } else {
> + struct iphdr *nh = ip_hdr(skb);
> + u8 old_ttl;
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
> + if (unlikely(err))
> + return err;
> +
> + nh = ip_hdr(skb);
> + if (nh->ttl <= 1)
> + return -EHOSTUNREACH;
> +
> + old_ttl = nh->ttl--;
> + csum_replace2(&nh->check, htons(old_ttl << 8),
> + htons(nh->ttl << 8));
> + key->ip.ttl = nh->ttl;
> + }
> +
> + return 0;
> +}
> +
> /* Execute a list of actions against 'skb'. */
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> struct sw_flow_key *key,
> @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>
> break;
> }
> +
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = execute_dec_ttl(skb, key);
> + if (err == -EHOSTUNREACH) {
> + err = dec_ttl(dp, skb, key, a, true);
> + return err;
> + }
> + break;
> }
>
> if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 65c2e3458ff5..a9eea2ffb8b0 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> case OVS_ACTION_ATTR_RECIRC:
> case OVS_ACTION_ATTR_TRUNC:
> case OVS_ACTION_ATTR_USERSPACE:
> + case OVS_ACTION_ATTR_DEC_TTL:
> break;
>
> case OVS_ACTION_ATTR_CT:
> @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> return 0;
> }
>
> +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> + const struct sw_flow_key *key,
> + struct sw_flow_actions **sfa,
> + __be16 eth_type, __be16 vlan_tci,
> + u32 mpls_label_count, bool log)
> +{
> + struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> + const struct nlattr *action_type, *action;
> + struct nlattr *a;
> + int rem, start, err;
> + struct dec_ttl_arg arg;
> +
> + nla_for_each_nested(a, attr, rem) {
> + int type = nla_type(a);
> +
> + if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> + return -EINVAL;
> +
> + attrs[type] = a;
> + }
> + if (rem)
> + return -EINVAL;
> +
> + action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> + if (!action_type || nla_len(action_type) != sizeof(u32))
> + return -EINVAL;
> +
> + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> + if (start < 0)
> + return start;
> +
> + arg.action_type = nla_get_u32(action_type);
> + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> + &arg, sizeof(arg), log);
> + if (err)
> + return err;
> +
> + if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> + action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> + if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> + return -EINVAL;
> +
> + err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> + vlan_tci, mpls_label_count, log);
> + if (err)
> + return err;
> + }
> +
> + add_nested_action_end(*sfa, start);
> +
> + return 0;
> +}
> +
> static int validate_and_copy_clone(struct net *net,
> const struct nlattr *attr,
> const struct sw_flow_key *key,
> @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> [OVS_ACTION_ATTR_METER] = sizeof(u32),
> [OVS_ACTION_ATTR_CLONE] = (u32)-1,
> [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> };
> const struct ovs_action_push_vlan *vlan;
> int type = nla_type(a);
> @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> break;
> }
>
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = validate_and_copy_dec_ttl(net, a, key, sfa,
> + eth_type, vlan_tci,
> + mpls_label_count, log);
> + if (err)
> + return err;
> + skip_copy = true;
> + break;
> +
> default:
> OVS_NLERR(log, "Unknown Action type %d", type);
> return -EINVAL;
> @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
> return err;
> }
>
> +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> +{
> + struct nlattr *start, *ac_start = NULL, *dec_ttl;
> + int err = 0, rem = nla_len(att);
> + const struct dec_ttl_arg *arg;
> + struct nlattr *actions;
> +
> + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> + if (!start)
> + return -EMSGSIZE;
> +
> + dec_ttl = nla_data(att);
> + arg = nla_data(dec_ttl);
> + actions = nla_next(dec_ttl, &rem);
> +
> + if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> + nla_nest_cancel(skb, start);
> + return -EMSGSIZE;
> + }
> +
> + if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> + ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> + if (!ac_start) {
> + nla_nest_cancel(skb, ac_start);
> + nla_nest_cancel(skb, start);
> + return -EMSGSIZE;
> + }
> + err = ovs_nla_put_actions(actions, rem, skb);
> + nla_nest_end(skb, ac_start);
> + }
> + nla_nest_end(skb, start);
> +
> + return err;
> +}
> +
> static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
> {
> const struct nlattr *ovs_key = nla_data(a);
> @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
> return err;
> break;
>
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = dec_ttl_action_to_attr(a, skb);
> + if (err)
> + return err;
> + break;
> +
> default:
> if (nla_put(skb, type, nla_len(a), nla_data(a)))
> return -EMSGSIZE;
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-17 15:51 [PATCH net-next v2] openvswitch: add TTL decrement action Matteo Croce
2019-12-17 16:30 ` Nikolay Aleksandrov
@ 2019-12-18 3:06 ` Pravin Shelar
2019-12-19 16:36 ` Matteo Croce
1 sibling, 1 reply; 8+ messages in thread
From: Pravin Shelar @ 2019-12-18 3:06 UTC (permalink / raw)
To: Matteo Croce
Cc: Linux Kernel Network Developers, ovs dev, linux-kernel,
David S. Miller, Bindiya Kurle, Simon Horman, Ben Pfaff
On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
>
> New action to decrement TTL instead of setting it to a fixed value.
> This action will decrement the TTL and, in case of expired TTL, drop it
> or execute an action passed via a nested attribute.
> The default TTL expired action is to drop the packet.
>
> Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>
> Tested with a corresponding change in the userspace:
>
> # ovs-dpctl dump-flows
> in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>
> # ping -c1 192.168.0.2 -t 42
> IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> # ping -c1 192.168.0.2 -t 120
> IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> # ping -c1 192.168.0.2 -t 1
> #
>
> Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Matteo Croce <mcroce@redhat.com>
> ---
> include/uapi/linux/openvswitch.h | 22 +++++++
> net/openvswitch/actions.c | 71 +++++++++++++++++++++
> net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> 3 files changed, 198 insertions(+)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index a87b44cd5590..b6684bc04883 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -927,6 +927,7 @@ enum ovs_action_attr {
> OVS_ACTION_ATTR_METER, /* u32 meter ID. */
> OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
> OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> + OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
>
> __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
> * from userspace. */
> @@ -939,6 +940,23 @@ enum ovs_action_attr {
> };
>
> #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> +enum ovs_dec_ttl_attr {
> + OVS_DEC_TTL_ATTR_UNSPEC,
> + OVS_DEC_TTL_ATTR_ACTION_TYPE, /* Action Type u32 */
> + OVS_DEC_TTL_ATTR_ACTION, /* nested action */
> + __OVS_DEC_TTL_ATTR_MAX,
> +#ifdef __KERNEL__
> + OVS_DEC_TTL_ATTR_ARG /* struct sample_arg */
> +#endif
> +};
> +
I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
nested action the datapath can drop the packet.
> +#ifdef __KERNEL__
> +struct dec_ttl_arg {
> + u32 action_type; /* dec_ttl action type.*/
> +};
> +#endif
> +
> +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
>
> /* Meters. */
> #define OVS_METER_FAMILY "ovs_meter"
> @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
> __OVS_CT_LIMIT_ATTR_MAX
> };
>
> +enum ovs_dec_ttl_action { /*Actions supported by dec_ttl */
> + OVS_DEC_TTL_ACTION_DROP,
> + OVS_DEC_TTL_ACTION_USER_SPACE
> +};
> #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
>
> #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 4c8395462303..5329668732b1 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
> }
>
> +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> + struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> +{
> + struct nlattr *actions;
> + struct nlattr *dec_ttl_arg;
> + int rem = nla_len(attr);
> + const struct dec_ttl_arg *arg;
> +
> + /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> + dec_ttl_arg = nla_data(attr);
> + arg = nla_data(dec_ttl_arg);
> + actions = nla_next(dec_ttl_arg, &rem);
> +
> + switch (arg->action_type) {
> + case OVS_DEC_TTL_ACTION_DROP:
> + consume_skb(skb);
> + break;
> +
> + case OVS_DEC_TTL_ACTION_USER_SPACE:
> + return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> + }
> +
> + return 0;
> +}
> +
> /* When 'last' is true, sample() should always consume the 'skb'.
> * Otherwise, sample() should keep 'skb' intact regardless what
> * actions are executed within sample().
> @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> nla_len(actions), last, clone_flow_key);
> }
>
> +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> + int err;
> +
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + struct ipv6hdr *nh = ipv6_hdr(skb);
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.
> + if (unlikely(err))
> + return err;
> +
> + if (nh->hop_limit <= 1)
> + return -EHOSTUNREACH;
> +
> + key->ip.ttl = --nh->hop_limit;
> + } else {
> + struct iphdr *nh = ip_hdr(skb);
> + u8 old_ttl;
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
same as above.
> + if (unlikely(err))
> + return err;
> +
> + nh = ip_hdr(skb);
> + if (nh->ttl <= 1)
> + return -EHOSTUNREACH;
> +
> + old_ttl = nh->ttl--;
> + csum_replace2(&nh->check, htons(old_ttl << 8),
> + htons(nh->ttl << 8));
> + key->ip.ttl = nh->ttl;
> + }
> +
> + return 0;
> +}
> +
> /* Execute a list of actions against 'skb'. */
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> struct sw_flow_key *key,
> @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>
> break;
> }
> +
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = execute_dec_ttl(skb, key);
> + if (err == -EHOSTUNREACH) {
Can you use unlikely().
> + err = dec_ttl(dp, skb, key, a, true);
> + return err;
> + }
> + break;
> }
>
> if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 65c2e3458ff5..a9eea2ffb8b0 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> case OVS_ACTION_ATTR_RECIRC:
> case OVS_ACTION_ATTR_TRUNC:
> case OVS_ACTION_ATTR_USERSPACE:
> + case OVS_ACTION_ATTR_DEC_TTL:
> break;
>
> case OVS_ACTION_ATTR_CT:
> @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> return 0;
> }
>
> +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> + const struct sw_flow_key *key,
> + struct sw_flow_actions **sfa,
> + __be16 eth_type, __be16 vlan_tci,
> + u32 mpls_label_count, bool log)
> +{
> + struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> + const struct nlattr *action_type, *action;
> + struct nlattr *a;
> + int rem, start, err;
> + struct dec_ttl_arg arg;
> +
Here we need to validate if eth_type is IPv4 or IPv6.
> + nla_for_each_nested(a, attr, rem) {
> + int type = nla_type(a);
> +
> + if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> + return -EINVAL;
> +
> + attrs[type] = a;
> + }
> + if (rem)
> + return -EINVAL;
> +
> + action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> + if (!action_type || nla_len(action_type) != sizeof(u32))
> + return -EINVAL;
> +
> + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> + if (start < 0)
> + return start;
> +
> + arg.action_type = nla_get_u32(action_type);
> + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> + &arg, sizeof(arg), log);
> + if (err)
> + return err;
> +
> + if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> + action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> + if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> + return -EINVAL;
> +
> + err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> + vlan_tci, mpls_label_count, log);
> + if (err)
> + return err;
> + }
> +
> + add_nested_action_end(*sfa, start);
> +
> + return 0;
> +}
> +
> static int validate_and_copy_clone(struct net *net,
> const struct nlattr *attr,
> const struct sw_flow_key *key,
> @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> [OVS_ACTION_ATTR_METER] = sizeof(u32),
> [OVS_ACTION_ATTR_CLONE] = (u32)-1,
> [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> };
> const struct ovs_action_push_vlan *vlan;
> int type = nla_type(a);
> @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> break;
> }
>
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = validate_and_copy_dec_ttl(net, a, key, sfa,
> + eth_type, vlan_tci,
> + mpls_label_count, log);
> + if (err)
> + return err;
> + skip_copy = true;
> + break;
> +
> default:
> OVS_NLERR(log, "Unknown Action type %d", type);
> return -EINVAL;
> @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
> return err;
> }
>
> +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> +{
> + struct nlattr *start, *ac_start = NULL, *dec_ttl;
> + int err = 0, rem = nla_len(att);
> + const struct dec_ttl_arg *arg;
> + struct nlattr *actions;
> +
> + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> + if (!start)
> + return -EMSGSIZE;
> +
> + dec_ttl = nla_data(att);
> + arg = nla_data(dec_ttl);
> + actions = nla_next(dec_ttl, &rem);
> +
> + if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> + nla_nest_cancel(skb, start);
> + return -EMSGSIZE;
> + }
> +
> + if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> + ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> + if (!ac_start) {
> + nla_nest_cancel(skb, ac_start);
> + nla_nest_cancel(skb, start);
> + return -EMSGSIZE;
> + }
> + err = ovs_nla_put_actions(actions, rem, skb);
> + nla_nest_end(skb, ac_start);
> + }
> + nla_nest_end(skb, start);
> +
> + return err;
> +}
> +
> static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
> {
> const struct nlattr *ovs_key = nla_data(a);
> @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
> return err;
> break;
>
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = dec_ttl_action_to_attr(a, skb);
> + if (err)
> + return err;
> + break;
> +
> default:
> if (nla_put(skb, type, nla_len(a), nla_data(a)))
> return -EMSGSIZE;
> --
> 2.23.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-18 3:06 ` Pravin Shelar
@ 2019-12-19 16:36 ` Matteo Croce
2019-12-20 1:05 ` Pravin Shelar
0 siblings, 1 reply; 8+ messages in thread
From: Matteo Croce @ 2019-12-19 16:36 UTC (permalink / raw)
To: Pravin Shelar
Cc: Linux Kernel Network Developers, ovs dev, LKML, David S. Miller,
Bindiya Kurle, Simon Horman, Ben Pfaff
On Wed, Dec 18, 2019 at 4:06 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
> >
> > New action to decrement TTL instead of setting it to a fixed value.
> > This action will decrement the TTL and, in case of expired TTL, drop it
> > or execute an action passed via a nested attribute.
> > The default TTL expired action is to drop the packet.
> >
> > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> >
> > Tested with a corresponding change in the userspace:
> >
> > # ovs-dpctl dump-flows
> > in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> >
> > # ping -c1 192.168.0.2 -t 42
> > IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > # ping -c1 192.168.0.2 -t 120
> > IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > # ping -c1 192.168.0.2 -t 1
> > #
> >
> > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > ---
> > include/uapi/linux/openvswitch.h | 22 +++++++
> > net/openvswitch/actions.c | 71 +++++++++++++++++++++
> > net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> > 3 files changed, 198 insertions(+)
> >
> > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > index a87b44cd5590..b6684bc04883 100644
> > --- a/include/uapi/linux/openvswitch.h
> > +++ b/include/uapi/linux/openvswitch.h
> > @@ -927,6 +927,7 @@ enum ovs_action_attr {
> > OVS_ACTION_ATTR_METER, /* u32 meter ID. */
> > OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
> > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> > + OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
> >
> > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
> > * from userspace. */
> > @@ -939,6 +940,23 @@ enum ovs_action_attr {
> > };
> >
> > #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> > +enum ovs_dec_ttl_attr {
> > + OVS_DEC_TTL_ATTR_UNSPEC,
> > + OVS_DEC_TTL_ATTR_ACTION_TYPE, /* Action Type u32 */
> > + OVS_DEC_TTL_ATTR_ACTION, /* nested action */
> > + __OVS_DEC_TTL_ATTR_MAX,
> > +#ifdef __KERNEL__
> > + OVS_DEC_TTL_ATTR_ARG /* struct sample_arg */
> > +#endif
> > +};
> > +
>
> I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
> nested action the datapath can drop the packet.
>
> > +#ifdef __KERNEL__
> > +struct dec_ttl_arg {
> > + u32 action_type; /* dec_ttl action type.*/
> > +};
> > +#endif
> > +
> > +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
> >
> > /* Meters. */
> > #define OVS_METER_FAMILY "ovs_meter"
> > @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
> > __OVS_CT_LIMIT_ATTR_MAX
> > };
> >
> > +enum ovs_dec_ttl_action { /*Actions supported by dec_ttl */
> > + OVS_DEC_TTL_ACTION_DROP,
> > + OVS_DEC_TTL_ACTION_USER_SPACE
> > +};
> > #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
> >
> > #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > index 4c8395462303..5329668732b1 100644
> > --- a/net/openvswitch/actions.c
> > +++ b/net/openvswitch/actions.c
> > @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> > return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
> > }
> >
> > +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> > + struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> > +{
> > + struct nlattr *actions;
> > + struct nlattr *dec_ttl_arg;
> > + int rem = nla_len(attr);
> > + const struct dec_ttl_arg *arg;
> > +
> > + /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> > + dec_ttl_arg = nla_data(attr);
> > + arg = nla_data(dec_ttl_arg);
> > + actions = nla_next(dec_ttl_arg, &rem);
> > +
> > + switch (arg->action_type) {
> > + case OVS_DEC_TTL_ACTION_DROP:
> > + consume_skb(skb);
> > + break;
> > +
> > + case OVS_DEC_TTL_ACTION_USER_SPACE:
> > + return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > /* When 'last' is true, sample() should always consume the 'skb'.
> > * Otherwise, sample() should keep 'skb' intact regardless what
> > * actions are executed within sample().
> > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > nla_len(actions), last, clone_flow_key);
> > }
> >
> > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > +{
> > + int err;
> > +
> > + if (skb->protocol == htons(ETH_P_IPV6)) {
> > + struct ipv6hdr *nh = ipv6_hdr(skb);
> > +
> > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > + sizeof(*nh));
> There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.
But I have to set it later to have nh->hop_limit.
Do you mean to assign it before the skb_ensure_writable check?
What differs sizeof(*nh) and sizeof(struct ipv6hdr)? The former will
work also after a refactor.
> > + if (unlikely(err))
> > + return err;
> > +
> > + if (nh->hop_limit <= 1)
> > + return -EHOSTUNREACH;
> > +
> > + key->ip.ttl = --nh->hop_limit;
> > + } else {
> > + struct iphdr *nh = ip_hdr(skb);
> > + u8 old_ttl;
> > +
> > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > + sizeof(*nh));
> same as above.
> > + if (unlikely(err))
> > + return err;
> > +
> > + nh = ip_hdr(skb);
> > + if (nh->ttl <= 1)
> > + return -EHOSTUNREACH;
> > +
> > + old_ttl = nh->ttl--;
> > + csum_replace2(&nh->check, htons(old_ttl << 8),
> > + htons(nh->ttl << 8));
> > + key->ip.ttl = nh->ttl;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > /* Execute a list of actions against 'skb'. */
> > static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> > struct sw_flow_key *key,
> > @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> >
> > break;
> > }
> > +
> > + case OVS_ACTION_ATTR_DEC_TTL:
> > + err = execute_dec_ttl(skb, key);
> > + if (err == -EHOSTUNREACH) {
> Can you use unlikely().
>
sure
> > + err = dec_ttl(dp, skb, key, a, true);
> > + return err;
> > + }
> > + break;
> > }
> >
> > if (unlikely(err)) {
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index 65c2e3458ff5..a9eea2ffb8b0 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> > case OVS_ACTION_ATTR_RECIRC:
> > case OVS_ACTION_ATTR_TRUNC:
> > case OVS_ACTION_ATTR_USERSPACE:
> > + case OVS_ACTION_ATTR_DEC_TTL:
> > break;
> >
> > case OVS_ACTION_ATTR_CT:
> > @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> > return 0;
> > }
> >
> > +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> > + const struct sw_flow_key *key,
> > + struct sw_flow_actions **sfa,
> > + __be16 eth_type, __be16 vlan_tci,
> > + u32 mpls_label_count, bool log)
> > +{
> > + struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> > + const struct nlattr *action_type, *action;
> > + struct nlattr *a;
> > + int rem, start, err;
> > + struct dec_ttl_arg arg;
> > +
> Here we need to validate if eth_type is IPv4 or IPv6.
>
>
check for ETH_P_IP or ETH_P_IPV6 and return -EINVAL?
>
> > + nla_for_each_nested(a, attr, rem) {
> > + int type = nla_type(a);
> > +
> > + if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> > + return -EINVAL;
> > +
> > + attrs[type] = a;
> > + }
> > + if (rem)
> > + return -EINVAL;
> > +
> > + action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> > + if (!action_type || nla_len(action_type) != sizeof(u32))
> > + return -EINVAL;
> > +
> > + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> > + if (start < 0)
> > + return start;
> > +
> > + arg.action_type = nla_get_u32(action_type);
> > + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> > + &arg, sizeof(arg), log);
> > + if (err)
> > + return err;
> > +
> > + if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> > + action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> > + if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> > + return -EINVAL;
> > +
> > + err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> > + vlan_tci, mpls_label_count, log);
> > + if (err)
> > + return err;
> > + }
> > +
> > + add_nested_action_end(*sfa, start);
> > +
> > + return 0;
> > +}
> > +
> > static int validate_and_copy_clone(struct net *net,
> > const struct nlattr *attr,
> > const struct sw_flow_key *key,
> > @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> > [OVS_ACTION_ATTR_METER] = sizeof(u32),
> > [OVS_ACTION_ATTR_CLONE] = (u32)-1,
> > [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> > + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> > };
> > const struct ovs_action_push_vlan *vlan;
> > int type = nla_type(a);
> > @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> > break;
> > }
> >
> > + case OVS_ACTION_ATTR_DEC_TTL:
> > + err = validate_and_copy_dec_ttl(net, a, key, sfa,
> > + eth_type, vlan_tci,
> > + mpls_label_count, log);
> > + if (err)
> > + return err;
> > + skip_copy = true;
> > + break;
> > +
> > default:
> > OVS_NLERR(log, "Unknown Action type %d", type);
> > return -EINVAL;
> > @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
> > return err;
> > }
> >
> > +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> > +{
> > + struct nlattr *start, *ac_start = NULL, *dec_ttl;
> > + int err = 0, rem = nla_len(att);
> > + const struct dec_ttl_arg *arg;
> > + struct nlattr *actions;
> > +
> > + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> > + if (!start)
> > + return -EMSGSIZE;
> > +
> > + dec_ttl = nla_data(att);
> > + arg = nla_data(dec_ttl);
> > + actions = nla_next(dec_ttl, &rem);
> > +
> > + if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> > + nla_nest_cancel(skb, start);
> > + return -EMSGSIZE;
> > + }
> > +
> > + if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> > + ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> > + if (!ac_start) {
> > + nla_nest_cancel(skb, ac_start);
> > + nla_nest_cancel(skb, start);
> > + return -EMSGSIZE;
> > + }
> > + err = ovs_nla_put_actions(actions, rem, skb);
> > + nla_nest_end(skb, ac_start);
> > + }
> > + nla_nest_end(skb, start);
> > +
> > + return err;
> > +}
> > +
> > static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
> > {
> > const struct nlattr *ovs_key = nla_data(a);
> > @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
> > return err;
> > break;
> >
> > + case OVS_ACTION_ATTR_DEC_TTL:
> > + err = dec_ttl_action_to_attr(a, skb);
> > + if (err)
> > + return err;
> > + break;
> > +
> > default:
> > if (nla_put(skb, type, nla_len(a), nla_data(a)))
> > return -EMSGSIZE;
> > --
> > 2.23.0
> >
>
--
Matteo Croce
per aspera ad upstream
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-19 16:36 ` Matteo Croce
@ 2019-12-20 1:05 ` Pravin Shelar
0 siblings, 0 replies; 8+ messages in thread
From: Pravin Shelar @ 2019-12-20 1:05 UTC (permalink / raw)
To: Matteo Croce
Cc: Linux Kernel Network Developers, ovs dev, LKML, David S. Miller,
Bindiya Kurle, Simon Horman, Ben Pfaff
On Thu, Dec 19, 2019 at 8:36 AM Matteo Croce <mcroce@redhat.com> wrote:
>
> On Wed, Dec 18, 2019 at 4:06 AM Pravin Shelar <pshelar@ovn.org> wrote:
> >
> > On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
> > >
> > > New action to decrement TTL instead of setting it to a fixed value.
> > > This action will decrement the TTL and, in case of expired TTL, drop it
> > > or execute an action passed via a nested attribute.
> > > The default TTL expired action is to drop the packet.
> > >
> > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> > >
> > > Tested with a corresponding change in the userspace:
> > >
> > > # ovs-dpctl dump-flows
> > > in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > > in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > > in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > > in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> > >
> > > # ping -c1 192.168.0.2 -t 42
> > > IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > > # ping -c1 192.168.0.2 -t 120
> > > IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > > # ping -c1 192.168.0.2 -t 1
> > > #
> > >
> > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > > ---
> > > include/uapi/linux/openvswitch.h | 22 +++++++
> > > net/openvswitch/actions.c | 71 +++++++++++++++++++++
> > > net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> > > 3 files changed, 198 insertions(+)
> > >
> > > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > > index a87b44cd5590..b6684bc04883 100644
> > > --- a/include/uapi/linux/openvswitch.h
> > > +++ b/include/uapi/linux/openvswitch.h
> > > @@ -927,6 +927,7 @@ enum ovs_action_attr {
> > > OVS_ACTION_ATTR_METER, /* u32 meter ID. */
> > > OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
> > > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> > > + OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
> > >
> > > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
> > > * from userspace. */
> > > @@ -939,6 +940,23 @@ enum ovs_action_attr {
> > > };
> > >
> > > #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> > > +enum ovs_dec_ttl_attr {
> > > + OVS_DEC_TTL_ATTR_UNSPEC,
> > > + OVS_DEC_TTL_ATTR_ACTION_TYPE, /* Action Type u32 */
> > > + OVS_DEC_TTL_ATTR_ACTION, /* nested action */
> > > + __OVS_DEC_TTL_ATTR_MAX,
> > > +#ifdef __KERNEL__
> > > + OVS_DEC_TTL_ATTR_ARG /* struct sample_arg */
> > > +#endif
> > > +};
> > > +
> >
> > I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
> > nested action the datapath can drop the packet.
> >
> > > +#ifdef __KERNEL__
> > > +struct dec_ttl_arg {
> > > + u32 action_type; /* dec_ttl action type.*/
> > > +};
> > > +#endif
> > > +
> > > +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
> > >
> > > /* Meters. */
> > > #define OVS_METER_FAMILY "ovs_meter"
> > > @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
> > > __OVS_CT_LIMIT_ATTR_MAX
> > > };
> > >
> > > +enum ovs_dec_ttl_action { /*Actions supported by dec_ttl */
> > > + OVS_DEC_TTL_ACTION_DROP,
> > > + OVS_DEC_TTL_ACTION_USER_SPACE
> > > +};
> > > #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
> > >
> > > #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > > index 4c8395462303..5329668732b1 100644
> > > --- a/net/openvswitch/actions.c
> > > +++ b/net/openvswitch/actions.c
> > > @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> > > return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
> > > }
> > >
> > > +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> > > + struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> > > +{
> > > + struct nlattr *actions;
> > > + struct nlattr *dec_ttl_arg;
> > > + int rem = nla_len(attr);
> > > + const struct dec_ttl_arg *arg;
> > > +
> > > + /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> > > + dec_ttl_arg = nla_data(attr);
> > > + arg = nla_data(dec_ttl_arg);
> > > + actions = nla_next(dec_ttl_arg, &rem);
> > > +
> > > + switch (arg->action_type) {
> > > + case OVS_DEC_TTL_ACTION_DROP:
> > > + consume_skb(skb);
> > > + break;
> > > +
> > > + case OVS_DEC_TTL_ACTION_USER_SPACE:
> > > + return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > /* When 'last' is true, sample() should always consume the 'skb'.
> > > * Otherwise, sample() should keep 'skb' intact regardless what
> > > * actions are executed within sample().
> > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > > nla_len(actions), last, clone_flow_key);
> > > }
> > >
> > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > > +{
> > > + int err;
> > > +
> > > + if (skb->protocol == htons(ETH_P_IPV6)) {
> > > + struct ipv6hdr *nh = ipv6_hdr(skb);
> > > +
> > > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > > + sizeof(*nh));
> > There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.
>
> But I have to set it later to have nh->hop_limit.
> Do you mean to assign it before the skb_ensure_writable check?
> What differs sizeof(*nh) and sizeof(struct ipv6hdr)? The former will
> work also after a refactor.
>
I meant you can initialize it after skb_ensure_writable() call to
avoid refreshing the pointer after this call.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-17 16:30 ` Nikolay Aleksandrov
@ 2019-12-20 12:36 ` Matteo Croce
2019-12-24 8:41 ` [ovs-dev] " Tonghao Zhang
0 siblings, 1 reply; 8+ messages in thread
From: Matteo Croce @ 2019-12-20 12:36 UTC (permalink / raw)
To: Nikolay Aleksandrov
Cc: netdev, ovs dev, LKML, Pravin B Shelar, David S. Miller,
Bindiya Kurle, Simon Horman, Ben Pfaff
On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
<nikolay@cumulusnetworks.com> wrote:
>
> On 17/12/2019 17:51, Matteo Croce wrote:
> > New action to decrement TTL instead of setting it to a fixed value.
> > This action will decrement the TTL and, in case of expired TTL, drop it
> > or execute an action passed via a nested attribute.
> > The default TTL expired action is to drop the packet.
> >
> > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> >
> > Tested with a corresponding change in the userspace:
> >
> > # ovs-dpctl dump-flows
> > in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> >
> > # ping -c1 192.168.0.2 -t 42
> > IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > # ping -c1 192.168.0.2 -t 120
> > IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > # ping -c1 192.168.0.2 -t 1
> > #
> >
> > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > ---
> > include/uapi/linux/openvswitch.h | 22 +++++++
> > net/openvswitch/actions.c | 71 +++++++++++++++++++++
> > net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> > 3 files changed, 198 insertions(+)
> >
>
> Hi Matteo,
>
> [snip]
> > +}
> > +
> > /* When 'last' is true, sample() should always consume the 'skb'.
> > * Otherwise, sample() should keep 'skb' intact regardless what
> > * actions are executed within sample().
> > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > nla_len(actions), last, clone_flow_key);
> > }
> >
> > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > +{
> > + int err;
> > +
> > + if (skb->protocol == htons(ETH_P_IPV6)) {
> > + struct ipv6hdr *nh = ipv6_hdr(skb);
> > +
> > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > + sizeof(*nh));
>
> skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
> It seems the IPv4 version below is ok as the ptr is reloaded.
>
Right
> One q as I don't know ovs that much - can this action be called only with
> skb->protocol == ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
>
I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
packet will pass.
Thanks,
--
Matteo Croce
per aspera ad upstream
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [ovs-dev] [PATCH net-next v2] openvswitch: add TTL decrement action
2019-12-20 12:36 ` Matteo Croce
@ 2019-12-24 8:41 ` Tonghao Zhang
[not found] ` <CAEPJBmo5ju_2+XdmOEscb_bWL6+qZ72ewk1LTdmiHEgxeE5+VA@mail.gmail.com>
0 siblings, 1 reply; 8+ messages in thread
From: Tonghao Zhang @ 2019-12-24 8:41 UTC (permalink / raw)
To: Matteo Croce
Cc: Nikolay Aleksandrov, ovs dev, netdev, LKML, Bindiya Kurle,
Simon Horman, David S. Miller
Hi Matteo,
Did you have plan to implement the TTL decrement action in userspace
datapath(with dpdk),
I am doing some research offloading about TTL decrement action, and
may sent patch TTL decrement offload action,
using dpdk rte_flow.
On Fri, Dec 20, 2019 at 8:37 PM Matteo Croce <mcroce@redhat.com> wrote:
>
> On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
> <nikolay@cumulusnetworks.com> wrote:
> >
> > On 17/12/2019 17:51, Matteo Croce wrote:
> > > New action to decrement TTL instead of setting it to a fixed value.
> > > This action will decrement the TTL and, in case of expired TTL, drop it
> > > or execute an action passed via a nested attribute.
> > > The default TTL expired action is to drop the packet.
> > >
> > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> > >
> > > Tested with a corresponding change in the userspace:
> > >
> > > # ovs-dpctl dump-flows
> > > in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > > in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > > in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > > in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> > >
> > > # ping -c1 192.168.0.2 -t 42
> > > IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > > # ping -c1 192.168.0.2 -t 120
> > > IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > > # ping -c1 192.168.0.2 -t 1
> > > #
> > >
> > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > > ---
> > > include/uapi/linux/openvswitch.h | 22 +++++++
> > > net/openvswitch/actions.c | 71 +++++++++++++++++++++
> > > net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
> > > 3 files changed, 198 insertions(+)
> > >
> >
> > Hi Matteo,
> >
> > [snip]
> > > +}
> > > +
> > > /* When 'last' is true, sample() should always consume the 'skb'.
> > > * Otherwise, sample() should keep 'skb' intact regardless what
> > > * actions are executed within sample().
> > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > > nla_len(actions), last, clone_flow_key);
> > > }
> > >
> > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > > +{
> > > + int err;
> > > +
> > > + if (skb->protocol == htons(ETH_P_IPV6)) {
> > > + struct ipv6hdr *nh = ipv6_hdr(skb);
> > > +
> > > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > > + sizeof(*nh));
> >
> > skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
> > It seems the IPv4 version below is ok as the ptr is reloaded.
> >
>
> Right
>
> > One q as I don't know ovs that much - can this action be called only with
> > skb->protocol == ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
> >
>
> I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
> packet will pass.
>
> Thanks,
>
> --
> Matteo Croce
> per aspera ad upstream
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [ovs-dev] [PATCH net-next v2] openvswitch: add TTL decrement action
[not found] ` <CAEPJBmo5ju_2+XdmOEscb_bWL6+qZ72ewk1LTdmiHEgxeE5+VA@mail.gmail.com>
@ 2019-12-25 3:26 ` Tonghao Zhang
0 siblings, 0 replies; 8+ messages in thread
From: Tonghao Zhang @ 2019-12-25 3:26 UTC (permalink / raw)
To: bindiya Kurle
Cc: Matteo Croce, Nikolay Aleksandrov, ovs dev, netdev, LKML,
Simon Horman, David S. Miller
On Wed, Dec 25, 2019 at 7:16 AM bindiya Kurle <bindiyakurle@gmail.com> wrote:
>
> Hi Tonghao,
> Once this kernel patch is integrated . I will be submitting patch to ovs-dpdk for implementing dec_ttl action on dpdk datapath.
Good, thanks
> Regards,
> Bindiya
>
> On Tue, Dec 24, 2019 at 2:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>>
>> Hi Matteo,
>> Did you have plan to implement the TTL decrement action in userspace
>> datapath(with dpdk),
>> I am doing some research offloading about TTL decrement action, and
>> may sent patch TTL decrement offload action,
>> using dpdk rte_flow.
>>
>> On Fri, Dec 20, 2019 at 8:37 PM Matteo Croce <mcroce@redhat.com> wrote:
>> >
>> > On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
>> > <nikolay@cumulusnetworks.com> wrote:
>> > >
>> > > On 17/12/2019 17:51, Matteo Croce wrote:
>> > > > New action to decrement TTL instead of setting it to a fixed value.
>> > > > This action will decrement the TTL and, in case of expired TTL, drop it
>> > > > or execute an action passed via a nested attribute.
>> > > > The default TTL expired action is to drop the packet.
>> > > >
>> > > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>> > > >
>> > > > Tested with a corresponding change in the userspace:
>> > > >
>> > > > # ovs-dpctl dump-flows
>> > > > in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
>> > > > in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
>> > > > in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
>> > > > in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>> > > >
>> > > > # ping -c1 192.168.0.2 -t 42
>> > > > IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
>> > > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
>> > > > # ping -c1 192.168.0.2 -t 120
>> > > > IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
>> > > > 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
>> > > > # ping -c1 192.168.0.2 -t 1
>> > > > #
>> > > >
>> > > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
>> > > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
>> > > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
>> > > > ---
>> > > > include/uapi/linux/openvswitch.h | 22 +++++++
>> > > > net/openvswitch/actions.c | 71 +++++++++++++++++++++
>> > > > net/openvswitch/flow_netlink.c | 105 +++++++++++++++++++++++++++++++
>> > > > 3 files changed, 198 insertions(+)
>> > > >
>> > >
>> > > Hi Matteo,
>> > >
>> > > [snip]
>> > > > +}
>> > > > +
>> > > > /* When 'last' is true, sample() should always consume the 'skb'.
>> > > > * Otherwise, sample() should keep 'skb' intact regardless what
>> > > > * actions are executed within sample().
>> > > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
>> > > > nla_len(actions), last, clone_flow_key);
>> > > > }
>> > > >
>> > > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
>> > > > +{
>> > > > + int err;
>> > > > +
>> > > > + if (skb->protocol == htons(ETH_P_IPV6)) {
>> > > > + struct ipv6hdr *nh = ipv6_hdr(skb);
>> > > > +
>> > > > + err = skb_ensure_writable(skb, skb_network_offset(skb) +
>> > > > + sizeof(*nh));
>> > >
>> > > skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
>> > > It seems the IPv4 version below is ok as the ptr is reloaded.
>> > >
>> >
>> > Right
>> >
>> > > One q as I don't know ovs that much - can this action be called only with
>> > > skb->protocol == ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
>> > >
>> >
>> > I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
>> > packet will pass.
>> >
>> > Thanks,
>> >
>> > --
>> > Matteo Croce
>> > per aspera ad upstream
>> >
>> > _______________________________________________
>> > dev mailing list
>> > dev@openvswitch.org
>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-12-25 3:27 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-17 15:51 [PATCH net-next v2] openvswitch: add TTL decrement action Matteo Croce
2019-12-17 16:30 ` Nikolay Aleksandrov
2019-12-20 12:36 ` Matteo Croce
2019-12-24 8:41 ` [ovs-dev] " Tonghao Zhang
[not found] ` <CAEPJBmo5ju_2+XdmOEscb_bWL6+qZ72ewk1LTdmiHEgxeE5+VA@mail.gmail.com>
2019-12-25 3:26 ` Tonghao Zhang
2019-12-18 3:06 ` Pravin Shelar
2019-12-19 16:36 ` Matteo Croce
2019-12-20 1:05 ` Pravin Shelar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).