All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
@ 2016-09-06 13:37 Jamal Hadi Salim
  2016-09-06 13:54 ` Jamal Hadi Salim
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-06 13:37 UTC (permalink / raw)
  To: davem
  Cc: netdev, daniel, xiyou.wangcong, eric.dumazet, alexei.starovoitov,
	Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

This action is intended to be an upgrade from a usability perspective
from pedit (as well as operational debugability).
Compare this:

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action pedit munge offset -14 u8 set 0x02 \
munge offset -13 u8 set 0x15 \
munge offset -12 u8 set 0x15 \
munge offset -11 u8 set 0x15 \
munge offset -10 u16 set 0x1515 \
pipe

to:

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action skbmod dmac 02:15:15:15:15:15

Also try to do a MAC address swap with pedit or worse
try to debug a policy with destination mac, source mac and
etherype. Then make few rules out of those and you'll get my point.

In the future common use cases on pedit can be migrated to this action
(as an example different fields in ip v4/6, transports like tcp/udp/sctp
etc). For this first cut, this allows modifying basic ethernet header.

The most important ethernet use case at the moment is when redirecting or
mirroring packets to a remote machine. The dst mac address needs a re-write
so that it doesnt get dropped or confuse an interconnecting (learning) switc
or dropped by a target machine (which looks at the dst mac). And at times
when flipping back the packet a swap of the MAC addresses is needed.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/net/tc_act/tc_skbmod.h        |  30 ++++
 include/uapi/linux/tc_act/tc_skbmod.h |  39 +++++
 net/sched/Kconfig                     |  11 ++
 net/sched/Makefile                    |   1 +
 net/sched/act_skbmod.c                | 295 ++++++++++++++++++++++++++++++++++
 5 files changed, 376 insertions(+)
 create mode 100644 include/net/tc_act/tc_skbmod.h
 create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h
 create mode 100644 net/sched/act_skbmod.c

diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..f34cf13
--- /dev/null
+++ b/include/net/tc_act/tc_skbmod.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __NET_TC_SKBMOD_H
+#define __NET_TC_SKBMOD_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_skbmod.h>
+
+struct tcf_skbmod_params {
+	struct rcu_head	rcu;
+	u64	flags; /*up to 64 types of operations; extend if needed */
+	u8	eth_dst[ETH_ALEN];
+	u16	eth_type;
+	u8	eth_src[ETH_ALEN];
+};
+
+struct tcf_skbmod {
+	struct tc_action	common;
+	struct tcf_skbmod_params  *skbmod_p;
+};
+#define to_skbmod(a) ((struct tcf_skbmod *)a)
+
+#endif /* __NET_TC_SKBMOD_H */
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..10fc07d
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __LINUX_TC_SKBMOD_H
+#define __LINUX_TC_SKBMOD_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBMOD 15
+
+#define SKBMOD_F_DMAC	0x1
+#define SKBMOD_F_SMAC	0x2
+#define SKBMOD_F_ETYPE	0x4
+#define SKBMOD_F_SWAPMAC 0x8
+
+struct tc_skbmod {
+	tc_gen;
+	__u64 flags;
+};
+
+enum {
+	TCA_SKBMOD_UNSPEC,
+	TCA_SKBMOD_TM,
+	TCA_SKBMOD_PARMS,
+	TCA_SKBMOD_DMAC,
+	TCA_SKBMOD_SMAC,
+	TCA_SKBMOD_ETYPE,
+	TCA_SKBMOD_PAD,
+	__TCA_SKBMOD_MAX
+};
+#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index ccf931b..34b556d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_SKBMOD
+        tristate "skb data modification action"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to allow modification of skb data
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_skbmod.
+
 config NET_ACT_IFE
         tristate "Inter-FE action based on IETF ForCES InterFE LFB"
         depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ae088a5..e82eff8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 0000000..6d784ac
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,295 @@
+/*
+ * net/sched/act_skbmod.c  skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK     15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	int action;
+	struct tcf_skbmod_params *p;
+	u64 flags;
+	int err;
+
+	bstats_update(&d->tcf_bstats, skb);
+
+	/* XXX: if you are going to edit more fields beyond ethernet header
+	 * (example when you add IP header replacement or vlan swap)
+	 * then MAX_EDIT_LEN needs to change appropriately
+	*/
+	err = skb_ensure_writable(skb, ETH_HLEN);
+	if (unlikely(err)) /* best policy is to drop on the floor */
+		action = TC_ACT_SHOT;
+
+	tcf_lastuse_update(&d->tcf_tm);
+
+	rcu_read_lock();
+	action = READ_ONCE(d->tcf_action);
+	if (unlikely(action == TC_ACT_SHOT)) {
+		d->tcf_qstats.drops++;
+		rcu_read_unlock();
+		return action;
+	}
+
+	p = rcu_dereference(d->skbmod_p);
+	flags = p->flags;
+	if (flags & SKBMOD_F_DMAC)
+		ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+	if (flags & SKBMOD_F_SMAC)
+		ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+	if (flags & SKBMOD_F_ETYPE)
+		eth_hdr(skb)->h_proto = p->eth_type;
+	rcu_read_unlock();
+
+	if (flags & SKBMOD_F_SWAPMAC) {
+		u8 tmpaddr[ETH_ALEN];
+		/*XXX: I am sure we can come up with more efficient swapping*/
+		ether_addr_copy(tmpaddr, eth_hdr(skb)->h_dest);
+		ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+		ether_addr_copy(eth_hdr(skb)->h_source, tmpaddr);
+	}
+
+	return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+	[TCA_SKBMOD_PARMS]		= { .len = sizeof(struct tc_skbmod) },
+	[TCA_SKBMOD_DMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_SMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_ETYPE]		= { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+	struct tc_skbmod *parm;
+	struct tcf_skbmod *d;
+	struct tcf_skbmod_params *p, *p_old;
+	u32 lflags = 0;
+	u8 *daddr = NULL;
+	u8 *saddr = NULL;
+	u16 eth_type = 0;
+	bool exists = false;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_SKBMOD_PARMS])
+		return -EINVAL;
+
+	if (tb[TCA_SKBMOD_DMAC]) {
+		daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+		lflags |= SKBMOD_F_DMAC;
+	}
+
+	if (tb[TCA_SKBMOD_SMAC]) {
+		saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+		lflags |= SKBMOD_F_SMAC;
+	}
+
+	if (tb[TCA_SKBMOD_ETYPE]) {
+		eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+		lflags |= SKBMOD_F_ETYPE;
+	}
+
+	parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+
+	if (parm->flags & SKBMOD_F_SWAPMAC)
+		lflags = SKBMOD_F_SWAPMAC;
+
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!lflags)
+		return -EINVAL;
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_skbmod_ops, bind, false);
+		if (ret)
+			return ret;
+
+		d = to_skbmod(*a);
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_skbmod(*a);
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	ASSERT_RTNL();
+	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+	if (unlikely (!p)) {
+		if (ovr)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	p->flags = lflags;
+	d->tcf_action = parm->action;
+
+	p_old = rtnl_dereference(d->skbmod_p);
+
+	if (ovr)
+		spin_lock_bh(&d->tcf_lock);
+
+	if (lflags & SKBMOD_F_DMAC)
+		ether_addr_copy(p->eth_dst, daddr);
+	if (lflags & SKBMOD_F_SMAC)
+		ether_addr_copy(p->eth_src, saddr);
+	if (lflags & SKBMOD_F_ETYPE)
+		p->eth_type = htons(eth_type);
+
+	rcu_assign_pointer(d->skbmod_p, p);
+	if (ovr)
+		spin_unlock_bh(&d->tcf_lock);
+
+	if (p_old)
+		kfree_rcu(p_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+	return ret;
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbmod *d = to_skbmod(a);
+	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
+	struct tc_skbmod opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
+	struct tcf_t t;
+
+	rcu_read_lock();
+
+	opt.flags  = p->flags;
+	if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_DMAC) &&
+	    nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_SMAC) &&
+	    nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_ETYPE) &&
+	    nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &d->tcf_tm);
+	if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+		goto nla_put_failure;
+
+	rcu_read_unlock();
+
+	return skb->len;
+nla_put_failure:
+	rcu_read_unlock();
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+	.kind		=	"skbmod",
+	.type		=	TCA_ACT_SKBMOD,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbmod_run,
+	.dump		=	tcf_skbmod_dump,
+	.init		=	tcf_skbmod_init,
+	.walk		=	tcf_skbmod_walker,
+	.lookup		=	tcf_skbmod_search,
+	.size		=	sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+	.init = skbmod_init_net,
+	.exit = skbmod_exit_net,
+	.id   = &skbmod_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+	return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-06 13:37 [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action Jamal Hadi Salim
@ 2016-09-06 13:54 ` Jamal Hadi Salim
  2016-09-06 23:35   ` David Miller
  2016-09-06 14:30 ` Eric Dumazet
  2016-09-07  3:29 ` kbuild test robot
  2 siblings, 1 reply; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-06 13:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, daniel, xiyou.wangcong, eric.dumazet, alexei.starovoitov

On 16-09-06 09:37 AM, Jamal Hadi Salim wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
>
> This action is intended to be an upgrade from a usability perspective
> from pedit (as well as operational debugability).
> Compare this:
>

Dave,
I will have to send some new version of this action - so please
dont apply.
As it stands right now, the per-cpu stats dont work; so code uses
global stats (which works).
Hoping to get some clarity from other people then some more testing.

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-06 13:37 [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action Jamal Hadi Salim
  2016-09-06 13:54 ` Jamal Hadi Salim
@ 2016-09-06 14:30 ` Eric Dumazet
  2016-09-07  3:29 ` kbuild test robot
  2 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2016-09-06 14:30 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: davem, netdev, daniel, xiyou.wangcong, alexei.starovoitov

On Tue, 2016-09-06 at 09:37 -0400, Jamal Hadi Salim wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> +
> +struct tcf_skbmod_params {
> +	struct rcu_head	rcu;
> +	u64	flags; /*up to 64 types of operations; extend if needed */
> +	u8	eth_dst[ETH_ALEN];
> +	u16	eth_type;
> +	u8	eth_src[ETH_ALEN];
> +};
> +
> +struct tcf_skbmod {
> +	struct tc_action	common;
> +	struct tcf_skbmod_params  *skbmod_p;

struct tcf_skbmod_params __rcu *skbmod_p;

> +};

Then add CONFIG_SPARSE_RCU_POINTER=y to your .config
And build/check

make C=2 M=net/sched

Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-06 13:54 ` Jamal Hadi Salim
@ 2016-09-06 23:35   ` David Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2016-09-06 23:35 UTC (permalink / raw)
  To: jhs; +Cc: netdev, daniel, xiyou.wangcong, eric.dumazet, alexei.starovoitov

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 6 Sep 2016 09:54:28 -0400

> On 16-09-06 09:37 AM, Jamal Hadi Salim wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>
>> This action is intended to be an upgrade from a usability perspective
>> from pedit (as well as operational debugability).
>> Compare this:
>>
> 
> Dave,
> I will have to send some new version of this action - so please
> dont apply.

Ok.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-06 13:37 [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action Jamal Hadi Salim
  2016-09-06 13:54 ` Jamal Hadi Salim
  2016-09-06 14:30 ` Eric Dumazet
@ 2016-09-07  3:29 ` kbuild test robot
  2 siblings, 0 replies; 16+ messages in thread
From: kbuild test robot @ 2016-09-07  3:29 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: kbuild-all, davem, netdev, daniel, xiyou.wangcong, eric.dumazet,
	alexei.starovoitov, Jamal Hadi Salim

Hi Jamal,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jamal-Hadi-Salim/net_sched-Introduce-skbmod-action/20160907-095338
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/compiler.h:230:8: sparse: attribute 'no_sanitize_address': unknown attribute
>> net/sched/act_skbmod.c:58:13: sparse: incompatible types in comparison expression (different address spaces)
   net/sched/act_skbmod.c:165:17: sparse: incompatible types in comparison expression (different address spaces)
   net/sched/act_skbmod.c:194:40: sparse: incompatible types in comparison expression (different address spaces)

vim +58 net/sched/act_skbmod.c

    42		 * then MAX_EDIT_LEN needs to change appropriately
    43		*/
    44		err = skb_ensure_writable(skb, ETH_HLEN);
    45		if (unlikely(err)) /* best policy is to drop on the floor */
    46			action = TC_ACT_SHOT;
    47	
    48		tcf_lastuse_update(&d->tcf_tm);
    49	
    50		rcu_read_lock();
    51		action = READ_ONCE(d->tcf_action);
    52		if (unlikely(action == TC_ACT_SHOT)) {
    53			d->tcf_qstats.drops++;
    54			rcu_read_unlock();
    55			return action;
    56		}
    57	
  > 58		p = rcu_dereference(d->skbmod_p);
    59		flags = p->flags;
    60		if (flags & SKBMOD_F_DMAC)
    61			ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
    62		if (flags & SKBMOD_F_SMAC)
    63			ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
    64		if (flags & SKBMOD_F_ETYPE)
    65			eth_hdr(skb)->h_proto = p->eth_type;
    66		rcu_read_unlock();

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 23:20           ` Eric Dumazet
@ 2016-09-12 23:40             ` Jamal Hadi Salim
  0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 23:40 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On 16-09-12 07:20 PM, Eric Dumazet wrote:
> On Mon, 2016-09-12 at 16:10 -0700, Eric Dumazet wrote:
>
>>
>> diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
>> index 508e051304fb62627e61b5065b2325edd1b84f2e..dc9dd8ae7d5405f76c775278dac7689655b21041 100644
>> --- a/net/core/gen_stats.c
>> +++ b/net/core/gen_stats.c
>> @@ -142,10 +142,14 @@ __gnet_stats_copy_basic(const seqcount_t *running,
>>  		return;
>>  	}
>>  	do {
>> -		if (running)
>> +		if (running) {
>> +			local_bh_disable();
>>  			seq = read_seqcount_begin(running);
>> +		}
>>  		bstats->bytes = b->bytes;
>>  		bstats->packets = b->packets;
>> +		if (running)
>> +			local_bh_enable();
>>  	} while (running && read_seqcount_retry(running, seq));
>>  }
>
> Ah well, forget this patch, re-enabling bh right before
> read_seqcount_retry() is not going to help.
>

I have to say I have seen some odd issues once in a while reading
generic action stats.
I had a program that opened a netlink socket into the kernel.
Every X seconds it does a dump of all the actions to read the
stats.
There is a very reproducible behavior that the stats
are not in sync with the kernel. Given generic stats is lockless
I thought maybe rcu or per-cpu stats was the issue. I havent had time
to look closely.
The solution is instead of keeping the socket open all the time;
I open, read stats, close (repeat every x seconds).

If there is something you want me to try - I could do sometimes
this week. Your patch above may be useful!

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 23:02       ` Jamal Hadi Salim
  2016-09-12 23:10         ` Eric Dumazet
@ 2016-09-12 23:26         ` Jamal Hadi Salim
  1 sibling, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 23:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On 16-09-12 07:02 PM, Jamal Hadi Salim wrote:

>> Looks like typical starvation caused by aggressive softirq.
>>
>
> Well, then it is strange that in one case a tc dump of the rule
> was immediate and in the other case it was consistent for 5-15
> seconds.
>

I may be going nuts because i cant reproduce that issue anymore.
VM is configured for SMP 4 vcpus.

I will remove that check and submit a new version.

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 23:10         ` Eric Dumazet
@ 2016-09-12 23:20           ` Eric Dumazet
  2016-09-12 23:40             ` Jamal Hadi Salim
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-09-12 23:20 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On Mon, 2016-09-12 at 16:10 -0700, Eric Dumazet wrote:

> 
> diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
> index 508e051304fb62627e61b5065b2325edd1b84f2e..dc9dd8ae7d5405f76c775278dac7689655b21041 100644
> --- a/net/core/gen_stats.c
> +++ b/net/core/gen_stats.c
> @@ -142,10 +142,14 @@ __gnet_stats_copy_basic(const seqcount_t *running,
>  		return;
>  	}
>  	do {
> -		if (running)
> +		if (running) {
> +			local_bh_disable();
>  			seq = read_seqcount_begin(running);
> +		}
>  		bstats->bytes = b->bytes;
>  		bstats->packets = b->packets;
> +		if (running)
> +			local_bh_enable();
>  	} while (running && read_seqcount_retry(running, seq));
>  }

Ah well, forget this patch, re-enabling bh right before
read_seqcount_retry() is not going to help.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 23:02       ` Jamal Hadi Salim
@ 2016-09-12 23:10         ` Eric Dumazet
  2016-09-12 23:20           ` Eric Dumazet
  2016-09-12 23:26         ` Jamal Hadi Salim
  1 sibling, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-09-12 23:10 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On Mon, 2016-09-12 at 19:02 -0400, Jamal Hadi Salim wrote:
> On 16-09-12 06:26 PM, Eric Dumazet wrote:
> > On Mon, 2016-09-12 at 18:14 -0400, Jamal Hadi Salim wrote:
> >
> >> I noticed some very weird issues when I took that out.
> >> Running sufficiently large amount of traffic (ping -f is sufficient)
> >> I saw that when i did a dump it took anywhere between 6-15 seconds.
> >> With the read_lock in place response was immediate.
> >> I can go back and run things to verify - but it was very odd.
> >
> > This was on uni processor ?
> >
> 
> It was a VM.
> 
> > Looks like typical starvation caused by aggressive softirq.
> >
> 
> Well, then it is strange that in one case a tc dump of the rule
> was immediate and in the other case it was consistent for 5-15
> seconds.
> 

This needs investigation ;)

One possible loop under high stress would be possible in
__gnet_stats_copy_basic(), since we might restart the loop if we are
really really unlucky, but this would have nothing with your patches.



diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 508e051304fb62627e61b5065b2325edd1b84f2e..dc9dd8ae7d5405f76c775278dac7689655b21041 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -142,10 +142,14 @@ __gnet_stats_copy_basic(const seqcount_t *running,
 		return;
 	}
 	do {
-		if (running)
+		if (running) {
+			local_bh_disable();
 			seq = read_seqcount_begin(running);
+		}
 		bstats->bytes = b->bytes;
 		bstats->packets = b->packets;
+		if (running)
+			local_bh_enable();
 	} while (running && read_seqcount_retry(running, seq));
 }
 EXPORT_SYMBOL(__gnet_stats_copy_basic);

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 22:26     ` Eric Dumazet
@ 2016-09-12 23:02       ` Jamal Hadi Salim
  2016-09-12 23:10         ` Eric Dumazet
  2016-09-12 23:26         ` Jamal Hadi Salim
  0 siblings, 2 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 23:02 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On 16-09-12 06:26 PM, Eric Dumazet wrote:
> On Mon, 2016-09-12 at 18:14 -0400, Jamal Hadi Salim wrote:
>
>> I noticed some very weird issues when I took that out.
>> Running sufficiently large amount of traffic (ping -f is sufficient)
>> I saw that when i did a dump it took anywhere between 6-15 seconds.
>> With the read_lock in place response was immediate.
>> I can go back and run things to verify - but it was very odd.
>
> This was on uni processor ?
>

It was a VM.

> Looks like typical starvation caused by aggressive softirq.
>

Well, then it is strange that in one case a tc dump of the rule
was immediate and in the other case it was consistent for 5-15
seconds.

> Anyway, I suspect your kernel build has rcu_read_lock() and
> rcu_read_unlock() as NOP ;)
>

Which doesnt give me a good feel if i tested this well ;->

I would like to try again with those two kernels just to
make sure i was not imagining this.

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 22:14   ` Jamal Hadi Salim
@ 2016-09-12 22:26     ` Eric Dumazet
  2016-09-12 23:02       ` Jamal Hadi Salim
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-09-12 22:26 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On Mon, 2016-09-12 at 18:14 -0400, Jamal Hadi Salim wrote:

> I noticed some very weird issues when I took that out.
> Running sufficiently large amount of traffic (ping -f is sufficient)
> I saw that when i did a dump it took anywhere between 6-15 seconds.
> With the read_lock in place response was immediate.
> I can go back and run things to verify - but it was very odd.

This was on uni processor ?

Looks like typical starvation caused by aggressive softirq.

Anyway, I suspect your kernel build has rcu_read_lock() and
rcu_read_unlock() as NOP ;)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 22:01 ` Eric Dumazet
@ 2016-09-12 22:14   ` Jamal Hadi Salim
  2016-09-12 22:26     ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 22:14 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On 16-09-12 06:01 PM, Eric Dumazet wrote:
> On Mon, 2016-09-12 at 16:46 -0400, Jamal Hadi Salim wrote:
>
>> +
>> +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
>> +			   int bind, int ref)
>> +{
>> +	struct tcf_skbmod *d = to_skbmod(a);
>> +	unsigned char *b = skb_tail_pointer(skb);
>> +	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
>> +	struct tc_skbmod opt = {
>> +		.index   = d->tcf_index,
>> +		.refcnt  = d->tcf_refcnt - ref,
>> +		.bindcnt = d->tcf_bindcnt - bind,
>> +		.action  = d->tcf_action,
>> +	};
>> +	struct tcf_t t;
>> +
>> +	rcu_read_lock();
>
> You do not need rcu read lock protection here, RTNL is enough.

I noticed some very weird issues when I took that out.
Running sufficiently large amount of traffic (ping -f is sufficient)
I saw that when i did a dump it took anywhere between 6-15 seconds.
With the read_lock in place response was immediate.
I can go back and run things to verify - but it was very odd.

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 21:58 ` Eric Dumazet
@ 2016-09-12 22:08   ` Jamal Hadi Salim
  0 siblings, 0 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 22:08 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On 16-09-12 05:58 PM, Eric Dumazet wrote:
> On Mon, 2016-09-12 at 16:46 -0400, Jamal Hadi Salim wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>> +
>> +	/* XXX: if you are going to edit more fields beyond ethernet header
>> +	 * (example when you add IP header replacement or vlan swap)
>> +	 * then MAX_EDIT_LEN needs to change appropriately
>> +	*/
>> +	err = skb_ensure_writable(skb, ETH_HLEN);
>> +	if (unlikely(err)) /* best policy is to drop on the floor */
>> +		action = TC_ACT_SHOT;
>> +
>> +	rcu_read_lock();
>> +	action = READ_ONCE(d->tcf_action);
>
> You are overwriting @action, while you might have put TC_ACT_SHOT in it
> 3 lines above.
>
> Maybe you meant :
> 	if (err)
> 		return TC_ACT_SHOT;
>

Thanks for catching that (leftover from when i used a lock).
Will resubmit.

cheers,
jamal

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 20:46 Jamal Hadi Salim
  2016-09-12 21:58 ` Eric Dumazet
@ 2016-09-12 22:01 ` Eric Dumazet
  2016-09-12 22:14   ` Jamal Hadi Salim
  1 sibling, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-09-12 22:01 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On Mon, 2016-09-12 at 16:46 -0400, Jamal Hadi Salim wrote:

> +
> +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
> +			   int bind, int ref)
> +{
> +	struct tcf_skbmod *d = to_skbmod(a);
> +	unsigned char *b = skb_tail_pointer(skb);
> +	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
> +	struct tc_skbmod opt = {
> +		.index   = d->tcf_index,
> +		.refcnt  = d->tcf_refcnt - ref,
> +		.bindcnt = d->tcf_bindcnt - bind,
> +		.action  = d->tcf_action,
> +	};
> +	struct tcf_t t;
> +
> +	rcu_read_lock();

You do not need rcu read lock protection here, RTNL is enough.

> +
> +	opt.flags  = p->flags;
> +	if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
> +		goto nla_put_failure;
> +	if ((p->flags & SKBMOD_F_DMAC) &&
> +	    nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
> +		goto nla_put_failure;
> +	if ((p->flags & SKBMOD_F_SMAC) &&
> +	    nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
> +		goto nla_put_failure;
> +	if ((p->flags & SKBMOD_F_ETYPE) &&
> +	    nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
> +		goto nla_put_failure;
> +
> +	tcf_tm_dump(&t, &d->tcf_tm);
> +	if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
> +		goto nla_put_failure;
> +
> +	rcu_read_unlock();
> +
> +	return skb->len;
> +nla_put_failure:
> +	rcu_read_unlock();
> +	nlmsg_trim(skb, b);
> +	return -1;
> +}
> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
  2016-09-12 20:46 Jamal Hadi Salim
@ 2016-09-12 21:58 ` Eric Dumazet
  2016-09-12 22:08   ` Jamal Hadi Salim
  2016-09-12 22:01 ` Eric Dumazet
  1 sibling, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2016-09-12 21:58 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, xiyou.wangcong, daniel, john.r.fastabend

On Mon, 2016-09-12 at 16:46 -0400, Jamal Hadi Salim wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>

> +
> +	/* XXX: if you are going to edit more fields beyond ethernet header
> +	 * (example when you add IP header replacement or vlan swap)
> +	 * then MAX_EDIT_LEN needs to change appropriately
> +	*/
> +	err = skb_ensure_writable(skb, ETH_HLEN);
> +	if (unlikely(err)) /* best policy is to drop on the floor */
> +		action = TC_ACT_SHOT;
> +
> +	rcu_read_lock();
> +	action = READ_ONCE(d->tcf_action);

You are overwriting @action, while you might have put TC_ACT_SHOT in it
3 lines above.

Maybe you meant :
	if (err)
		return TC_ACT_SHOT;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action
@ 2016-09-12 20:46 Jamal Hadi Salim
  2016-09-12 21:58 ` Eric Dumazet
  2016-09-12 22:01 ` Eric Dumazet
  0 siblings, 2 replies; 16+ messages in thread
From: Jamal Hadi Salim @ 2016-09-12 20:46 UTC (permalink / raw)
  To: davem
  Cc: netdev, xiyou.wangcong, daniel, eric.dumazet, john.r.fastabend,
	Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

This action is intended to be an upgrade from a usability perspective
from pedit (as well as operational debugability).
Compare this:

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action pedit munge offset -14 u8 set 0x02 \
munge offset -13 u8 set 0x15 \
munge offset -12 u8 set 0x15 \
munge offset -11 u8 set 0x15 \
munge offset -10 u16 set 0x1515 \
pipe

To: 

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action skbmod dmac 02:15:15:15:15:15

Also try to do a MAC address swap with pedit or worse
try to debug a policy with destination mac, source mac and
etherype. Then make few rules out of those and you'll get my point.

In the future common use cases on pedit can be migrated to this action
(as an example different fields in ip v4/6, transports like tcp/udp/sctp
etc). For this first cut, this allows modifying basic ethernet header.

The most important ethernet use case at the moment is when redirecting or
mirroring packets to a remote machine. The dst mac address needs a re-write
so that it doesn't get dropped or confuse an interconnecting (learning) switch
or dropped by a target machine (which looks at the dst mac). And at times
when flipping back the packet a swap of the MAC addresses is needed.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/net/tc_act/tc_skbmod.h        |  30 ++++
 include/uapi/linux/tc_act/tc_skbmod.h |  39 +++++
 net/sched/Kconfig                     |  11 ++
 net/sched/Makefile                    |   1 +
 net/sched/act_skbmod.c                | 293 ++++++++++++++++++++++++++++++++++
 5 files changed, 374 insertions(+)
 create mode 100644 include/net/tc_act/tc_skbmod.h
 create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h
 create mode 100644 net/sched/act_skbmod.c

diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..644a211
--- /dev/null
+++ b/include/net/tc_act/tc_skbmod.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __NET_TC_SKBMOD_H
+#define __NET_TC_SKBMOD_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_skbmod.h>
+
+struct tcf_skbmod_params {
+	struct rcu_head	rcu;
+	u64	flags; /*up to 64 types of operations; extend if needed */
+	u8	eth_dst[ETH_ALEN];
+	u16	eth_type;
+	u8	eth_src[ETH_ALEN];
+};
+
+struct tcf_skbmod {
+	struct tc_action	common;
+	struct tcf_skbmod_params __rcu *skbmod_p;
+};
+#define to_skbmod(a) ((struct tcf_skbmod *)a)
+
+#endif /* __NET_TC_SKBMOD_H */
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..10fc07d
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __LINUX_TC_SKBMOD_H
+#define __LINUX_TC_SKBMOD_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBMOD 15
+
+#define SKBMOD_F_DMAC	0x1
+#define SKBMOD_F_SMAC	0x2
+#define SKBMOD_F_ETYPE	0x4
+#define SKBMOD_F_SWAPMAC 0x8
+
+struct tc_skbmod {
+	tc_gen;
+	__u64 flags;
+};
+
+enum {
+	TCA_SKBMOD_UNSPEC,
+	TCA_SKBMOD_TM,
+	TCA_SKBMOD_PARMS,
+	TCA_SKBMOD_DMAC,
+	TCA_SKBMOD_SMAC,
+	TCA_SKBMOD_ETYPE,
+	TCA_SKBMOD_PAD,
+	__TCA_SKBMOD_MAX
+};
+#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 72e3426..7795d5a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_SKBMOD
+        tristate "skb data modification action"
+        depends on NET_CLS_ACT
+        ---help---
+         Say Y here to allow modification of skb data
+
+         If unsure, say N.
+
+         To compile this code as a module, choose M here: the
+         module will be called act_skbmod.
+
 config NET_ACT_IFE
         tristate "Inter-FE action based on IETF ForCES InterFE LFB"
         depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b9d046b..148ae0d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 0000000..40a2dfd
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,293 @@
+/*
+ * net/sched/act_skbmod.c  skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK     15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	int action;
+	struct tcf_skbmod_params *p;
+	u64 flags;
+	int err;
+
+	tcf_lastuse_update(&d->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+	/* XXX: if you are going to edit more fields beyond ethernet header
+	 * (example when you add IP header replacement or vlan swap)
+	 * then MAX_EDIT_LEN needs to change appropriately
+	*/
+	err = skb_ensure_writable(skb, ETH_HLEN);
+	if (unlikely(err)) /* best policy is to drop on the floor */
+		action = TC_ACT_SHOT;
+
+	rcu_read_lock();
+	action = READ_ONCE(d->tcf_action);
+	if (unlikely(action == TC_ACT_SHOT)) {
+		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+		rcu_read_unlock();
+		return action;
+	}
+
+	p = rcu_dereference(d->skbmod_p);
+	flags = p->flags;
+	if (flags & SKBMOD_F_DMAC)
+		ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+	if (flags & SKBMOD_F_SMAC)
+		ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+	if (flags & SKBMOD_F_ETYPE)
+		eth_hdr(skb)->h_proto = p->eth_type;
+	rcu_read_unlock();
+
+	if (flags & SKBMOD_F_SWAPMAC) {
+		u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+		/*XXX: I am sure we can come up with more efficient swapping*/
+		ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+		ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+		ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+	}
+
+	return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+	[TCA_SKBMOD_PARMS]		= { .len = sizeof(struct tc_skbmod) },
+	[TCA_SKBMOD_DMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_SMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_ETYPE]		= { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+	struct tcf_skbmod_params *p, *p_old;
+	struct tc_skbmod *parm;
+	struct tcf_skbmod *d;
+	bool exists = false;
+	u8 *daddr = NULL;
+	u8 *saddr = NULL;
+	u16 eth_type = 0;
+	u32 lflags = 0;
+	int ret = 0, err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_SKBMOD_PARMS])
+		return -EINVAL;
+
+	if (tb[TCA_SKBMOD_DMAC]) {
+		daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+		lflags |= SKBMOD_F_DMAC;
+	}
+
+	if (tb[TCA_SKBMOD_SMAC]) {
+		saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+		lflags |= SKBMOD_F_SMAC;
+	}
+
+	if (tb[TCA_SKBMOD_ETYPE]) {
+		eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+		lflags |= SKBMOD_F_ETYPE;
+	}
+
+	parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+	if (parm->flags & SKBMOD_F_SWAPMAC)
+		lflags = SKBMOD_F_SWAPMAC;
+
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!lflags)
+		return -EINVAL;
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_skbmod_ops, bind, true);
+		if (ret)
+			return ret;
+
+		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	d = to_skbmod(*a);
+
+	ASSERT_RTNL();
+	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+	if (unlikely(!p)) {
+		if (ovr)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	p->flags = lflags;
+	d->tcf_action = parm->action;
+
+	p_old = rtnl_dereference(d->skbmod_p);
+
+	if (ovr)
+		spin_lock_bh(&d->tcf_lock);
+
+	if (lflags & SKBMOD_F_DMAC)
+		ether_addr_copy(p->eth_dst, daddr);
+	if (lflags & SKBMOD_F_SMAC)
+		ether_addr_copy(p->eth_src, saddr);
+	if (lflags & SKBMOD_F_ETYPE)
+		p->eth_type = htons(eth_type);
+
+	rcu_assign_pointer(d->skbmod_p, p);
+	if (ovr)
+		spin_unlock_bh(&d->tcf_lock);
+
+	if (p_old)
+		kfree_rcu(p_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+	return ret;
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
+	struct tc_skbmod opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
+	struct tcf_t t;
+
+	rcu_read_lock();
+
+	opt.flags  = p->flags;
+	if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_DMAC) &&
+	    nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_SMAC) &&
+	    nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_ETYPE) &&
+	    nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &d->tcf_tm);
+	if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+		goto nla_put_failure;
+
+	rcu_read_unlock();
+
+	return skb->len;
+nla_put_failure:
+	rcu_read_unlock();
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+	.kind		=	"skbmod",
+	.type		=	TCA_ACT_SKBMOD,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbmod_run,
+	.dump		=	tcf_skbmod_dump,
+	.init		=	tcf_skbmod_init,
+	.walk		=	tcf_skbmod_walker,
+	.lookup		=	tcf_skbmod_search,
+	.size		=	sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+	.init = skbmod_init_net,
+	.exit = skbmod_exit_net,
+	.id   = &skbmod_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+	return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-09-12 23:40 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-06 13:37 [PATCH v4 net-next 1/1] net_sched: Introduce skbmod action Jamal Hadi Salim
2016-09-06 13:54 ` Jamal Hadi Salim
2016-09-06 23:35   ` David Miller
2016-09-06 14:30 ` Eric Dumazet
2016-09-07  3:29 ` kbuild test robot
2016-09-12 20:46 Jamal Hadi Salim
2016-09-12 21:58 ` Eric Dumazet
2016-09-12 22:08   ` Jamal Hadi Salim
2016-09-12 22:01 ` Eric Dumazet
2016-09-12 22:14   ` Jamal Hadi Salim
2016-09-12 22:26     ` Eric Dumazet
2016-09-12 23:02       ` Jamal Hadi Salim
2016-09-12 23:10         ` Eric Dumazet
2016-09-12 23:20           ` Eric Dumazet
2016-09-12 23:40             ` Jamal Hadi Salim
2016-09-12 23:26         ` Jamal Hadi Salim

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.