All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework
@ 2017-07-12 10:40 martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 2/5] drop_monitor: let dm trace state support ns martinbj2008
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: martinbj2008 @ 2017-07-12 10:40 UTC (permalink / raw)
  To: nhorman, davem; +Cc: netdev, martinbj2008, zhangjunweimartin

From: martin Zhang <zhangjunweimartin@didichuxing.com>

This is a serial patch for drop monitor, in order to support net namespace.

Import two struct to support net ns:

1. struct per_ns_dm_cb:
  Just like its name, it is used in per net ns.

  In this patch it is empty, but in following patch, these field will be added.
  a. trace_state: every net ns has a switch to indicate the trace state.
  b. ns_dm_mutex: the mutex will only work and keep exclusive operatons in a net ns.
  c. hw_stats_list: monitor for NAPI of net device.

2. ns_pcpu_dm_data
   It is used to replace per_cpu_dm_data under per net ns.

   per_cpu_dm_data will only keep the dm_alert_work, and the other field
will be moved to ns_pcpu_dm_data. They do same thing just like current
code, and the only difference is under per net ns.

  Keep there is a work under percpu, to send alter netlink message.

Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
---
The dropwatch is a very useful tool to diagnose network problem,
which give us greate help.
Dropwatch could not work under container(net namespace).
It is a pitty, so let it support net ns.

 net/core/drop_monitor.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 70ccda2..6a75e04 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -32,6 +32,10 @@
 #include <trace/events/napi.h>
 
 #include <asm/unaligned.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/smp.h>
 
 #define TRACE_ON 1
 #define TRACE_OFF 0
@@ -41,6 +45,13 @@
  * and the work handle that will send up
  * netlink alerts
  */
+
+struct ns_pcpu_dm_data {
+};
+
+struct per_ns_dm_cb {
+};
+
 static int trace_state = TRACE_OFF;
 static DEFINE_MUTEX(trace_state_mutex);
 
@@ -59,6 +70,7 @@ struct dm_hw_stat_delta {
 	unsigned long last_drop_val;
 };
 
+static int dm_net_id __read_mostly;
 static struct genl_family net_drop_monitor_family;
 
 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
@@ -382,6 +394,33 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 	.notifier_call = dropmon_net_event
 };
 
+static int __net_init dm_net_init(struct net *net)
+{
+	struct per_ns_dm_cb *ns_dm_cb;
+
+	ns_dm_cb = net_generic(net, dm_net_id);
+	if (!ns_dm_cb)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __net_exit dm_net_exit(struct net *net)
+{
+	struct per_ns_dm_cb *ns_dm_cb;
+
+	ns_dm_cb = net_generic(net, dm_net_id);
+	if (!ns_dm_cb)
+		return;
+}
+
+static struct pernet_operations dm_net_ops = {
+	.init = dm_net_init,
+	.exit = dm_net_exit,
+	.id   = &dm_net_id,
+	.size = sizeof(struct per_ns_dm_cb),
+};
+
 static int __init init_net_drop_monitor(void)
 {
 	struct per_cpu_dm_data *data;
@@ -393,6 +432,7 @@ static int __init init_net_drop_monitor(void)
 		pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
 		return -ENOSPC;
 	}
+	rc = register_pernet_subsys(&dm_net_ops);
 
 	rc = genl_register_family(&net_drop_monitor_family);
 	if (rc) {
@@ -441,6 +481,7 @@ static void exit_net_drop_monitor(void)
 	 * or pending schedule calls
 	 */
 
+	unregister_pernet_subsys(&dm_net_ops);
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
 		del_timer_sync(&data->send_timer);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v1 net-next 2/5] drop_monitor: let dm trace state support ns
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
@ 2017-07-12 10:40 ` martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 3/5] drop_monitor: let hw_stats_list support net ns martinbj2008
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: martinbj2008 @ 2017-07-12 10:40 UTC (permalink / raw)
  To: nhorman, davem; +Cc: netdev, martinbj2008, zhangjunweimartin

From: martin Zhang <zhangjunweimartin@didichuxing.com>

Every net ns has its own trace_state,
and use a ref to control trace state of whole kernel.

trace_state in struct per_ns_dm_cb:
Just like the previous trace state, record the trace state for
every net ns. Possible values are ON/OFF.

dm_trace_ref: record how many net namespace is set to
TRACE_ON. increase when a net ns change to ON,
and decrease for OFF.

Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
---
 net/core/drop_monitor.c | 88 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 13 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 6a75e04..0cf25c3 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -49,10 +49,16 @@
 struct ns_pcpu_dm_data {
 };
 
+/**
+ * struct per_ns_dm_cb  - drop monitor control block in per net ns.
+ * @trace_state:    the trace state.
+ * @ns_dm_mutex:    protect whole per_ns_dm_cb.
+ */
 struct per_ns_dm_cb {
+	int trace_state;
+	struct mutex ns_dm_mutex;
 };
 
-static int trace_state = TRACE_OFF;
 static DEFINE_MUTEX(trace_state_mutex);
 
 struct per_cpu_dm_data {
@@ -70,6 +76,7 @@ struct dm_hw_stat_delta {
 	unsigned long last_drop_val;
 };
 
+int dm_trace_ref;
 static int dm_net_id __read_mostly;
 static struct genl_family net_drop_monitor_family;
 
@@ -254,9 +261,16 @@ static int set_all_monitor_traces(int state)
 
 	mutex_lock(&trace_state_mutex);
 
-	if (state == trace_state) {
-		rc = -EAGAIN;
-		goto out_unlock;
+	//Cases: Only inc/dec reference value.
+	if (state == TRACE_ON && dm_trace_ref > 0)
+		goto skip_register_trace;
+	else if (state == TRACE_OFF && dm_trace_ref > 1)
+		goto skip_register_trace;
+
+	//Bad cases.
+	if (dm_trace_ref < 0 || (dm_trace_ref == 0 && state == TRACE_OFF)) {
+		rc = -EINPROGRESS;
+		goto skip_register_trace;
 	}
 
 	switch (state) {
@@ -294,12 +308,15 @@ static int set_all_monitor_traces(int state)
 		break;
 	}
 
-	if (!rc)
-		trace_state = state;
-	else
+skip_register_trace:
+	if (!rc) {
+		if (state == TRACE_ON)
+			dm_trace_ref++;
+		else if (state == TRACE_OFF)
+			dm_trace_ref--;
+	} else
 		rc = -EINPROGRESS;
 
-out_unlock:
 	mutex_unlock(&trace_state_mutex);
 
 	return rc;
@@ -315,22 +332,65 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 static int net_dm_cmd_trace(struct sk_buff *skb,
 			struct genl_info *info)
 {
+	int state;
+	struct net *net;
+	struct per_ns_dm_cb *ns_dm_cb;
+
+	if (!skb->sk)
+		return -ENOTSUPP;
+	net = sock_net(skb->sk);
+	ns_dm_cb = net_generic(net, dm_net_id);
+
+	if (!ns_dm_cb)
+		return -ENOMEM;
+
 	switch (info->genlhdr->cmd) {
 	case NET_DM_CMD_START:
-		return set_all_monitor_traces(TRACE_ON);
+		state = TRACE_ON;
+		break;
+
 	case NET_DM_CMD_STOP:
-		return set_all_monitor_traces(TRACE_OFF);
+		state = TRACE_OFF;
+		break;
+
+	default:
+		return -ENOTSUPP;
 	}
 
-	return -ENOTSUPP;
+	mutex_lock(&ns_dm_cb->ns_dm_mutex);
+	if (state == ns_dm_cb->trace_state) {
+		mutex_unlock(&ns_dm_cb->ns_dm_mutex);
+		return -EAGAIN;
+	}
+
+	if (set_all_monitor_traces(state) != 0) {
+		mutex_unlock(&ns_dm_cb->ns_dm_mutex);
+		return -ENOTSUPP;
+	}
+
+	ns_dm_cb->trace_state = state;
+	mutex_unlock(&ns_dm_cb->ns_dm_mutex);
+
+	return 0;
 }
 
 static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net;
+	struct net_device *dev;
 	struct dm_hw_stat_delta *new_stat = NULL;
 	struct dm_hw_stat_delta *tmp;
+	struct per_ns_dm_cb *ns_dm_cb;
+
+	dev = netdev_notifier_info_to_dev(ptr);
+	if (!dev)
+		goto out;
+
+	net = dev_net(dev);
+	ns_dm_cb = net_generic(net, dm_net_id);
+	if (!ns_dm_cb)
+		goto out;
 
 	switch (event) {
 	case NETDEV_REGISTER:
@@ -350,7 +410,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 			if (new_stat->dev == dev) {
 				new_stat->dev = NULL;
-				if (trace_state == TRACE_OFF) {
+				if (ns_dm_cb->trace_state == TRACE_OFF) {
 					list_del_rcu(&new_stat->list);
 					kfree_rcu(new_stat, rcu);
 					break;
@@ -402,6 +462,7 @@ static int __net_init dm_net_init(struct net *net)
 	if (!ns_dm_cb)
 		return -ENOMEM;
 
+	ns_dm_cb->trace_state = TRACE_OFF;
 	return 0;
 }
 
@@ -432,6 +493,7 @@ static int __init init_net_drop_monitor(void)
 		pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
 		return -ENOSPC;
 	}
+	dm_trace_ref = 0;
 	rc = register_pernet_subsys(&dm_net_ops);
 
 	rc = genl_register_family(&net_drop_monitor_family);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v1 net-next 3/5] drop_monitor: let hw_stats_list support net ns
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 2/5] drop_monitor: let dm trace state support ns martinbj2008
@ 2017-07-12 10:40 ` martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 4/5] drop_monitor: let drop stat " martinbj2008
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: martinbj2008 @ 2017-07-12 10:40 UTC (permalink / raw)
  To: nhorman, davem; +Cc: netdev, martinbj2008, zhangjunweimartin

From: martin Zhang <zhangjunweimartin@didichuxing.com>

hw_stats_list is used to record NAPI state for net device.
Every net device belongs to one net ns.
so every net ns has a list head to record them.

Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
---
 net/core/drop_monitor.c | 54 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 0cf25c3..875e8b4 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -53,10 +53,12 @@ struct ns_pcpu_dm_data {
  * struct per_ns_dm_cb  - drop monitor control block in per net ns.
  * @trace_state:    the trace state.
  * @ns_dm_mutex:    protect whole per_ns_dm_cb.
+ * @hw_stats_list:  monitor for NAPI of net device.
  */
 struct per_ns_dm_cb {
 	int trace_state;
 	struct mutex ns_dm_mutex;
+	struct list_head hw_stats_list;
 };
 
 static DEFINE_MUTEX(trace_state_mutex);
@@ -85,7 +87,6 @@ struct dm_hw_stat_delta {
 static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
 
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
@@ -225,16 +226,22 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio
 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 				int work, int budget)
 {
+	struct net *net;
 	struct dm_hw_stat_delta *new_stat;
+	struct per_ns_dm_cb *ns_dm_net;
 
 	/*
 	 * Don't check napi structures with no associated device
 	 */
 	if (!napi->dev)
 		return;
+	net = dev_net(napi->dev);
+	ns_dm_net = net_generic(net, dm_net_id);
+	if (!ns_dm_net)
+		return;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+	list_for_each_entry_rcu(new_stat, &ns_dm_net->hw_stats_list, list) {
 		/*
 		 * only add a note to our monitor buffer if:
 		 * 1) this is the dev we received on
@@ -256,8 +263,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 static int set_all_monitor_traces(int state)
 {
 	int rc = 0;
-	struct dm_hw_stat_delta *new_stat = NULL;
-	struct dm_hw_stat_delta *temp;
 
 	mutex_lock(&trace_state_mutex);
 
@@ -290,16 +295,6 @@ static int set_all_monitor_traces(int state)
 
 		tracepoint_synchronize_unregister();
 
-		/*
-		 * Clean the device list
-		 */
-		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-			if (new_stat->dev == NULL) {
-				list_del_rcu(&new_stat->list);
-				kfree_rcu(new_stat, rcu);
-			}
-		}
-
 		module_put(THIS_MODULE);
 
 		break;
@@ -368,6 +363,19 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
 		return -ENOTSUPP;
 	}
 
+	if (state == TRACE_OFF) {
+		/* Clean the device list */
+		struct dm_hw_stat_delta *new_stat = NULL;
+		struct dm_hw_stat_delta *temp;
+		struct list_head *head = &ns_dm_cb->hw_stats_list;
+
+		list_for_each_entry_safe(new_stat, temp, head, list) {
+			if (!new_stat->dev) {
+				list_del_rcu(&new_stat->list);
+				kfree_rcu(new_stat, rcu);
+			}
+		}
+	}
 	ns_dm_cb->trace_state = state;
 	mutex_unlock(&ns_dm_cb->ns_dm_mutex);
 
@@ -382,6 +390,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 	struct dm_hw_stat_delta *new_stat = NULL;
 	struct dm_hw_stat_delta *tmp;
 	struct per_ns_dm_cb *ns_dm_cb;
+	struct list_head *head;
 
 	dev = netdev_notifier_info_to_dev(ptr);
 	if (!dev)
@@ -391,23 +400,23 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 	ns_dm_cb = net_generic(net, dm_net_id);
 	if (!ns_dm_cb)
 		goto out;
+	head = &ns_dm_cb->hw_stats_list;
 
 	switch (event) {
 	case NETDEV_REGISTER:
 		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
-
 		if (!new_stat)
 			goto out;
 
 		new_stat->dev = dev;
 		new_stat->last_rx = jiffies;
-		mutex_lock(&trace_state_mutex);
-		list_add_rcu(&new_stat->list, &hw_stats_list);
-		mutex_unlock(&trace_state_mutex);
+		mutex_lock(&ns_dm_cb->ns_dm_mutex);
+		list_add_rcu(&new_stat->list, head);
+		mutex_unlock(&ns_dm_cb->ns_dm_mutex);
 		break;
 	case NETDEV_UNREGISTER:
-		mutex_lock(&trace_state_mutex);
-		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+		mutex_lock(&ns_dm_cb->ns_dm_mutex);
+		list_for_each_entry_safe(new_stat, tmp, head, list) {
 			if (new_stat->dev == dev) {
 				new_stat->dev = NULL;
 				if (ns_dm_cb->trace_state == TRACE_OFF) {
@@ -417,7 +426,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 				}
 			}
 		}
-		mutex_unlock(&trace_state_mutex);
+		mutex_unlock(&ns_dm_cb->ns_dm_mutex);
 		break;
 	}
 out:
@@ -462,7 +471,10 @@ static int __net_init dm_net_init(struct net *net)
 	if (!ns_dm_cb)
 		return -ENOMEM;
 
+	mutex_init(&ns_dm_cb->ns_dm_mutex);
 	ns_dm_cb->trace_state = TRACE_OFF;
+	INIT_LIST_HEAD(&ns_dm_cb->hw_stats_list);
+
 	return 0;
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v1 net-next 4/5] drop_monitor: let drop stat support net ns
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 2/5] drop_monitor: let dm trace state support ns martinbj2008
  2017-07-12 10:40 ` [PATCH v1 net-next 3/5] drop_monitor: let hw_stats_list support net ns martinbj2008
@ 2017-07-12 10:40 ` martinbj2008
  2017-07-12 18:44   ` kbuild test robot
  2017-07-12 18:44   ` [PATCH] drop_monitor: fix semicolon.cocci warnings kbuild test robot
  2017-07-12 10:40 ` [PATCH v1 net-next 5/5] drop_monitor: increase version when ns support is ready martinbj2008
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 11+ messages in thread
From: martinbj2008 @ 2017-07-12 10:40 UTC (permalink / raw)
  To: nhorman, davem; +Cc: netdev, martinbj2008, zhangjunweimartin

From: martin Zhang <zhangjunweimartin@didichuxing.com>

move the detail drop stat to per net ns.
A net ns has its per cpu stat.

keep the work under per cpu to send netlink alter message.

all the net ns share a work under a CPU, the work will be scheduled
by any ns, and will send message in all the ns.

Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
---
 net/core/drop_monitor.c | 123 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 91 insertions(+), 32 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 875e8b4..5828bf2 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -47,6 +47,10 @@
  */
 
 struct ns_pcpu_dm_data {
+	spinlock_t	lock;
+	struct sk_buff	*skb;
+	struct net	*net;
+	struct timer_list	send_timer;
 };
 
 /**
@@ -59,15 +63,13 @@ struct per_ns_dm_cb {
 	int trace_state;
 	struct mutex ns_dm_mutex;
 	struct list_head hw_stats_list;
+	struct ns_pcpu_dm_data __percpu *pcpu_data;
 };
 
 static DEFINE_MUTEX(trace_state_mutex);
 
 struct per_cpu_dm_data {
-	spinlock_t		lock;
-	struct sk_buff		*skb;
 	struct work_struct	dm_alert_work;
-	struct timer_list	send_timer;
 };
 
 struct dm_hw_stat_delta {
@@ -88,7 +90,7 @@ struct dm_hw_stat_delta {
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 
-static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
+static struct sk_buff *reset_per_cpu_data(struct ns_pcpu_dm_data *ns_dm_data)
 {
 	size_t al;
 	struct net_dm_alert_msg *msg;
@@ -125,11 +127,11 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 	goto out;
 
 err:
-	mod_timer(&data->send_timer, jiffies + HZ / 10);
+	mod_timer(&ns_dm_data->send_timer, jiffies + HZ / 10);
 out:
-	spin_lock_irqsave(&data->lock, flags);
-	swap(data->skb, skb);
-	spin_unlock_irqrestore(&data->lock, flags);
+	spin_lock_irqsave(&ns_dm_data->lock, flags);
+	swap(ns_dm_data->skb, skb);
+	spin_unlock_irqrestore(&ns_dm_data->lock, flags);
 
 	if (skb) {
 		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
@@ -147,16 +149,30 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 
 static void send_dm_alert(struct work_struct *work)
 {
+	struct net *net;
 	struct sk_buff *skb;
-	struct per_cpu_dm_data *data;
-
-	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
-
-	skb = reset_per_cpu_data(data);
-
-	if (skb)
-		genlmsg_multicast(&net_drop_monitor_family, skb, 0,
-				  0, GFP_KERNEL);
+	struct ns_pcpu_dm_data *pcpu_data;
+	struct per_ns_dm_cb *ns_dm_net;
+	struct ns_pcpu_dm_data *data;
+
+	for_each_net_rcu(net) {
+		ns_dm_net = net_generic(net, dm_net_id);
+		if (!ns_dm_net)
+			continue;
+		if (ns_dm_net->trace_state == TRACE_OFF)
+			continue;
+
+		pcpu_data = ns_dm_net->pcpu_data;
+		if (!pcpu_data)
+			continue;
+
+		data = (struct ns_pcpu_dm_data *)this_cpu_ptr(pcpu_data);
+		WARN_ON(data->net != net);
+		skb = reset_per_cpu_data(data);
+		if (skb)
+			genlmsg_multicast_netns(&net_drop_monitor_family, net,
+						skb, 0, 0, GFP_KERNEL);
+	}
 }
 
 /*
@@ -166,9 +182,15 @@ static void send_dm_alert(struct work_struct *work)
  */
 static void sched_send_work(unsigned long _data)
 {
-	struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
+	int cpu;
+	struct per_cpu_dm_data *dm_data;
 
-	schedule_work(&data->dm_alert_work);
+	cpu = (int)_data;
+	if (unlikely(cpu < 0))
+		return;
+
+	dm_data = &per_cpu(dm_cpu_data, cpu);
+	schedule_work(&dm_data->dm_alert_work);
 }
 
 static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -178,14 +200,30 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 	struct nlattr *nla;
 	int i;
 	struct sk_buff *dskb;
-	struct per_cpu_dm_data *data;
+	struct ns_pcpu_dm_data *data;
 	unsigned long flags;
+	struct net *net;
+	struct per_ns_dm_cb *ns_dm_net;
+
+	if (skb->dev)
+		net = dev_net(skb->dev);
+	else if (skb->sk)
+		net = sock_net(skb->sk);
+	else
+		return;
+
+	ns_dm_net = net_generic(net, dm_net_id);
+	if (unlikely(!ns_dm_net))
+		return;
+
+	data = this_cpu_ptr(ns_dm_net->pcpu_data);
+	if (unlikely(!data))
+		return;
 
 	local_irq_save(flags);
-	data = this_cpu_ptr(&dm_cpu_data);
 	spin_lock(&data->lock);
-	dskb = data->skb;
 
+	dskb = data->skb;
 	if (!dskb)
 		goto out;
 
@@ -465,7 +503,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 
 static int __net_init dm_net_init(struct net *net)
 {
+	int cpu;
+	struct ns_pcpu_dm_data *pcpu_data;
 	struct per_ns_dm_cb *ns_dm_cb;
+	struct ns_pcpu_dm_data *data;
 
 	ns_dm_cb = net_generic(net, dm_net_id);
 	if (!ns_dm_cb)
@@ -474,17 +515,45 @@ static int __net_init dm_net_init(struct net *net)
 	mutex_init(&ns_dm_cb->ns_dm_mutex);
 	ns_dm_cb->trace_state = TRACE_OFF;
 	INIT_LIST_HEAD(&ns_dm_cb->hw_stats_list);
+	pcpu_data = alloc_percpu(struct ns_pcpu_dm_data);
+	ns_dm_cb->pcpu_data = pcpu_data;
+	if (!pcpu_data)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		data = (struct ns_pcpu_dm_data *)per_cpu_ptr(pcpu_data, cpu);
+		spin_lock_init(&data->lock);
+		data->skb = NULL;
+		data->net = net;
+		setup_timer(&data->send_timer, sched_send_work,
+			    (unsigned long)cpu);
+		reset_per_cpu_data(data);
+	}
 
 	return 0;
 }
 
 static void __net_exit dm_net_exit(struct net *net)
 {
+	int cpu;
+	struct ns_pcpu_dm_data *pcpu_data;
 	struct per_ns_dm_cb *ns_dm_cb;
+	struct ns_pcpu_dm_data *data;
 
 	ns_dm_cb = net_generic(net, dm_net_id);
 	if (!ns_dm_cb)
 		return;
+
+	pcpu_data = ns_dm_cb->pcpu_data;
+	if (!pcpu_data)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		data = (struct ns_pcpu_dm_data *)per_cpu_ptr(pcpu_data, cpu);
+		if (data->skb)
+			kfree_skb(data->skb);
+		del_timer_sync(&data->send_timer);;
+	}
 }
 
 static struct pernet_operations dm_net_ops = {
@@ -526,10 +595,6 @@ static int __init init_net_drop_monitor(void)
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
 		INIT_WORK(&data->dm_alert_work, send_dm_alert);
-		setup_timer(&data->send_timer, sched_send_work,
-			    (unsigned long)data);
-		spin_lock_init(&data->lock);
-		reset_per_cpu_data(data);
 	}
 
 
@@ -558,13 +623,7 @@ static void exit_net_drop_monitor(void)
 	unregister_pernet_subsys(&dm_net_ops);
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
-		del_timer_sync(&data->send_timer);
 		cancel_work_sync(&data->dm_alert_work);
-		/*
-		 * At this point, we should have exclusive access
-		 * to this struct and can free the skb inside it
-		 */
-		kfree_skb(data->skb);
 	}
 
 	BUG_ON(genl_unregister_family(&net_drop_monitor_family));
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v1 net-next 5/5] drop_monitor: increase version when ns support is ready
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
                   ` (2 preceding siblings ...)
  2017-07-12 10:40 ` [PATCH v1 net-next 4/5] drop_monitor: let drop stat " martinbj2008
@ 2017-07-12 10:40 ` martinbj2008
  2017-07-12 13:37 ` [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework Neil Horman
  2017-07-12 15:21 ` David Miller
  5 siblings, 0 replies; 11+ messages in thread
From: martinbj2008 @ 2017-07-12 10:40 UTC (permalink / raw)
  To: nhorman, davem; +Cc: netdev, martinbj2008, zhangjunweimartin

From: martin Zhang <zhangjunweimartin@didichuxing.com>

1. increase DM netlink version from 2 to 3, as it now support net ns.
2. netns ok become ture.

Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
---
 net/core/drop_monitor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5828bf2..064128b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -489,7 +489,8 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 static struct genl_family net_drop_monitor_family __ro_after_init = {
 	.hdrsize        = 0,
 	.name           = "NET_DM",
-	.version        = 2,
+	.version        = 3,
+	.netnsok        = 1,
 	.module		= THIS_MODULE,
 	.ops		= dropmon_ops,
 	.n_ops		= ARRAY_SIZE(dropmon_ops),
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
                   ` (3 preceding siblings ...)
  2017-07-12 10:40 ` [PATCH v1 net-next 5/5] drop_monitor: increase version when ns support is ready martinbj2008
@ 2017-07-12 13:37 ` Neil Horman
  2017-07-12 16:58   ` Cong Wang
  2017-07-12 15:21 ` David Miller
  5 siblings, 1 reply; 11+ messages in thread
From: Neil Horman @ 2017-07-12 13:37 UTC (permalink / raw)
  To: martinbj2008; +Cc: davem, netdev, zhangjunweimartin

On Wed, Jul 12, 2017 at 06:40:49PM +0800, martinbj2008@gmail.com wrote:
> From: martin Zhang <zhangjunweimartin@didichuxing.com>
> 
> This is a serial patch for drop monitor, in order to support net namespace.
> 
> Import two struct to support net ns:
> 
> 1. struct per_ns_dm_cb:
>   Just like its name, it is used in per net ns.
> 
>   In this patch it is empty, but in following patch, these field will be added.
>   a. trace_state: every net ns has a switch to indicate the trace state.
>   b. ns_dm_mutex: the mutex will only work and keep exclusive operatons in a net ns.
>   c. hw_stats_list: monitor for NAPI of net device.
> 
> 2. ns_pcpu_dm_data
>    It is used to replace per_cpu_dm_data under per net ns.
> 
>    per_cpu_dm_data will only keep the dm_alert_work, and the other field
> will be moved to ns_pcpu_dm_data. They do same thing just like current
> code, and the only difference is under per net ns.
> 
>   Keep there is a work under percpu, to send alter netlink message.
> 
> Signed-off-by: martin Zhang <zhangjunweimartin@didichuxing.com>
> ---
> The dropwatch is a very useful tool to diagnose network problem,
> which give us greate help.
> Dropwatch could not work under container(net namespace).
> It is a pitty, so let it support net ns.
> 
Sorry, Im having a hard time wrapping my head around this.  Why exactly is it
that dropwatch won't work in a namespaced environment?  IIRC, the kfree
tracepoints are namespace agnostic, and so running dropwatch anywhere should
result in seeing drops in all namespaces.  I grant that perhaps it would be nice
to filter on a namespace, but it should all 'just work' for some definition of
the term, no?

Neil

>  net/core/drop_monitor.c | 41 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 41 insertions(+)
> 
> diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
> index 70ccda2..6a75e04 100644
> --- a/net/core/drop_monitor.c
> +++ b/net/core/drop_monitor.c
> @@ -32,6 +32,10 @@
>  #include <trace/events/napi.h>
>  
>  #include <asm/unaligned.h>
> +#include <net/sock.h>
> +#include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#include <linux/smp.h>
>  
>  #define TRACE_ON 1
>  #define TRACE_OFF 0
> @@ -41,6 +45,13 @@
>   * and the work handle that will send up
>   * netlink alerts
>   */
> +
> +struct ns_pcpu_dm_data {
> +};
> +
> +struct per_ns_dm_cb {
> +};
> +
>  static int trace_state = TRACE_OFF;
>  static DEFINE_MUTEX(trace_state_mutex);
>  
> @@ -59,6 +70,7 @@ struct dm_hw_stat_delta {
>  	unsigned long last_drop_val;
>  };
>  
> +static int dm_net_id __read_mostly;
>  static struct genl_family net_drop_monitor_family;
>  
>  static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
> @@ -382,6 +394,33 @@ static int dropmon_net_event(struct notifier_block *ev_block,
>  	.notifier_call = dropmon_net_event
>  };
>  
> +static int __net_init dm_net_init(struct net *net)
> +{
> +	struct per_ns_dm_cb *ns_dm_cb;
> +
> +	ns_dm_cb = net_generic(net, dm_net_id);
> +	if (!ns_dm_cb)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void __net_exit dm_net_exit(struct net *net)
> +{
> +	struct per_ns_dm_cb *ns_dm_cb;
> +
> +	ns_dm_cb = net_generic(net, dm_net_id);
> +	if (!ns_dm_cb)
> +		return;
> +}
> +
> +static struct pernet_operations dm_net_ops = {
> +	.init = dm_net_init,
> +	.exit = dm_net_exit,
> +	.id   = &dm_net_id,
> +	.size = sizeof(struct per_ns_dm_cb),
> +};
> +
>  static int __init init_net_drop_monitor(void)
>  {
>  	struct per_cpu_dm_data *data;
> @@ -393,6 +432,7 @@ static int __init init_net_drop_monitor(void)
>  		pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
>  		return -ENOSPC;
>  	}
> +	rc = register_pernet_subsys(&dm_net_ops);
>  
>  	rc = genl_register_family(&net_drop_monitor_family);
>  	if (rc) {
> @@ -441,6 +481,7 @@ static void exit_net_drop_monitor(void)
>  	 * or pending schedule calls
>  	 */
>  
> +	unregister_pernet_subsys(&dm_net_ops);
>  	for_each_possible_cpu(cpu) {
>  		data = &per_cpu(dm_cpu_data, cpu);
>  		del_timer_sync(&data->send_timer);
> -- 
> 1.8.3.1
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework
  2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
                   ` (4 preceding siblings ...)
  2017-07-12 13:37 ` [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework Neil Horman
@ 2017-07-12 15:21 ` David Miller
  5 siblings, 0 replies; 11+ messages in thread
From: David Miller @ 2017-07-12 15:21 UTC (permalink / raw)
  To: martinbj2008; +Cc: nhorman, netdev, zhangjunweimartin


You must provide a proper "[PATCH vx net-next 0/N]" header posting with
a patch series, which describes at a high level what the patch series
on a whole is doing, how it is doing it, and why it is doing it that way.

Second, net-next is closed:

	http://vger.kernel.org/~davem/net-next.html

So you should resubmit this when it is open again.

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework
  2017-07-12 13:37 ` [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework Neil Horman
@ 2017-07-12 16:58   ` Cong Wang
       [not found]     ` <eef69f4a65644a499665d3973bf5bc06@BJSGEXMBX01.didichuxing.com>
  0 siblings, 1 reply; 11+ messages in thread
From: Cong Wang @ 2017-07-12 16:58 UTC (permalink / raw)
  To: Neil Horman
  Cc: martinbj2008, David Miller, Linux Kernel Network Developers,
	zhangjunweimartin

On Wed, Jul 12, 2017 at 6:37 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Wed, Jul 12, 2017 at 06:40:49PM +0800, martinbj2008@gmail.com wrote:
>> The dropwatch is a very useful tool to diagnose network problem,
>> which give us greate help.
>> Dropwatch could not work under container(net namespace).
>> It is a pitty, so let it support net ns.
>>
> Sorry, Im having a hard time wrapping my head around this.  Why exactly is it
> that dropwatch won't work in a namespaced environment?  IIRC, the kfree
> tracepoints are namespace agnostic, and so running dropwatch anywhere should
> result in seeing drops in all namespaces.  I grant that perhaps it would be nice
> to filter on a namespace, but it should all 'just work' for some definition of
> the term, no?

Agreed.

And I doubt Martin's implementation which uses skb->sk to retrieve net
works for RX packets, since skb->sk is set very late (except with early demux)
on RX side but we can drop them at anytime...

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework
       [not found]     ` <eef69f4a65644a499665d3973bf5bc06@BJSGEXMBX01.didichuxing.com>
@ 2017-07-12 17:45       ` Cong Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Cong Wang @ 2017-07-12 17:45 UTC (permalink / raw)
  To: 张军伟(基础平台部)
  Cc: Neil Horman, Linux Kernel Network Developers, martinbj2008, David Miller

On Wed, Jul 12, 2017 at 10:08 AM, 张军伟(基础平台部)
<zhangjunweimartin@didichuxing.com> wrote:
> about skb->sk
> it is used as supplementary when skb->dev is empty,such as netlink message。
>
> +       if (skb->dev)
> +               net = dev_net(skb->dev);
> +       else if (skb->sk)
> +               net = sock_net(skb->sk);
> +       else
> +               return;

Check udp_set_dev_scratch().

Again, as Neil mentioned, the idea is arguable, it is actually harder to trace
skb's with your patch when they across netns'es.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 net-next 4/5] drop_monitor: let drop stat support net ns
  2017-07-12 10:40 ` [PATCH v1 net-next 4/5] drop_monitor: let drop stat " martinbj2008
@ 2017-07-12 18:44   ` kbuild test robot
  2017-07-12 18:44   ` [PATCH] drop_monitor: fix semicolon.cocci warnings kbuild test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2017-07-12 18:44 UTC (permalink / raw)
  To: martinbj2008
  Cc: kbuild-all, nhorman, davem, netdev, martinbj2008, zhangjunweimartin

Hi martin,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/martinbj2008-gmail-com/drop_monitor-import-netnamespace-framework/20170712-205015


coccinelle warnings: (new ones prefixed by >>)

>> net/core/drop_monitor.c:555:36-37: Unneeded semicolon

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] drop_monitor: fix semicolon.cocci warnings
  2017-07-12 10:40 ` [PATCH v1 net-next 4/5] drop_monitor: let drop stat " martinbj2008
  2017-07-12 18:44   ` kbuild test robot
@ 2017-07-12 18:44   ` kbuild test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2017-07-12 18:44 UTC (permalink / raw)
  To: martinbj2008
  Cc: kbuild-all, nhorman, davem, netdev, martinbj2008, zhangjunweimartin

net/core/drop_monitor.c:555:36-37: Unneeded semicolon


 Remove unneeded semicolon.

Generated by: scripts/coccinelle/misc/semicolon.cocci

Fixes: d5bf05101a5c ("drop_monitor: let drop stat support net ns")
CC: martin Zhang <zhangjunweimartin@didichuxing.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---

 drop_monitor.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -552,7 +552,7 @@ static void __net_exit dm_net_exit(struc
 		data = (struct ns_pcpu_dm_data *)per_cpu_ptr(pcpu_data, cpu);
 		if (data->skb)
 			kfree_skb(data->skb);
-		del_timer_sync(&data->send_timer);;
+		del_timer_sync(&data->send_timer);
 	}
 }
 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-07-12 18:45 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-12 10:40 [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework martinbj2008
2017-07-12 10:40 ` [PATCH v1 net-next 2/5] drop_monitor: let dm trace state support ns martinbj2008
2017-07-12 10:40 ` [PATCH v1 net-next 3/5] drop_monitor: let hw_stats_list support net ns martinbj2008
2017-07-12 10:40 ` [PATCH v1 net-next 4/5] drop_monitor: let drop stat " martinbj2008
2017-07-12 18:44   ` kbuild test robot
2017-07-12 18:44   ` [PATCH] drop_monitor: fix semicolon.cocci warnings kbuild test robot
2017-07-12 10:40 ` [PATCH v1 net-next 5/5] drop_monitor: increase version when ns support is ready martinbj2008
2017-07-12 13:37 ` [PATCH v1 net-next 1/5] drop_monitor: import netnamespace framework Neil Horman
2017-07-12 16:58   ` Cong Wang
     [not found]     ` <eef69f4a65644a499665d3973bf5bc06@BJSGEXMBX01.didichuxing.com>
2017-07-12 17:45       ` Cong Wang
2017-07-12 15:21 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.