* [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol @ 2009-03-03 17:04 Neil Horman 2009-03-03 18:19 ` Evgeniy Polyakov ` (2 more replies) 0 siblings, 3 replies; 31+ messages in thread From: Neil Horman @ 2009-03-03 17:04 UTC (permalink / raw) To: netdev; +Cc: nhorman, davem, kuznet, pekkas, jmorris, yoshfuji, kaber Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 54 +++++++ net/core/drop_monitor.c | 307 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 361 insertions(+) diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h new file mode 100644 index 0000000..af11d26 --- /dev/null +++ b/include/linux/net_dropmon.h @@ -0,0 +1,54 @@ +#ifndef __NET_DROPMON_H +#define __NET_DROPMON_H + +#include <linux/netlink.h> + +struct net_dm_drop_point { + uint8_t pc[8]; + uint32_t count; +}; + +typedef enum { + NET_DM_CFG_VERSION = 0, + NET_DM_CFG_ALERT_COUNT, + NET_DM_CFG_ALERT_DELAY, + NET_DM_CFG_MAX, +} config_type_t; + +struct net_dm_config_entry { + config_type_t type; + uint64_t data; +}; + +struct net_dm_config_msg { + size_t entries; + struct net_dm_config_entry options[0]; +}; + +struct net_dm_alert_msg { + size_t entries; + struct net_dm_drop_point points[0]; +}; + +struct net_dm_user_msg { + union { + struct net_dm_config_msg user; + struct net_dm_alert_msg alert; + }u; +}; + +/* + * Group names + */ +#define NET_DM_GRP_ALERTS 1 + + +/* These are the netlink message types for this protocol */ + +#define NET_DM_BASE 0x10 /* Standard Netlink Messages below this */ +#define NET_DM_ALERT (NET_DM_BASE + 1) /* Alert about dropped packets */ +#define NET_DM_CONFIG (NET_DM_BASE + 2) /* Configuration message */ +#define NET_DM_START (NET_DM_BASE + 3) /* Start monitoring */ +#define NET_DM_STOP (NET_DM_BASE + 4) /* Stop monitoring */ +#define NET_DM_MAX (NET_DM_BASE + 5) +#endif diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 0000000..5074f2e --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,307 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <linux/percpu.h> +#include <linux/timer.h> + +#include <trace/skb.h> + +#include <asm/unaligned.h> +#include <asm/bitops.h> + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static spinlock_t send_lock = SPIN_LOCK_UNLOCKED; +static int dm_hit_limit = 64; +static int dm_delay = 1; + +static void send_dm_alert(struct work_struct *unused) +{ + size_t al, size; + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + struct sk_buff *skb, *nskb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + al = sizeof(struct nlmsghdr); + al += sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + nskb = alloc_skb(al, GFP_KERNEL); + nlh = (struct nlmsghdr *)nskb->data; + memset(nlh, 0, sizeof(struct nlmsghdr)); + nlh->nlmsg_type = NET_DM_ALERT; + msg = NLMSG_DATA(nlh); + memset(msg, 0, al); + skb_put(nskb, sizeof(struct net_dm_alert_msg)); + skb_put(nskb, sizeof(struct nlmsghdr)); + + data->skb = nskb; + + /* + * Make sure to fix up the length field on the nlmsghdr + */ + nlh = (struct nlmsghdr *)skb->data; + msg = NLMSG_DATA(nlh); + + size = sizeof(struct nlmsghdr) + sizeof (struct net_dm_alert_msg); + size += msg->entries * sizeof(struct net_dm_drop_point); + nlh->nlmsg_len = NLMSG_LENGTH(size); + + /* + * And adjust the skb if we need to + */ + if (nlh->nlmsg_len > size) + skb_put(skb, (nlh->nlmsg_len-size)); + + /* + * Ship it! + */ + NETLINK_CB(skb).dst_group = NET_DM_GRP_ALERTS; + spin_lock(&send_lock); + netlink_broadcast(dm_sock, skb, 0, NET_DM_GRP_ALERTS, 0); + spin_unlock(&send_lock); + + /* + * Reset the per_cpu counter. This unlocks the trace point + * So that we can collect for subsequent drops + */ + atomic_set(&data->dm_hit_count, dm_hit_limit); + +} + +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = NLMSG_DATA(nlh); + for (i=0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + skb_put(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EFAULT; + return rc; +} + +static int dropmon_handle_msg(struct sk_buff *skb, + unsigned char type, unsigned int len) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + int status = 0; + + switch (type) { + case NET_DM_START: + set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_STOP: + set_all_monitor_traces(TRACE_OFF); + break; + case NET_DM_CONFIG: + /* + * This is just a placeholder until + * this protocol has something to configure + */ + netlink_ack(skb, nlh, -ENOTSUPP); + break; + default: + status = -EINVAL; + } + return status; +} + + +static void drpmon_rcv(struct sk_buff *skb) +{ + int status, type, pid, flags, nlmsglen, skblen; + struct nlmsghdr *nlh; + + skblen = skb->len; + if (skblen < sizeof(*nlh)) + return; + + nlh = nlmsg_hdr(skb); + nlmsglen = nlh->nlmsg_len; + if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) + return; + + pid = nlh->nlmsg_pid; + flags = nlh->nlmsg_flags; + type = nlh->nlmsg_type; + + if (pid != 0) + return; + + if (!(flags & NLM_F_REQUEST)) + RCV_SKB_FAIL(-ECOMM); + + + if (type <= NET_DM_BASE) + return; + + if (type >= NET_DM_MAX) + RCV_SKB_FAIL(-EINVAL); + + + status = dropmon_handle_msg(skb, type, + nlmsglen - NLMSG_LENGTH(0)); + if (status < 0) + RCV_SKB_FAIL(status); + + if (flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + return; +} + +static int __init init_net_drop_monitor(void) +{ + int cpu; + size_t al; + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + dm_sock = netlink_kernel_create(&init_net, NETLINK_DRPMON, NET_DM_GRP_ALERTS, + drpmon_rcv, NULL, THIS_MODULE); + + if (dm_sock == NULL) { + printk(KERN_ERR "Could not create drop monitor socket\n"); + return -ENOMEM; + } + + al = sizeof(struct nlmsghdr); + al += sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + data->skb = alloc_skb(al, GFP_KERNEL); + skb_put(data->skb, sizeof(struct nlmsghdr)); + skb_put(data->skb, sizeof(struct net_dm_alert_msg)); + nlh = (struct nlmsghdr *)data->skb->data; + memset(nlh, 0, sizeof(struct nlmsghdr)); + nlh->nlmsg_type = NET_DM_ALERT; + msg = NLMSG_DATA(nlh); + memset(msg, 0, al); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + atomic_set(&data->dm_hit_count, dm_hit_limit); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + + return 0; +} + +subsys_initcall(init_net_drop_monitor); ^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 17:04 [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Neil Horman @ 2009-03-03 18:19 ` Evgeniy Polyakov 2009-03-03 19:21 ` Neil Horman 2009-03-05 19:27 ` Neil Horman 2009-03-11 19:51 ` Neil Horman 2 siblings, 1 reply; 31+ messages in thread From: Evgeniy Polyakov @ 2009-03-03 18:19 UTC (permalink / raw) To: Neil Horman; +Cc: netdev, davem, kuznet, pekkas, jmorris, yoshfuji, kaber Hi Neil. On Tue, Mar 03, 2009 at 12:04:35PM -0500, Neil Horman (nhorman@tuxdriver.com) wrote: > +typedef enum { > + NET_DM_CFG_VERSION = 0, > + NET_DM_CFG_ALERT_COUNT, > + NET_DM_CFG_ALERT_DELAY, > + NET_DM_CFG_MAX, > +} config_type_t; > + > +struct net_dm_config_entry { > + config_type_t type; > + uint64_t data; > +}; > + Ugh, please add either some comments about its alignment or some padding fields. > +struct net_dm_config_msg { > + size_t entries; > + struct net_dm_config_entry options[0]; > +}; Isn't size_t have different size on different platforms? > +/* > + * Globals, our netlink socket pointer > + * and the work handle that will send up > + * netlink alerts > + */ > +struct sock *dm_sock; > + > +struct per_cpu_dm_data { > + struct work_struct dm_alert_work; > + struct sk_buff *skb; > + atomic_t dm_hit_count; > + struct timer_list send_timer; > +}; > + > +DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); > + Static dm data? > +static spinlock_t send_lock = SPIN_LOCK_UNLOCKED; DEFINE_SPINLOCK > +static int dm_hit_limit = 64; > +static int dm_delay = 1; > + Configurable? > +static void send_dm_alert(struct work_struct *unused) > +{ > + size_t al, size; > + struct net_dm_alert_msg *msg; > + struct nlmsghdr *nlh; > + struct sk_buff *skb, *nskb; > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > + Should it be plain get_cpu_var() or disabled preemption? > + al = sizeof(struct nlmsghdr); Hmm, this worries a bit, shouldn't it use NLMSG_LENGTH() and friends? > + al += sizeof(struct net_dm_alert_msg); > + al += dm_hit_limit * sizeof(struct net_dm_drop_point); > + > + > + /* > + * Grab the skb we're about to send > + */ > + skb = data->skb; > + > + /* > + * Replace it with a new one > + */ > + nskb = alloc_skb(al, GFP_KERNEL); > + nlh = (struct nlmsghdr *)nskb->data; > + memset(nlh, 0, sizeof(struct nlmsghdr)); > + nlh->nlmsg_type = NET_DM_ALERT; > + msg = NLMSG_DATA(nlh); > + memset(msg, 0, al); > + skb_put(nskb, sizeof(struct net_dm_alert_msg)); > + skb_put(nskb, sizeof(struct nlmsghdr)); > + > + data->skb = nskb; > + > + /* > + * Make sure to fix up the length field on the nlmsghdr > + */ > + nlh = (struct nlmsghdr *)skb->data; > + msg = NLMSG_DATA(nlh); > + > + size = sizeof(struct nlmsghdr) + sizeof (struct net_dm_alert_msg); > + size += msg->entries * sizeof(struct net_dm_drop_point); > + nlh->nlmsg_len = NLMSG_LENGTH(size); > + > + /* > + * And adjust the skb if we need to > + */ > + if (nlh->nlmsg_len > size) > + skb_put(skb, (nlh->nlmsg_len-size)); > + > + /* > + * Ship it! > + */ > + NETLINK_CB(skb).dst_group = NET_DM_GRP_ALERTS; Additional space sneaked in. > + spin_lock(&send_lock); > + netlink_broadcast(dm_sock, skb, 0, NET_DM_GRP_ALERTS, 0); Another 3. Also why do you use zero allocation instead of GFP_ATOMIC? Also why do you use global lock here? > + spin_unlock(&send_lock); > + > + /* > + * Reset the per_cpu counter. This unlocks the trace point > + * So that we can collect for subsequent drops > + */ > + atomic_set(&data->dm_hit_count, dm_hit_limit); > + Trailing tab and unneded new line. > +} > + > +static void sched_send_work(unsigned long unused) > +{ > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > + Please add some comments that it is called from the timer interrupt and thus should not disable preemption around per-cpu variable. > + schedule_work(&data->dm_alert_work); > +} > + > +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) > +{ > + struct net_dm_alert_msg *msg; > + struct nlmsghdr *nlh; > + int i; > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > + > + > + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { > + /* > + * we're already at zero, discard this hit > + */ > + goto out; > + } > + > + nlh = (struct nlmsghdr *)data->skb->data; > + msg = NLMSG_DATA(nlh); > + for (i=0; i < msg->entries; i++) { > + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { Looks like a netlink message to/from userspace contains pointer. If this is the case, then it is not the way to go. > + msg->points[i].count++; > + goto out; > + } > + } > + > + /* > + * We need to create a new entry > + */ > + skb_put(data->skb, sizeof(struct net_dm_drop_point)); What if there is no space? > + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); Please change the structures so that they never contain variable size members like pointers or longs. > + msg->points[msg->entries].count = 1; > + msg->entries++; > + > + if (!timer_pending(&data->send_timer)) { > + data->send_timer.expires = jiffies + dm_delay * HZ; > + add_timer_on(&data->send_timer, smp_processor_id()); > + } > + > +out: > + return; > +} > +static void drpmon_rcv(struct sk_buff *skb) > +{ > + int status, type, pid, flags, nlmsglen, skblen; > + struct nlmsghdr *nlh; > + > + skblen = skb->len; > + if (skblen < sizeof(*nlh)) > + return; > + > + nlh = nlmsg_hdr(skb); > + nlmsglen = nlh->nlmsg_len; > + if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) > + return; > + > + pid = nlh->nlmsg_pid; > + flags = nlh->nlmsg_flags; > + type = nlh->nlmsg_type; > + > + if (pid != 0) > + return; > + > + if (!(flags & NLM_F_REQUEST)) > + RCV_SKB_FAIL(-ECOMM); > + > + Additional newline. > + if (type <= NET_DM_BASE) > + return; > + > + if (type >= NET_DM_MAX) > + RCV_SKB_FAIL(-EINVAL); > + > + And another one. > + status = dropmon_handle_msg(skb, type, > + nlmsglen - NLMSG_LENGTH(0)); > + if (status < 0) > + RCV_SKB_FAIL(status); > + > + if (flags & NLM_F_ACK) > + netlink_ack(skb, nlh, 0); > + return; > +} > + > +static int __init init_net_drop_monitor(void) > +{ > + int cpu; > + size_t al; > + struct net_dm_alert_msg *msg; > + struct nlmsghdr *nlh; > + struct per_cpu_dm_data *data; > + printk(KERN_INFO "Initalizing network drop monitor service\n"); > + > + if (sizeof(void *) > 8) { > + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); Why? :) > + return -ENOSPC; > + } > + > + dm_sock = netlink_kernel_create(&init_net, NETLINK_DRPMON, NET_DM_GRP_ALERTS, > + drpmon_rcv, NULL, THIS_MODULE); > + > + if (dm_sock == NULL) { > + printk(KERN_ERR "Could not create drop monitor socket\n"); > + return -ENOMEM; > + } > + > + al = sizeof(struct nlmsghdr); > + al += sizeof(struct net_dm_alert_msg); > + al += dm_hit_limit * sizeof(struct net_dm_drop_point); > + Please use NLMSG helpers here. > + for_each_present_cpu(cpu) { > + data = &per_cpu(dm_cpu_data, cpu); > + data->skb = alloc_skb(al, GFP_KERNEL); > + skb_put(data->skb, sizeof(struct nlmsghdr)); > + skb_put(data->skb, sizeof(struct net_dm_alert_msg)); > + nlh = (struct nlmsghdr *)data->skb->data; > + memset(nlh, 0, sizeof(struct nlmsghdr)); > + nlh->nlmsg_type = NET_DM_ALERT; > + msg = NLMSG_DATA(nlh); > + memset(msg, 0, al); > + INIT_WORK(&data->dm_alert_work, send_dm_alert); > + atomic_set(&data->dm_hit_count, dm_hit_limit); > + init_timer(&data->send_timer); > + data->send_timer.data = cpu; > + data->send_timer.function = sched_send_work; > + } > + > + return 0; > +} > + > +subsys_initcall(init_net_drop_monitor); > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Evgeniy Polyakov ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 18:19 ` Evgeniy Polyakov @ 2009-03-03 19:21 ` Neil Horman 2009-03-03 22:14 ` David Miller 2009-03-03 22:16 ` David Miller 0 siblings, 2 replies; 31+ messages in thread From: Neil Horman @ 2009-03-03 19:21 UTC (permalink / raw) To: Evgeniy Polyakov; +Cc: netdev, davem, kuznet, pekkas, jmorris, yoshfuji, kaber On Tue, Mar 03, 2009 at 09:19:09PM +0300, Evgeniy Polyakov wrote: > Hi Neil. > > On Tue, Mar 03, 2009 at 12:04:35PM -0500, Neil Horman (nhorman@tuxdriver.com) wrote: > > +typedef enum { > > + NET_DM_CFG_VERSION = 0, > > + NET_DM_CFG_ALERT_COUNT, > > + NET_DM_CFG_ALERT_DELAY, > > + NET_DM_CFG_MAX, > > +} config_type_t; > > + > > +struct net_dm_config_entry { > > + config_type_t type; > > + uint64_t data; > > +}; > > + > > Ugh, please add either some comments about its alignment or some padding > fields. > Yeah, I probably should align that to 64 bit boundaries for performance, thanks! > > +struct net_dm_config_msg { > > + size_t entries; > > + struct net_dm_config_entry options[0]; > > +}; > > Isn't size_t have different size on different platforms? > Probably, but we're only sending this data over netlink sockets, so despite the platform, I don't really see this as an issue. About the only place for concern I think are cases like x86_64 and ppc64, in the event you have a 64 bit kernel and 32 bit user space, and in those the app can be adjusted to compensate for this. I suppose I can fixate the size if its a real concern, but I don't think it really is. > > +/* > > + * Globals, our netlink socket pointer > > + * and the work handle that will send up > > + * netlink alerts > > + */ > > +struct sock *dm_sock; > > + > > +struct per_cpu_dm_data { > > + struct work_struct dm_alert_work; > > + struct sk_buff *skb; > > + atomic_t dm_hit_count; > > + struct timer_list send_timer; > > +}; > > + > > +DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); > > + > > Static dm data? > Noted, I can make it static, but I don't think its a big deal, given that its not exported anywhere, so its not accessible to anything else. > > +static spinlock_t send_lock = SPIN_LOCK_UNLOCKED; > > DEFINE_SPINLOCK > will do, thanks. > > +static int dm_hit_limit = 64; > > +static int dm_delay = 1; > > + > > Configurable? > Eventually, see implementation notes in part 0 of this series :) > > +static void send_dm_alert(struct work_struct *unused) > > +{ > > + size_t al, size; > > + struct net_dm_alert_msg *msg; > > + struct nlmsghdr *nlh; > > + struct sk_buff *skb, *nskb; > > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > > + > > Should it be plain get_cpu_var() or disabled preemption? > I don't think I really need to worry about it. The atomic_add_unless should gate us from multiple contexts on the same cpu getting into the critical region beyond. e.g. preemption should be safe here. > > + al = sizeof(struct nlmsghdr); > > Hmm, this worries a bit, shouldn't it use NLMSG_LENGTH() and friends? > Hmm, perhaps, the padding does go between the end of the nlmsghdr and the start of the data, doesn't it? I think its ok right now, since I use NLMSG_DATA to find the offset of the data (as long as we don't fill up the skbs). I should fix that, thank you! > > + al += sizeof(struct net_dm_alert_msg); > > + al += dm_hit_limit * sizeof(struct net_dm_drop_point); > > + > > + > > + /* > > + * Grab the skb we're about to send > > + */ > > + skb = data->skb; > > + > > + /* > > + * Replace it with a new one > > + */ > > + nskb = alloc_skb(al, GFP_KERNEL); > > + nlh = (struct nlmsghdr *)nskb->data; > > + memset(nlh, 0, sizeof(struct nlmsghdr)); > > + nlh->nlmsg_type = NET_DM_ALERT; > > + msg = NLMSG_DATA(nlh); > > + memset(msg, 0, al); > > + skb_put(nskb, sizeof(struct net_dm_alert_msg)); > > + skb_put(nskb, sizeof(struct nlmsghdr)); > > + > > + data->skb = nskb; > > + > > + /* > > + * Make sure to fix up the length field on the nlmsghdr > > + */ > > + nlh = (struct nlmsghdr *)skb->data; > > + msg = NLMSG_DATA(nlh); > > + > > + size = sizeof(struct nlmsghdr) + sizeof (struct net_dm_alert_msg); > > + size += msg->entries * sizeof(struct net_dm_drop_point); > > + nlh->nlmsg_len = NLMSG_LENGTH(size); > > + > > + /* > > + * And adjust the skb if we need to > > + */ > > + if (nlh->nlmsg_len > size) > > + skb_put(skb, (nlh->nlmsg_len-size)); > > + > > + /* > > + * Ship it! > > + */ > > + NETLINK_CB(skb).dst_group = NET_DM_GRP_ALERTS; > > Additional space sneaked in. > I'll correct that, thanks! > > + spin_lock(&send_lock); > > + netlink_broadcast(dm_sock, skb, 0, NET_DM_GRP_ALERTS, 0); > > Another 3. Also why do you use zero allocation instead of GFP_ATOMIC? > Also why do you use global lock here? > Not sure I understand your first question, why would I use GFP_ATOMIC? I'd rather keep the allocation of skb data out of the fast path entirely If at all possible. AS for the global lock, I was trying to avoid two processors sending on the socket at once, but as I look at it, its completely unneeded, isn't it? I'll correct that shortly. Thanks! > > + spin_unlock(&send_lock); > > + > > + /* > > + * Reset the per_cpu counter. This unlocks the trace point > > + * So that we can collect for subsequent drops > > + */ > > + atomic_set(&data->dm_hit_count, dm_hit_limit); > > + > > Trailing tab and unneded new line. > Will fix, thank you! > > +} > > + > > +static void sched_send_work(unsigned long unused) > > +{ > > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > > + > > Please add some comments that it is called from the timer interrupt and > thus should not disable preemption around per-cpu variable. > will do. Thank you! > > + schedule_work(&data->dm_alert_work); > > +} > > + > > +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) > > +{ > > + struct net_dm_alert_msg *msg; > > + struct nlmsghdr *nlh; > > + int i; > > + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); > > + > > + > > + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { > > + /* > > + * we're already at zero, discard this hit > > + */ > > + goto out; > > + } > > + > > + nlh = (struct nlmsghdr *)data->skb->data; > > + msg = NLMSG_DATA(nlh); > > + for (i=0; i < msg->entries; i++) { > > + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { > > Looks like a netlink message to/from userspace contains pointer. If this > is the case, then it is not the way to go. > It does, and that is exactly the idea. The intent here is for the user space application to be informed of where in the kernel a packet was dropped. The user space app then translates that into a symbol+offset so that the admin knows where the drop occured (using /proc/kallsyms, kernel debuigging info, etc). > > + msg->points[i].count++; > > + goto out; > > + } > > + } > > + > > + /* > > + * We need to create a new entry > > + */ > > + skb_put(data->skb, sizeof(struct net_dm_drop_point)); > > What if there is no space? > The skb is allocated so there will be exactly enough space. We allocate the skb so thata there is exactly enough room for the nlmsghdr the drop alert header and the dm_hit_limit number of drop points. Thats why each cpu has a dm_hit_counter, and if we decrement that counter to zero, we drop subsequent trace hits until the timer fires, the event thread sends the skb and allocates us a new one. > > + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); > > Please change the structures so that they never contain variable size > members like pointers or longs. > They don't, pc is an array of uint8_t's. > > + msg->points[msg->entries].count = 1; > > + msg->entries++; > > + > > + if (!timer_pending(&data->send_timer)) { > > + data->send_timer.expires = jiffies + dm_delay * HZ; > > + add_timer_on(&data->send_timer, smp_processor_id()); > > + } > > + > > +out: > > + return; > > +} > > > > +static void drpmon_rcv(struct sk_buff *skb) > > +{ > > + int status, type, pid, flags, nlmsglen, skblen; > > + struct nlmsghdr *nlh; > > + > > + skblen = skb->len; > > + if (skblen < sizeof(*nlh)) > > + return; > > + > > + nlh = nlmsg_hdr(skb); > > + nlmsglen = nlh->nlmsg_len; > > + if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) > > + return; > > + > > + pid = nlh->nlmsg_pid; > > + flags = nlh->nlmsg_flags; > > + type = nlh->nlmsg_type; > > + > > + if (pid != 0) > > + return; > > + > > + if (!(flags & NLM_F_REQUEST)) > > + RCV_SKB_FAIL(-ECOMM); > > + > > + > > Additional newline. > Will fix, thanks > > + if (type <= NET_DM_BASE) > > + return; > > + > > + if (type >= NET_DM_MAX) > > + RCV_SKB_FAIL(-EINVAL); > > + > > + > > And another one. > Ditto > > + status = dropmon_handle_msg(skb, type, > > + nlmsglen - NLMSG_LENGTH(0)); > > + if (status < 0) > > + RCV_SKB_FAIL(status); > > + > > + if (flags & NLM_F_ACK) > > + netlink_ack(skb, nlh, 0); > > + return; > > +} > > + > > +static int __init init_net_drop_monitor(void) > > +{ > > + int cpu; > > + size_t al; > > + struct net_dm_alert_msg *msg; > > + struct nlmsghdr *nlh; > > + struct per_cpu_dm_data *data; > > + printk(KERN_INFO "Initalizing network drop monitor service\n"); > > + > > + if (sizeof(void *) > 8) { > > + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); > > Why? :) > For the reason you pointed out above :) > > + return -ENOSPC; > > + } > > + > > + dm_sock = netlink_kernel_create(&init_net, NETLINK_DRPMON, NET_DM_GRP_ALERTS, > > + drpmon_rcv, NULL, THIS_MODULE); > > + > > + if (dm_sock == NULL) { > > + printk(KERN_ERR "Could not create drop monitor socket\n"); > > + return -ENOMEM; > > + } > > + > > + al = sizeof(struct nlmsghdr); > > + al += sizeof(struct net_dm_alert_msg); > > + al += dm_hit_limit * sizeof(struct net_dm_drop_point); > > + > > Please use NLMSG helpers here. > As noted earlier, I'll fix that up. > > + for_each_present_cpu(cpu) { > > + data = &per_cpu(dm_cpu_data, cpu); > > + data->skb = alloc_skb(al, GFP_KERNEL); > > + skb_put(data->skb, sizeof(struct nlmsghdr)); > > + skb_put(data->skb, sizeof(struct net_dm_alert_msg)); > > + nlh = (struct nlmsghdr *)data->skb->data; > > + memset(nlh, 0, sizeof(struct nlmsghdr)); > > + nlh->nlmsg_type = NET_DM_ALERT; > > + msg = NLMSG_DATA(nlh); > > + memset(msg, 0, al); > > + INIT_WORK(&data->dm_alert_work, send_dm_alert); > > + atomic_set(&data->dm_hit_count, dm_hit_limit); > > + init_timer(&data->send_timer); > > + data->send_timer.data = cpu; > > + data->send_timer.function = sched_send_work; > > + } > > + > > + return 0; > > +} > > + > > +subsys_initcall(init_net_drop_monitor); > > -- > > To unsubscribe from this list: send the line "unsubscribe netdev" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > Evgeniy Polyakov > Ok, thank you for the through review, I've noted all of these. I'll wait a few days to see if anyone else has comments, and will repost a new version of this part of the series when I think all the comments are in. Regards Neil ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 19:21 ` Neil Horman @ 2009-03-03 22:14 ` David Miller 2009-03-03 22:16 ` David Miller 1 sibling, 0 replies; 31+ messages in thread From: David Miller @ 2009-03-03 22:14 UTC (permalink / raw) To: nhorman; +Cc: zbr, netdev, kuznet, pekkas, jmorris, yoshfuji, kaber From: Neil Horman <nhorman@tuxdriver.com> Date: Tue, 3 Mar 2009 14:21:07 -0500 > On Tue, Mar 03, 2009 at 09:19:09PM +0300, Evgeniy Polyakov wrote: > > Hi Neil. > > > > On Tue, Mar 03, 2009 at 12:04:35PM -0500, Neil Horman (nhorman@tuxdriver.com) wrote: > > > +typedef enum { > > > + NET_DM_CFG_VERSION = 0, > > > + NET_DM_CFG_ALERT_COUNT, > > > + NET_DM_CFG_ALERT_DELAY, > > > + NET_DM_CFG_MAX, > > > +} config_type_t; > > > + > > > +struct net_dm_config_entry { > > > + config_type_t type; > > > + uint64_t data; > > > +}; > > > + > > > > Ugh, please add either some comments about its alignment or some padding > > fields. > > > Yeah, I probably should align that to 64 bit boundaries for performance, thanks! Actually you should use "aligned_u64" otherwise there will be compat issues on 32-bit x86 on x86_64/ia64 since 32-bit x86 only 4-byte aligns 64-bit objects unless told otherwise. And using anon-fixed type like an enum for "type" is also likely not such a good idea. I would just use a __u32 and some defines instead. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 19:21 ` Neil Horman 2009-03-03 22:14 ` David Miller @ 2009-03-03 22:16 ` David Miller 2009-03-04 10:06 ` Patrick McHardy 2009-03-04 11:44 ` Neil Horman 1 sibling, 2 replies; 31+ messages in thread From: David Miller @ 2009-03-03 22:16 UTC (permalink / raw) To: nhorman; +Cc: zbr, netdev, kuznet, pekkas, jmorris, yoshfuji, kaber From: Neil Horman <nhorman@tuxdriver.com> Date: Tue, 3 Mar 2009 14:21:07 -0500 > On Tue, Mar 03, 2009 at 09:19:09PM +0300, Evgeniy Polyakov wrote: > > > +struct net_dm_config_msg { > > > + size_t entries; > > > + struct net_dm_config_entry options[0]; > > > +}; > > > > Isn't size_t have different size on different platforms? > > > Probably, but we're only sending this data over netlink sockets, so despite the > platform, I don't really see this as an issue. About the only place for concern > I think are cases like x86_64 and ppc64, in the event you have a 64 bit kernel > and 32 bit user space, and in those the app can be adjusted to compensate for > this. I suppose I can fixate the size if its a real concern, but I don't think > it really is. Neil, that is _THE_ concern. You have to use portable types that will be both sized and aligned identically on both 32-bit and 64-bit variants of a given platform. This means size_t is absolutely not usable. We really don't have a clean way to add compat layer translators for netlink, so you have to get this right from the beginning. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 22:16 ` David Miller @ 2009-03-04 10:06 ` Patrick McHardy 2009-03-04 11:00 ` David Miller 2009-03-04 11:44 ` Neil Horman 1 sibling, 1 reply; 31+ messages in thread From: Patrick McHardy @ 2009-03-04 10:06 UTC (permalink / raw) To: David Miller; +Cc: nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji David Miller wrote: > From: Neil Horman <nhorman@tuxdriver.com> > Date: Tue, 3 Mar 2009 14:21:07 -0500 > >> On Tue, Mar 03, 2009 at 09:19:09PM +0300, Evgeniy Polyakov wrote: >>>> +struct net_dm_config_msg { >>>> + size_t entries; >>>> + struct net_dm_config_entry options[0]; >>>> +}; >>> Isn't size_t have different size on different platforms? >>> >> Probably, but we're only sending this data over netlink sockets, so despite the >> platform, I don't really see this as an issue. About the only place for concern >> I think are cases like x86_64 and ppc64, in the event you have a 64 bit kernel >> and 32 bit user space, and in those the app can be adjusted to compensate for >> this. I suppose I can fixate the size if its a real concern, but I don't think >> it really is. > > Neil, that is _THE_ concern. You have to use portable types that > will be both sized and aligned identically on both 32-bit and > 64-bit variants of a given platform. > > This means size_t is absolutely not usable. > > We really don't have a clean way to add compat layer translators > for netlink, so you have to get this right from the beginning. It should also be noted that netlink attributes only provide 4 byte alignment. Not sure what the current status of handling unaligned accesses on all architectures is, but something that might have to be taken into consideration. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-04 10:06 ` Patrick McHardy @ 2009-03-04 11:00 ` David Miller 2009-04-02 9:39 ` Herbert Xu 0 siblings, 1 reply; 31+ messages in thread From: David Miller @ 2009-03-04 11:00 UTC (permalink / raw) To: kaber; +Cc: nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Patrick McHardy <kaber@trash.net> Date: Wed, 04 Mar 2009 11:06:41 +0100 > It should also be noted that netlink attributes only provide 4 byte > alignment. Not sure what the current status of handling unaligned > accesses on all architectures is, but something that might have to > be taken into consideration. Indeed, we have hacks in libnl to work around some of these issues :-( ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-04 11:00 ` David Miller @ 2009-04-02 9:39 ` Herbert Xu 2009-04-02 9:50 ` David Miller 2009-04-02 9:52 ` David Miller 0 siblings, 2 replies; 31+ messages in thread From: Herbert Xu @ 2009-04-02 9:39 UTC (permalink / raw) To: David Miller Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji (Catching up with old emails) David Miller <davem@davemloft.net> wrote: > From: Patrick McHardy <kaber@trash.net> > Date: Wed, 04 Mar 2009 11:06:41 +0100 > >> It should also be noted that netlink attributes only provide 4 byte >> alignment. Not sure what the current status of handling unaligned >> accesses on all architectures is, but something that might have to >> be taken into consideration. > > Indeed, we have hacks in libnl to work around some of these > issues :-( Requiring the use of aligned_u64 instead of __u64 in netlink structs should help with new data structures, right? Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 9:39 ` Herbert Xu @ 2009-04-02 9:50 ` David Miller 2009-04-02 9:52 ` David Miller 1 sibling, 0 replies; 31+ messages in thread From: David Miller @ 2009-04-02 9:50 UTC (permalink / raw) To: herbert; +Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 2 Apr 2009 17:39:52 +0800 > (Catching up with old emails) > > David Miller <davem@davemloft.net> wrote: > > From: Patrick McHardy <kaber@trash.net> > > Date: Wed, 04 Mar 2009 11:06:41 +0100 > > > >> It should also be noted that netlink attributes only provide 4 byte > >> alignment. Not sure what the current status of handling unaligned > >> accesses on all architectures is, but something that might have to > >> be taken into consideration. > > > > Indeed, we have hacks in libnl to work around some of these > > issues :-( > > Requiring the use of aligned_u64 instead of __u64 in netlink > structs should help with new data structures, right? Yes, it should. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 9:39 ` Herbert Xu 2009-04-02 9:50 ` David Miller @ 2009-04-02 9:52 ` David Miller 2009-04-02 9:59 ` Herbert Xu 1 sibling, 1 reply; 31+ messages in thread From: David Miller @ 2009-04-02 9:52 UTC (permalink / raw) To: herbert; +Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 2 Apr 2009 17:39:52 +0800 > (Catching up with old emails) > > David Miller <davem@davemloft.net> wrote: > > From: Patrick McHardy <kaber@trash.net> > > Date: Wed, 04 Mar 2009 11:06:41 +0100 > > > >> It should also be noted that netlink attributes only provide 4 byte > >> alignment. Not sure what the current status of handling unaligned > >> accesses on all architectures is, but something that might have to > >> be taken into consideration. > > > > Indeed, we have hacks in libnl to work around some of these > > issues :-( > > Requiring the use of aligned_u64 instead of __u64 in netlink > structs should help with new data structures, right? I lied. It doesn't help, because the problem is that attributes can only ever be 4 byte aligned, so no matter what attributes you tag the types with they will be only 4-byte aligned. It has to do with how the netlink attributes get packed together into the messages. It's this crap: #define NLMSG_ALIGNTO 4 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) ... #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) Thus, nothing in an nlmsg can ever be more than 4 byte aligned. And this is hard coded into the protocol. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 9:52 ` David Miller @ 2009-04-02 9:59 ` Herbert Xu 2009-04-02 14:42 ` Patrick McHardy 2009-04-05 9:54 ` David Miller 0 siblings, 2 replies; 31+ messages in thread From: Herbert Xu @ 2009-04-02 9:59 UTC (permalink / raw) To: David Miller Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji On Thu, Apr 02, 2009 at 02:52:23AM -0700, David Miller wrote: > > It has to do with how the netlink attributes get packed together > into the messages. It's this crap: > > #define NLMSG_ALIGNTO 4 > #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) > ... > #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ > (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) > > Thus, nothing in an nlmsg can ever be more than 4 byte aligned. > And this is hard coded into the protocol. Ah yes, this is the killer. I suppose we can modify in-kernel helpers like nla_parse to deal with that by copying if it's unaligned. Though we'll have to figure out how to free the memory. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 9:59 ` Herbert Xu @ 2009-04-02 14:42 ` Patrick McHardy 2009-04-02 14:45 ` Herbert Xu 2009-04-05 9:54 ` David Miller 1 sibling, 1 reply; 31+ messages in thread From: Patrick McHardy @ 2009-04-02 14:42 UTC (permalink / raw) To: Herbert Xu Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji Herbert Xu wrote: > On Thu, Apr 02, 2009 at 02:52:23AM -0700, David Miller wrote: >> It has to do with how the netlink attributes get packed together >> into the messages. It's this crap: >> >> #define NLMSG_ALIGNTO 4 >> #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) >> ... >> #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ >> (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) >> >> Thus, nothing in an nlmsg can ever be more than 4 byte aligned. >> And this is hard coded into the protocol. > > Ah yes, this is the killer. > > I suppose we can modify in-kernel helpers like nla_parse to deal > with that by copying if it's unaligned. Though we'll have to > figure out how to free the memory. Actually we should be fine since the current netlink helpers only do bytewise copying anyways. And I think we've pretty much gotten rid of all the raw attribute accesses. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 14:42 ` Patrick McHardy @ 2009-04-02 14:45 ` Herbert Xu 2009-04-02 14:57 ` Patrick McHardy 2009-04-05 9:56 ` David Miller 0 siblings, 2 replies; 31+ messages in thread From: Herbert Xu @ 2009-04-02 14:45 UTC (permalink / raw) To: Patrick McHardy Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji On Thu, Apr 02, 2009 at 04:42:18PM +0200, Patrick McHardy wrote: > > Actually we should be fine since the current netlink helpers only do > bytewise copying anyways. And I think we've pretty much gotten rid of > all the raw attribute accesses. What about stuff like xfrm_userpolicy_info? I suppose if *everything* is copied then it wouldn't matter. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 14:45 ` Herbert Xu @ 2009-04-02 14:57 ` Patrick McHardy 2009-04-02 14:59 ` Herbert Xu 2009-04-05 9:56 ` David Miller 1 sibling, 1 reply; 31+ messages in thread From: Patrick McHardy @ 2009-04-02 14:57 UTC (permalink / raw) To: Herbert Xu Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji Herbert Xu wrote: > On Thu, Apr 02, 2009 at 04:42:18PM +0200, Patrick McHardy wrote: >> Actually we should be fine since the current netlink helpers only do >> bytewise copying anyways. And I think we've pretty much gotten rid of >> all the raw attribute accesses. > > What about stuff like xfrm_userpolicy_info? I suppose if *everything* > is copied then it wouldn't matter. I didn't spot the problem, it seems it alreay copies all (sub)structures that have larger alignment requirements. Presuming I simply missed it, I think it would be best to gradually move to a set of nested attributes since some of the structures also have holes that are currently not cleared during dumping (I guess not a big deal since its root-only) and IIRC there are some 32/64 bit compatibility issues. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 14:57 ` Patrick McHardy @ 2009-04-02 14:59 ` Herbert Xu 2009-04-02 15:06 ` Patrick McHardy 0 siblings, 1 reply; 31+ messages in thread From: Herbert Xu @ 2009-04-02 14:59 UTC (permalink / raw) To: Patrick McHardy Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji On Thu, Apr 02, 2009 at 04:57:36PM +0200, Patrick McHardy wrote: > > I didn't spot the problem, it seems it alreay copies all (sub)structures > that have larger alignment requirements. Oh no I didn't mean that xfrm_userpolicy_info has a problem, for all I know it might not even have a u64 inside :) If we can ensure that all netlink backends use the helpers that copy, even for structures then it should definitely work. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 14:59 ` Herbert Xu @ 2009-04-02 15:06 ` Patrick McHardy 2009-04-02 15:09 ` Herbert Xu 2009-04-05 9:57 ` David Miller 0 siblings, 2 replies; 31+ messages in thread From: Patrick McHardy @ 2009-04-02 15:06 UTC (permalink / raw) To: Herbert Xu Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji Herbert Xu wrote: > On Thu, Apr 02, 2009 at 04:57:36PM +0200, Patrick McHardy wrote: >> I didn't spot the problem, it seems it alreay copies all (sub)structures >> that have larger alignment requirements. > > Oh no I didn't mean that xfrm_userpolicy_info has a problem, for > all I know it might not even have a u64 inside :) :) > If we can ensure that all netlink backends use the helpers that > copy, even for structures then it should definitely work. Yes, that should always work. For new stuff I think we should try to avoid encapsulating larger structures in single attributes though since it forces us to do copy everything, instead of just the types that really require it (and the padding issues in case of at least non-priviledged interfaces). ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 15:06 ` Patrick McHardy @ 2009-04-02 15:09 ` Herbert Xu 2009-04-02 15:14 ` Patrick McHardy 2009-04-05 9:57 ` David Miller 1 sibling, 1 reply; 31+ messages in thread From: Herbert Xu @ 2009-04-02 15:09 UTC (permalink / raw) To: Patrick McHardy Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji On Thu, Apr 02, 2009 at 05:06:40PM +0200, Patrick McHardy wrote: > > Yes, that should always work. For new stuff I think we should try > to avoid encapsulating larger structures in single attributes though > since it forces us to do copy everything, instead of just the types > that really require it (and the padding issues in case of at least > non-priviledged interfaces). Are there any situations where we want to use a struct? Perhaps we can just ban it altogether for new code? Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 15:09 ` Herbert Xu @ 2009-04-02 15:14 ` Patrick McHardy 2009-04-02 15:30 ` Herbert Xu 0 siblings, 1 reply; 31+ messages in thread From: Patrick McHardy @ 2009-04-02 15:14 UTC (permalink / raw) To: Herbert Xu Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji Herbert Xu wrote: > On Thu, Apr 02, 2009 at 05:06:40PM +0200, Patrick McHardy wrote: >> Yes, that should always work. For new stuff I think we should try >> to avoid encapsulating larger structures in single attributes though >> since it forces us to do copy everything, instead of just the types >> that really require it (and the padding issues in case of at least >> non-priviledged interfaces). > > Are there any situations where we want to use a struct? Perhaps > we can just ban it altogether for new code? I'd prefer that as well. The only reason to do this is to save a few bytes and cycles for attribute encapsulation. I don't think this matters at all, judging by the fact that I've never seen a userspace implementation using message batching, the current users don't seem to care about performance *that* much. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 15:14 ` Patrick McHardy @ 2009-04-02 15:30 ` Herbert Xu 2009-04-05 9:59 ` David Miller 0 siblings, 1 reply; 31+ messages in thread From: Herbert Xu @ 2009-04-02 15:30 UTC (permalink / raw) To: Patrick McHardy Cc: David Miller, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji On Thu, Apr 02, 2009 at 05:14:50PM +0200, Patrick McHardy wrote: > > I'd prefer that as well. The only reason to do this is to save a few > bytes and cycles for attribute encapsulation. I don't think this > matters at all, judging by the fact that I've never seen a userspace > implementation using message batching, the current users don't seem > to care about performance *that* much. OK, let's all just keep an eye out for new struct users and see how it goes. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 15:30 ` Herbert Xu @ 2009-04-05 9:59 ` David Miller 2009-04-06 13:21 ` Patrick McHardy 0 siblings, 1 reply; 31+ messages in thread From: David Miller @ 2009-04-05 9:59 UTC (permalink / raw) To: herbert; +Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 2 Apr 2009 23:30:42 +0800 > On Thu, Apr 02, 2009 at 05:14:50PM +0200, Patrick McHardy wrote: > > > > I'd prefer that as well. The only reason to do this is to save a few > > bytes and cycles for attribute encapsulation. I don't think this > > matters at all, judging by the fact that I've never seen a userspace > > implementation using message batching, the current users don't seem > > to care about performance *that* much. > > OK, let's all just keep an eye out for new struct users and see > how it goes. It might be about time to start working on netlink-2 seriously. :-) We can start treating it like a real protocol, firmly define the endianness of everything to kill all the over-the-wire issues, etc. and deal with this 4-byte-align stuff as well. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-05 9:59 ` David Miller @ 2009-04-06 13:21 ` Patrick McHardy 2009-06-10 8:08 ` David Miller 0 siblings, 1 reply; 31+ messages in thread From: Patrick McHardy @ 2009-04-06 13:21 UTC (permalink / raw) To: David Miller Cc: herbert, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji David Miller wrote: > From: Herbert Xu <herbert@gondor.apana.org.au> > Date: Thu, 2 Apr 2009 23:30:42 +0800 > >> On Thu, Apr 02, 2009 at 05:14:50PM +0200, Patrick McHardy wrote: >>> I'd prefer that as well. The only reason to do this is to save a few >>> bytes and cycles for attribute encapsulation. I don't think this >>> matters at all, judging by the fact that I've never seen a userspace >>> implementation using message batching, the current users don't seem >>> to care about performance *that* much. >> OK, let's all just keep an eye out for new struct users and see >> how it goes. > > It might be about time to start working on netlink-2 seriously. > :-) > > We can start treating it like a real protocol, firmly define > the endianness of everything to kill all the over-the-wire > issues, etc. and deal with this 4-byte-align stuff as well. We could add a setsockopt option for userspace to specify "version 2" operation. The netlink attributes helpers would need to be aware of the currently used protocol version and could transparently choose a different encoding (also taking care of endianess etc). For the case of one value per attribute this should be a straight-forward conversion. In case of structures that contain values with different endianness, we'll probably need to define a new encoding (ideally also one attribute per value) for version 2. I'd also suggest to get rid of the 64k attribute and message size limit at the same time, this is making it quite hard to support transactional semantics for large updates (f.i. for nf_tables sets). ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-06 13:21 ` Patrick McHardy @ 2009-06-10 8:08 ` David Miller 2009-06-10 10:35 ` Patrick McHardy 0 siblings, 1 reply; 31+ messages in thread From: David Miller @ 2009-06-10 8:08 UTC (permalink / raw) To: kaber; +Cc: herbert, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Patrick McHardy <kaber@trash.net> Date: Mon, 06 Apr 2009 15:21:45 +0200 > David Miller wrote: >> We can start treating it like a real protocol, firmly define >> the endianness of everything to kill all the over-the-wire >> issues, etc. and deal with this 4-byte-align stuff as well. > > We could add a setsockopt option for userspace to specify "version 2" > operation. The netlink attributes helpers would need to be aware of > the currently used protocol version and could transparently choose > a different encoding (also taking care of endianess etc). For the > case of one value per attribute this should be a straight-forward > conversion. In case of structures that contain values with different > endianness, we'll probably need to define a new encoding (ideally > also one attribute per value) for version 2. Ok. > I'd also suggest to get rid of the 64k attribute and message size > limit at the same time, this is making it quite hard to support > transactional semantics for large updates (f.i. for nf_tables sets). Another option is to support continuations of some sort. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-06-10 8:08 ` David Miller @ 2009-06-10 10:35 ` Patrick McHardy 0 siblings, 0 replies; 31+ messages in thread From: Patrick McHardy @ 2009-06-10 10:35 UTC (permalink / raw) To: David Miller Cc: herbert, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji David Miller wrote: > From: Patrick McHardy <kaber@trash.net> > Date: Mon, 06 Apr 2009 15:21:45 +0200 > >> I'd also suggest to get rid of the 64k attribute and message size >> limit at the same time, this is making it quite hard to support >> transactional semantics for large updates (f.i. for nf_tables sets). > > Another option is to support continuations of some sort. Yes, something like NLM_F_MULTI to accumulate multiple messages might help. I'm not sure yet whether this can be used to make the entire operation atomic since the individual attributes still can't span more than one skb, but it looks worth a try. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 15:06 ` Patrick McHardy 2009-04-02 15:09 ` Herbert Xu @ 2009-04-05 9:57 ` David Miller 1 sibling, 0 replies; 31+ messages in thread From: David Miller @ 2009-04-05 9:57 UTC (permalink / raw) To: kaber; +Cc: herbert, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Patrick McHardy <kaber@trash.net> Date: Thu, 02 Apr 2009 17:06:40 +0200 > Herbert Xu wrote: > > If we can ensure that all netlink backends use the helpers that > > copy, even for structures then it should definitely work. > > Yes, that should always work. It should work on the kernel side, the user side is still broken and I can show you the hacks in libnl to prove it :-) ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 14:45 ` Herbert Xu 2009-04-02 14:57 ` Patrick McHardy @ 2009-04-05 9:56 ` David Miller 1 sibling, 0 replies; 31+ messages in thread From: David Miller @ 2009-04-05 9:56 UTC (permalink / raw) To: herbert; +Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 2 Apr 2009 22:45:49 +0800 > On Thu, Apr 02, 2009 at 04:42:18PM +0200, Patrick McHardy wrote: > > > > Actually we should be fine since the current netlink helpers only do > > bytewise copying anyways. And I think we've pretty much gotten rid of > > all the raw attribute accesses. > > What about stuff like xfrm_userpolicy_info? I suppose if *everything* > is copied then it wouldn't matter. You were saying? [ 6400.609361] Kernel unaligned access at TPC[10258734] copy_to_user_state+0x54/0x9c [xfrm_user] [ 6400.609498] Kernel unaligned access at TPC[10258734] copy_to_user_state+0x54/0x9c [xfrm_user] [ 6400.667575] Kernel unaligned access at TPC[10258734] copy_to_user_state+0x54/0x9c [xfrm_user] [ 6400.667707] Kernel unaligned access at TPC[10258734] copy_to_user_state+0x54/0x9c [xfrm_user] This problem has existed for a long time. In this specific case even if you use memcpy() (in fact memcpy() is being used here) GCC sees the types and says it can do aligned 64-bit loads and stores to do the memcpy() inline. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-04-02 9:59 ` Herbert Xu 2009-04-02 14:42 ` Patrick McHardy @ 2009-04-05 9:54 ` David Miller 1 sibling, 0 replies; 31+ messages in thread From: David Miller @ 2009-04-05 9:54 UTC (permalink / raw) To: herbert; +Cc: kaber, nhorman, zbr, netdev, kuznet, pekkas, jmorris, yoshfuji From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 2 Apr 2009 17:59:57 +0800 > On Thu, Apr 02, 2009 at 02:52:23AM -0700, David Miller wrote: > > > > It has to do with how the netlink attributes get packed together > > into the messages. It's this crap: > > > > #define NLMSG_ALIGNTO 4 > > #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) > > ... > > #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ > > (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) > > > > Thus, nothing in an nlmsg can ever be more than 4 byte aligned. > > And this is hard coded into the protocol. > > Ah yes, this is the killer. > > I suppose we can modify in-kernel helpers like nla_parse to deal > with that by copying if it's unaligned. Though we'll have to > figure out how to free the memory. Note that this issue exists on the user side as well. libnl has some ugly hacks to deal with touching u64 stuff received in netlink messages due to this. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 22:16 ` David Miller 2009-03-04 10:06 ` Patrick McHardy @ 2009-03-04 11:44 ` Neil Horman 1 sibling, 0 replies; 31+ messages in thread From: Neil Horman @ 2009-03-04 11:44 UTC (permalink / raw) To: David Miller; +Cc: zbr, netdev, kuznet, pekkas, jmorris, yoshfuji, kaber On Tue, Mar 03, 2009 at 02:16:12PM -0800, David Miller wrote: > From: Neil Horman <nhorman@tuxdriver.com> > Date: Tue, 3 Mar 2009 14:21:07 -0500 > > > On Tue, Mar 03, 2009 at 09:19:09PM +0300, Evgeniy Polyakov wrote: > > > > +struct net_dm_config_msg { > > > > + size_t entries; > > > > + struct net_dm_config_entry options[0]; > > > > +}; > > > > > > Isn't size_t have different size on different platforms? > > > > > Probably, but we're only sending this data over netlink sockets, so despite the > > platform, I don't really see this as an issue. About the only place for concern > > I think are cases like x86_64 and ppc64, in the event you have a 64 bit kernel > > and 32 bit user space, and in those the app can be adjusted to compensate for > > this. I suppose I can fixate the size if its a real concern, but I don't think > > it really is. > > Neil, that is _THE_ concern. You have to use portable types that > will be both sized and aligned identically on both 32-bit and > 64-bit variants of a given platform. > > This means size_t is absolutely not usable. > > We really don't have a clean way to add compat layer translators > for netlink, so you have to get this right from the beginning. > Understood, I've changed this in my local tree, and will repost when I've collected remaining comments (I'd like to avoid my usual 30 revisions of patch :) ) Neil ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 17:04 [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Neil Horman 2009-03-03 18:19 ` Evgeniy Polyakov @ 2009-03-05 19:27 ` Neil Horman 2009-03-11 16:17 ` David Miller 2009-03-11 19:51 ` Neil Horman 2 siblings, 1 reply; 31+ messages in thread From: Neil Horman @ 2009-03-05 19:27 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, pekkas, jmorris, yoshfuji, kaber Ok, as requested new version of the patch to implement the drop monitor trace hook and netlink service Modification notes: 1) As requested, modified the code to use generic netlink sockets rather than create a new netlink protocol family. This implies that patch 1/5 can be dropped from this series entirely, since all it did was add that protocol family identifier 2) General checkpatch cleanup. 3) Fix some alignment issues in net_dropmon.h. Note that I used an explicit __attribute__, rather than aligned_u64, since the later doesn't appear to be exposed to user space. Let me know what you all think. Thanks! Neil Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 56 +++++++++ net/core/drop_monitor.c | 265 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 321 insertions(+) Network Drop Monitor: Adding drop monitor implementation & Netlink protocol diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h new file mode 100644 index 0000000..7ef35cb --- /dev/null +++ b/include/linux/net_dropmon.h @@ -0,0 +1,56 @@ +#ifndef __NET_DROPMON_H +#define __NET_DROPMON_H + +#include <linux/netlink.h> + +struct net_dm_drop_point { + uint8_t pc[8]; + uint32_t count; +}; + +#define NET_DM_CFG_VERSION 0 +#define NET_DM_CFG_ALERT_COUNT 1 +#define NET_DM_CFG_ALERT_DELAY 2 +#define NET_DM_CFG_MAX 3 + +struct net_dm_config_entry { + uint32_t type; + uint64_t data __attribute__((aligned(8))); +}; + +struct net_dm_config_msg { + uint32_t entries; + struct net_dm_config_entry options[0]; +}; + +struct net_dm_alert_msg { + uint32_t entries; + struct net_dm_drop_point points[0]; +}; + +struct net_dm_user_msg { + union { + struct net_dm_config_msg user; + struct net_dm_alert_msg alert; + } u; +}; + + +/* These are the netlink message types for this protocol */ + +enum { + NET_DM_CMD_UNSPEC = 0, + NET_DM_CMD_ALERT, + NET_DM_CMD_CONFIG, + NET_DM_CMD_START, + NET_DM_CMD_STOP, + _NET_DM_CMD_MAX, +}; + +#define NET_DM_CMD_MAX (_NET_DM_CMD_MAX - 1) + +/* + * Our group identifiers + */ +#define NET_DM_GRP_ALERT 1 +#endif diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 0000000..a59ba6f --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,265 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <linux/percpu.h> +#include <linux/timer.h> +#include <linux/bitops.h> +#include <net/genetlink.h> + +#include <trace/skb.h> + +#include <asm/unaligned.h> + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +static struct genl_family net_drop_monitor_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "NET_DM", + .version = 1, + .maxattr = NET_DM_CMD_MAX, +}; + +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static int dm_hit_limit = 64; +static int dm_delay = 1; + + +static void reset_per_cpu_data(struct per_cpu_dm_data *data) +{ + size_t al; + struct net_dm_alert_msg *msg; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + data->skb = genlmsg_new(al, GFP_KERNEL); + genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + memset(msg, 0, al); + atomic_set(&data->dm_hit_count, dm_hit_limit); +} + +static void send_dm_alert(struct work_struct *unused) +{ + struct sk_buff *skb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + reset_per_cpu_data(data); + + /* + * Ship it! + */ + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + +} + +/* + * This is the timer function to delay the sending of an alert + * in the event that more drops will arrive during the + * hysteresis period. Note that it operates under the timer interrupt + * so we don't need to disable preemption here + */ +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = genlmsg_data(nlmsg_data(nlh)); + for (i = 0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EINPROGRESS; + return rc; +} + + +static int net_dm_cmd_config(struct sk_buff *skb, + struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int net_dm_cmd_trace(struct sk_buff *skb, + struct genl_info *info) +{ + switch (info->genlhdr->cmd) { + case NET_DM_CMD_START: + return set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_CMD_STOP: + return set_all_monitor_traces(TRACE_OFF); + break; + } + + return -ENOTSUPP; +} + + +static struct genl_ops dropmon_ops[] = { + { + .cmd = NET_DM_CMD_CONFIG, + .doit = net_dm_cmd_config, + }, + { + .cmd = NET_DM_CMD_START, + .doit = net_dm_cmd_trace, + }, + { + .cmd = NET_DM_CMD_STOP, + .doit = net_dm_cmd_trace, + }, +}; + +static int __init init_net_drop_monitor(void) +{ + int cpu; + int rc, i, ret; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + if (genl_register_family(&net_drop_monitor_family) < 0) { + printk(KERN_ERR "Could not create drop monitor netlink family\n"); + return -EFAULT; + } + + rc = -EFAULT; + + for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { + ret = genl_register_ops(&net_drop_monitor_family, + &dropmon_ops[i]); + if (ret) { + printk(KERN_CRIT "failed to register operation %d\n", + dropmon_ops[i].cmd); + goto out_unreg; + } + } + + rc = 0; + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + reset_per_cpu_data(data); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + goto out; + +out_unreg: + genl_unregister_family(&net_drop_monitor_family); +out: + return rc; +} + +late_initcall(init_net_drop_monitor); ^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-05 19:27 ` Neil Horman @ 2009-03-11 16:17 ` David Miller 0 siblings, 0 replies; 31+ messages in thread From: David Miller @ 2009-03-11 16:17 UTC (permalink / raw) To: nhorman; +Cc: netdev, kuznet, pekkas, jmorris, yoshfuji, kaber From: Neil Horman <nhorman@tuxdriver.com> Date: Thu, 5 Mar 2009 14:27:06 -0500 > +struct net_dm_drop_point { > + uint8_t pc[8]; > + uint32_t count; > +}; Please use "__u8", "__u32", etc. and you'll get those as a result of your linux/netlink.h include at the top of this file. > +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) Please don't embed return statements in macors, this makes the code nearly impossible to audit. I don't even see this macro being used in the code :-) Otherwise this looks fine. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-03 17:04 [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Neil Horman 2009-03-03 18:19 ` Evgeniy Polyakov 2009-03-05 19:27 ` Neil Horman @ 2009-03-11 19:51 ` Neil Horman 2009-03-13 19:10 ` David Miller 2 siblings, 1 reply; 31+ messages in thread From: Neil Horman @ 2009-03-11 19:51 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, pekkas, jmorris, yoshfuji, kaber On Tue, Mar 03, 2009 at 12:04:35PM -0500, Neil Horman wrote: > > As requested, respinning to latest net-next tree Modification notes: 1) Converted use of unit*_t to __u* in net_dropmon.h Neil Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 56 +++++++++ net/core/drop_monitor.c | 263 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 319 insertions(+) diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h new file mode 100644 index 0000000..0217fb8 --- /dev/null +++ b/include/linux/net_dropmon.h @@ -0,0 +1,56 @@ +#ifndef __NET_DROPMON_H +#define __NET_DROPMON_H + +#include <linux/netlink.h> + +struct net_dm_drop_point { + __u8 pc[8]; + __u32 count; +}; + +#define NET_DM_CFG_VERSION 0 +#define NET_DM_CFG_ALERT_COUNT 1 +#define NET_DM_CFG_ALERT_DELAY 2 +#define NET_DM_CFG_MAX 3 + +struct net_dm_config_entry { + __u32 type; + __u64 data __attribute__((aligned(8))); +}; + +struct net_dm_config_msg { + __u32 entries; + struct net_dm_config_entry options[0]; +}; + +struct net_dm_alert_msg { + __u32 entries; + struct net_dm_drop_point points[0]; +}; + +struct net_dm_user_msg { + union { + struct net_dm_config_msg user; + struct net_dm_alert_msg alert; + } u; +}; + + +/* These are the netlink message types for this protocol */ + +enum { + NET_DM_CMD_UNSPEC = 0, + NET_DM_CMD_ALERT, + NET_DM_CMD_CONFIG, + NET_DM_CMD_START, + NET_DM_CMD_STOP, + _NET_DM_CMD_MAX, +}; + +#define NET_DM_CMD_MAX (_NET_DM_CMD_MAX - 1) + +/* + * Our group identifiers + */ +#define NET_DM_GRP_ALERT 1 +#endif diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 0000000..9fd0dc3 --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,263 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/string.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/inet.h> +#include <linux/interrupt.h> +#include <linux/netpoll.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/netlink.h> +#include <linux/net_dropmon.h> +#include <linux/percpu.h> +#include <linux/timer.h> +#include <linux/bitops.h> +#include <net/genetlink.h> + +#include <trace/skb.h> + +#include <asm/unaligned.h> + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +static struct genl_family net_drop_monitor_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "NET_DM", + .version = 1, + .maxattr = NET_DM_CMD_MAX, +}; + +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static int dm_hit_limit = 64; +static int dm_delay = 1; + + +static void reset_per_cpu_data(struct per_cpu_dm_data *data) +{ + size_t al; + struct net_dm_alert_msg *msg; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + data->skb = genlmsg_new(al, GFP_KERNEL); + genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + memset(msg, 0, al); + atomic_set(&data->dm_hit_count, dm_hit_limit); +} + +static void send_dm_alert(struct work_struct *unused) +{ + struct sk_buff *skb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + reset_per_cpu_data(data); + + /* + * Ship it! + */ + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + +} + +/* + * This is the timer function to delay the sending of an alert + * in the event that more drops will arrive during the + * hysteresis period. Note that it operates under the timer interrupt + * so we don't need to disable preemption here + */ +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = genlmsg_data(nlmsg_data(nlh)); + for (i = 0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EINPROGRESS; + return rc; +} + + +static int net_dm_cmd_config(struct sk_buff *skb, + struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int net_dm_cmd_trace(struct sk_buff *skb, + struct genl_info *info) +{ + switch (info->genlhdr->cmd) { + case NET_DM_CMD_START: + return set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_CMD_STOP: + return set_all_monitor_traces(TRACE_OFF); + break; + } + + return -ENOTSUPP; +} + + +static struct genl_ops dropmon_ops[] = { + { + .cmd = NET_DM_CMD_CONFIG, + .doit = net_dm_cmd_config, + }, + { + .cmd = NET_DM_CMD_START, + .doit = net_dm_cmd_trace, + }, + { + .cmd = NET_DM_CMD_STOP, + .doit = net_dm_cmd_trace, + }, +}; + +static int __init init_net_drop_monitor(void) +{ + int cpu; + int rc, i, ret; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + if (genl_register_family(&net_drop_monitor_family) < 0) { + printk(KERN_ERR "Could not create drop monitor netlink family\n"); + return -EFAULT; + } + + rc = -EFAULT; + + for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { + ret = genl_register_ops(&net_drop_monitor_family, + &dropmon_ops[i]); + if (ret) { + printk(KERN_CRIT "failed to register operation %d\n", + dropmon_ops[i].cmd); + goto out_unreg; + } + } + + rc = 0; + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + reset_per_cpu_data(data); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + goto out; + +out_unreg: + genl_unregister_family(&net_drop_monitor_family); +out: + return rc; +} + +late_initcall(init_net_drop_monitor); ^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol 2009-03-11 19:51 ` Neil Horman @ 2009-03-13 19:10 ` David Miller 0 siblings, 0 replies; 31+ messages in thread From: David Miller @ 2009-03-13 19:10 UTC (permalink / raw) To: nhorman; +Cc: netdev, kuznet, pekkas, jmorris, yoshfuji, kaber From: Neil Horman <nhorman@tuxdriver.com> Date: Wed, 11 Mar 2009 15:51:26 -0400 > Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Applied. ^ permalink raw reply [flat|nested] 31+ messages in thread
end of thread, other threads:[~2009-06-10 10:35 UTC | newest] Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2009-03-03 17:04 [Patch 4/5] Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Neil Horman 2009-03-03 18:19 ` Evgeniy Polyakov 2009-03-03 19:21 ` Neil Horman 2009-03-03 22:14 ` David Miller 2009-03-03 22:16 ` David Miller 2009-03-04 10:06 ` Patrick McHardy 2009-03-04 11:00 ` David Miller 2009-04-02 9:39 ` Herbert Xu 2009-04-02 9:50 ` David Miller 2009-04-02 9:52 ` David Miller 2009-04-02 9:59 ` Herbert Xu 2009-04-02 14:42 ` Patrick McHardy 2009-04-02 14:45 ` Herbert Xu 2009-04-02 14:57 ` Patrick McHardy 2009-04-02 14:59 ` Herbert Xu 2009-04-02 15:06 ` Patrick McHardy 2009-04-02 15:09 ` Herbert Xu 2009-04-02 15:14 ` Patrick McHardy 2009-04-02 15:30 ` Herbert Xu 2009-04-05 9:59 ` David Miller 2009-04-06 13:21 ` Patrick McHardy 2009-06-10 8:08 ` David Miller 2009-06-10 10:35 ` Patrick McHardy 2009-04-05 9:57 ` David Miller 2009-04-05 9:56 ` David Miller 2009-04-05 9:54 ` David Miller 2009-03-04 11:44 ` Neil Horman 2009-03-05 19:27 ` Neil Horman 2009-03-11 16:17 ` David Miller 2009-03-11 19:51 ` Neil Horman 2009-03-13 19:10 ` David Miller
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.