From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jesper Dangaard Brouer Subject: [bpf-next PATCH 3/4] xdp: add tracepoint for devmap like cpumap have Date: Wed, 09 May 2018 15:13:09 +0200 Message-ID: <152587158988.20423.12304211614933998174.stgit@firesoul> References: <152587152136.20423.14493673928480468024.stgit@firesoul> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Cc: Christoph Hellwig , =?utf-8?b?QmrDtnJu?= =?utf-8?b?VMO2cGVs?= , Magnus Karlsson To: netdev@vger.kernel.org, Daniel Borkmann , Alexei Starovoitov , Jesper Dangaard Brouer Return-path: Received: from mx3-rdu2.redhat.com ([66.187.233.73]:35844 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S934727AbeEINNL (ORCPT ); Wed, 9 May 2018 09:13:11 -0400 In-Reply-To: <152587152136.20423.14493673928480468024.stgit@firesoul> Sender: netdev-owner@vger.kernel.org List-ID: Notice how this allow us get XDP statistic without affecting the XDP performance, as tracepoint is no-longer activated on a per packet basis. The xdp_monitor sample/tool is updated to use this new tracepoint. Signed-off-by: Jesper Dangaard Brouer --- include/linux/bpf.h | 6 ++++- include/trace/events/xdp.h | 39 +++++++++++++++++++++++++++++++++++ kernel/bpf/devmap.c | 25 ++++++++++++++++++----- net/core/filter.c | 2 +- samples/bpf/xdp_monitor_kern.c | 39 +++++++++++++++++++++++++++++++++++ samples/bpf/xdp_monitor_user.c | 44 +++++++++++++++++++++++++++++++++++++++- 6 files changed, 146 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6457343f6dfa..066b5c67c71f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -489,7 +489,8 @@ struct xdp_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp); +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); @@ -574,7 +575,8 @@ static inline void __dev_map_flush(struct bpf_map *map) struct xdp_buff; static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx) { return 0; } diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 96104610d40e..2e9ef0650144 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -229,6 +229,45 @@ TRACE_EVENT(xdp_cpumap_enqueue, __entry->to_cpu) ); +TRACE_EVENT(xdp_devmap_xmit, + + TP_PROTO(const struct bpf_map *map, u32 map_index, + int sent, int drops, + const struct net_device *from_dev, + const struct net_device *to_dev), + + TP_ARGS(map, map_index, sent, drops, from_dev, to_dev), + + TP_STRUCT__entry( + __field(int, map_id) + __field(u32, act) + __field(u32, map_index) + __field(int, drops) + __field(int, sent) + __field(int, from_ifindex) + __field(int, to_ifindex) + ), + + TP_fast_assign( + __entry->map_id = map->id; + __entry->act = XDP_REDIRECT; + __entry->map_index = map_index; + __entry->drops = drops; + __entry->sent = sent; + __entry->from_ifindex = from_dev->ifindex; + __entry->to_ifindex = to_dev->ifindex; + ), + + TP_printk("ndo_xdp_xmit" + " map_id=%d map_index=%d action=%s" + " sent=%d drops=%d" + " from_ifindex=%d to_ifindex=%d", + __entry->map_id, __entry->map_index, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->sent, __entry->drops, + __entry->from_ifindex, __entry->to_ifindex) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index cab72c100bb5..6f84100723b0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -50,6 +50,7 @@ #include #include #include +#include #define DEV_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -57,6 +58,7 @@ #define DEV_MAP_BULK_SIZE 16 struct xdp_bulk_queue { struct xdp_frame *q[DEV_MAP_BULK_SIZE]; + struct net_device *dev_rx; unsigned int count; }; @@ -219,8 +221,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) static int bq_xmit_all(struct bpf_dtab_netdev *obj, struct xdp_bulk_queue *bq) { - unsigned int processed = 0, drops = 0; struct net_device *dev = obj->dev; + int sent = 0, drops = 0; int i; if (unlikely(!bq->count)) @@ -241,10 +243,13 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, drops++; xdp_return_frame(xdpf); } - processed++; + sent++; } bq->count = 0; + trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, + sent, drops, bq->dev_rx, dev); + bq->dev_rx = NULL; return 0; } @@ -301,18 +306,28 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) /* Runs under RCU-read-side, plus in softirq under NAPI protection. * Thus, safe percpu variable access. */ -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) bq_xmit_all(obj, bq); + /* Ingress dev_rx will be the same for all xdp_frame's in + * bulk_queue, because bq stored per-CPU and must be flushed + * from net_device drivers NAPI func end. + */ + if (!bq->dev_rx) + bq->dev_rx = dev_rx; + bq->q[bq->count++] = xdpf; return 0; } -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, + struct net_device *dev_rx) { struct net_device *dev = dst->dev; struct xdp_frame *xdpf; @@ -325,7 +340,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp) if (unlikely(!xdpf)) return -EOVERFLOW; - err = bq_enqueue(dst, xdpf); + err = bq_enqueue(dst, xdpf, dev_rx); if (err) return err; diff --git a/net/core/filter.c b/net/core/filter.c index 258d243099e1..8b7924368dc1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3022,7 +3022,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, case BPF_MAP_TYPE_DEVMAP: { struct bpf_dtab_netdev *dst = fwd; - err = dev_map_enqueue(dst, xdp); + err = dev_map_enqueue(dst, xdp, dev_rx); if (err) return err; __dev_map_insert_ctx(map, index); diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 211db8ded0de..2854aa0665ea 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -208,3 +208,42 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) return 0; } + +struct bpf_map_def SEC("maps") devmap_xmit_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct devmap_xmit_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + u32 map_index; // offset:16; size:4; signed:0; + int drops; // offset:20; size:4; signed:1; + int sent; // offset:24; size:4; signed:1; + int from_ifindex; // offset:28; size:4; signed:1; + int to_ifindex; // offset:32; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_devmap_xmit") +int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->sent; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + rec->info += 1; + + return 1; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 894bc64c2cac..4aaf1ab1927d 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -141,6 +141,7 @@ struct stats_record { struct record_u64 xdp_exception[XDP_ACTION_MAX]; struct record xdp_cpumap_kthread; struct record xdp_cpumap_enqueue[MAX_CPUS]; + struct record xdp_devmap_xmit; }; static bool map_collect_record(int fd, __u32 key, struct record *rec) @@ -397,7 +398,7 @@ static void stats_print(struct stats_record *stats_rec, info = calc_info(r, p, t); if (info > 0) i_str = "sched"; - if (pps > 0) + if (pps > 0 || drop > 0) printf(fmt1, "cpumap-kthread", i, pps, drop, info, i_str); } @@ -409,6 +410,42 @@ static void stats_print(struct stats_record *stats_rec, printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); } + /* devmap ndo_xdp_xmit stats */ + { + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s\n"; + struct record *rec, *prev; + double drop, info; + char *i_str = ""; + + rec = &stats_rec->xdp_devmap_xmit; + prev = &stats_prev->xdp_devmap_xmit; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + if (pps > 0 || drop > 0) + printf(fmt1, "devmap-xmit", + i, pps, drop, info, i_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + printf(fmt2, "devmap-xmit", "total", pps, drop, info, i_str); + } + printf("\n"); } @@ -437,6 +474,9 @@ static bool stats_collect(struct stats_record *rec) fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); + fd = map_data[4].fd; /* map4: devmap_xmit_cnt */ + map_collect_record(fd, 0, &rec->xdp_devmap_xmit); + return true; } @@ -480,6 +520,7 @@ static struct stats_record *alloc_stats_record(void) rec_sz = sizeof(struct datarec); rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); + rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz); for (i = 0; i < MAX_CPUS; i++) rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); @@ -498,6 +539,7 @@ static void free_stats_record(struct stats_record *r) free(r->xdp_exception[i].cpu); free(r->xdp_cpumap_kthread.cpu); + free(r->xdp_devmap_xmit.cpu); for (i = 0; i < MAX_CPUS; i++) free(r->xdp_cpumap_enqueue[i].cpu);