From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org
Subject: [PATCH v3] IB/core: Trace points for diagnosing completion queue issues
Date: Wed, 09 Oct 2019 12:54:40 -0400 [thread overview]
Message-ID: <20191009165219.2202.56785.stgit@manet.1015granger.net> (raw)
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
drivers/infiniband/core/Makefile | 2
drivers/infiniband/core/cq.c | 29 +++--
drivers/infiniband/core/trace.c | 14 ++
include/rdma/ib_verbs.h | 2
include/trace/events/rdma_core.h | 218 ++++++++++++++++++++++++++++++++++++++
5 files changed, 255 insertions(+), 10 deletions(-)
create mode 100644 drivers/infiniband/core/trace.c
create mode 100644 include/trace/events/rdma_core.h
Changes since v2:
- Removed extraneous changes to include/trace/events/rdma.h
Changes since RFC:
- Addressed comments from Parav Pandit <parav@mellanox.com>
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 09881bd..68d9e27 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- nldev.o restrack.o counters.o
+ nldev.o restrack.o counters.o trace.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index bbfded6..bcde992 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -7,6 +7,8 @@
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include <trace/events/rdma_core.h>
+
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
#define IB_POLL_BATCH_DIRECT 8
@@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w)
dim->state = DIM_START_MEASURE;
+ trace_cq_modify(cq, comps, usec);
cq->device->ops.modify_cq(cq, comps, usec);
}
@@ -70,13 +73,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
{
int i, n, completed = 0;
- /*
- * budget might be (-1) if the caller does not
- * want to bound this call, thus we need unsigned
- * minimum here.
- */
- while ((n = ib_poll_cq(cq, min_t(u32, batch,
- budget - completed), wcs)) > 0) {
+ trace_cq_process(cq);
+ while ((n = ib_poll_cq(cq, batch, wcs)) > 0) {
+ trace_cq_poll(cq, batch, n);
for (i = 0; i < n; i++) {
struct ib_wc *wc = &wcs[i];
@@ -87,9 +86,15 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
}
completed += n;
-
if (n != batch || (budget != -1 && completed >= budget))
break;
+
+ /*
+ * budget might be (-1) if the caller does not
+ * want to bound this call, thus we need unsigned
+ * minimum here.
+ */
+ batch = min_t(u32, batch, budget - completed);
}
return completed;
@@ -131,8 +136,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
if (completed < budget) {
irq_poll_complete(&cq->iop);
- if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
+ trace_cq_reschedule(cq);
irq_poll_sched(&cq->iop);
+ }
}
if (dim)
@@ -143,6 +150,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
irq_poll_sched(&cq->iop);
}
@@ -162,6 +170,7 @@ static void ib_cq_poll_work(struct work_struct *work)
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
queue_work(cq->comp_wq, &cq->work);
}
@@ -239,6 +248,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
goto out_destroy_cq;
}
+ trace_cq_alloc(cq, comp_vector, poll_ctx);
return cq;
out_destroy_cq:
@@ -304,6 +314,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
+ trace_cq_free(cq);
rdma_restrack_del(&cq->res);
cq->device->ops.destroy_cq(cq, udata);
if (cq->dim)
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
new file mode 100644
index 0000000..6c3514b
--- /dev/null
+++ b/drivers/infiniband/core/trace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/ib_verbs.h>
+
+#include <trace/events/rdma_core.h>
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6a47ba8..95a6bce 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1555,6 +1555,8 @@ struct ib_cq {
};
struct workqueue_struct *comp_wq;
struct dim *dim;
+ ktime_t timestamp;
+ bool interrupt;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
diff --git a/include/trace/events/rdma_core.h b/include/trace/events/rdma_core.h
new file mode 100644
index 0000000..c1397a3
--- /dev/null
+++ b/include/trace/events/rdma_core.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_core
+
+#if !defined(_TRACE_RDMA_CORE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RDMA_CORE_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
+
+/*
+ * enum ib_poll_context, from include/rdma/ib_verbs.h
+ */
+#define IB_POLL_CTX_LIST \
+ ib_poll_ctx(DIRECT) \
+ ib_poll_ctx(SOFTIRQ) \
+ ib_poll_ctx(WORKQUEUE) \
+ ib_poll_ctx_end(UNBOUND_WORKQUEUE)
+
+#undef ib_poll_ctx
+#undef ib_poll_ctx_end
+
+#define ib_poll_ctx(x) TRACE_DEFINE_ENUM(IB_POLL_##x);
+#define ib_poll_ctx_end(x) TRACE_DEFINE_ENUM(IB_POLL_##x);
+
+IB_POLL_CTX_LIST
+
+#undef ib_poll_ctx
+#undef ib_poll_ctx_end
+
+#define ib_poll_ctx(x) { IB_POLL_##x, #x },
+#define ib_poll_ctx_end(x) { IB_POLL_##x, #x }
+
+#define rdma_show_ib_poll_ctx(x) \
+ __print_symbolic(x, IB_POLL_CTX_LIST)
+
+/**
+ ** Completion Queue events
+ **/
+
+TRACE_EVENT(cq_schedule,
+ TP_PROTO(
+ struct ib_cq *cq
+ ),
+
+ TP_ARGS(cq),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ ),
+
+ TP_fast_assign(
+ cq->timestamp = ktime_get();
+ cq->interrupt = true;
+
+ __entry->id = cq->res.id;
+ ),
+
+ TP_printk("id %u", __entry->id)
+);
+
+TRACE_EVENT(cq_reschedule,
+ TP_PROTO(
+ struct ib_cq *cq
+ ),
+
+ TP_ARGS(cq),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ ),
+
+ TP_fast_assign(
+ cq->timestamp = ktime_get();
+ cq->interrupt = false;
+
+ __entry->id = cq->res.id;
+ ),
+
+ TP_printk("id %u", __entry->id)
+);
+
+TRACE_EVENT(cq_process,
+ TP_PROTO(
+ const struct ib_cq *cq
+ ),
+
+ TP_ARGS(cq),
+
+ TP_STRUCT__entry(
+ __field(s64, latency)
+ __field(u32, id)
+ __field(bool, interrupt)
+ ),
+
+ TP_fast_assign(
+ ktime_t latency = ktime_sub(ktime_get(), cq->timestamp);
+
+ __entry->id = cq->res.id;
+ __entry->latency = ktime_to_us(latency);
+ __entry->interrupt = cq->interrupt;
+ ),
+
+ TP_printk("id %u wake-up took %lld [us] from %s",
+ __entry->id, __entry->latency,
+ __entry->interrupt ? "interrupt" : "reschedule"
+ )
+);
+
+TRACE_EVENT(cq_poll,
+ TP_PROTO(
+ const struct ib_cq *cq,
+ int requested,
+ int rc
+ ),
+
+ TP_ARGS(cq, requested, rc),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ __field(int, requested)
+ __field(int, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->id = cq->res.id;
+ __entry->requested = requested;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("id %u requested %d, returned %d",
+ __entry->id, __entry->requested, __entry->rc
+ )
+);
+
+TRACE_EVENT(cq_modify,
+ TP_PROTO(
+ const struct ib_cq *cq,
+ u16 comps,
+ u16 usec
+ ),
+
+ TP_ARGS(cq, comps, usec),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ __field(unsigned int, comps)
+ __field(unsigned int, usec)
+ ),
+
+ TP_fast_assign(
+ __entry->id = cq->res.id;
+ __entry->comps = comps;
+ __entry->usec = usec;
+ ),
+
+ TP_printk("id %u comps=%u usec=%u",
+ __entry->id, __entry->comps, __entry->usec
+ )
+);
+
+TRACE_EVENT(cq_alloc,
+ TP_PROTO(
+ const struct ib_cq *cq,
+ int comp_vector,
+ enum ib_poll_context poll_ctx
+ ),
+
+ TP_ARGS(cq, comp_vector, poll_ctx),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ __field(int, comp_vector)
+ __field(unsigned long, poll_ctx)
+ ),
+
+ TP_fast_assign(
+ __entry->id = cq->res.id;
+ __entry->comp_vector = comp_vector;
+ __entry->poll_ctx = poll_ctx;
+ ),
+
+ TP_printk("id %u comp_vector=%d poll_ctx=%s",
+ __entry->id, __entry->comp_vector,
+ rdma_show_ib_poll_ctx(__entry->poll_ctx)
+ )
+);
+
+TRACE_EVENT(cq_free,
+ TP_PROTO(
+ const struct ib_cq *cq
+ ),
+
+ TP_ARGS(cq),
+
+ TP_STRUCT__entry(
+ __field(u32, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = cq->res.id;
+ ),
+
+ TP_printk("id %u", __entry->id)
+);
+
+#endif /* _TRACE_RDMA_CORE_H */
+
+#include <trace/define_trace.h>
next reply other threads:[~2019-10-09 16:54 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-09 16:54 Chuck Lever [this message]
2019-10-11 17:26 ` [PATCH v3] IB/core: Trace points for diagnosing completion queue issues Parav Pandit
2019-10-11 17:38 ` Chuck Lever
2019-10-11 17:44 ` Chuck Lever
2019-10-11 17:46 ` Parav Pandit
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191009165219.2202.56785.stgit@manet.1015granger.net \
--to=chuck.lever@oracle.com \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).