* [PATCH v2 2/3] RDMA/cm: Replace pr_debug() call sites with tracepoints
2020-07-20 17:16 [PATCH v2 0/3] IB CM tracepoints Chuck Lever
2020-07-20 17:16 ` [PATCH v2 1/3] RDMA/core: Move the rdma_show_ib_cm_event() macro Chuck Lever
@ 2020-07-20 17:16 ` Chuck Lever
2020-07-20 17:16 ` [PATCH v2 3/3] RDMA/cm: Add tracepoints to track MAD send operations Chuck Lever
2020-07-31 19:07 ` [PATCH v2 0/3] IB CM tracepoints Jason Gunthorpe
3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2020-07-20 17:16 UTC (permalink / raw)
To: linux-rdma
In the interest of converging on a common instrumentation
infrastructure, modernize the pr_debug() call sites added by commit
119bf81793ea ("IB/cm: Add debug prints to ib_cm"). The new
tracepoints appear in a new "ib_cma" subsystem.
The conversion is somewhat mechanical. Someone more familiar with
the semantics of the recorded information might suggest additional
data capture.
Some benefits include:
- Tracepoints enable "always on" reporting of these errors
- The error records are structured and compact
- Tracepoints provide hooks for eBPF scripts
Sample output:
nfsd-1954 [003] 62.017901: icm_dreq_skipped: local_id=1998890974 remote_id=1129750393 state=DREQ_RCVD lap_state=LAP_UNINIT
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
drivers/infiniband/core/Makefile | 2
drivers/infiniband/core/cm.c | 80 +++------
drivers/infiniband/core/cm_trace.c | 15 ++
drivers/infiniband/core/cm_trace.h | 309 ++++++++++++++++++++++++++++++++++++
4 files changed, 351 insertions(+), 55 deletions(-)
create mode 100644 drivers/infiniband/core/cm_trace.c
create mode 100644 drivers/infiniband/core/cm_trace.h
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 24cb71a16a28..ccf2670ef45e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -17,7 +17,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0d1377232933..78554e679297 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -27,6 +27,7 @@
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -2124,8 +2125,7 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
goto destroy;
@@ -2274,8 +2274,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2348,8 +2347,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
@@ -2465,7 +2463,7 @@ static int cm_rep_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2479,11 +2477,10 @@ static int cm_rep_handler(struct cm_work *work)
break;
default:
ret = -EINVAL;
- pr_debug(
- "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
+ trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
- IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
@@ -2500,7 +2497,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
@@ -2517,9 +2514,8 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug(
- "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
@@ -2646,9 +2642,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_skipped(&cm_id_priv->id);
return -EINVAL;
}
@@ -2722,10 +2716,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
- pr_debug(
- "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_drep_err(&cm_id_priv->id);
kfree(private_data);
return -EINVAL;
}
@@ -2810,9 +2801,8 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug(
- "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
@@ -2858,9 +2848,7 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2945,9 +2933,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
return -EINVAL;
}
@@ -3060,9 +3046,7 @@ static int cm_rej_handler(struct cm_work *work)
}
/* fall through */
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
goto out;
}
@@ -3118,9 +3102,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
}
/* fall through */
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
goto error1;
}
@@ -3229,9 +3211,7 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
@@ -3765,8 +3745,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3889,7 +3868,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3925,8 +3904,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -4123,9 +4101,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4173,9 +4149,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4235,9 +4209,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..c69e28564913
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 3/3] RDMA/cm: Add tracepoints to track MAD send operations
2020-07-20 17:16 [PATCH v2 0/3] IB CM tracepoints Chuck Lever
2020-07-20 17:16 ` [PATCH v2 1/3] RDMA/core: Move the rdma_show_ib_cm_event() macro Chuck Lever
2020-07-20 17:16 ` [PATCH v2 2/3] RDMA/cm: Replace pr_debug() call sites with tracepoints Chuck Lever
@ 2020-07-20 17:16 ` Chuck Lever
2020-07-31 19:07 ` [PATCH v2 0/3] IB CM tracepoints Jason Gunthorpe
3 siblings, 0 replies; 5+ messages in thread
From: Chuck Lever @ 2020-07-20 17:16 UTC (permalink / raw)
To: linux-rdma
Surface the operation of MAD exchanges during connection
establishment. Some samples:
[root@klimt ~]# trace-cmd report -F ib_cma
cpus=4
kworker/0:4-123 [000] 60.677388: icm_send_rep: local_id=1965336542 remote_id=1096195961 state=REQ_RCVD lap_state=LAP_UNINIT
kworker/u8:11-391 [002] 60.678808: icm_send_req: local_id=1982113758 remote_id=0 state=IDLE lap_state=LAP_UNINIT
kworker/0:4-123 [000] 60.679652: icm_send_rtu: local_id=1982113758 remote_id=1079418745 state=REP_RCVD lap_state=LAP_UNINIT
nfsd-1954 [001] 60.691350: icm_send_rep: local_id=1998890974 remote_id=1129750393 state=MRA_REQ_SENT lap_state=LAP_UNINIT
nfsd-1954 [003] 62.017931: icm_send_drep: local_id=1998890974 remote_id=1129750393 state=TIMEWAIT lap_state=LAP_UNINIT
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
drivers/infiniband/core/cm.c | 22 +++++++-
drivers/infiniband/core/cm_trace.h | 105 ++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 78554e679297..43b32f5ce3da 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1564,6 +1564,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ trace_icm_send_req(&cm_id_priv->id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
@@ -1611,6 +1612,9 @@ static int cm_issue_rej(struct cm_port *port,
IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1962,6 +1966,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2288,6 +2293,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2359,6 +2365,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2440,6 +2447,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2661,6 +2669,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2731,6 +2740,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@@ -2780,6 +2790,9 @@ static int cm_issue_drep(struct cm_port *port,
IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2937,6 +2950,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
}
+ trace_icm_send_rej(&cm_id_priv->id, reason);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
@@ -3115,6 +3129,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
msg_response, service_timeout,
private_data, private_data_len);
+ trace_icm_send_mra(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
@@ -3485,10 +3500,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
+ if (cm_id->state == IB_CM_IDLE) {
+ trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
- else
+ } else {
ret = -EINVAL;
+ }
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3650,6 +3667,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_free_msg(msg);
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
index c69e28564913..8e53982f9250 100644
--- a/drivers/infiniband/core/cm_trace.h
+++ b/drivers/infiniband/core/cm_trace.h
@@ -80,6 +80,59 @@ IB_CM_LAP_STATE_LIST
#define show_ib_cm_lap_state(x) \
__print_symbolic(x, IB_CM_LAP_STATE_LIST)
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
DECLARE_EVENT_CLASS(icm_id_class,
TP_PROTO(
@@ -111,6 +164,56 @@ DECLARE_EVENT_CLASS(icm_id_class,
)
);
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
#define DEFINE_CM_ERR_EVENT(name) \
DEFINE_EVENT(icm_id_class, \
icm_##name##_err, \
@@ -172,6 +275,8 @@ DECLARE_EVENT_CLASS(icm_local_class,
), \
TP_ARGS(local_id, remote_id))
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
DEFINE_CM_LOCAL_EVENT(staleconn_err);
DEFINE_CM_LOCAL_EVENT(no_priv_err);
^ permalink raw reply related [flat|nested] 5+ messages in thread