From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 11/15] lnet: add mechanism for dumping lnd peer debug info
Date: Thu, 27 Oct 2022 10:05:38 -0400 [thread overview]
Message-ID: <1666879542-10737-12-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1666879542-10737-1-git-send-email-jsimmons@infradead.org>
From: Serguei Smirnov <ssmirnov@whamcloud.com>
Add ability to dump lnd peer debug info:
lnetctl debug peer --nid=<nid>
The debug info is dumped to the log as D_CONSOLE by the respective
lnd and can be retrieved with "lctl dk" or seen in syslog.
This mechanism has been added for socklnd and o2iblnd peers.
WC-bug-id: https://jira.whamcloud.com/browse/LU-15234
Lustre-commit: 950e59ced18d49e9f ("LU-15234 lnet: add mechanism for dumping lnd peer debug info")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48566
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
net/lnet/klnds/o2iblnd/o2iblnd.c | 96 +++++++++++++++++++++++++++++++++++++++-
net/lnet/klnds/socklnd/socklnd.c | 51 ++++++++++++++++++++-
2 files changed, 143 insertions(+), 4 deletions(-)
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.c b/net/lnet/klnds/o2iblnd/o2iblnd.c
index 14dd686..d2e4ce9 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.c
@@ -422,7 +422,96 @@ void kiblnd_unlink_peer_locked(struct kib_peer_ni *peer_ni)
kiblnd_peer_decref(peer_ni);
}
-static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
+static void
+kiblnd_debug_rx(struct kib_rx *rx)
+{
+ CDEBUG(D_CONSOLE, " %p msg_type %x cred %d\n",
+ rx, rx->rx_msg->ibm_type,
+ rx->rx_msg->ibm_credits);
+}
+
+static void
+kiblnd_debug_tx(struct kib_tx *tx)
+{
+ CDEBUG(D_CONSOLE,
+ " %p snd %d q %d w %d rc %d dl %lld cookie %#llx msg %s%s type %x cred %d\n",
+ tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
+ tx->tx_status, ktime_to_ns(tx->tx_deadline), tx->tx_cookie,
+ !tx->tx_lntmsg[0] ? "-" : "!",
+ !tx->tx_lntmsg[1] ? "-" : "!",
+ tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
+}
+
+static void
+kiblnd_debug_conn(struct kib_conn *conn)
+{
+ struct list_head *tmp;
+ int i;
+
+ spin_lock(&conn->ibc_lock);
+
+ CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s:\n",
+ atomic_read(&conn->ibc_refcount), conn,
+ conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ CDEBUG(D_CONSOLE,
+ " state %d nposted %d/%d cred %d o_cred %d r_cred %d\n",
+ conn->ibc_state, conn->ibc_noops_posted,
+ conn->ibc_nsends_posted, conn->ibc_credits,
+ conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
+ CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error);
+
+ CDEBUG(D_CONSOLE, " early_rxs:\n");
+ list_for_each(tmp, &conn->ibc_early_rxs)
+ kiblnd_debug_rx(list_entry(tmp, struct kib_rx, rx_list));
+
+ CDEBUG(D_CONSOLE, " tx_noops:\n");
+ list_for_each(tmp, &conn->ibc_tx_noops)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_nocred)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " active_txs:\n");
+ list_for_each(tmp, &conn->ibc_active_txs)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " rxs:\n");
+ for (i = 0; i < IBLND_RX_MSGS(conn); i++)
+ kiblnd_debug_rx(&conn->ibc_rxs[i]);
+
+ spin_unlock(&conn->ibc_lock);
+}
+
+static void
+kiblnd_dump_peer_debug_info(struct kib_peer_ni *peer_ni)
+{
+ struct kib_conn *conn;
+ struct kib_conn *cnxt;
+ int count = 0;
+
+ CDEBUG(D_CONSOLE, "[last_alive, races, reconnected, error]: %lld, %d, %d, %d\n",
+ peer_ni->ibp_last_alive,
+ peer_ni->ibp_races,
+ peer_ni->ibp_reconnected,
+ peer_ni->ibp_error);
+ list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+ ibc_list) {
+ CDEBUG(D_CONSOLE, "Conn %d:\n", count);
+ kiblnd_debug_conn(conn);
+ count++;
+ }
+}
+
+static int kiblnd_get_peer_info(struct lnet_ni *ni, lnet_nid_t nid, int index,
lnet_nid_t *nidp, int *count)
{
struct kib_peer_ni *peer_ni;
@@ -437,6 +526,9 @@ static int kiblnd_get_peer_info(struct lnet_ni *ni, int index,
if (peer_ni->ibp_ni != ni)
continue;
+ if (peer_ni->ibp_nid == nid)
+ kiblnd_dump_peer_debug_info(peer_ni);
+
if (index-- > 0)
continue;
@@ -1065,7 +1157,7 @@ static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
lnet_nid_t nid = 0;
int count = 0;
- rc = kiblnd_get_peer_info(ni, data->ioc_count,
+ rc = kiblnd_get_peer_info(ni, data->ioc_nid, data->ioc_count,
&nid, &count);
data->ioc_nid = nid;
data->ioc_count = count;
diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index 8d3c0d6..996d3a9 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -277,6 +277,52 @@ struct ksock_peer_ni *
ksocknal_peer_decref(peer_ni);
}
+static void
+ksocknal_dump_peer_debug_info(struct ksock_peer_ni *peer_ni)
+{
+ struct ksock_conn *conn;
+ struct list_head *ctmp;
+ struct list_head *txtmp;
+ int ccount = 0;
+ int txcount = 0;
+
+ list_for_each(ctmp, &peer_ni->ksnp_conns) {
+ conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
+
+ if (!list_empty(&conn->ksnc_tx_queue))
+ list_for_each(txtmp, &conn->ksnc_tx_queue) txcount++;
+
+ CDEBUG(D_CONSOLE, "Conn %d [type, closing, crefcnt, srefcnt]: %d, %d, %d, %d\n",
+ ccount,
+ conn->ksnc_type,
+ conn->ksnc_closing,
+ refcount_read(&conn->ksnc_conn_refcount),
+ refcount_read(&conn->ksnc_sock_refcount));
+ CDEBUG(D_CONSOLE, "Conn %d rx [scheduled, ready, state]: %d, %d, %d\n",
+ ccount,
+ conn->ksnc_rx_scheduled,
+ conn->ksnc_rx_ready,
+ conn->ksnc_rx_state);
+ CDEBUG(D_CONSOLE,
+ "Conn %d tx [txqcnt, scheduled, last_post, ready, deadline]: %d, %d, %lld, %d, %lld\n",
+ ccount,
+ txcount,
+ conn->ksnc_tx_scheduled,
+ conn->ksnc_tx_last_post,
+ conn->ksnc_rx_ready,
+ conn->ksnc_rx_deadline);
+
+ if (conn->ksnc_scheduler)
+ CDEBUG(D_CONSOLE, "Conn %d sched [nconns, cpt]: %d, %d\n",
+ ccount,
+ conn->ksnc_scheduler->kss_nconns,
+ conn->ksnc_scheduler->kss_cpt);
+
+ txcount = 0;
+ ccount++;
+ }
+}
+
static int
ksocknal_get_peer_info(struct lnet_ni *ni, int index,
struct lnet_processid *id, u32 *myip, u32 *peer_ip,
@@ -295,9 +341,9 @@ struct ksock_peer_ni *
if (index-- > 0)
continue;
+ *id = peer_ni->ksnp_id;
conn_cb = peer_ni->ksnp_conn_cb;
if (!conn_cb) {
- *id = peer_ni->ksnp_id;
*myip = 0;
*peer_ip = 0;
*port = 0;
@@ -305,7 +351,8 @@ struct ksock_peer_ni *
*share_count = 0;
rc = 0;
} else {
- *id = peer_ni->ksnp_id;
+ ksocknal_dump_peer_debug_info(peer_ni);
+
if (conn_cb->ksnr_addr.ss_family == AF_INET) {
struct sockaddr_in *sa;
--
1.8.3.1
_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
next prev parent reply other threads:[~2022-10-27 14:24 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-27 14:05 [lustre-devel] [PATCH 00/15] lustre: sync to OpenSFS Oct 27, 2022 James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 01/15] lnet: o2iblnd: Avoid NULL md deref James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 02/15] lnet: support IPv6 in lnet_inet_enumerate() James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 03/15] lustre: sec: retry ro mount if read-only flag set James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 04/15] lustre: ptlrpc: reduce lock contention in ptlrpc_free_committed James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 05/15] lustre: llite: only statfs for projid if PROJINHERIT set James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 06/15] lustre: llite: revert: "lustre: llite: prevent mulitple group locks" James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 07/15] lustre: ldlm: group lock fix James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 08/15] lustre: llite: adjust read count as file got truncated James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 09/15] lnet: Discovery queue and deletion race James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 10/15] lustre: statahead: avoid to block ptlrpcd interpret context James Simmons
2022-10-27 14:05 ` James Simmons [this message]
2022-10-27 14:05 ` [lustre-devel] [PATCH 12/15] lnet: ksocklnd: fix irq lock inversion while calling sk_data_ready() James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 13/15] lustre: obdclass: fix race in class_del_profile James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 14/15] lnet: use 'fallthrough' pseudo keyword for switch James Simmons
2022-10-27 14:05 ` [lustre-devel] [PATCH 15/15] lustre: " James Simmons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1666879542-10737-12-git-send-email-jsimmons@infradead.org \
--to=jsimmons@infradead.org \
--cc=adilger@whamcloud.com \
--cc=green@whamcloud.com \
--cc=lustre-devel@lists.lustre.org \
--cc=neilb@suse.de \
--cc=ssmirnov@whamcloud.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).