All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 07/15] lnet: socklnd: detect link state to set fatal error on ni
Date: Wed,  7 Jul 2021 15:11:08 -0400	[thread overview]
Message-ID: <1625685076-1964-8-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1625685076-1964-1-git-send-email-jsimmons@infradead.org>

From: Serguei Smirnov <ssmirnov@whamcloud.com>

To help avoid selecting lnet ni which corresponds to a downed
ethernet link for sending, add a mechanism for detecting link
events in socklnd. On link up/down events, find corresponding
ni and toggle ni_fatal_error_on flag, similar to o2iblnd way.

WC-bug-id: https://jira.whamcloud.com/browse/LU-14742
Lustre-commit: fc2df80e96dc5db9f ("LU-14742 socklnd: detect link state to set fatal error on ni")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/43952
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 net/lnet/klnds/socklnd/socklnd.c | 78 ++++++++++++++++++++++++++++++++++++++++
 net/lnet/klnds/socklnd/socklnd.h |  1 +
 2 files changed, 79 insertions(+)

diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index eb8c736..e15f1c0 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -1843,6 +1843,78 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 	}
 }
 
+static int ksocknal_get_link_status(struct net_device *dev)
+{
+	int ret = -1;
+
+	LASSERT(dev);
+
+	if (!netif_running(dev))
+		ret = 0;
+	/* Some devices may not be providing link settings */
+	else if (dev->ethtool_ops->get_link)
+		ret = dev->ethtool_ops->get_link(dev);
+
+	return ret;
+}
+
+static int
+ksocknal_handle_link_state_change(struct net_device *dev,
+				  unsigned char operstate)
+{
+	struct lnet_ni *ni;
+	struct ksock_net *net;
+	struct ksock_net *cnxt;
+	int ifindex;
+	unsigned char link_down = !(operstate == IF_OPER_UP);
+
+	ifindex = dev->ifindex;
+
+	if (!ksocknal_data.ksnd_nnets)
+		goto out;
+
+	list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets,
+				 ksnn_list) {
+		if (net->ksnn_interface.ksni_index != ifindex)
+			continue;
+		ni = net->ksnn_ni;
+		if (link_down)
+			atomic_set(&ni->ni_fatal_error_on, link_down);
+		else
+			atomic_set(&ni->ni_fatal_error_on,
+				   (ksocknal_get_link_status(dev) == 0));
+	}
+out:
+	return 0;
+}
+
+
+/************************************
+ * Net device notifier event handler
+ ************************************/
+static int ksocknal_device_event(struct notifier_block *unused,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	unsigned char operstate;
+
+	operstate = dev->operstate;
+
+	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_CHANGE:
+		ksocknal_handle_link_state_change(dev, operstate);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ksocknal_notifier_block = {
+	.notifier_call = ksocknal_device_event,
+};
+
 static void
 ksocknal_base_shutdown(void)
 {
@@ -1852,6 +1924,9 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 
 	LASSERT(!ksocknal_data.ksnd_nnets);
 
+	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL)
+		unregister_netdevice_notifier(&ksocknal_notifier_block);
+
 	switch (ksocknal_data.ksnd_init) {
 	default:
 		LASSERT(0);
@@ -2015,6 +2090,8 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 		goto failed;
 	}
 
+	register_netdevice_notifier(&ksocknal_notifier_block);
+
 	/* flag everything initialised */
 	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
 
@@ -2297,6 +2374,7 @@ static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
 	ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
 				ntohl(((struct sockaddr_in *)&ksi->ksni_addr)->sin_addr.s_addr));
 	list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
+	net->ksnn_ni = ni;
 	ksocknal_data.ksnd_nnets++;
 
 	return 0;
diff --git a/net/lnet/klnds/socklnd/socklnd.h b/net/lnet/klnds/socklnd/socklnd.h
index dac8559..357769a 100644
--- a/net/lnet/klnds/socklnd/socklnd.h
+++ b/net/lnet/klnds/socklnd/socklnd.h
@@ -175,6 +175,7 @@ struct ksock_net {
 	struct list_head	ksnn_list;		/* chain on global list */
 	atomic_t		ksnn_npeers;		/* # peers */
 	struct ksock_interface	ksnn_interface;		/* IP interface */
+	struct lnet_ni		*ksnn_ni;
 };
 
 /* When the ksock_net is shut down, this bias is added to
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-07-07 19:11 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-07 19:11 [lustre-devel] [PATCH 00/15] lustre: updates to OpenSFS tree as of July 7 2021 James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 01/15] lustre: osc: Notify server if cache discard takes a long time James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 02/15] lustre: osc: Move shrink update to per-write James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 03/15] lustre: client: don't panic for mgs evictions James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 04/15] lnet: Add health ping stats James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 05/15] lnet: Ensure ref taken when queueing for discovery James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 06/15] lnet: Correct distance calculation of local NIDs James Simmons
2021-07-07 19:11 ` James Simmons [this message]
2021-07-07 19:11 ` [lustre-devel] [PATCH 08/15] lustre: mdt: New connect flag for non-open-by-fid lock request James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 09/15] lustre: obdclass: Wake up entire queue of requests on close completion James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 10/15] lnet: add netlink infrastructure James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 11/15] lustre: llite: parallelize direct i/o issuance James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 12/15] lustre: osc: Don't get time for each page James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 13/15] lustre: clio: Implement real list splice James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 14/15] lustre: osc: Simplify clipping for transient pages James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 15/15] lustre: mgc: configurable wait-to-reprocess time James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1625685076-1964-8-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --cc=ssmirnov@whamcloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.