All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 16/24] lnet: socklnd: fix link state detection
Date: Tue, 21 Sep 2021 22:19:53 -0400	[thread overview]
Message-ID: <1632277201-6920-17-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1632277201-6920-1-git-send-email-jsimmons@infradead.org>

From: Serguei Smirnov <ssmirnov@whamcloud.com>

Due to matching only the device index, link detection implemented
in LU-14742 has issues with confusing the link events for the
virtual interfaces with the link events for the interface that
LNet was actually configured to use. Fix this by improving
the identification of the event source: use both device name and
device index.

Also, to make sure the link fatal state is cleared only when
the device is bound to the IP address used at NI creation,
subscribe to inetaddr events in addition to the netdev events.

Fixes: 1db29e184712 ("lnet: socklnd: detect link state to set fatal error on ni")
WC-bug-id: https://jira.whamcloud.com/browse/LU-14954
Lustre-commit: 008795508d65bb40b ("LU-14954 socklnd: fix link state detection")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/44732
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 net/lnet/klnds/socklnd/socklnd.c | 132 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 122 insertions(+), 10 deletions(-)

diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index 7397ac7..b014aa8 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -1896,11 +1896,15 @@ static int ksocknal_get_link_status(struct net_device *dev)
 
 	LASSERT(dev);
 
-	if (!netif_running(dev))
+	if (!netif_running(dev)) {
 		ret = 0;
+		CDEBUG(D_NET, "device not running\n");
+	}
 	/* Some devices may not be providing link settings */
-	else if (dev->ethtool_ops->get_link)
+	else if (dev->ethtool_ops->get_link) {
 		ret = dev->ethtool_ops->get_link(dev);
+		CDEBUG(D_NET, "get_link returns %u\n", ret);
+	}
 
 	return ret;
 }
@@ -1909,11 +1913,16 @@ static int ksocknal_get_link_status(struct net_device *dev)
 ksocknal_handle_link_state_change(struct net_device *dev,
 				  unsigned char operstate)
 {
-	struct lnet_ni *ni;
+	struct lnet_ni *ni = NULL;
 	struct ksock_net *net;
 	struct ksock_net *cnxt;
 	int ifindex;
 	unsigned char link_down = !(operstate == IF_OPER_UP);
+	struct in_device *in_dev;
+	bool found_ip = false;
+	struct ksock_interface *ksi = NULL;
+	struct sockaddr_in *sa;
+	const struct in_ifaddr *ifa;
 
 	ifindex = dev->ifindex;
 
@@ -1922,20 +1931,91 @@ static int ksocknal_get_link_status(struct net_device *dev)
 
 	list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets,
 				 ksnn_list) {
-		if (net->ksnn_interface.ksni_index != ifindex)
+
+		ksi = &net->ksnn_interface;
+		sa = (void *)&ksi->ksni_addr;
+		found_ip = false;
+
+		if (ksi->ksni_index != ifindex ||
+		    strcmp(ksi->ksni_name, dev->name))
 			continue;
+
 		ni = net->ksnn_ni;
-		if (link_down)
+
+		in_dev = __in_dev_get_rtnl(dev);
+		if (!in_dev) {
+			CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
+			       dev->name);
+			CDEBUG(D_NET, "set link fatal state to 1\n");
+			atomic_set(&ni->ni_fatal_error_on, 1);
+			continue;
+		}
+		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+			if (sa->sin_addr.s_addr == ifa->ifa_local)
+				found_ip = true;
+		}
+
+		if (!found_ip) {
+			CDEBUG(D_NET, "Interface %s has no matching ip\n",
+			       dev->name);
+			CDEBUG(D_NET, "set link fatal state to 1\n");
+			atomic_set(&ni->ni_fatal_error_on, 1);
+			continue;
+		}
+
+		if (link_down) {
+			CDEBUG(D_NET, "set link fatal state to 1\n");
 			atomic_set(&ni->ni_fatal_error_on, link_down);
-		else
+		} else {
+			CDEBUG(D_NET, "set link fatal state to %u\n",
+			       (ksocknal_get_link_status(dev) == 0));
 			atomic_set(&ni->ni_fatal_error_on,
 				   (ksocknal_get_link_status(dev) == 0));
+		}
 	}
 out:
 	return 0;
 }
 
 
+static int
+ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
+{
+	struct lnet_ni *ni;
+	struct ksock_net *net;
+	struct ksock_net *cnxt;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	int ifindex;
+	struct ksock_interface *ksi = NULL;
+	struct sockaddr_in *sa;
+
+	if (!ksocknal_data.ksnd_nnets)
+		goto out;
+
+	ifindex = event_netdev->ifindex;
+
+	list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets,
+				 ksnn_list) {
+
+		ksi = &net->ksnn_interface;
+		sa = (void *)&ksi->ksni_addr;
+
+		if (ksi->ksni_index != ifindex ||
+		    strcmp(ksi->ksni_name, event_netdev->name))
+			continue;
+
+		if (sa->sin_addr.s_addr == ifa->ifa_local) {
+			CDEBUG(D_NET, "set link fatal state to %u\n",
+			       (event == NETDEV_DOWN));
+			ni = net->ksnn_ni;
+			atomic_set(&ni->ni_fatal_error_on,
+				   (event == NETDEV_DOWN));
+		}
+	}
+out:
+	return 0;
+}
+
 /************************************
  * Net device notifier event handler
  ************************************/
@@ -1947,6 +2027,9 @@ static int ksocknal_device_event(struct notifier_block *unused,
 
 	operstate = dev->operstate;
 
+	CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n",
+	       event, dev->name, dev->ifindex, operstate);
+
 	switch (event) {
 	case NETDEV_UP:
 	case NETDEV_DOWN:
@@ -1958,10 +2041,36 @@ static int ksocknal_device_event(struct notifier_block *unused,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block ksocknal_notifier_block = {
+/************************************
+ * Inetaddr notifier event handler
+ ************************************/
+static int ksocknal_inetaddr_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+
+	CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n",
+	       event, &ifa->ifa_address, &ifa->ifa_mask);
+
+	switch (event) {
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_CHANGE:
+		ksocknal_handle_inetaddr_change(ifa, event);
+		break;
+
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ksocknal_dev_notifier_block = {
 	.notifier_call = ksocknal_device_event,
 };
 
+static struct notifier_block ksocknal_inetaddr_notifier_block = {
+	.notifier_call = ksocknal_inetaddr_event,
+};
+
 static void
 ksocknal_base_shutdown(void)
 {
@@ -1971,8 +2080,10 @@ static int ksocknal_device_event(struct notifier_block *unused,
 
 	LASSERT(!ksocknal_data.ksnd_nnets);
 
-	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL)
-		unregister_netdevice_notifier(&ksocknal_notifier_block);
+	if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) {
+		unregister_netdevice_notifier(&ksocknal_dev_notifier_block);
+		unregister_inetaddr_notifier(&ksocknal_inetaddr_notifier_block);
+	}
 
 	switch (ksocknal_data.ksnd_init) {
 	default:
@@ -2135,7 +2246,8 @@ static int ksocknal_device_event(struct notifier_block *unused,
 		goto failed;
 	}
 
-	register_netdevice_notifier(&ksocknal_notifier_block);
+	register_netdevice_notifier(&ksocknal_dev_notifier_block);
+	register_inetaddr_notifier(&ksocknal_inetaddr_notifier_block);
 
 	/* flag everything initialised */
 	ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-09-22  2:21 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-22  2:19 [lustre-devel] [PATCH 00/24] lustre: Update to OpenSFS Sept 21, 2021 James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 01/24] lnet: Lock primary NID logic James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 02/24] lustre: quota: enforce block quota for chgrp James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 03/24] lnet: introduce struct lnet_nid James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 04/24] lnet: add string formating/parsing for IPv6 nids James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 05/24] lnet: change lpni_nid in lnet_peer_ni to lnet_nid James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 06/24] lnet: change lp_primary_nid to struct lnet_nid James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 07/24] lnet: change lp_disc_*_nid " James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 08/24] lnet: socklnd: factor out key calculation for ksnd_peers James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 09/24] lnet: introduce lnet_processid for ksock_peer_ni James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 10/24] lnet: enhance connect/accept to support large addr James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 11/24] lnet: change lr_nid to struct lnet_nid James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 12/24] lnet: extend rspt_next_hop_nid in lnet_rsp_tracker James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 13/24] lustre: ptlrpc: two replay lock threads James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 14/24] lustre: llite: Always do lookup on ENOENT in open James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 15/24] lustre: llite: Remove inode locking in ll_fsync James Simmons
2021-09-22  2:19 ` James Simmons [this message]
2021-09-22  2:19 ` [lustre-devel] [PATCH 17/24] lustre: llite: check read only mount for setquota James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 18/24] lustre: llite: don't touch vma after filemap_fault James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 19/24] lnet: Check for -ESHUTDOWN in lnet_parse James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 20/24] lustre: obdclass: EAGAIN after rhashtable_walk_next() James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 21/24] lustre: sec: filename encryption James Simmons
2021-09-22  2:19 ` [lustre-devel] [PATCH 22/24] lustre: uapi: fixup UAPI headers for native Linux client James Simmons
2021-09-22  2:20 ` [lustre-devel] [PATCH 23/24] lustre: ptlrpc: separate out server code for wiretest James Simmons
2021-09-22  2:20 ` [lustre-devel] [PATCH 24/24] lustre: pcc: VM_WRITE should not trigger layout write James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1632277201-6920-17-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --cc=ssmirnov@whamcloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.