linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Alexander Aring <aahringo@redhat.com>,
	David Teigland <teigland@redhat.com>,
	Sasha Levin <sashal@kernel.org>,
	cluster-devel@redhat.com
Subject: [PATCH AUTOSEL 5.13 16/59] fs: dlm: reconnect if socket error report occurs
Date: Mon,  5 Jul 2021 11:27:32 -0400	[thread overview]
Message-ID: <20210705152815.1520546-16-sashal@kernel.org> (raw)
In-Reply-To: <20210705152815.1520546-1-sashal@kernel.org>

From: Alexander Aring <aahringo@redhat.com>

[ Upstream commit ba868d9deaab2bb1c09e50650127823925154802 ]

This patch will change the reconnect handling that if an error occurs
if a socket error callback is occurred. This will also handle reconnects
in a non blocking connecting case which is currently missing. If error
ECONNREFUSED is reported we delay the reconnect by one second.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/dlm/lowcomms.c | 60 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 47bf99373f3e..cdc50e9a5ab0 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -79,6 +79,8 @@ struct connection {
 #define CF_CLOSING 8
 #define CF_SHUTDOWN 9
 #define CF_CONNECTED 10
+#define CF_RECONNECT 11
+#define CF_DELAY_CONNECT 12
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
 	void (*connect_action) (struct connection *);	/* What to do to connect */
@@ -87,6 +89,7 @@ struct connection {
 #define MAX_CONNECT_RETRIES 3
 	struct hlist_node list;
 	struct connection *othercon;
+	struct connection *sendcon;
 	struct work_struct rwork; /* Receive workqueue */
 	struct work_struct swork; /* Send workqueue */
 	wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
@@ -585,6 +588,22 @@ static void lowcomms_error_report(struct sock *sk)
 				   dlm_config.ci_tcp_port, sk->sk_err,
 				   sk->sk_err_soft);
 	}
+
+	/* below sendcon only handling */
+	if (test_bit(CF_IS_OTHERCON, &con->flags))
+		con = con->sendcon;
+
+	switch (sk->sk_err) {
+	case ECONNREFUSED:
+		set_bit(CF_DELAY_CONNECT, &con->flags);
+		break;
+	default:
+		break;
+	}
+
+	if (!test_and_set_bit(CF_RECONNECT, &con->flags))
+		queue_work(send_workqueue, &con->swork);
+
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	if (orig_report)
@@ -702,6 +721,8 @@ static void close_connection(struct connection *con, bool and_other,
 	con->rx_leftover = 0;
 	con->retries = 0;
 	clear_bit(CF_CONNECTED, &con->flags);
+	clear_bit(CF_DELAY_CONNECT, &con->flags);
+	clear_bit(CF_RECONNECT, &con->flags);
 	mutex_unlock(&con->sock_mutex);
 	clear_bit(CF_CLOSING, &con->flags);
 }
@@ -840,18 +861,15 @@ static int receive_from_sock(struct connection *con)
 
 out_close:
 	mutex_unlock(&con->sock_mutex);
-	if (ret != -EAGAIN) {
-		/* Reconnect when there is something to send */
+	if (ret == 0) {
 		close_connection(con, false, true, false);
-		if (ret == 0) {
-			log_print("connection %p got EOF from %d",
-				  con, con->nodeid);
-			/* handling for tcp shutdown */
-			clear_bit(CF_SHUTDOWN, &con->flags);
-			wake_up(&con->shutdown_wait);
-			/* signal to breaking receive worker */
-			ret = -1;
-		}
+		log_print("connection %p got EOF from %d",
+			  con, con->nodeid);
+		/* handling for tcp shutdown */
+		clear_bit(CF_SHUTDOWN, &con->flags);
+		wake_up(&con->shutdown_wait);
+		/* signal to breaking receive worker */
+		ret = -1;
 	}
 	return ret;
 }
@@ -939,6 +957,7 @@ static int accept_from_sock(struct listen_connection *con)
 
 			lockdep_set_subclass(&othercon->sock_mutex, 1);
 			newcon->othercon = othercon;
+			othercon->sendcon = newcon;
 		} else {
 			/* close other sock con if we have something new */
 			close_connection(othercon, false, true, false);
@@ -1503,7 +1522,7 @@ static void send_to_sock(struct connection *con)
 				cond_resched();
 				goto out;
 			} else if (ret < 0)
-				goto send_error;
+				goto out;
 		}
 
 		/* Don't starve people filling buffers */
@@ -1520,14 +1539,6 @@ static void send_to_sock(struct connection *con)
 	mutex_unlock(&con->sock_mutex);
 	return;
 
-send_error:
-	mutex_unlock(&con->sock_mutex);
-	close_connection(con, false, false, true);
-	/* Requeue the send work. When the work daemon runs again, it will try
-	   a new connection, then call this function again. */
-	queue_work(send_workqueue, &con->swork);
-	return;
-
 out_connect:
 	mutex_unlock(&con->sock_mutex);
 	queue_work(send_workqueue, &con->swork);
@@ -1602,8 +1613,15 @@ static void process_send_sockets(struct work_struct *work)
 	struct connection *con = container_of(work, struct connection, swork);
 
 	clear_bit(CF_WRITE_PENDING, &con->flags);
-	if (con->sock == NULL) /* not mutex protected so check it inside too */
+
+	if (test_and_clear_bit(CF_RECONNECT, &con->flags))
+		close_connection(con, false, false, true);
+
+	if (con->sock == NULL) { /* not mutex protected so check it inside too */
+		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
+			msleep(1000);
 		con->connect_action(con);
+	}
 	if (!list_empty(&con->writequeue))
 		send_to_sock(con);
 }
-- 
2.30.2


  parent reply	other threads:[~2021-07-05 15:28 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-05 15:27 [PATCH AUTOSEL 5.13 01/59] HID: do not use down_interruptible() when unbinding devices Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 02/59] EDAC/ti: Add missing MODULE_DEVICE_TABLE Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 03/59] ACPI: PM: s2idle: Add missing LPS0 functions for AMD Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 04/59] ACPI: scan: Rearrange dep_unmet initialization Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 05/59] ACPI: processor idle: Fix up C-state latency if not ordered Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 06/59] hv_utils: Fix passing zero to 'PTR_ERR' warning Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 07/59] lib: vsprintf: Fix handling of number field widths in vsscanf Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 08/59] Input: goodix - platform/x86: touchscreen_dmi - Move upside down quirks to touchscreen_dmi.c Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 09/59] platform/x86: touchscreen_dmi: Add an extra entry for the upside down Goodix touchscreen on Teclast X89 tablets Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 10/59] platform/x86: touchscreen_dmi: Add info for the Goodix GT912 panel of TM800A550L tablets Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 11/59] ACPI: EC: Make more Asus laptops use ECDT _GPE Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 12/59] block_dump: remove block_dump feature in mark_inode_dirty() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 13/59] blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 14/59] blk-mq: clear stale request in tags->rq[] before freeing one request pool Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 15/59] fs: dlm: fix srcu read lock usage Sasha Levin
2021-07-05 15:27 ` Sasha Levin [this message]
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 17/59] fs: dlm: cancel work sync othercon Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 18/59] fs: dlm: fix connection tcp EOF handling Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 19/59] random32: Fix implicit truncation warning in prandom_seed_state() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 20/59] open: don't silently ignore unknown O-flags in openat2() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 21/59] drivers: hv: Fix missing error code in vmbus_connect() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 22/59] fs: dlm: fix lowcomms_start error case Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 23/59] fs: dlm: fix memory leak when fenced Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 24/59] ACPICA: Fix memory leak caused by _CID repair function Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 25/59] ACPI: bus: Call kobject_put() in acpi_init() error path Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 26/59] ACPI: resources: Add checks for ACPI IRQ override Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 27/59] HID: hid-input: add Surface Go battery quirk Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 28/59] HID: sony: fix freeze when inserting ghlive ps3/wii dongles Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 29/59] block: fix race between adding/removing rq qos and normal IO Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 30/59] platform/x86: asus-nb-wmi: Revert "Drop duplicate DMI quirk structures" Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 31/59] platform/x86: asus-nb-wmi: Revert "add support for ASUS ROG Zephyrus G14 and G15" Sasha Levin
2021-07-05 17:08   ` Hans de Goede
2021-07-05 17:09     ` Hans de Goede
2021-07-09 22:50       ` Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 32/59] platform/x86: toshiba_acpi: Fix missing error code in toshiba_acpi_setup_keyboard() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 33/59] nvme-pci: fix var. type for increasing cq_head Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 34/59] nvmet-fc: do not check for invalid target port in nvmet_fc_handle_fcp_rqst() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 35/59] EDAC/Intel: Do not load EDAC driver when running as a guest Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 36/59] tools/power/x86/intel-speed-select: Fix uncore memory frequency display Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 37/59] PCI: hv: Add check for hyperv_initialized in init_hv_pci_drv() Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 38/59] cifs: improve fallocate emulation Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 39/59] cifs: fix check of dfs interlinks Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 40/59] cifs: retry lookup and readdir when EAGAIN is returned Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 41/59] smb3: fix uninitialized value for port in witness protocol move Sasha Levin
2021-07-05 15:27 ` [PATCH AUTOSEL 5.13 42/59] cifs: fix SMB1 error path in cifs_get_file_info_unix Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210705152815.1520546-16-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=aahringo@redhat.com \
    --cc=cluster-devel@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=teigland@redhat.com \
    --subject='Re: [PATCH AUTOSEL 5.13 16/59] fs: dlm: reconnect if socket error report occurs' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).