From: Md Haris Iqbal <haris.iqbal@ionos.com>
To: linux-rdma@vger.kernel.org
Cc: bvanassche@acm.org, leon@kernel.org, dledford@redhat.com,
jgg@ziepe.ca, haris.iqbal@ionos.com, jinpu.wang@ionos.com,
Aleksei Marov <aleksei.marov@ionos.com>
Subject: [PATCH for-next 5/5] RDMA/rtrs-clt: Do stop and failover outside reconnect work.
Date: Fri, 14 Jan 2022 16:47:53 +0100 [thread overview]
Message-ID: <20220114154753.983568-6-haris.iqbal@ionos.com> (raw)
In-Reply-To: <20220114154753.983568-1-haris.iqbal@ionos.com>
From: Jack Wang <jinpu.wang@ionos.com>
We can't do instant reconnect, not to DDoS server, but
we should stop and failover earlier, so there is less
service interruption.
To avoid deadlock, as error_recovery is called from different
callback like rdma event or hb error handler, add a new err
recovery_work.
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Aleksei Marov <aleksei.marov@ionos.com>
Reviewed-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Md Haris Iqbal <haris.iqbal@ionos.com>
---
drivers/infiniband/ulp/rtrs/rtrs-clt.c | 40 ++++++++++++++------------
drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 +
2 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 05de9ec7c99a..b159471a8959 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -297,6 +297,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path,
return changed;
}
+static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path);
static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
{
struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
@@ -304,16 +305,7 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
if (rtrs_clt_change_state_from_to(clt_path,
RTRS_CLT_CONNECTED,
RTRS_CLT_RECONNECTING)) {
- struct rtrs_clt_sess *clt = clt_path->clt;
- unsigned int delay_ms;
-
- /*
- * Normal scenario, reconnect if we were successfully connected
- */
- delay_ms = clt->reconnect_delay_sec * 1000;
- queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
- msecs_to_jiffies(delay_ms +
- prandom_u32() % RTRS_RECONNECT_SEED));
+ queue_work(rtrs_wq, &clt_path->err_recovery_work);
} else {
/*
* Error can happen just on establishing new connection,
@@ -1501,6 +1493,22 @@ static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path)
static void rtrs_clt_reconnect_work(struct work_struct *work);
static void rtrs_clt_close_work(struct work_struct *work);
+static void rtrs_clt_err_recovery_work(struct work_struct *work)
+{
+ struct rtrs_clt_path *clt_path;
+ struct rtrs_clt_sess *clt;
+ int delay_ms;
+
+ clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work);
+ clt = clt_path->clt;
+ delay_ms = clt->reconnect_delay_sec * 1000;
+ rtrs_clt_stop_and_destroy_conns(clt_path);
+ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
+ msecs_to_jiffies(delay_ms +
+ prandom_u32() %
+ RTRS_RECONNECT_SEED));
+}
+
static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt,
const struct rtrs_addr *path,
size_t con_num, u32 nr_poll_queues)
@@ -1552,6 +1560,7 @@ static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt,
clt_path->state = RTRS_CLT_CONNECTING;
atomic_set(&clt_path->connected_cnt, 0);
INIT_WORK(&clt_path->close_work, rtrs_clt_close_work);
+ INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work);
INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work);
rtrs_clt_init_hb(clt_path);
@@ -2321,6 +2330,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
clt_path = container_of(work, struct rtrs_clt_path, close_work);
+ cancel_work_sync(&clt_path->err_recovery_work);
cancel_delayed_work_sync(&clt_path->reconnect_dwork);
rtrs_clt_stop_and_destroy_conns(clt_path);
rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL);
@@ -2633,7 +2643,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
{
struct rtrs_clt_path *clt_path;
struct rtrs_clt_sess *clt;
- unsigned int delay_ms;
int err;
clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path,
@@ -2650,8 +2659,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
}
clt_path->reconnect_attempts++;
- /* Stop everything */
- rtrs_clt_stop_and_destroy_conns(clt_path);
msleep(RTRS_RECONNECT_BACKOFF);
if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) {
err = init_path(clt_path);
@@ -2664,11 +2671,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
reconnect_again:
if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) {
clt_path->stats->reconnects.fail_cnt++;
- delay_ms = clt->reconnect_delay_sec * 1000;
- queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
- msecs_to_jiffies(delay_ms +
- prandom_u32() %
- RTRS_RECONNECT_SEED));
+ queue_work(rtrs_wq, &clt_path->err_recovery_work);
}
}
@@ -2900,6 +2903,7 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path)
&old_state);
if (changed) {
clt_path->reconnect_attempts = 0;
+ rtrs_clt_stop_and_destroy_conns(clt_path);
queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0);
}
if (changed || old_state == RTRS_CLT_RECONNECTING) {
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index d1b18a154ae0..f848c0392d98 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -134,6 +134,7 @@ struct rtrs_clt_path {
struct rtrs_clt_io_req *reqs;
struct delayed_work reconnect_dwork;
struct work_struct close_work;
+ struct work_struct err_recovery_work;
unsigned int reconnect_attempts;
bool established;
struct rtrs_rbuf *rbufs;
--
2.25.1
next prev parent reply other threads:[~2022-01-14 15:48 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-14 15:47 [PATCH for-next 0/5] Misc update for RTRS Md Haris Iqbal
2022-01-14 15:47 ` [PATCH for-next 1/5] RDMA/rtrs: fix CHECK:BRACES type warning Md Haris Iqbal
2022-01-14 15:47 ` [PATCH for-next 2/5] RDMA/rtrs-clt: fix CHECK type warnings Md Haris Iqbal
2022-01-28 14:55 ` Jason Gunthorpe
2022-02-07 16:07 ` Haris Iqbal
2022-01-14 15:47 ` [PATCH for-next 3/5] " Md Haris Iqbal
2022-01-14 15:47 ` [PATCH for-next 4/5] RDMA/rtrs-clt: Update one outdated comment in path_it_deinit Md Haris Iqbal
2022-01-14 15:47 ` Md Haris Iqbal [this message]
2022-01-26 13:56 ` [PATCH for-next 0/5] Misc update for RTRS Haris Iqbal
2022-01-28 15:00 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220114154753.983568-6-haris.iqbal@ionos.com \
--to=haris.iqbal@ionos.com \
--cc=aleksei.marov@ionos.com \
--cc=bvanassche@acm.org \
--cc=dledford@redhat.com \
--cc=jgg@ziepe.ca \
--cc=jinpu.wang@ionos.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.