linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] iscsi: Perform connection failure entirely in kernel space
@ 2019-12-09 18:20 Gabriel Krisman Bertazi
  2019-12-09 18:29 ` Lee Duncan
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2019-12-09 18:20 UTC (permalink / raw)
  To: lduncan, cleech, martin.petersen
  Cc: linux-scsi, open-iscsi, Bharath Ravi, kernel, Dave Clausen,
	Nick Black, Vaibhav Nagarnaik, Anatol Pomazau, Tahsin Erdogan,
	Frank Mayhar, Junho Ryu, Khazhismel Kumykov,
	Gabriel Krisman Bertazi

From: Bharath Ravi <rbharath@google.com>

Connection failure processing depends on a daemon being present to (at
least) stop the connection and start recovery.  This is a problem on a
multipath scenario, where if the daemon failed for whatever reason, the
SCSI path is never marked as down, multipath won't perform the
failover and IO to the device will be forever waiting for that
connection to come back.

This patch implements an optional feature in the iscsi module, to
perform the connection failure inside the kernel.  This way, the
failover can happen and pending IO can continue even if the daemon is
dead. Once the daemon comes alive again, it can perform recovery
procedures if applicable.

Co-developed-by: Dave Clausen <dclausen@google.com>
Signed-off-by: Dave Clausen <dclausen@google.com>
Co-developed-by: Nick Black <nlb@google.com>
Signed-off-by: Nick Black <nlb@google.com>
Co-developed-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
Co-developed-by: Anatol Pomazau <anatol@google.com>
Signed-off-by: Anatol Pomazau <anatol@google.com>
Co-developed-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Co-developed-by: Frank Mayhar <fmayhar@google.com>
Signed-off-by: Frank Mayhar <fmayhar@google.com>
Co-developed-by: Junho Ryu <jayr@google.com>
Signed-off-by: Junho Ryu <jayr@google.com>
Co-developed-by: Khazhismel Kumykov <khazhy@google.com>
Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
Signed-off-by: Bharath Ravi <rbharath@google.com>
Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 46 +++++++++++++++++++++++++++++
 include/scsi/scsi_transport_iscsi.h |  1 +
 2 files changed, 47 insertions(+)

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 417b868d8735..7251b2b5b272 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -36,6 +36,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
 EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
 EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
 
+static bool kern_conn_failure;
+module_param(kern_conn_failure, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(kern_conn_failure,
+		 "Allow the kernel to detect and disable broken connections "
+		 "without requiring userspace intervention");
+
 static int dbg_session;
 module_param_named(debug_session, dbg_session, int,
 		   S_IRUGO | S_IWUSR);
@@ -84,6 +90,12 @@ struct iscsi_internal {
 	struct transport_container session_cont;
 };
 
+/* Worker to perform connection failure on unresponsive connections
+ * completely in kernel space.
+ */
+static void stop_conn_work_fn(struct work_struct *work);
+static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
+
 static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
 static struct workqueue_struct *iscsi_eh_timer_workq;
 
@@ -1609,6 +1621,7 @@ static DEFINE_MUTEX(rx_queue_mutex);
 static LIST_HEAD(sesslist);
 static DEFINE_SPINLOCK(sesslock);
 static LIST_HEAD(connlist);
+static LIST_HEAD(connlist_err);
 static DEFINE_SPINLOCK(connlock);
 
 static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn)
@@ -2245,6 +2258,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
 
 	mutex_init(&conn->ep_mutex);
 	INIT_LIST_HEAD(&conn->conn_list);
+	INIT_LIST_HEAD(&conn->conn_list_err);
 	conn->transport = transport;
 	conn->cid = cid;
 
@@ -2291,6 +2305,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
 
 	spin_lock_irqsave(&connlock, flags);
 	list_del(&conn->conn_list);
+	list_del(&conn->conn_list_err);
 	spin_unlock_irqrestore(&connlock, flags);
 
 	transport_unregister_device(&conn->dev);
@@ -2405,6 +2420,28 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
 }
 EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
 
+static void stop_conn_work_fn(struct work_struct *work)
+{
+	struct iscsi_cls_conn *conn, *tmp;
+	unsigned long flags;
+	LIST_HEAD(recovery_list);
+
+	spin_lock_irqsave(&connlock, flags);
+	if (list_empty(&connlist_err)) {
+		spin_unlock_irqrestore(&connlock, flags);
+		return;
+	}
+	list_splice_init(&connlist_err, &recovery_list);
+	spin_unlock_irqrestore(&connlock, flags);
+
+	mutex_lock(&rx_queue_mutex);
+	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
+		conn->transport->stop_conn(conn, STOP_CONN_RECOVER);
+		list_del_init(&conn->conn_list_err);
+	}
+	mutex_unlock(&rx_queue_mutex);
+}
+
 void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
 	struct nlmsghdr	*nlh;
@@ -2412,6 +2449,15 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	struct iscsi_uevent *ev;
 	struct iscsi_internal *priv;
 	int len = nlmsg_total_size(sizeof(*ev));
+	unsigned long flags;
+
+	if (kern_conn_failure) {
+		spin_lock_irqsave(&connlock, flags);
+		list_add(&conn->conn_list_err, &connlist_err);
+		spin_unlock_irqrestore(&connlock, flags);
+
+		queue_work(system_unbound_wq, &stop_conn_work);
+	}
 
 	priv = iscsi_if_transport_lookup(conn->transport);
 	if (!priv)
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 325ae731d9ad..2129dc9e2dec 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -190,6 +190,7 @@ extern void iscsi_ping_comp_event(uint32_t host_no,
 
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
+	struct list_head conn_list_err;	/* item in connlist_err */
 	void *dd_data;			/* LLD private data */
 	struct iscsi_transport *transport;
 	uint32_t cid;			/* connection id */
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] iscsi: Perform connection failure entirely in kernel space
  2019-12-09 18:20 [PATCH] iscsi: Perform connection failure entirely in kernel space Gabriel Krisman Bertazi
@ 2019-12-09 18:29 ` Lee Duncan
  2019-12-09 18:39   ` Gabriel Krisman Bertazi
  2019-12-11  0:05 ` Mike Christie
  2019-12-17  2:16 ` Khazhismel Kumykov
  2 siblings, 1 reply; 6+ messages in thread
From: Lee Duncan @ 2019-12-09 18:29 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi, cleech, martin.petersen
  Cc: linux-scsi, open-iscsi, Bharath Ravi, kernel, Dave Clausen,
	Nick Black, Vaibhav Nagarnaik, Anatol Pomazau, Tahsin Erdogan,
	Frank Mayhar, Junho Ryu, Khazhismel Kumykov

On 12/9/19 10:20 AM, Gabriel Krisman Bertazi wrote:
> From: Bharath Ravi <rbharath@google.com>
> 
> Connection failure processing depends on a daemon being present to (at
> least) stop the connection and start recovery.  This is a problem on a
> multipath scenario, where if the daemon failed for whatever reason, the
> SCSI path is never marked as down, multipath won't perform the
> failover and IO to the device will be forever waiting for that
> connection to come back.
> 
> This patch implements an optional feature in the iscsi module, to
> perform the connection failure inside the kernel.  This way, the
> failover can happen and pending IO can continue even if the daemon is
> dead. Once the daemon comes alive again, it can perform recovery
> procedures if applicable.


I don't suppose you've tested how this plays with the daemon, when present?


> 
> Co-developed-by: Dave Clausen <dclausen@google.com>
> Signed-off-by: Dave Clausen <dclausen@google.com>
> Co-developed-by: Nick Black <nlb@google.com>
> Signed-off-by: Nick Black <nlb@google.com>
> Co-developed-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Co-developed-by: Anatol Pomazau <anatol@google.com>
> Signed-off-by: Anatol Pomazau <anatol@google.com>
> Co-developed-by: Tahsin Erdogan <tahsin@google.com>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> Co-developed-by: Frank Mayhar <fmayhar@google.com>
> Signed-off-by: Frank Mayhar <fmayhar@google.com>
> Co-developed-by: Junho Ryu <jayr@google.com>
> Signed-off-by: Junho Ryu <jayr@google.com>
> Co-developed-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Bharath Ravi <rbharath@google.com>
> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> ---
>  drivers/scsi/scsi_transport_iscsi.c | 46 +++++++++++++++++++++++++++++
>  include/scsi/scsi_transport_iscsi.h |  1 +
>  2 files changed, 47 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
> index 417b868d8735..7251b2b5b272 100644
> --- a/drivers/scsi/scsi_transport_iscsi.c
> +++ b/drivers/scsi/scsi_transport_iscsi.c
> @@ -36,6 +36,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
>  
> +static bool kern_conn_failure;
> +module_param(kern_conn_failure, bool, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(kern_conn_failure,
> +		 "Allow the kernel to detect and disable broken connections "
> +		 "without requiring userspace intervention");
> +
>  static int dbg_session;
>  module_param_named(debug_session, dbg_session, int,
>  		   S_IRUGO | S_IWUSR);
> @@ -84,6 +90,12 @@ struct iscsi_internal {
>  	struct transport_container session_cont;
>  };
>  
> +/* Worker to perform connection failure on unresponsive connections
> + * completely in kernel space.
> + */
> +static void stop_conn_work_fn(struct work_struct *work);
> +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
> +
>  static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
>  static struct workqueue_struct *iscsi_eh_timer_workq;
>  
> @@ -1609,6 +1621,7 @@ static DEFINE_MUTEX(rx_queue_mutex);
>  static LIST_HEAD(sesslist);
>  static DEFINE_SPINLOCK(sesslock);
>  static LIST_HEAD(connlist);
> +static LIST_HEAD(connlist_err);
>  static DEFINE_SPINLOCK(connlock);
>  
>  static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn)
> @@ -2245,6 +2258,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
>  
>  	mutex_init(&conn->ep_mutex);
>  	INIT_LIST_HEAD(&conn->conn_list);
> +	INIT_LIST_HEAD(&conn->conn_list_err);
>  	conn->transport = transport;
>  	conn->cid = cid;
>  
> @@ -2291,6 +2305,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
>  
>  	spin_lock_irqsave(&connlock, flags);
>  	list_del(&conn->conn_list);
> +	list_del(&conn->conn_list_err);
>  	spin_unlock_irqrestore(&connlock, flags);
>  
>  	transport_unregister_device(&conn->dev);
> @@ -2405,6 +2420,28 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
>  }
>  EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
>  
> +static void stop_conn_work_fn(struct work_struct *work)
> +{
> +	struct iscsi_cls_conn *conn, *tmp;
> +	unsigned long flags;
> +	LIST_HEAD(recovery_list);
> +
> +	spin_lock_irqsave(&connlock, flags);
> +	if (list_empty(&connlist_err)) {
> +		spin_unlock_irqrestore(&connlock, flags);
> +		return;
> +	}
> +	list_splice_init(&connlist_err, &recovery_list);
> +	spin_unlock_irqrestore(&connlock, flags);
> +
> +	mutex_lock(&rx_queue_mutex);
> +	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
> +		conn->transport->stop_conn(conn, STOP_CONN_RECOVER);
> +		list_del_init(&conn->conn_list_err);
> +	}
> +	mutex_unlock(&rx_queue_mutex);
> +}
> +
>  void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>  {
>  	struct nlmsghdr	*nlh;
> @@ -2412,6 +2449,15 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>  	struct iscsi_uevent *ev;
>  	struct iscsi_internal *priv;
>  	int len = nlmsg_total_size(sizeof(*ev));
> +	unsigned long flags;
> +
> +	if (kern_conn_failure) {
> +		spin_lock_irqsave(&connlock, flags);
> +		list_add(&conn->conn_list_err, &connlist_err);
> +		spin_unlock_irqrestore(&connlock, flags);
> +
> +		queue_work(system_unbound_wq, &stop_conn_work);
> +	}
>  
>  	priv = iscsi_if_transport_lookup(conn->transport);
>  	if (!priv)
> diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
> index 325ae731d9ad..2129dc9e2dec 100644
> --- a/include/scsi/scsi_transport_iscsi.h
> +++ b/include/scsi/scsi_transport_iscsi.h
> @@ -190,6 +190,7 @@ extern void iscsi_ping_comp_event(uint32_t host_no,
>  
>  struct iscsi_cls_conn {
>  	struct list_head conn_list;	/* item in connlist */
> +	struct list_head conn_list_err;	/* item in connlist_err */
>  	void *dd_data;			/* LLD private data */
>  	struct iscsi_transport *transport;
>  	uint32_t cid;			/* connection id */
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iscsi: Perform connection failure entirely in kernel space
  2019-12-09 18:29 ` Lee Duncan
@ 2019-12-09 18:39   ` Gabriel Krisman Bertazi
  0 siblings, 0 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2019-12-09 18:39 UTC (permalink / raw)
  To: Lee Duncan
  Cc: cleech, martin.petersen, linux-scsi, open-iscsi, Bharath Ravi,
	kernel, Dave Clausen, Nick Black, Vaibhav Nagarnaik,
	Anatol Pomazau, Tahsin Erdogan, Frank Mayhar, Junho Ryu,
	Khazhismel Kumykov

Lee Duncan <LDuncan@suse.com> writes:

> On 12/9/19 10:20 AM, Gabriel Krisman Bertazi wrote:
>> From: Bharath Ravi <rbharath@google.com>
>> 
>> Connection failure processing depends on a daemon being present to (at
>> least) stop the connection and start recovery.  This is a problem on a
>> multipath scenario, where if the daemon failed for whatever reason, the
>> SCSI path is never marked as down, multipath won't perform the
>> failover and IO to the device will be forever waiting for that
>> connection to come back.
>> 
>> This patch implements an optional feature in the iscsi module, to
>> perform the connection failure inside the kernel.  This way, the
>> failover can happen and pending IO can continue even if the daemon is
>> dead. Once the daemon comes alive again, it can perform recovery
>> procedures if applicable.
>
>
> I don't suppose you've tested how this plays with the daemon, when present?

Hello,

Yes, I did.  It seemed to work properly over TCP.

The stop calls are serialized by the rx_mutex, whoever gets it first,
gets the job done.  The second should return immediately since the socket
is no longer there.

What am I missing?

-- 
Gabriel Krisman Bertazi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iscsi: Perform connection failure entirely in kernel space
  2019-12-09 18:20 [PATCH] iscsi: Perform connection failure entirely in kernel space Gabriel Krisman Bertazi
  2019-12-09 18:29 ` Lee Duncan
@ 2019-12-11  0:05 ` Mike Christie
  2019-12-11  0:23   ` Mike Christie
  2019-12-17  2:16 ` Khazhismel Kumykov
  2 siblings, 1 reply; 6+ messages in thread
From: Mike Christie @ 2019-12-11  0:05 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi, lduncan, cleech, martin.petersen
  Cc: linux-scsi, open-iscsi, Bharath Ravi, kernel, Dave Clausen,
	Nick Black, Vaibhav Nagarnaik, Anatol Pomazau, Tahsin Erdogan,
	Frank Mayhar, Junho Ryu, Khazhismel Kumykov

On 12/09/2019 12:20 PM, Gabriel Krisman Bertazi wrote:
> From: Bharath Ravi <rbharath@google.com>
> 
> Connection failure processing depends on a daemon being present to (at
> least) stop the connection and start recovery.  This is a problem on a
> multipath scenario, where if the daemon failed for whatever reason, the
> SCSI path is never marked as down, multipath won't perform the
> failover and IO to the device will be forever waiting for that
> connection to come back.
> 
> This patch implements an optional feature in the iscsi module, to
> perform the connection failure inside the kernel.  This way, the
> failover can happen and pending IO can continue even if the daemon is
> dead. Once the daemon comes alive again, it can perform recovery
> procedures if applicable.
> 
> Co-developed-by: Dave Clausen <dclausen@google.com>
> Signed-off-by: Dave Clausen <dclausen@google.com>
> Co-developed-by: Nick Black <nlb@google.com>
> Signed-off-by: Nick Black <nlb@google.com>
> Co-developed-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Co-developed-by: Anatol Pomazau <anatol@google.com>
> Signed-off-by: Anatol Pomazau <anatol@google.com>
> Co-developed-by: Tahsin Erdogan <tahsin@google.com>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> Co-developed-by: Frank Mayhar <fmayhar@google.com>
> Signed-off-by: Frank Mayhar <fmayhar@google.com>
> Co-developed-by: Junho Ryu <jayr@google.com>
> Signed-off-by: Junho Ryu <jayr@google.com>
> Co-developed-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Bharath Ravi <rbharath@google.com>
> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> ---
>  drivers/scsi/scsi_transport_iscsi.c | 46 +++++++++++++++++++++++++++++
>  include/scsi/scsi_transport_iscsi.h |  1 +
>  2 files changed, 47 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
> index 417b868d8735..7251b2b5b272 100644
> --- a/drivers/scsi/scsi_transport_iscsi.c
> +++ b/drivers/scsi/scsi_transport_iscsi.c
> @@ -36,6 +36,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
>  
> +static bool kern_conn_failure;
> +module_param(kern_conn_failure, bool, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(kern_conn_failure,
> +		 "Allow the kernel to detect and disable broken connections "
> +		 "without requiring userspace intervention");
> +
>  static int dbg_session;
>  module_param_named(debug_session, dbg_session, int,
>  		   S_IRUGO | S_IWUSR);
> @@ -84,6 +90,12 @@ struct iscsi_internal {
>  	struct transport_container session_cont;
>  };
>  
> +/* Worker to perform connection failure on unresponsive connections
> + * completely in kernel space.
> + */
> +static void stop_conn_work_fn(struct work_struct *work);
> +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
> +
>  static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
>  static struct workqueue_struct *iscsi_eh_timer_workq;
>  
> @@ -1609,6 +1621,7 @@ static DEFINE_MUTEX(rx_queue_mutex);
>  static LIST_HEAD(sesslist);
>  static DEFINE_SPINLOCK(sesslock);
>  static LIST_HEAD(connlist);
> +static LIST_HEAD(connlist_err);
>  static DEFINE_SPINLOCK(connlock);
>  
>  static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn)
> @@ -2245,6 +2258,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
>  
>  	mutex_init(&conn->ep_mutex);
>  	INIT_LIST_HEAD(&conn->conn_list);
> +	INIT_LIST_HEAD(&conn->conn_list_err);
>  	conn->transport = transport;
>  	conn->cid = cid;
>  
> @@ -2291,6 +2305,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
>  
>  	spin_lock_irqsave(&connlock, flags);
>  	list_del(&conn->conn_list);
> +	list_del(&conn->conn_list_err);
>  	spin_unlock_irqrestore(&connlock, flags);
>  
>  	transport_unregister_device(&conn->dev);
> @@ -2405,6 +2420,28 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
>  }
>  EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
>  
> +static void stop_conn_work_fn(struct work_struct *work)
> +{
> +	struct iscsi_cls_conn *conn, *tmp;
> +	unsigned long flags;
> +	LIST_HEAD(recovery_list);
> +
> +	spin_lock_irqsave(&connlock, flags);
> +	if (list_empty(&connlist_err)) {
> +		spin_unlock_irqrestore(&connlock, flags);
> +		return;
> +	}
> +	list_splice_init(&connlist_err, &recovery_list);
> +	spin_unlock_irqrestore(&connlock, flags);
> +
> +	mutex_lock(&rx_queue_mutex);
> +	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
> +		conn->transport->stop_conn(conn, STOP_CONN_RECOVER);
> +		list_del_init(&conn->conn_list_err);
> +	}
> +	mutex_unlock(&rx_queue_mutex);
> +}
> +
>  void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>  {
>  	struct nlmsghdr	*nlh;
> @@ -2412,6 +2449,15 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>  	struct iscsi_uevent *ev;
>  	struct iscsi_internal *priv;
>  	int len = nlmsg_total_size(sizeof(*ev));
> +	unsigned long flags;
> +
> +	if (kern_conn_failure) {
> +		spin_lock_irqsave(&connlock, flags);
> +		list_add(&conn->conn_list_err, &connlist_err);
> +		spin_unlock_irqrestore(&connlock, flags);
> +
> +		queue_work(system_unbound_wq, &stop_conn_work);
> +	}
>  

Do you need the modparam? I think you could handle this issue and the
similar one during shutdown at the same time, and you would always want
to do the kernel based error handler when userspace is not answering for
both cases.

You could do the following:

- Modify __iscsi_block_session so it does the stop_conn callout instead
of reverse, and change the iscsi_stop_conn/ISCSI_UEVENT_STOP_CONN:
related code accordingly.

- In iscsi_conn_error_event you would then do:

iscsi_multicast_skb();
iscsi_block_session();

- You can then drop the system_state check in iscsi_eh_cmd_timed_out
because those running commands are always handled by the stop_conn call
in __iscsi_block_session now.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iscsi: Perform connection failure entirely in kernel space
  2019-12-11  0:05 ` Mike Christie
@ 2019-12-11  0:23   ` Mike Christie
  0 siblings, 0 replies; 6+ messages in thread
From: Mike Christie @ 2019-12-11  0:23 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi, lduncan, cleech, martin.petersen
  Cc: linux-scsi, open-iscsi, Bharath Ravi, kernel, Dave Clausen,
	Nick Black, Vaibhav Nagarnaik, Anatol Pomazau, Tahsin Erdogan,
	Frank Mayhar, Junho Ryu, Khazhismel Kumykov

On 12/10/2019 06:05 PM, Mike Christie wrote:
> On 12/09/2019 12:20 PM, Gabriel Krisman Bertazi wrote:
>> From: Bharath Ravi <rbharath@google.com>
>>
>> Connection failure processing depends on a daemon being present to (at
>> least) stop the connection and start recovery.  This is a problem on a
>> multipath scenario, where if the daemon failed for whatever reason, the
>> SCSI path is never marked as down, multipath won't perform the
>> failover and IO to the device will be forever waiting for that
>> connection to come back.
>>
>> This patch implements an optional feature in the iscsi module, to
>> perform the connection failure inside the kernel.  This way, the
>> failover can happen and pending IO can continue even if the daemon is
>> dead. Once the daemon comes alive again, it can perform recovery
>> procedures if applicable.
>>
>> Co-developed-by: Dave Clausen <dclausen@google.com>
>> Signed-off-by: Dave Clausen <dclausen@google.com>
>> Co-developed-by: Nick Black <nlb@google.com>
>> Signed-off-by: Nick Black <nlb@google.com>
>> Co-developed-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
>> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
>> Co-developed-by: Anatol Pomazau <anatol@google.com>
>> Signed-off-by: Anatol Pomazau <anatol@google.com>
>> Co-developed-by: Tahsin Erdogan <tahsin@google.com>
>> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
>> Co-developed-by: Frank Mayhar <fmayhar@google.com>
>> Signed-off-by: Frank Mayhar <fmayhar@google.com>
>> Co-developed-by: Junho Ryu <jayr@google.com>
>> Signed-off-by: Junho Ryu <jayr@google.com>
>> Co-developed-by: Khazhismel Kumykov <khazhy@google.com>
>> Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
>> Signed-off-by: Bharath Ravi <rbharath@google.com>
>> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
>> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
>> ---
>>  drivers/scsi/scsi_transport_iscsi.c | 46 +++++++++++++++++++++++++++++
>>  include/scsi/scsi_transport_iscsi.h |  1 +
>>  2 files changed, 47 insertions(+)
>>
>> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
>> index 417b868d8735..7251b2b5b272 100644
>> --- a/drivers/scsi/scsi_transport_iscsi.c
>> +++ b/drivers/scsi/scsi_transport_iscsi.c
>> @@ -36,6 +36,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
>>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
>>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
>>  
>> +static bool kern_conn_failure;
>> +module_param(kern_conn_failure, bool, S_IRUGO|S_IWUSR);
>> +MODULE_PARM_DESC(kern_conn_failure,
>> +		 "Allow the kernel to detect and disable broken connections "
>> +		 "without requiring userspace intervention");
>> +
>>  static int dbg_session;
>>  module_param_named(debug_session, dbg_session, int,
>>  		   S_IRUGO | S_IWUSR);
>> @@ -84,6 +90,12 @@ struct iscsi_internal {
>>  	struct transport_container session_cont;
>>  };
>>  
>> +/* Worker to perform connection failure on unresponsive connections
>> + * completely in kernel space.
>> + */
>> +static void stop_conn_work_fn(struct work_struct *work);
>> +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
>> +
>>  static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
>>  static struct workqueue_struct *iscsi_eh_timer_workq;
>>  
>> @@ -1609,6 +1621,7 @@ static DEFINE_MUTEX(rx_queue_mutex);
>>  static LIST_HEAD(sesslist);
>>  static DEFINE_SPINLOCK(sesslock);
>>  static LIST_HEAD(connlist);
>> +static LIST_HEAD(connlist_err);
>>  static DEFINE_SPINLOCK(connlock);
>>  
>>  static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn)
>> @@ -2245,6 +2258,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
>>  
>>  	mutex_init(&conn->ep_mutex);
>>  	INIT_LIST_HEAD(&conn->conn_list);
>> +	INIT_LIST_HEAD(&conn->conn_list_err);
>>  	conn->transport = transport;
>>  	conn->cid = cid;
>>  
>> @@ -2291,6 +2305,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
>>  
>>  	spin_lock_irqsave(&connlock, flags);
>>  	list_del(&conn->conn_list);
>> +	list_del(&conn->conn_list_err);
>>  	spin_unlock_irqrestore(&connlock, flags);
>>  
>>  	transport_unregister_device(&conn->dev);
>> @@ -2405,6 +2420,28 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
>>  }
>>  EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
>>  
>> +static void stop_conn_work_fn(struct work_struct *work)
>> +{
>> +	struct iscsi_cls_conn *conn, *tmp;
>> +	unsigned long flags;
>> +	LIST_HEAD(recovery_list);
>> +
>> +	spin_lock_irqsave(&connlock, flags);
>> +	if (list_empty(&connlist_err)) {
>> +		spin_unlock_irqrestore(&connlock, flags);
>> +		return;
>> +	}
>> +	list_splice_init(&connlist_err, &recovery_list);
>> +	spin_unlock_irqrestore(&connlock, flags);
>> +
>> +	mutex_lock(&rx_queue_mutex);
>> +	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
>> +		conn->transport->stop_conn(conn, STOP_CONN_RECOVER);
>> +		list_del_init(&conn->conn_list_err);
>> +	}
>> +	mutex_unlock(&rx_queue_mutex);
>> +}
>> +
>>  void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>>  {
>>  	struct nlmsghdr	*nlh;
>> @@ -2412,6 +2449,15 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>>  	struct iscsi_uevent *ev;
>>  	struct iscsi_internal *priv;
>>  	int len = nlmsg_total_size(sizeof(*ev));
>> +	unsigned long flags;
>> +
>> +	if (kern_conn_failure) {
>> +		spin_lock_irqsave(&connlock, flags);
>> +		list_add(&conn->conn_list_err, &connlist_err);
>> +		spin_unlock_irqrestore(&connlock, flags);
>> +
>> +		queue_work(system_unbound_wq, &stop_conn_work);
>> +	}
>>  
> 
> Do you need the modparam? I think you could handle this issue and the
> similar one during shutdown at the same time, and you would always want
> to do the kernel based error handler when userspace is not answering for
> both cases.
> 
> You could do the following:
> 
> - Modify __iscsi_block_session so it does the stop_conn callout instead
> of reverse, and change the iscsi_stop_conn/ISCSI_UEVENT_STOP_CONN:
> related code accordingly.

Oh yeah, on second thought, I think I like how your new function above
calls into the stop_conn callout and everything works like it did
before. Ignore the __iscsi_block_session changes. But, I would drop the
modparam, always queue the work, and then fix up the system_state check.



> 
> - In iscsi_conn_error_event you would then do:
> 
> iscsi_multicast_skb();
> iscsi_block_session();
> 
> - You can then drop the system_state check in iscsi_eh_cmd_timed_out
> because those running commands are always handled by the stop_conn call
> in __iscsi_block_session now.
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] iscsi: Perform connection failure entirely in kernel space
  2019-12-09 18:20 [PATCH] iscsi: Perform connection failure entirely in kernel space Gabriel Krisman Bertazi
  2019-12-09 18:29 ` Lee Duncan
  2019-12-11  0:05 ` Mike Christie
@ 2019-12-17  2:16 ` Khazhismel Kumykov
  2 siblings, 0 replies; 6+ messages in thread
From: Khazhismel Kumykov @ 2019-12-17  2:16 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi
  Cc: lduncan, Chris Leech, Martin K. Petersen, linux-scsi,
	'Khazhismel Kumykov' via open-iscsi, Bharath Ravi,
	kernel, Dave Clausen, Nick Black, Vaibhav Nagarnaik,
	Anatol Pomazau, Tahsin Erdogan, Frank Mayhar, Junho Ryu

[-- Attachment #1: Type: text/plain, Size: 6689 bytes --]

On Mon, Dec 9, 2019 at 10:21 AM Gabriel Krisman Bertazi
<krisman@collabora.com> wrote:
>
> From: Bharath Ravi <rbharath@google.com>
>
> Connection failure processing depends on a daemon being present to (at
> least) stop the connection and start recovery.  This is a problem on a
> multipath scenario, where if the daemon failed for whatever reason, the
> SCSI path is never marked as down, multipath won't perform the
> failover and IO to the device will be forever waiting for that
> connection to come back.
>
> This patch implements an optional feature in the iscsi module, to
> perform the connection failure inside the kernel.  This way, the
> failover can happen and pending IO can continue even if the daemon is
> dead. Once the daemon comes alive again, it can perform recovery
> procedures if applicable.
>
> Co-developed-by: Dave Clausen <dclausen@google.com>
> Signed-off-by: Dave Clausen <dclausen@google.com>
> Co-developed-by: Nick Black <nlb@google.com>
> Signed-off-by: Nick Black <nlb@google.com>
> Co-developed-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
> Co-developed-by: Anatol Pomazau <anatol@google.com>
> Signed-off-by: Anatol Pomazau <anatol@google.com>
> Co-developed-by: Tahsin Erdogan <tahsin@google.com>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> Co-developed-by: Frank Mayhar <fmayhar@google.com>
> Signed-off-by: Frank Mayhar <fmayhar@google.com>
> Co-developed-by: Junho Ryu <jayr@google.com>
> Signed-off-by: Junho Ryu <jayr@google.com>
> Co-developed-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
> Signed-off-by: Bharath Ravi <rbharath@google.com>
> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> ---
>  drivers/scsi/scsi_transport_iscsi.c | 46 +++++++++++++++++++++++++++++
>  include/scsi/scsi_transport_iscsi.h |  1 +
>  2 files changed, 47 insertions(+)
>
> diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
> index 417b868d8735..7251b2b5b272 100644
> --- a/drivers/scsi/scsi_transport_iscsi.c
> +++ b/drivers/scsi/scsi_transport_iscsi.c
> @@ -36,6 +36,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp);
>
> +static bool kern_conn_failure;
> +module_param(kern_conn_failure, bool, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(kern_conn_failure,
> +                "Allow the kernel to detect and disable broken connections "
> +                "without requiring userspace intervention");
> +
>  static int dbg_session;
>  module_param_named(debug_session, dbg_session, int,
>                    S_IRUGO | S_IWUSR);
> @@ -84,6 +90,12 @@ struct iscsi_internal {
>         struct transport_container session_cont;
>  };
>
> +/* Worker to perform connection failure on unresponsive connections
> + * completely in kernel space.
> + */
> +static void stop_conn_work_fn(struct work_struct *work);
> +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
> +
>  static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
>  static struct workqueue_struct *iscsi_eh_timer_workq;
>
> @@ -1609,6 +1621,7 @@ static DEFINE_MUTEX(rx_queue_mutex);
>  static LIST_HEAD(sesslist);
>  static DEFINE_SPINLOCK(sesslock);
>  static LIST_HEAD(connlist);
> +static LIST_HEAD(connlist_err);
>  static DEFINE_SPINLOCK(connlock);
>
>  static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn)
> @@ -2245,6 +2258,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
>
>         mutex_init(&conn->ep_mutex);
>         INIT_LIST_HEAD(&conn->conn_list);
> +       INIT_LIST_HEAD(&conn->conn_list_err);
>         conn->transport = transport;
>         conn->cid = cid;
>
> @@ -2291,6 +2305,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
>
>         spin_lock_irqsave(&connlock, flags);
>         list_del(&conn->conn_list);
> +       list_del(&conn->conn_list_err);
>         spin_unlock_irqrestore(&connlock, flags);
>
>         transport_unregister_device(&conn->dev);
> @@ -2405,6 +2420,28 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
>  }
>  EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
>
> +static void stop_conn_work_fn(struct work_struct *work)
> +{
> +       struct iscsi_cls_conn *conn, *tmp;
> +       unsigned long flags;
> +       LIST_HEAD(recovery_list);
> +
> +       spin_lock_irqsave(&connlock, flags);
> +       if (list_empty(&connlist_err)) {
> +               spin_unlock_irqrestore(&connlock, flags);
> +               return;
> +       }
> +       list_splice_init(&connlist_err, &recovery_list);
> +       spin_unlock_irqrestore(&connlock, flags);
> +
> +       mutex_lock(&rx_queue_mutex);
> +       list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
> +               conn->transport->stop_conn(conn, STOP_CONN_RECOVER);
> +               list_del_init(&conn->conn_list_err);
> +       }
> +       mutex_unlock(&rx_queue_mutex);
Holding rx_queue_mutex for the entire conn_list_err may be problematic
for long conn_list_err, could we drop on need_resched perhaps, or otherwise
limit how long we hold this?
> +}
> +
>  void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>  {
>         struct nlmsghdr *nlh;
> @@ -2412,6 +2449,15 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>         struct iscsi_uevent *ev;
>         struct iscsi_internal *priv;
>         int len = nlmsg_total_size(sizeof(*ev));
> +       unsigned long flags;
> +
> +       if (kern_conn_failure) {
> +               spin_lock_irqsave(&connlock, flags);
> +               list_add(&conn->conn_list_err, &connlist_err);
> +               spin_unlock_irqrestore(&connlock, flags);
> +
> +               queue_work(system_unbound_wq, &stop_conn_work);
> +       }
>
>         priv = iscsi_if_transport_lookup(conn->transport);
>         if (!priv)
> diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
> index 325ae731d9ad..2129dc9e2dec 100644
> --- a/include/scsi/scsi_transport_iscsi.h
> +++ b/include/scsi/scsi_transport_iscsi.h
> @@ -190,6 +190,7 @@ extern void iscsi_ping_comp_event(uint32_t host_no,
>
>  struct iscsi_cls_conn {
>         struct list_head conn_list;     /* item in connlist */
> +       struct list_head conn_list_err; /* item in connlist_err */
>         void *dd_data;                  /* LLD private data */
>         struct iscsi_transport *transport;
>         uint32_t cid;                   /* connection id */
> --
> 2.24.0
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4843 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-12-17  2:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-09 18:20 [PATCH] iscsi: Perform connection failure entirely in kernel space Gabriel Krisman Bertazi
2019-12-09 18:29 ` Lee Duncan
2019-12-09 18:39   ` Gabriel Krisman Bertazi
2019-12-11  0:05 ` Mike Christie
2019-12-11  0:23   ` Mike Christie
2019-12-17  2:16 ` Khazhismel Kumykov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).