All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages
@ 2018-07-03  7:57 Martin Wilck
  2018-07-03  7:57 ` [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION Martin Wilck
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Martin Wilck @ 2018-07-03  7:57 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: dm-devel, Martin Wilck

Map the PRINT_DEBUG macro of the ALUA code to a condlog call, to
be able to better debug SCSI error conditions.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/prioritizers/alua_rtpg.c | 34 +++++++++++----------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
index e4315020..ce405b55 100644
--- a/libmultipath/prioritizers/alua_rtpg.c
+++ b/libmultipath/prioritizers/alua_rtpg.c
@@ -27,20 +27,14 @@
 #include "../prio.h"
 #include "../discovery.h"
 #include "../unaligned.h"
+#include "../debug.h"
 #include "alua_rtpg.h"
 
 #define SENSE_BUFF_LEN  32
 #define SGIO_TIMEOUT     60000
 
-/*
- * Macro used to print debug messaged.
- */
-#if DEBUG > 0
 #define PRINT_DEBUG(f, a...) \
-		fprintf(stderr, "DEBUG: " f, ##a)
-#else
-#define PRINT_DEBUG(f, a...)
-#endif
+	condlog(4, "alua: " f, ##a)
 
 /*
  * Optionally print the commands sent and the data received a hex dump.
@@ -144,12 +138,12 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
 	hdr.timeout		= get_prio_timeout(timeout, SGIO_TIMEOUT);
 
 	if (ioctl(fd, SG_IO, &hdr) < 0) {
-		PRINT_DEBUG("do_inquiry: IOCTL failed!\n");
+		PRINT_DEBUG("do_inquiry: IOCTL failed!");
 		return -RTPG_INQUIRY_FAILED;
 	}
 
 	if (scsi_error(&hdr)) {
-		PRINT_DEBUG("do_inquiry: SCSI error!\n");
+		PRINT_DEBUG("do_inquiry: SCSI error!");
 		return -RTPG_INQUIRY_FAILED;
 	}
 	PRINT_HEX((unsigned char *) resp, resplen);
@@ -189,7 +183,7 @@ get_sysfs_pg83(struct path *pp, unsigned char *buff, int buflen)
 	}
 
 	if (!parent || sysfs_get_vpd(parent, 0x83, buff, buflen) <= 0) {
-		PRINT_DEBUG("failed to read sysfs vpd pg83\n");
+		PRINT_DEBUG("failed to read sysfs vpd pg83");
 		return -1;
 	}
 	return 0;
@@ -208,7 +202,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
 	buf = (unsigned char *)malloc(buflen);
 	if (!buf) {
 		PRINT_DEBUG("malloc failed: could not allocate"
-			     "%u bytes\n", buflen);
+			     "%u bytes", buflen);
 		return -RTPG_RTPG_FAILED;
 	}
 
@@ -230,7 +224,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
 			buf = (unsigned char *)malloc(scsi_buflen);
 			if (!buf) {
 				PRINT_DEBUG("malloc failed: could not allocate"
-					    "%u bytes\n", scsi_buflen);
+					    "%u bytes", scsi_buflen);
 				return -RTPG_RTPG_FAILED;
 			}
 			buflen = scsi_buflen;
@@ -248,7 +242,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
 			struct vpd83_tpg_dscr *p;
 			if (rc != -RTPG_NO_TPG_IDENTIFIER) {
 				PRINT_DEBUG("get_target_port_group: more "
-					    "than one TPG identifier found!\n");
+					    "than one TPG identifier found!");
 				continue;
 			}
 			p  = (struct vpd83_tpg_dscr *)dscr->data;
@@ -258,7 +252,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
 
 	if (rc == -RTPG_NO_TPG_IDENTIFIER) {
 		PRINT_DEBUG("get_target_port_group: "
-			    "no TPG identifier found!\n");
+			    "no TPG identifier found!");
 	}
 out:
 	free(buf);
@@ -293,7 +287,7 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
 		return -RTPG_RTPG_FAILED;
 
 	if (scsi_error(&hdr)) {
-		PRINT_DEBUG("do_rtpg: SCSI error!\n");
+		PRINT_DEBUG("do_rtpg: SCSI error!");
 		return -RTPG_RTPG_FAILED;
 	}
 	PRINT_HEX(resp, resplen);
@@ -315,7 +309,7 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 	buf = (unsigned char *)malloc(buflen);
 	if (!buf) {
 		PRINT_DEBUG ("malloc failed: could not allocate"
-			"%u bytes\n", buflen);
+			"%u bytes", buflen);
 		return -RTPG_RTPG_FAILED;
 	}
 	memset(buf, 0, buflen);
@@ -330,7 +324,7 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 		buf = (unsigned char *)malloc(scsi_buflen);
 		if (!buf) {
 			PRINT_DEBUG ("malloc failed: could not allocate"
-				"%u bytes\n", scsi_buflen);
+				"%u bytes", scsi_buflen);
 			return -RTPG_RTPG_FAILED;
 		}
 		buflen = scsi_buflen;
@@ -347,9 +341,9 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 			if (rc != -RTPG_TPG_NOT_FOUND) {
 				PRINT_DEBUG("get_asymmetric_access_state: "
 					"more than one entry with same port "
-					"group.\n");
+					"group.");
 			} else {
-				PRINT_DEBUG("pref=%i\n", dscr->b0);
+				condlog(5, "pref=%i", dscr->b0);
 				rc = rtpg_tpg_dscr_get_aas(dscr);
 			}
 		}
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION
  2018-07-03  7:57 [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Martin Wilck
@ 2018-07-03  7:57 ` Martin Wilck
  2018-07-13 19:08   ` Benjamin Marzinski
  2018-07-03  7:57 ` [PATCH 3/3] libmultipath: more debug output in alua prioritizer Martin Wilck
  2018-07-13 19:08 ` [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Benjamin Marzinski
  2 siblings, 1 reply; 6+ messages in thread
From: Martin Wilck @ 2018-07-03  7:57 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: dm-devel, Martin Wilck

Use similar logic as the kernel for retrying ALUA commands to
avoid misinterpreting temporary failures as fatal errors.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/prioritizers/alua_rtpg.c | 59 +++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
index ce405b55..34b5f3ce 100644
--- a/libmultipath/prioritizers/alua_rtpg.c
+++ b/libmultipath/prioritizers/alua_rtpg.c
@@ -69,10 +69,20 @@ print_hex(unsigned char *p, unsigned long len)
 #define SCSI_COMMAND_TERMINATED 0x22
 #define SG_ERR_DRIVER_SENSE     0x08
 #define RECOVERED_ERROR 0x01
+#define NOT_READY 0x2
+#define UNIT_ATTENTION 0x6
+
+enum scsi_disposition {
+	SCSI_GOOD = 0,
+	SCSI_ERROR,
+	SCSI_RETRY,
+};
 
 static int
-scsi_error(struct sg_io_hdr *hdr)
+scsi_error(struct sg_io_hdr *hdr, int opcode)
 {
+	int sense_key, asc, ascq;
+
 	/* Treat SG_ERR here to get rid of sg_err.[ch] */
 	hdr->status &= 0x7e;
 
@@ -81,29 +91,44 @@ scsi_error(struct sg_io_hdr *hdr)
 		(hdr->host_status == 0)   &&
 		(hdr->driver_status == 0)
 	) {
-		return 0;
+		return SCSI_GOOD;
 	}
 
+	sense_key = asc = ascq = -1;
 	if (
 		(hdr->status == SCSI_CHECK_CONDITION)    ||
 		(hdr->status == SCSI_COMMAND_TERMINATED) ||
 		((hdr->driver_status & 0xf) == SG_ERR_DRIVER_SENSE)
 	) {
 		if (hdr->sbp && (hdr->sb_len_wr > 2)) {
-			int		sense_key;
 			unsigned char *	sense_buffer = hdr->sbp;
 
-			if (sense_buffer[0] & 0x2)
+			if (sense_buffer[0] & 0x2) {
 				sense_key = sense_buffer[1] & 0xf;
-			else
+				if (hdr->sb_len_wr > 3)
+					asc = sense_buffer[2];
+				if (hdr->sb_len_wr > 4)
+					ascq = sense_buffer[3];
+			} else {
 				sense_key = sense_buffer[2] & 0xf;
+				if (hdr->sb_len_wr > 13)
+					asc = sense_buffer[12];
+				if (hdr->sb_len_wr > 14)
+					ascq = sense_buffer[13];
+			}
 
 			if (sense_key == RECOVERED_ERROR)
-				return 0;
+				return SCSI_GOOD;
 		}
 	}
 
-	return 1;
+	PRINT_DEBUG("alua: SCSI error for command %02x: status %02x, sense %02x/%02x/%02x",
+		    opcode, hdr->status, sense_key, asc, ascq);
+
+	if (sense_key == UNIT_ATTENTION || sense_key == NOT_READY)
+		return SCSI_RETRY;
+	else
+		return SCSI_ERROR;
 }
 
 /*
@@ -116,7 +141,9 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
 	struct inquiry_command	cmd;
 	struct sg_io_hdr	hdr;
 	unsigned char		sense[SENSE_BUFF_LEN];
+	int rc, retry_count = 3;
 
+retry:
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.op = OPERATION_CODE_INQUIRY;
 	if (evpd) {
@@ -142,9 +169,15 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
 		return -RTPG_INQUIRY_FAILED;
 	}
 
-	if (scsi_error(&hdr)) {
+	rc = scsi_error(&hdr, OPERATION_CODE_INQUIRY);
+	if (rc == SCSI_ERROR) {
 		PRINT_DEBUG("do_inquiry: SCSI error!");
 		return -RTPG_INQUIRY_FAILED;
+	} else if (rc == SCSI_RETRY) {
+		if (--retry_count >= 0)
+			goto retry;
+		PRINT_DEBUG("do_inquiry: retries exhausted!");
+		return -RTPG_INQUIRY_FAILED;
 	}
 	PRINT_HEX((unsigned char *) resp, resplen);
 
@@ -265,7 +298,9 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
 	struct rtpg_command	cmd;
 	struct sg_io_hdr	hdr;
 	unsigned char		sense[SENSE_BUFF_LEN];
+	int retry_count = 3, rc;
 
+retry:
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.op			= OPERATION_CODE_RTPG;
 	rtpg_command_set_service_action(&cmd);
@@ -286,9 +321,15 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
 	if (ioctl(fd, SG_IO, &hdr) < 0)
 		return -RTPG_RTPG_FAILED;
 
-	if (scsi_error(&hdr)) {
+	rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
+	if (rc == SCSI_ERROR) {
 		PRINT_DEBUG("do_rtpg: SCSI error!");
 		return -RTPG_RTPG_FAILED;
+	} else if (rc == SCSI_RETRY) {
+		if (--retry_count >= 0)
+			goto retry;
+		PRINT_DEBUG("do_rtpg: retries exhausted!");
+		return -RTPG_RTPG_FAILED;
 	}
 	PRINT_HEX(resp, resplen);
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] libmultipath: more debug output in alua prioritizer
  2018-07-03  7:57 [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Martin Wilck
  2018-07-03  7:57 ` [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION Martin Wilck
@ 2018-07-03  7:57 ` Martin Wilck
  2018-07-13 19:09   ` Benjamin Marzinski
  2018-07-13 19:08 ` [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Benjamin Marzinski
  2 siblings, 1 reply; 6+ messages in thread
From: Martin Wilck @ 2018-07-03  7:57 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: dm-devel, Martin Wilck

Help debug problems with ALUA commands by emitting more
meaningful error messages.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/prioritizers/alua.c      |  5 ++++-
 libmultipath/prioritizers/alua_rtpg.c | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/libmultipath/prioritizers/alua.c b/libmultipath/prioritizers/alua.c
index 03d0a0e0..b24e2d48 100644
--- a/libmultipath/prioritizers/alua.c
+++ b/libmultipath/prioritizers/alua.c
@@ -67,8 +67,11 @@ get_alua_info(struct path * pp, unsigned int timeout)
 	}
 	condlog(3, "%s: reported target port group is %i", pp->dev, tpg);
 	rc = get_asymmetric_access_state(pp->fd, tpg, timeout);
-	if (rc < 0)
+	if (rc < 0) {
+		condlog(2, "%s: get_asymmetric_access_state returned %d",
+			__func__, rc);
 		return -ALUA_PRIO_GETAAS_FAILED;
+	}
 
 	condlog(3, "%s: aas = %02x [%s]%s", pp->dev, rc, aas_print_string(rc),
 		(rc & 0x80) ? " [preferred]" : "");
diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
index 34b5f3ce..811ce7a2 100644
--- a/libmultipath/prioritizers/alua_rtpg.c
+++ b/libmultipath/prioritizers/alua_rtpg.c
@@ -19,6 +19,7 @@
 #include <sys/ioctl.h>
 #include <inttypes.h>
 #include <libudev.h>
+#include <errno.h>
 
 #define __user
 #include <scsi/sg.h>
@@ -318,8 +319,11 @@ retry:
 	hdr.sbp			= sense;
 	hdr.timeout		= get_prio_timeout(timeout, SGIO_TIMEOUT);
 
-	if (ioctl(fd, SG_IO, &hdr) < 0)
+	if (ioctl(fd, SG_IO, &hdr) < 0) {
+		condlog(2, "%s: sg ioctl failed: %s",
+			__func__, strerror(errno));
 		return -RTPG_RTPG_FAILED;
+	}
 
 	rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
 	if (rc == SCSI_ERROR) {
@@ -355,8 +359,10 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 	}
 	memset(buf, 0, buflen);
 	rc = do_rtpg(fd, buf, buflen, timeout);
-	if (rc < 0)
+	if (rc < 0) {
+		PRINT_DEBUG("%s: do_rtpg returned %d", __func__, rc);
 		goto out;
+	}
 	scsi_buflen = get_unaligned_be32(&buf[0]) + 4;
 	if (scsi_buflen > UINT_MAX)
 		scsi_buflen = UINT_MAX;
@@ -364,8 +370,8 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 		free(buf);
 		buf = (unsigned char *)malloc(scsi_buflen);
 		if (!buf) {
-			PRINT_DEBUG ("malloc failed: could not allocate"
-				"%u bytes", scsi_buflen);
+			PRINT_DEBUG("malloc failed: could not allocate %"
+				    PRIu64 " bytes", scsi_buflen);
 			return -RTPG_RTPG_FAILED;
 		}
 		buflen = scsi_buflen;
@@ -389,6 +395,8 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
 			}
 		}
 	}
+	if (rc == -RTPG_TPG_NOT_FOUND)
+		condlog(2, "%s: port group %d not found", __func__, tpg);
 out:
 	free(buf);
 	return rc;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages
  2018-07-03  7:57 [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Martin Wilck
  2018-07-03  7:57 ` [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION Martin Wilck
  2018-07-03  7:57 ` [PATCH 3/3] libmultipath: more debug output in alua prioritizer Martin Wilck
@ 2018-07-13 19:08 ` Benjamin Marzinski
  2 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2018-07-13 19:08 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel

On Tue, Jul 03, 2018 at 09:57:05AM +0200, Martin Wilck wrote:
> Map the PRINT_DEBUG macro of the ALUA code to a condlog call, to
> be able to better debug SCSI error conditions.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/prioritizers/alua_rtpg.c | 34 +++++++++++----------------
>  1 file changed, 14 insertions(+), 20 deletions(-)
> 
> diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
> index e4315020..ce405b55 100644
> --- a/libmultipath/prioritizers/alua_rtpg.c
> +++ b/libmultipath/prioritizers/alua_rtpg.c
> @@ -27,20 +27,14 @@
>  #include "../prio.h"
>  #include "../discovery.h"
>  #include "../unaligned.h"
> +#include "../debug.h"
>  #include "alua_rtpg.h"
>  
>  #define SENSE_BUFF_LEN  32
>  #define SGIO_TIMEOUT     60000
>  
> -/*
> - * Macro used to print debug messaged.
> - */
> -#if DEBUG > 0
>  #define PRINT_DEBUG(f, a...) \
> -		fprintf(stderr, "DEBUG: " f, ##a)
> -#else
> -#define PRINT_DEBUG(f, a...)
> -#endif
> +	condlog(4, "alua: " f, ##a)
>  
>  /*
>   * Optionally print the commands sent and the data received a hex dump.
> @@ -144,12 +138,12 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
>  	hdr.timeout		= get_prio_timeout(timeout, SGIO_TIMEOUT);
>  
>  	if (ioctl(fd, SG_IO, &hdr) < 0) {
> -		PRINT_DEBUG("do_inquiry: IOCTL failed!\n");
> +		PRINT_DEBUG("do_inquiry: IOCTL failed!");
>  		return -RTPG_INQUIRY_FAILED;
>  	}
>  
>  	if (scsi_error(&hdr)) {
> -		PRINT_DEBUG("do_inquiry: SCSI error!\n");
> +		PRINT_DEBUG("do_inquiry: SCSI error!");
>  		return -RTPG_INQUIRY_FAILED;
>  	}
>  	PRINT_HEX((unsigned char *) resp, resplen);
> @@ -189,7 +183,7 @@ get_sysfs_pg83(struct path *pp, unsigned char *buff, int buflen)
>  	}
>  
>  	if (!parent || sysfs_get_vpd(parent, 0x83, buff, buflen) <= 0) {
> -		PRINT_DEBUG("failed to read sysfs vpd pg83\n");
> +		PRINT_DEBUG("failed to read sysfs vpd pg83");
>  		return -1;
>  	}
>  	return 0;
> @@ -208,7 +202,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
>  	buf = (unsigned char *)malloc(buflen);
>  	if (!buf) {
>  		PRINT_DEBUG("malloc failed: could not allocate"
> -			     "%u bytes\n", buflen);
> +			     "%u bytes", buflen);
>  		return -RTPG_RTPG_FAILED;
>  	}
>  
> @@ -230,7 +224,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
>  			buf = (unsigned char *)malloc(scsi_buflen);
>  			if (!buf) {
>  				PRINT_DEBUG("malloc failed: could not allocate"
> -					    "%u bytes\n", scsi_buflen);
> +					    "%u bytes", scsi_buflen);
>  				return -RTPG_RTPG_FAILED;
>  			}
>  			buflen = scsi_buflen;
> @@ -248,7 +242,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
>  			struct vpd83_tpg_dscr *p;
>  			if (rc != -RTPG_NO_TPG_IDENTIFIER) {
>  				PRINT_DEBUG("get_target_port_group: more "
> -					    "than one TPG identifier found!\n");
> +					    "than one TPG identifier found!");
>  				continue;
>  			}
>  			p  = (struct vpd83_tpg_dscr *)dscr->data;
> @@ -258,7 +252,7 @@ get_target_port_group(struct path * pp, unsigned int timeout)
>  
>  	if (rc == -RTPG_NO_TPG_IDENTIFIER) {
>  		PRINT_DEBUG("get_target_port_group: "
> -			    "no TPG identifier found!\n");
> +			    "no TPG identifier found!");
>  	}
>  out:
>  	free(buf);
> @@ -293,7 +287,7 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
>  		return -RTPG_RTPG_FAILED;
>  
>  	if (scsi_error(&hdr)) {
> -		PRINT_DEBUG("do_rtpg: SCSI error!\n");
> +		PRINT_DEBUG("do_rtpg: SCSI error!");
>  		return -RTPG_RTPG_FAILED;
>  	}
>  	PRINT_HEX(resp, resplen);
> @@ -315,7 +309,7 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  	buf = (unsigned char *)malloc(buflen);
>  	if (!buf) {
>  		PRINT_DEBUG ("malloc failed: could not allocate"
> -			"%u bytes\n", buflen);
> +			"%u bytes", buflen);
>  		return -RTPG_RTPG_FAILED;
>  	}
>  	memset(buf, 0, buflen);
> @@ -330,7 +324,7 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  		buf = (unsigned char *)malloc(scsi_buflen);
>  		if (!buf) {
>  			PRINT_DEBUG ("malloc failed: could not allocate"
> -				"%u bytes\n", scsi_buflen);
> +				"%u bytes", scsi_buflen);
>  			return -RTPG_RTPG_FAILED;
>  		}
>  		buflen = scsi_buflen;
> @@ -347,9 +341,9 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  			if (rc != -RTPG_TPG_NOT_FOUND) {
>  				PRINT_DEBUG("get_asymmetric_access_state: "
>  					"more than one entry with same port "
> -					"group.\n");
> +					"group.");
>  			} else {
> -				PRINT_DEBUG("pref=%i\n", dscr->b0);
> +				condlog(5, "pref=%i", dscr->b0);
>  				rc = rtpg_tpg_dscr_get_aas(dscr);
>  			}
>  		}
> -- 
> 2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION
  2018-07-03  7:57 ` [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION Martin Wilck
@ 2018-07-13 19:08   ` Benjamin Marzinski
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2018-07-13 19:08 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel

On Tue, Jul 03, 2018 at 09:57:06AM +0200, Martin Wilck wrote:
> Use similar logic as the kernel for retrying ALUA commands to
> avoid misinterpreting temporary failures as fatal errors.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/prioritizers/alua_rtpg.c | 59 +++++++++++++++++++++++----
>  1 file changed, 50 insertions(+), 9 deletions(-)
> 
> diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
> index ce405b55..34b5f3ce 100644
> --- a/libmultipath/prioritizers/alua_rtpg.c
> +++ b/libmultipath/prioritizers/alua_rtpg.c
> @@ -69,10 +69,20 @@ print_hex(unsigned char *p, unsigned long len)
>  #define SCSI_COMMAND_TERMINATED 0x22
>  #define SG_ERR_DRIVER_SENSE     0x08
>  #define RECOVERED_ERROR 0x01
> +#define NOT_READY 0x2
> +#define UNIT_ATTENTION 0x6
> +
> +enum scsi_disposition {
> +	SCSI_GOOD = 0,
> +	SCSI_ERROR,
> +	SCSI_RETRY,
> +};
>  
>  static int
> -scsi_error(struct sg_io_hdr *hdr)
> +scsi_error(struct sg_io_hdr *hdr, int opcode)
>  {
> +	int sense_key, asc, ascq;
> +
>  	/* Treat SG_ERR here to get rid of sg_err.[ch] */
>  	hdr->status &= 0x7e;
>  
> @@ -81,29 +91,44 @@ scsi_error(struct sg_io_hdr *hdr)
>  		(hdr->host_status == 0)   &&
>  		(hdr->driver_status == 0)
>  	) {
> -		return 0;
> +		return SCSI_GOOD;
>  	}
>  
> +	sense_key = asc = ascq = -1;
>  	if (
>  		(hdr->status == SCSI_CHECK_CONDITION)    ||
>  		(hdr->status == SCSI_COMMAND_TERMINATED) ||
>  		((hdr->driver_status & 0xf) == SG_ERR_DRIVER_SENSE)
>  	) {
>  		if (hdr->sbp && (hdr->sb_len_wr > 2)) {
> -			int		sense_key;
>  			unsigned char *	sense_buffer = hdr->sbp;
>  
> -			if (sense_buffer[0] & 0x2)
> +			if (sense_buffer[0] & 0x2) {
>  				sense_key = sense_buffer[1] & 0xf;
> -			else
> +				if (hdr->sb_len_wr > 3)
> +					asc = sense_buffer[2];
> +				if (hdr->sb_len_wr > 4)
> +					ascq = sense_buffer[3];
> +			} else {
>  				sense_key = sense_buffer[2] & 0xf;
> +				if (hdr->sb_len_wr > 13)
> +					asc = sense_buffer[12];
> +				if (hdr->sb_len_wr > 14)
> +					ascq = sense_buffer[13];
> +			}
>  
>  			if (sense_key == RECOVERED_ERROR)
> -				return 0;
> +				return SCSI_GOOD;
>  		}
>  	}
>  
> -	return 1;
> +	PRINT_DEBUG("alua: SCSI error for command %02x: status %02x, sense %02x/%02x/%02x",
> +		    opcode, hdr->status, sense_key, asc, ascq);
> +
> +	if (sense_key == UNIT_ATTENTION || sense_key == NOT_READY)
> +		return SCSI_RETRY;
> +	else
> +		return SCSI_ERROR;
>  }
>  
>  /*
> @@ -116,7 +141,9 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
>  	struct inquiry_command	cmd;
>  	struct sg_io_hdr	hdr;
>  	unsigned char		sense[SENSE_BUFF_LEN];
> +	int rc, retry_count = 3;
>  
> +retry:
>  	memset(&cmd, 0, sizeof(cmd));
>  	cmd.op = OPERATION_CODE_INQUIRY;
>  	if (evpd) {
> @@ -142,9 +169,15 @@ do_inquiry(int fd, int evpd, unsigned int codepage,
>  		return -RTPG_INQUIRY_FAILED;
>  	}
>  
> -	if (scsi_error(&hdr)) {
> +	rc = scsi_error(&hdr, OPERATION_CODE_INQUIRY);
> +	if (rc == SCSI_ERROR) {
>  		PRINT_DEBUG("do_inquiry: SCSI error!");
>  		return -RTPG_INQUIRY_FAILED;
> +	} else if (rc == SCSI_RETRY) {
> +		if (--retry_count >= 0)
> +			goto retry;
> +		PRINT_DEBUG("do_inquiry: retries exhausted!");
> +		return -RTPG_INQUIRY_FAILED;
>  	}
>  	PRINT_HEX((unsigned char *) resp, resplen);
>  
> @@ -265,7 +298,9 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
>  	struct rtpg_command	cmd;
>  	struct sg_io_hdr	hdr;
>  	unsigned char		sense[SENSE_BUFF_LEN];
> +	int retry_count = 3, rc;
>  
> +retry:
>  	memset(&cmd, 0, sizeof(cmd));
>  	cmd.op			= OPERATION_CODE_RTPG;
>  	rtpg_command_set_service_action(&cmd);
> @@ -286,9 +321,15 @@ do_rtpg(int fd, void* resp, long resplen, unsigned int timeout)
>  	if (ioctl(fd, SG_IO, &hdr) < 0)
>  		return -RTPG_RTPG_FAILED;
>  
> -	if (scsi_error(&hdr)) {
> +	rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
> +	if (rc == SCSI_ERROR) {
>  		PRINT_DEBUG("do_rtpg: SCSI error!");
>  		return -RTPG_RTPG_FAILED;
> +	} else if (rc == SCSI_RETRY) {
> +		if (--retry_count >= 0)
> +			goto retry;
> +		PRINT_DEBUG("do_rtpg: retries exhausted!");
> +		return -RTPG_RTPG_FAILED;
>  	}
>  	PRINT_HEX(resp, resplen);
>  
> -- 
> 2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] libmultipath: more debug output in alua prioritizer
  2018-07-03  7:57 ` [PATCH 3/3] libmultipath: more debug output in alua prioritizer Martin Wilck
@ 2018-07-13 19:09   ` Benjamin Marzinski
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2018-07-13 19:09 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel

On Tue, Jul 03, 2018 at 09:57:07AM +0200, Martin Wilck wrote:
> Help debug problems with ALUA commands by emitting more
> meaningful error messages.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/prioritizers/alua.c      |  5 ++++-
>  libmultipath/prioritizers/alua_rtpg.c | 16 ++++++++++++----
>  2 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/libmultipath/prioritizers/alua.c b/libmultipath/prioritizers/alua.c
> index 03d0a0e0..b24e2d48 100644
> --- a/libmultipath/prioritizers/alua.c
> +++ b/libmultipath/prioritizers/alua.c
> @@ -67,8 +67,11 @@ get_alua_info(struct path * pp, unsigned int timeout)
>  	}
>  	condlog(3, "%s: reported target port group is %i", pp->dev, tpg);
>  	rc = get_asymmetric_access_state(pp->fd, tpg, timeout);
> -	if (rc < 0)
> +	if (rc < 0) {
> +		condlog(2, "%s: get_asymmetric_access_state returned %d",
> +			__func__, rc);
>  		return -ALUA_PRIO_GETAAS_FAILED;
> +	}
>  
>  	condlog(3, "%s: aas = %02x [%s]%s", pp->dev, rc, aas_print_string(rc),
>  		(rc & 0x80) ? " [preferred]" : "");
> diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c
> index 34b5f3ce..811ce7a2 100644
> --- a/libmultipath/prioritizers/alua_rtpg.c
> +++ b/libmultipath/prioritizers/alua_rtpg.c
> @@ -19,6 +19,7 @@
>  #include <sys/ioctl.h>
>  #include <inttypes.h>
>  #include <libudev.h>
> +#include <errno.h>
>  
>  #define __user
>  #include <scsi/sg.h>
> @@ -318,8 +319,11 @@ retry:
>  	hdr.sbp			= sense;
>  	hdr.timeout		= get_prio_timeout(timeout, SGIO_TIMEOUT);
>  
> -	if (ioctl(fd, SG_IO, &hdr) < 0)
> +	if (ioctl(fd, SG_IO, &hdr) < 0) {
> +		condlog(2, "%s: sg ioctl failed: %s",
> +			__func__, strerror(errno));
>  		return -RTPG_RTPG_FAILED;
> +	}
>  
>  	rc = scsi_error(&hdr, OPERATION_CODE_RTPG);
>  	if (rc == SCSI_ERROR) {
> @@ -355,8 +359,10 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  	}
>  	memset(buf, 0, buflen);
>  	rc = do_rtpg(fd, buf, buflen, timeout);
> -	if (rc < 0)
> +	if (rc < 0) {
> +		PRINT_DEBUG("%s: do_rtpg returned %d", __func__, rc);
>  		goto out;
> +	}
>  	scsi_buflen = get_unaligned_be32(&buf[0]) + 4;
>  	if (scsi_buflen > UINT_MAX)
>  		scsi_buflen = UINT_MAX;
> @@ -364,8 +370,8 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  		free(buf);
>  		buf = (unsigned char *)malloc(scsi_buflen);
>  		if (!buf) {
> -			PRINT_DEBUG ("malloc failed: could not allocate"
> -				"%u bytes", scsi_buflen);
> +			PRINT_DEBUG("malloc failed: could not allocate %"
> +				    PRIu64 " bytes", scsi_buflen);
>  			return -RTPG_RTPG_FAILED;
>  		}
>  		buflen = scsi_buflen;
> @@ -389,6 +395,8 @@ get_asymmetric_access_state(int fd, unsigned int tpg, unsigned int timeout)
>  			}
>  		}
>  	}
> +	if (rc == -RTPG_TPG_NOT_FOUND)
> +		condlog(2, "%s: port group %d not found", __func__, tpg);
>  out:
>  	free(buf);
>  	return rc;
> -- 
> 2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-07-13 19:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-03  7:57 [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Martin Wilck
2018-07-03  7:57 ` [PATCH 2/3] libmultipath: alua: retry RTPG for NOT_READY and UNIT_ATTENTION Martin Wilck
2018-07-13 19:08   ` Benjamin Marzinski
2018-07-03  7:57 ` [PATCH 3/3] libmultipath: more debug output in alua prioritizer Martin Wilck
2018-07-13 19:09   ` Benjamin Marzinski
2018-07-13 19:08 ` [PATCH 1/3] libmultipath: alua_rtpg: use condlog for error messages Benjamin Marzinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.