All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
@ 2021-08-26  8:21 ` Daniel Wagner
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Wagner @ 2021-08-26  8:21 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Hannes Reinecke, Sagi Grimberg, yi.he, Daniel Wagner

The spec says

  7.4.6.1 Digest Error handling

  When a host detects a data digest error in a C2HData PDU, that host
  shall continue processing C2HData PDUs associated with the command and
  when the command processing has completed, if a successful status was
  returned by the controller, the host shall fail the command with a
  non-fatal transport error.

Currently the transport is reseted when a data digest error is
detected. Instead, when a digest error is detected, mark the final
status as NVME_SC_DATA_XFER_ERROR and let the upper layer handle
the error.

In order to keep track of the final result maintain a status field in
nvme_tcp_request object and use it to overwrite the completion queue
status (which might be successful even though a digest error has been
detected) when completing the request.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
The status member placed so that it fills up a hole in struct
nvme_tcp_request:

struct nvme_tcp_request {
        struct nvme_request        req;                  /*     0    32 */
        void *                     pdu;                  /*    32     8 */
        struct nvme_tcp_queue *    queue;                /*    40     8 */
        u32                        data_len;             /*    48     4 */
        u32                        pdu_len;              /*    52     4 */
        u32                        pdu_sent;             /*    56     4 */
        u16                        ttag;                 /*    60     2 */
        u16                        status;               /*    62     2 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct list_head           entry;                /*    64    16 */
        struct llist_node          lentry;               /*    80     8 */
        __le32                     ddgst;                /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct bio *               curr_bio;             /*    96     8 */
        struct iov_iter            iter;                 /*   104    40 */
        /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
        size_t                     offset;               /*   144     8 */
        size_t                     data_sent;            /*   152     8 */
        enum nvme_tcp_send_state   state;                /*   160     4 */

        /* size: 168, cachelines: 3, members: 16 */
        /* sum members: 160, holes: 1, sum holes: 4 */
        /* padding: 4 */
        /* last cacheline: 40 bytes */
};

v3:
 - initialize req->status in nvme_tcp_setup_cmd_pdu()
 - add rb tag from Hannes

v2:
 - https://lore.kernel.org/linux-nvme/20210825124259.28707-1-dwagner@suse.de/
 - moved 'status' from nvme_tcp_queue to nvme_tcp_request.

v1:
 - https://lore.kernel.org/linux-nvme/20210805121541.77613-1-dwagner@suse.de/

drivers/nvme/host/tcp.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 645025620154..29ef0f74f620 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -45,6 +45,7 @@ struct nvme_tcp_request {
 	u32			pdu_len;
 	u32			pdu_sent;
 	u16			ttag;
+	u16			status;
 	struct list_head	entry;
 	struct llist_node	lentry;
 	__le32			ddgst;
@@ -485,7 +486,9 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
 static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		struct nvme_completion *cqe)
 {
+	struct nvme_tcp_request *req;
 	struct request *rq;
+	u16 status;
 
 	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
 	if (!rq) {
@@ -496,7 +499,12 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		return -EINVAL;
 	}
 
-	if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+	req = blk_mq_rq_to_pdu(rq);
+	status = req->status;
+	if (status == NVME_SC_SUCCESS)
+		status = cqe->status;
+
+	if (!nvme_try_complete_req(rq, status, cqe->result))
 		nvme_complete_rq(rq);
 	queue->nr_cqe++;
 
@@ -758,7 +766,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
-				nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+				nvme_tcp_end_request(rq, req->status);
 				queue->nr_cqe++;
 			}
 			nvme_tcp_init_recv_ctx(queue);
@@ -788,18 +796,24 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 		return 0;
 
 	if (queue->recv_ddgst != queue->exp_ddgst) {
+		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+		req->status = NVME_SC_DATA_XFER_ERROR;
+
 		dev_err(queue->ctrl->ctrl.device,
 			"data digest error: recv %#x expected %#x\n",
 			le32_to_cpu(queue->recv_ddgst),
 			le32_to_cpu(queue->exp_ddgst));
-		return -EIO;
 	}
 
 	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
 		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
 					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 
-		nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+		nvme_tcp_end_request(rq, req->status);
 		queue->nr_cqe++;
 	}
 
@@ -2293,6 +2307,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 		return ret;
 
 	req->state = NVME_TCP_SEND_CMD_PDU;
+	req->status = NVME_SC_SUCCESS;
 	req->offset = 0;
 	req->data_sent = 0;
 	req->pdu_len = 0;
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
@ 2021-08-26  8:21 ` Daniel Wagner
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Wagner @ 2021-08-26  8:21 UTC (permalink / raw)
  To: linux-nvme
  Cc: linux-kernel, Hannes Reinecke, Sagi Grimberg, yi.he, Daniel Wagner

The spec says

  7.4.6.1 Digest Error handling

  When a host detects a data digest error in a C2HData PDU, that host
  shall continue processing C2HData PDUs associated with the command and
  when the command processing has completed, if a successful status was
  returned by the controller, the host shall fail the command with a
  non-fatal transport error.

Currently the transport is reseted when a data digest error is
detected. Instead, when a digest error is detected, mark the final
status as NVME_SC_DATA_XFER_ERROR and let the upper layer handle
the error.

In order to keep track of the final result maintain a status field in
nvme_tcp_request object and use it to overwrite the completion queue
status (which might be successful even though a digest error has been
detected) when completing the request.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
The status member placed so that it fills up a hole in struct
nvme_tcp_request:

struct nvme_tcp_request {
        struct nvme_request        req;                  /*     0    32 */
        void *                     pdu;                  /*    32     8 */
        struct nvme_tcp_queue *    queue;                /*    40     8 */
        u32                        data_len;             /*    48     4 */
        u32                        pdu_len;              /*    52     4 */
        u32                        pdu_sent;             /*    56     4 */
        u16                        ttag;                 /*    60     2 */
        u16                        status;               /*    62     2 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct list_head           entry;                /*    64    16 */
        struct llist_node          lentry;               /*    80     8 */
        __le32                     ddgst;                /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct bio *               curr_bio;             /*    96     8 */
        struct iov_iter            iter;                 /*   104    40 */
        /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
        size_t                     offset;               /*   144     8 */
        size_t                     data_sent;            /*   152     8 */
        enum nvme_tcp_send_state   state;                /*   160     4 */

        /* size: 168, cachelines: 3, members: 16 */
        /* sum members: 160, holes: 1, sum holes: 4 */
        /* padding: 4 */
        /* last cacheline: 40 bytes */
};

v3:
 - initialize req->status in nvme_tcp_setup_cmd_pdu()
 - add rb tag from Hannes

v2:
 - https://lore.kernel.org/linux-nvme/20210825124259.28707-1-dwagner@suse.de/
 - moved 'status' from nvme_tcp_queue to nvme_tcp_request.

v1:
 - https://lore.kernel.org/linux-nvme/20210805121541.77613-1-dwagner@suse.de/

drivers/nvme/host/tcp.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 645025620154..29ef0f74f620 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -45,6 +45,7 @@ struct nvme_tcp_request {
 	u32			pdu_len;
 	u32			pdu_sent;
 	u16			ttag;
+	u16			status;
 	struct list_head	entry;
 	struct llist_node	lentry;
 	__le32			ddgst;
@@ -485,7 +486,9 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
 static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		struct nvme_completion *cqe)
 {
+	struct nvme_tcp_request *req;
 	struct request *rq;
+	u16 status;
 
 	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
 	if (!rq) {
@@ -496,7 +499,12 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		return -EINVAL;
 	}
 
-	if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+	req = blk_mq_rq_to_pdu(rq);
+	status = req->status;
+	if (status == NVME_SC_SUCCESS)
+		status = cqe->status;
+
+	if (!nvme_try_complete_req(rq, status, cqe->result))
 		nvme_complete_rq(rq);
 	queue->nr_cqe++;
 
@@ -758,7 +766,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
-				nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+				nvme_tcp_end_request(rq, req->status);
 				queue->nr_cqe++;
 			}
 			nvme_tcp_init_recv_ctx(queue);
@@ -788,18 +796,24 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 		return 0;
 
 	if (queue->recv_ddgst != queue->exp_ddgst) {
+		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+		req->status = NVME_SC_DATA_XFER_ERROR;
+
 		dev_err(queue->ctrl->ctrl.device,
 			"data digest error: recv %#x expected %#x\n",
 			le32_to_cpu(queue->recv_ddgst),
 			le32_to_cpu(queue->exp_ddgst));
-		return -EIO;
 	}
 
 	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
 		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
 					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 
-		nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+		nvme_tcp_end_request(rq, req->status);
 		queue->nr_cqe++;
 	}
 
@@ -2293,6 +2307,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 		return ret;
 
 	req->state = NVME_TCP_SEND_CMD_PDU;
+	req->status = NVME_SC_SUCCESS;
 	req->offset = 0;
 	req->data_sent = 0;
 	req->pdu_len = 0;
-- 
2.29.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
  2021-08-26  8:21 ` Daniel Wagner
  (?)
@ 2021-08-27  0:45   ` kernel test robot
  -1 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2021-08-27  0:45 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme
  Cc: kbuild-all, linux-kernel, Hannes Reinecke, Sagi Grimberg, yi.he,
	Daniel Wagner

[-- Attachment #1: Type: text/plain, Size: 3395 bytes --]

Hi Daniel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210825]
[cannot apply to linux/master linus/master v5.14-rc7 v5.14-rc6 v5.14-rc5 v5.14-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
base:    7636510f976d75b860848884169ba985c8f844d8
config: arc-randconfig-s031-20210826 (attached as .config)
compiler: arc-elf-gcc (GCC) 11.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-348-gf0e6938b-dirty
        # https://github.com/0day-ci/linux/commit/92725dd7ae69044f33fd17f708da5d3079a041ce
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
        git checkout 92725dd7ae69044f33fd17f708da5d3079a041ce
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/host/tcp.c:505:24: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned short [assigned] [usertype] status @@     got restricted __le16 [usertype] status @@
   drivers/nvme/host/tcp.c:505:24: sparse:     expected unsigned short [assigned] [usertype] status
   drivers/nvme/host/tcp.c:505:24: sparse:     got restricted __le16 [usertype] status
>> drivers/nvme/host/tcp.c:507:40: sparse: sparse: incorrect type in argument 2 (different base types) @@     expected restricted __le16 [usertype] status @@     got unsigned short [assigned] [usertype] status @@
   drivers/nvme/host/tcp.c:507:40: sparse:     expected restricted __le16 [usertype] status
   drivers/nvme/host/tcp.c:507:40: sparse:     got unsigned short [assigned] [usertype] status

vim +505 drivers/nvme/host/tcp.c

   485	
   486	static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
   487			struct nvme_completion *cqe)
   488	{
   489		struct nvme_tcp_request *req;
   490		struct request *rq;
   491		u16 status;
   492	
   493		rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
   494		if (!rq) {
   495			dev_err(queue->ctrl->ctrl.device,
   496				"got bad cqe.command_id %#x on queue %d\n",
   497				cqe->command_id, nvme_tcp_queue_id(queue));
   498			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
   499			return -EINVAL;
   500		}
   501	
   502		req = blk_mq_rq_to_pdu(rq);
   503		status = req->status;
   504		if (status == NVME_SC_SUCCESS)
 > 505			status = cqe->status;
   506	
 > 507		if (!nvme_try_complete_req(rq, status, cqe->result))
   508			nvme_complete_rq(rq);
   509		queue->nr_cqe++;
   510	
   511		return 0;
   512	}
   513	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 41302 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
@ 2021-08-27  0:45   ` kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2021-08-27  0:45 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme
  Cc: kbuild-all, linux-kernel, Hannes Reinecke, Sagi Grimberg, yi.he,
	Daniel Wagner

[-- Attachment #1: Type: text/plain, Size: 3395 bytes --]

Hi Daniel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210825]
[cannot apply to linux/master linus/master v5.14-rc7 v5.14-rc6 v5.14-rc5 v5.14-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
base:    7636510f976d75b860848884169ba985c8f844d8
config: arc-randconfig-s031-20210826 (attached as .config)
compiler: arc-elf-gcc (GCC) 11.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-348-gf0e6938b-dirty
        # https://github.com/0day-ci/linux/commit/92725dd7ae69044f33fd17f708da5d3079a041ce
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
        git checkout 92725dd7ae69044f33fd17f708da5d3079a041ce
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/host/tcp.c:505:24: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned short [assigned] [usertype] status @@     got restricted __le16 [usertype] status @@
   drivers/nvme/host/tcp.c:505:24: sparse:     expected unsigned short [assigned] [usertype] status
   drivers/nvme/host/tcp.c:505:24: sparse:     got restricted __le16 [usertype] status
>> drivers/nvme/host/tcp.c:507:40: sparse: sparse: incorrect type in argument 2 (different base types) @@     expected restricted __le16 [usertype] status @@     got unsigned short [assigned] [usertype] status @@
   drivers/nvme/host/tcp.c:507:40: sparse:     expected restricted __le16 [usertype] status
   drivers/nvme/host/tcp.c:507:40: sparse:     got unsigned short [assigned] [usertype] status

vim +505 drivers/nvme/host/tcp.c

   485	
   486	static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
   487			struct nvme_completion *cqe)
   488	{
   489		struct nvme_tcp_request *req;
   490		struct request *rq;
   491		u16 status;
   492	
   493		rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
   494		if (!rq) {
   495			dev_err(queue->ctrl->ctrl.device,
   496				"got bad cqe.command_id %#x on queue %d\n",
   497				cqe->command_id, nvme_tcp_queue_id(queue));
   498			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
   499			return -EINVAL;
   500		}
   501	
   502		req = blk_mq_rq_to_pdu(rq);
   503		status = req->status;
   504		if (status == NVME_SC_SUCCESS)
 > 505			status = cqe->status;
   506	
 > 507		if (!nvme_try_complete_req(rq, status, cqe->result))
   508			nvme_complete_rq(rq);
   509		queue->nr_cqe++;
   510	
   511		return 0;
   512	}
   513	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 41302 bytes --]

[-- Attachment #3: Type: text/plain, Size: 158 bytes --]

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
@ 2021-08-27  0:45   ` kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2021-08-27  0:45 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3470 bytes --]

Hi Daniel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210825]
[cannot apply to linux/master linus/master v5.14-rc7 v5.14-rc6 v5.14-rc5 v5.14-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
base:    7636510f976d75b860848884169ba985c8f844d8
config: arc-randconfig-s031-20210826 (attached as .config)
compiler: arc-elf-gcc (GCC) 11.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-348-gf0e6938b-dirty
        # https://github.com/0day-ci/linux/commit/92725dd7ae69044f33fd17f708da5d3079a041ce
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Daniel-Wagner/nvme-tcp-Do-not-reset-transport-on-data-digest-errors/20210826-162342
        git checkout 92725dd7ae69044f33fd17f708da5d3079a041ce
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/host/tcp.c:505:24: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned short [assigned] [usertype] status @@     got restricted __le16 [usertype] status @@
   drivers/nvme/host/tcp.c:505:24: sparse:     expected unsigned short [assigned] [usertype] status
   drivers/nvme/host/tcp.c:505:24: sparse:     got restricted __le16 [usertype] status
>> drivers/nvme/host/tcp.c:507:40: sparse: sparse: incorrect type in argument 2 (different base types) @@     expected restricted __le16 [usertype] status @@     got unsigned short [assigned] [usertype] status @@
   drivers/nvme/host/tcp.c:507:40: sparse:     expected restricted __le16 [usertype] status
   drivers/nvme/host/tcp.c:507:40: sparse:     got unsigned short [assigned] [usertype] status

vim +505 drivers/nvme/host/tcp.c

   485	
   486	static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
   487			struct nvme_completion *cqe)
   488	{
   489		struct nvme_tcp_request *req;
   490		struct request *rq;
   491		u16 status;
   492	
   493		rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
   494		if (!rq) {
   495			dev_err(queue->ctrl->ctrl.device,
   496				"got bad cqe.command_id %#x on queue %d\n",
   497				cqe->command_id, nvme_tcp_queue_id(queue));
   498			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
   499			return -EINVAL;
   500		}
   501	
   502		req = blk_mq_rq_to_pdu(rq);
   503		status = req->status;
   504		if (status == NVME_SC_SUCCESS)
 > 505			status = cqe->status;
   506	
 > 507		if (!nvme_try_complete_req(rq, status, cqe->result))
   508			nvme_complete_rq(rq);
   509		queue->nr_cqe++;
   510	
   511		return 0;
   512	}
   513	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 41302 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
  2021-08-26  8:21 ` Daniel Wagner
@ 2021-08-30 11:25   ` Sagi Grimberg
  -1 siblings, 0 replies; 7+ messages in thread
From: Sagi Grimberg @ 2021-08-30 11:25 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme; +Cc: linux-kernel, Hannes Reinecke, yi.he



On 8/26/21 1:21 AM, Daniel Wagner wrote:
> The spec says
> 
>    7.4.6.1 Digest Error handling
> 
>    When a host detects a data digest error in a C2HData PDU, that host
>    shall continue processing C2HData PDUs associated with the command and
>    when the command processing has completed, if a successful status was
>    returned by the controller, the host shall fail the command with a
>    non-fatal transport error.
> 
> Currently the transport is reseted when a data digest error is
> detected. Instead, when a digest error is detected, mark the final
> status as NVME_SC_DATA_XFER_ERROR and let the upper layer handle
> the error.
> 
> In order to keep track of the final result maintain a status field in
> nvme_tcp_request object and use it to overwrite the completion queue
> status (which might be successful even though a digest error has been
> detected) when completing the request.
> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Daniel Wagner <dwagner@suse.de>
> ---
> The status member placed so that it fills up a hole in struct
> nvme_tcp_request:
> 
> struct nvme_tcp_request {
>          struct nvme_request        req;                  /*     0    32 */
>          void *                     pdu;                  /*    32     8 */
>          struct nvme_tcp_queue *    queue;                /*    40     8 */
>          u32                        data_len;             /*    48     4 */
>          u32                        pdu_len;              /*    52     4 */
>          u32                        pdu_sent;             /*    56     4 */
>          u16                        ttag;                 /*    60     2 */
>          u16                        status;               /*    62     2 */
>          /* --- cacheline 1 boundary (64 bytes) --- */
>          struct list_head           entry;                /*    64    16 */
>          struct llist_node          lentry;               /*    80     8 */
>          __le32                     ddgst;                /*    88     4 */
> 
>          /* XXX 4 bytes hole, try to pack */
> 
>          struct bio *               curr_bio;             /*    96     8 */
>          struct iov_iter            iter;                 /*   104    40 */
>          /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
>          size_t                     offset;               /*   144     8 */
>          size_t                     data_sent;            /*   152     8 */
>          enum nvme_tcp_send_state   state;                /*   160     4 */
> 
>          /* size: 168, cachelines: 3, members: 16 */
>          /* sum members: 160, holes: 1, sum holes: 4 */
>          /* padding: 4 */
>          /* last cacheline: 40 bytes */
> };
> 
> v3:
>   - initialize req->status in nvme_tcp_setup_cmd_pdu()
>   - add rb tag from Hannes
> 
> v2:
>   - https://lore.kernel.org/linux-nvme/20210825124259.28707-1-dwagner@suse.de/
>   - moved 'status' from nvme_tcp_queue to nvme_tcp_request.
> 
> v1:
>   - https://lore.kernel.org/linux-nvme/20210805121541.77613-1-dwagner@suse.de/
> 
> drivers/nvme/host/tcp.c | 23 +++++++++++++++++++----
>   1 file changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 645025620154..29ef0f74f620 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -45,6 +45,7 @@ struct nvme_tcp_request {
>   	u32			pdu_len;
>   	u32			pdu_sent;
>   	u16			ttag;
> +	u16			status;
>   	struct list_head	entry;
>   	struct llist_node	lentry;
>   	__le32			ddgst;
> @@ -485,7 +486,9 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
>   static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
>   		struct nvme_completion *cqe)
>   {
> +	struct nvme_tcp_request *req;
>   	struct request *rq;
> +	u16 status;
>   
>   	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
>   	if (!rq) {
> @@ -496,7 +499,12 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
>   		return -EINVAL;
>   	}
>   
> -	if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
> +	req = blk_mq_rq_to_pdu(rq);
> +	status = req->status;
> +	if (status == NVME_SC_SUCCESS)
> +		status = cqe->status;

Maybe more intuitive to skip the local status variable?
	/*  */
	if (req->status == NVME_SC_SUCCESS)
		req->status = cqe->status;

This way it is always consistent completing with req->status.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] nvme-tcp: Do not reset transport on data digest errors
@ 2021-08-30 11:25   ` Sagi Grimberg
  0 siblings, 0 replies; 7+ messages in thread
From: Sagi Grimberg @ 2021-08-30 11:25 UTC (permalink / raw)
  To: Daniel Wagner, linux-nvme; +Cc: linux-kernel, Hannes Reinecke, yi.he



On 8/26/21 1:21 AM, Daniel Wagner wrote:
> The spec says
> 
>    7.4.6.1 Digest Error handling
> 
>    When a host detects a data digest error in a C2HData PDU, that host
>    shall continue processing C2HData PDUs associated with the command and
>    when the command processing has completed, if a successful status was
>    returned by the controller, the host shall fail the command with a
>    non-fatal transport error.
> 
> Currently the transport is reseted when a data digest error is
> detected. Instead, when a digest error is detected, mark the final
> status as NVME_SC_DATA_XFER_ERROR and let the upper layer handle
> the error.
> 
> In order to keep track of the final result maintain a status field in
> nvme_tcp_request object and use it to overwrite the completion queue
> status (which might be successful even though a digest error has been
> detected) when completing the request.
> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Daniel Wagner <dwagner@suse.de>
> ---
> The status member placed so that it fills up a hole in struct
> nvme_tcp_request:
> 
> struct nvme_tcp_request {
>          struct nvme_request        req;                  /*     0    32 */
>          void *                     pdu;                  /*    32     8 */
>          struct nvme_tcp_queue *    queue;                /*    40     8 */
>          u32                        data_len;             /*    48     4 */
>          u32                        pdu_len;              /*    52     4 */
>          u32                        pdu_sent;             /*    56     4 */
>          u16                        ttag;                 /*    60     2 */
>          u16                        status;               /*    62     2 */
>          /* --- cacheline 1 boundary (64 bytes) --- */
>          struct list_head           entry;                /*    64    16 */
>          struct llist_node          lentry;               /*    80     8 */
>          __le32                     ddgst;                /*    88     4 */
> 
>          /* XXX 4 bytes hole, try to pack */
> 
>          struct bio *               curr_bio;             /*    96     8 */
>          struct iov_iter            iter;                 /*   104    40 */
>          /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
>          size_t                     offset;               /*   144     8 */
>          size_t                     data_sent;            /*   152     8 */
>          enum nvme_tcp_send_state   state;                /*   160     4 */
> 
>          /* size: 168, cachelines: 3, members: 16 */
>          /* sum members: 160, holes: 1, sum holes: 4 */
>          /* padding: 4 */
>          /* last cacheline: 40 bytes */
> };
> 
> v3:
>   - initialize req->status in nvme_tcp_setup_cmd_pdu()
>   - add rb tag from Hannes
> 
> v2:
>   - https://lore.kernel.org/linux-nvme/20210825124259.28707-1-dwagner@suse.de/
>   - moved 'status' from nvme_tcp_queue to nvme_tcp_request.
> 
> v1:
>   - https://lore.kernel.org/linux-nvme/20210805121541.77613-1-dwagner@suse.de/
> 
> drivers/nvme/host/tcp.c | 23 +++++++++++++++++++----
>   1 file changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 645025620154..29ef0f74f620 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -45,6 +45,7 @@ struct nvme_tcp_request {
>   	u32			pdu_len;
>   	u32			pdu_sent;
>   	u16			ttag;
> +	u16			status;
>   	struct list_head	entry;
>   	struct llist_node	lentry;
>   	__le32			ddgst;
> @@ -485,7 +486,9 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
>   static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
>   		struct nvme_completion *cqe)
>   {
> +	struct nvme_tcp_request *req;
>   	struct request *rq;
> +	u16 status;
>   
>   	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
>   	if (!rq) {
> @@ -496,7 +499,12 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
>   		return -EINVAL;
>   	}
>   
> -	if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
> +	req = blk_mq_rq_to_pdu(rq);
> +	status = req->status;
> +	if (status == NVME_SC_SUCCESS)
> +		status = cqe->status;

Maybe more intuitive to skip the local status variable?
	/*  */
	if (req->status == NVME_SC_SUCCESS)
		req->status = cqe->status;

This way it is always consistent completing with req->status.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-08-30 11:25 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-26  8:21 [PATCH v3] nvme-tcp: Do not reset transport on data digest errors Daniel Wagner
2021-08-26  8:21 ` Daniel Wagner
2021-08-27  0:45 ` kernel test robot
2021-08-27  0:45   ` kernel test robot
2021-08-27  0:45   ` kernel test robot
2021-08-30 11:25 ` Sagi Grimberg
2021-08-30 11:25   ` Sagi Grimberg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.