qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] hw/block/nvme: zoned fixes
@ 2021-01-19 13:54 Klaus Jensen
  2021-01-19 13:54 ` [PATCH 1/2] hw/block/nvme: fix zone boundary check for append Klaus Jensen
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Klaus Jensen @ 2021-01-19 13:54 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen, Max Reitz,
	Klaus Jensen, Keith Busch

From: Klaus Jensen <k.jensen@samsung.com>

Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
refactors the zone write check function to return status codes in a
different order if there are multiple zone write violations that apply.

Klaus Jensen (2):
  hw/block/nvme: fix zone boundary check for append
  hw/block/nvme: refactor the logic for zone write checks

 hw/block/nvme.c       | 89 +++++++++++++++++++++----------------------
 hw/block/trace-events |  5 +++
 2 files changed, 48 insertions(+), 46 deletions(-)

-- 
2.30.0



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] hw/block/nvme: fix zone boundary check for append
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
@ 2021-01-19 13:54 ` Klaus Jensen
  2021-01-26  4:55   ` Dmitry Fomichev
  2021-01-19 13:55 ` [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks Klaus Jensen
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Klaus Jensen @ 2021-01-19 13:54 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Niklas Cassel, qemu-block, Dmitry Fomichev,
	Klaus Jensen, Max Reitz, Klaus Jensen, Keith Busch

From: Klaus Jensen <k.jensen@samsung.com>

When a zone append is processed the controller checks that validity of
the write before assigning the LBA to the append command. This causes
the boundary check to be wrong.

Fix this by checking the write *after* assigning the LBA. Remove the
append special case from the nvme_check_zone_write and open code it in
nvme_do_write, assigning the slba when basic sanity checks have been
performed. Then check the validity of the resulting write like any other
write command.

In the process, also fix a missing endianness conversion for the zone
append ALBA.

Reported-by: Niklas Cassel <Niklas.Cassel@wdc.com>
Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 309c26db8ff7..f05dea657b01 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1133,7 +1133,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
 
 static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
                                       NvmeZone *zone, uint64_t slba,
-                                      uint32_t nlb, bool append)
+                                      uint32_t nlb)
 {
     uint16_t status;
 
@@ -1147,16 +1147,8 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
         trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
     } else {
         assert(nvme_wp_is_valid(zone));
-        if (append) {
-            if (unlikely(slba != zone->d.zslba)) {
-                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
-                status = NVME_INVALID_FIELD;
-            }
-            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
-                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
-                status = NVME_INVALID_FIELD;
-            }
-        } else if (unlikely(slba != zone->w_ptr)) {
+
+        if (unlikely(slba != zone->w_ptr)) {
             trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
                                                zone->w_ptr);
             status = NVME_ZONE_INVALID_WRITE;
@@ -1294,10 +1286,9 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
     }
 }
 
-static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
-                                     uint32_t nlb)
+static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
+                                 uint32_t nlb)
 {
-    uint64_t result = zone->w_ptr;
     uint8_t zs;
 
     zone->w_ptr += nlb;
@@ -1313,8 +1304,6 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
             nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
         }
     }
-
-    return result;
 }
 
 static inline bool nvme_is_write(NvmeRequest *req)
@@ -1692,7 +1681,24 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
     if (ns->params.zoned) {
         zone = nvme_get_zone_by_slba(ns, slba);
 
-        status = nvme_check_zone_write(n, ns, zone, slba, nlb, append);
+        if (append) {
+            if (unlikely(slba != zone->d.zslba)) {
+                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
+                status = NVME_INVALID_FIELD;
+                goto invalid;
+            }
+
+            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
+                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
+                status = NVME_INVALID_FIELD;
+                goto invalid;
+            }
+
+            slba = zone->w_ptr;
+            res->slba = cpu_to_le64(slba);
+        }
+
+        status = nvme_check_zone_write(n, ns, zone, slba, nlb);
         if (status != NVME_SUCCESS) {
             goto invalid;
         }
@@ -1702,11 +1708,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
             goto invalid;
         }
 
-        if (append) {
-            slba = zone->w_ptr;
-        }
-
-        res->slba = nvme_advance_zone_wp(ns, zone, nlb);
+        nvme_advance_zone_wp(ns, zone, nlb);
     }
 
     data_offset = nvme_l2b(ns, slba);
-- 
2.30.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
  2021-01-19 13:54 ` [PATCH 1/2] hw/block/nvme: fix zone boundary check for append Klaus Jensen
@ 2021-01-19 13:55 ` Klaus Jensen
  2021-01-26  4:58   ` Dmitry Fomichev
  2021-01-20 11:10 ` [PATCH 0/2] hw/block/nvme: zoned fixes Niklas Cassel
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Klaus Jensen @ 2021-01-19 13:55 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen, Max Reitz,
	Klaus Jensen, Keith Busch

From: Klaus Jensen <k.jensen@samsung.com>

Refactor the zone write check logic such that the most "meaningful"
error is returned first. That is, first, if the zone is not writable,
return an appropriate status code for that. Then, make sure we are
actually writing at the write pointer and finally check that we do not
cross the zone write boundary. This aligns with the "priority" of status
codes for zone read checks.

Also add a couple of additional descriptive trace events and remove an
always true assert.

Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c       | 47 +++++++++++++++++++------------------------
 hw/block/trace-events |  5 +++++
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f05dea657b01..193a27259dda 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1106,56 +1106,51 @@ static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
 
 static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
 {
-    uint16_t status;
+    uint64_t zslba = zone->d.zslba;
 
     switch (nvme_get_zone_state(zone)) {
     case NVME_ZONE_STATE_EMPTY:
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
     case NVME_ZONE_STATE_EXPLICITLY_OPEN:
     case NVME_ZONE_STATE_CLOSED:
-        status = NVME_SUCCESS;
-        break;
+        return NVME_SUCCESS;
     case NVME_ZONE_STATE_FULL:
-        status = NVME_ZONE_FULL;
-        break;
+        trace_pci_nvme_err_zone_is_full(zslba);
+        return NVME_ZONE_FULL;
     case NVME_ZONE_STATE_OFFLINE:
-        status = NVME_ZONE_OFFLINE;
-        break;
+        trace_pci_nvme_err_zone_is_offline(zslba);
+        return NVME_ZONE_OFFLINE;
     case NVME_ZONE_STATE_READ_ONLY:
-        status = NVME_ZONE_READ_ONLY;
-        break;
+        trace_pci_nvme_err_zone_is_read_only(zslba);
+        return NVME_ZONE_READ_ONLY;
     default:
         assert(false);
     }
-
-    return status;
 }
 
 static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
                                       NvmeZone *zone, uint64_t slba,
                                       uint32_t nlb)
 {
+    uint64_t zcap = nvme_zone_wr_boundary(zone);
     uint16_t status;
 
-    if (unlikely((slba + nlb) > nvme_zone_wr_boundary(zone))) {
-        status = NVME_ZONE_BOUNDARY_ERROR;
-    } else {
-        status = nvme_check_zone_state_for_write(zone);
+    status = nvme_check_zone_state_for_write(zone);
+    if (status) {
+        return status;
     }
 
-    if (status != NVME_SUCCESS) {
-        trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
-    } else {
-        assert(nvme_wp_is_valid(zone));
-
-        if (unlikely(slba != zone->w_ptr)) {
-            trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
-                                               zone->w_ptr);
-            status = NVME_ZONE_INVALID_WRITE;
-        }
+    if (unlikely(slba != zone->w_ptr)) {
+        trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
+        return NVME_ZONE_INVALID_WRITE;
     }
 
-    return status;
+    if (unlikely((slba + nlb) > zcap)) {
+        trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
+        return NVME_ZONE_BOUNDARY_ERROR;
+    }
+
+    return NVME_SUCCESS;
 }
 
 static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 78d76b0a71c1..c80b02b85ac9 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -127,6 +127,11 @@ pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "
 pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
 pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
 pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
+pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
+pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
 pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
 pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
 pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""
-- 
2.30.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
  2021-01-19 13:54 ` [PATCH 1/2] hw/block/nvme: fix zone boundary check for append Klaus Jensen
  2021-01-19 13:55 ` [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks Klaus Jensen
@ 2021-01-20 11:10 ` Niklas Cassel
  2021-01-25  7:25 ` Klaus Jensen
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Niklas Cassel @ 2021-01-20 11:10 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen,
	qemu-devel, Max Reitz, Keith Busch

On Tue, Jan 19, 2021 at 02:54:58PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> refactors the zone write check function to return status codes in a
> different order if there are multiple zone write violations that apply.
> 
> Klaus Jensen (2):
>   hw/block/nvme: fix zone boundary check for append
>   hw/block/nvme: refactor the logic for zone write checks
> 
>  hw/block/nvme.c       | 89 +++++++++++++++++++++----------------------
>  hw/block/trace-events |  5 +++
>  2 files changed, 48 insertions(+), 46 deletions(-)
> 
> -- 
> 2.30.0
> 
> 

For the series:
Tested-by: Niklas Cassel <niklas.cassel@wdc.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
                   ` (2 preceding siblings ...)
  2021-01-20 11:10 ` [PATCH 0/2] hw/block/nvme: zoned fixes Niklas Cassel
@ 2021-01-25  7:25 ` Klaus Jensen
  2021-01-25 21:48   ` Dmitry Fomichev
  2021-01-27 17:42 ` Keith Busch
  2021-01-28  6:24 ` Klaus Jensen
  5 siblings, 1 reply; 12+ messages in thread
From: Klaus Jensen @ 2021-01-25  7:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen, Max Reitz,
	Keith Busch

[-- Attachment #1: Type: text/plain, Size: 653 bytes --]

On Jan 19 14:54, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> refactors the zone write check function to return status codes in a
> different order if there are multiple zone write violations that apply.
> 
> Klaus Jensen (2):
>   hw/block/nvme: fix zone boundary check for append
>   hw/block/nvme: refactor the logic for zone write checks
> 
>  hw/block/nvme.c       | 89 +++++++++++++++++++++----------------------
>  hw/block/trace-events |  5 +++
>  2 files changed, 48 insertions(+), 46 deletions(-)
> 

Pinging for a review. Dmitry maybe?

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-25  7:25 ` Klaus Jensen
@ 2021-01-25 21:48   ` Dmitry Fomichev
  0 siblings, 0 replies; 12+ messages in thread
From: Dmitry Fomichev @ 2021-01-25 21:48 UTC (permalink / raw)
  To: its, qemu-devel; +Cc: kwolf, kbusch, k.jensen, qemu-block, mreitz

On Mon, 2021-01-25 at 08:25 +0100, Klaus Jensen wrote:
> On Jan 19 14:54, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> > refactors the zone write check function to return status codes in a
> > different order if there are multiple zone write violations that apply.
> > 
> > Klaus Jensen (2):
> >   hw/block/nvme: fix zone boundary check for append
> >   hw/block/nvme: refactor the logic for zone write checks
> > 
> >  hw/block/nvme.c       | 89 +++++++++++++++++++++----------------------
> >  hw/block/trace-events |  5 +++
> >  2 files changed, 48 insertions(+), 46 deletions(-)
> > 
> 
> Pinging for a review. Dmitry maybe?

I am testing it now, will post my review shortly.

DF

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hw/block/nvme: fix zone boundary check for append
  2021-01-19 13:54 ` [PATCH 1/2] hw/block/nvme: fix zone boundary check for append Klaus Jensen
@ 2021-01-26  4:55   ` Dmitry Fomichev
  2021-01-26  6:58     ` Klaus Jensen
  0 siblings, 1 reply; 12+ messages in thread
From: Dmitry Fomichev @ 2021-01-26  4:55 UTC (permalink / raw)
  To: its, qemu-devel
  Cc: kwolf, Niklas Cassel, qemu-block, k.jensen, mreitz, kbusch

On Tue, 2021-01-19 at 14:54 +0100, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> When a zone append is processed the controller checks that validity of
> the write before assigning the LBA to the append command. This causes
> the boundary check to be wrong.
> 
> Fix this by checking the write *after* assigning the LBA. Remove the
> append special case from the nvme_check_zone_write and open code it in
> nvme_do_write, assigning the slba when basic sanity checks have been
> performed. Then check the validity of the resulting write like any other
> write command.
> 

Klaus,

I tested this series and it works fine. I however think that the problem that
Niklas has found can be fixed in the much simpler way by applying the
following to the existing code -

--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1152,6 +1152,9 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
                 trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
                 status = NVME_INVALID_FIELD;
             }
+            if (zone->w_ptr + nlb > nvme_zone_wr_boundary(zone)) {
+                status = NVME_ZONE_BOUNDARY_ERROR;
+            }
             if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
                 trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
                 status = NVME_INVALID_FIELD;

I am going to send a few patches that take this approach, please take a look. In my
testing, it works just as well :)

> 
> In the process, also fix a missing endianness conversion for the zone
> append ALBA.

Great catch! This could be placed to a separate patch though...
A few more comments below.


Reported-by: Niklas Cassel <Niklas.Cassel@wdc.com>
Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 309c26db8ff7..f05dea657b01 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1133,7 +1133,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
 
 static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
                                       NvmeZone *zone, uint64_t slba,
-                                      uint32_t nlb, bool append)
+                                      uint32_t nlb)
 {
     uint16_t status;
 
@@ -1147,16 +1147,8 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
         trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
     } else {
         assert(nvme_wp_is_valid(zone));
-        if (append) {
-            if (unlikely(slba != zone->d.zslba)) {
-                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
-                status = NVME_INVALID_FIELD;
-            }
-            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
-                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
-                status = NVME_INVALID_FIELD;
-            }
-        } else if (unlikely(slba != zone->w_ptr)) {
+
+        if (unlikely(slba != zone->w_ptr)) {
             trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
                                                zone->w_ptr);
             status = NVME_ZONE_INVALID_WRITE;
@@ -1294,10 +1286,9 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
     }
 }
 
-static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
-                                     uint32_t nlb)
+static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
+                                 uint32_t nlb)
 {
-    uint64_t result = zone->w_ptr;
     uint8_t zs;
 
     zone->w_ptr += nlb;
@@ -1313,8 +1304,6 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
             nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
         }
     }
-
-    return result;
 }

 static inline bool nvme_is_write(NvmeRequest *req)
@@ -1692,7 +1681,24 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
     if (ns->params.zoned) {
         zone = nvme_get_zone_by_slba(ns, slba);

-        status = nvme_check_zone_write(n, ns, zone, slba, nlb, append);
+        if (append) {

This is what I see as a drawback of this approach.
You have to move the ZA checks that were previously nicely tucked in
nvme_check_zone_write() to the spot below and now this validation is done
in two different places for appends and regular writes. This can be avoided.
 
+            if (unlikely(slba != zone->d.zslba)) {
+                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
+                status = NVME_INVALID_FIELD;
+                goto invalid;
+            }
+
+            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
+                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
+                status = NVME_INVALID_FIELD;
+                goto invalid;
+            }
+
+            slba = zone->w_ptr;
+            res->slba = cpu_to_le64(slba);

It is a bit premature to set the result here since the nvme_check_zone_write() below
can still fail. As I recall, ALBA is only returned by a successful command. Again,
good find about endianness!

Regards,
Dmitry

+        }
+
+        status = nvme_check_zone_write(n, ns, zone, slba, nlb);
         if (status != NVME_SUCCESS) {
             goto invalid;
         }
@@ -1702,11 +1708,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
             goto invalid;
         }
 

-        if (append) {
-            slba = zone->w_ptr;
-        }
-
-        res->slba = nvme_advance_zone_wp(ns, zone, nlb);
+        nvme_advance_zone_wp(ns, zone, nlb);
     }
 

     data_offset = nvme_l2b(ns, slba);


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks
  2021-01-19 13:55 ` [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks Klaus Jensen
@ 2021-01-26  4:58   ` Dmitry Fomichev
  0 siblings, 0 replies; 12+ messages in thread
From: Dmitry Fomichev @ 2021-01-26  4:58 UTC (permalink / raw)
  To: its, qemu-devel; +Cc: kwolf, kbusch, k.jensen, qemu-block, mreitz

On Tue, 2021-01-19 at 14:55 +0100, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Refactor the zone write check logic such that the most "meaningful"
> error is returned first. That is, first, if the zone is not writable,
> return an appropriate status code for that. Then, make sure we are
> actually writing at the write pointer and finally check that we do not
> cross the zone write boundary. This aligns with the "priority" of status
> codes for zone read checks.

Right, all boundary checks need to be performed after validating the zone
state and WP checks. 

> 
> Also add a couple of additional descriptive trace events and remove an
> always true assert.

Adding the new trace events looks like a great idea. I think that assert
is useful, but I have no qualms about removing it.

Overall, this looks good to me, except the part about open coding
ZA validation :)

Best regards,
Dmitry

> 
> Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c       | 47 +++++++++++++++++++------------------------
>  hw/block/trace-events |  5 +++++
>  2 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f05dea657b01..193a27259dda 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1106,56 +1106,51 @@ static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
>  
> 
> 
> 
>  static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
>  {
> -    uint16_t status;
> +    uint64_t zslba = zone->d.zslba;
>  
> 
> 
> 
>      switch (nvme_get_zone_state(zone)) {
>      case NVME_ZONE_STATE_EMPTY:
>      case NVME_ZONE_STATE_IMPLICITLY_OPEN:
>      case NVME_ZONE_STATE_EXPLICITLY_OPEN:
>      case NVME_ZONE_STATE_CLOSED:
> -        status = NVME_SUCCESS;
> -        break;
> +        return NVME_SUCCESS;
>      case NVME_ZONE_STATE_FULL:
> -        status = NVME_ZONE_FULL;
> -        break;
> +        trace_pci_nvme_err_zone_is_full(zslba);
> +        return NVME_ZONE_FULL;
>      case NVME_ZONE_STATE_OFFLINE:
> -        status = NVME_ZONE_OFFLINE;
> -        break;
> +        trace_pci_nvme_err_zone_is_offline(zslba);
> +        return NVME_ZONE_OFFLINE;
>      case NVME_ZONE_STATE_READ_ONLY:
> -        status = NVME_ZONE_READ_ONLY;
> -        break;
> +        trace_pci_nvme_err_zone_is_read_only(zslba);
> +        return NVME_ZONE_READ_ONLY;
>      default:
>          assert(false);
>      }
> -
> -    return status;
>  }
>  
>  static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>                                        NvmeZone *zone, uint64_t slba,
>                                        uint32_t nlb)
>  {
> +    uint64_t zcap = nvme_zone_wr_boundary(zone);
>      uint16_t status;
>  
> 
> 
> 
> -    if (unlikely((slba + nlb) > nvme_zone_wr_boundary(zone))) {
> -        status = NVME_ZONE_BOUNDARY_ERROR;
> -    } else {
> -        status = nvme_check_zone_state_for_write(zone);
> +    status = nvme_check_zone_state_for_write(zone);
> +    if (status) {
> +        return status;
>      }
>  
> 
> 
> 
> -    if (status != NVME_SUCCESS) {
> -        trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
> -    } else {
> -        assert(nvme_wp_is_valid(zone));
> -
> -        if (unlikely(slba != zone->w_ptr)) {
> -            trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
> -                                               zone->w_ptr);
> -            status = NVME_ZONE_INVALID_WRITE;
> -        }
> +    if (unlikely(slba != zone->w_ptr)) {
> +        trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
> +        return NVME_ZONE_INVALID_WRITE;
>      }
>  
> 
> 
> 
> -    return status;
> +    if (unlikely((slba + nlb) > zcap)) {
> +        trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
> +        return NVME_ZONE_BOUNDARY_ERROR;
> +    }
> +
> +    return NVME_SUCCESS;
>  }
>  
> 
>  static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 78d76b0a71c1..c80b02b85ac9 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -127,6 +127,11 @@ pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "
>  pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
>  pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
>  pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
> +pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
> +pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
> +pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
> +pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
> +pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
>  pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
>  pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint32_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
>  pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hw/block/nvme: fix zone boundary check for append
  2021-01-26  4:55   ` Dmitry Fomichev
@ 2021-01-26  6:58     ` Klaus Jensen
  0 siblings, 0 replies; 12+ messages in thread
From: Klaus Jensen @ 2021-01-26  6:58 UTC (permalink / raw)
  To: Dmitry Fomichev
  Cc: kwolf, Niklas Cassel, qemu-block, k.jensen, qemu-devel, mreitz, kbusch

[-- Attachment #1: Type: text/plain, Size: 6588 bytes --]

On Jan 26 04:55, Dmitry Fomichev wrote:
> On Tue, 2021-01-19 at 14:54 +0100, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > When a zone append is processed the controller checks that validity of
> > the write before assigning the LBA to the append command. This causes
> > the boundary check to be wrong.
> > 
> > Fix this by checking the write *after* assigning the LBA. Remove the
> > append special case from the nvme_check_zone_write and open code it in
> > nvme_do_write, assigning the slba when basic sanity checks have been
> > performed. Then check the validity of the resulting write like any other
> > write command.
> > 
> 
> Klaus,
> 
> I tested this series and it works fine. I however think that the problem that
> Niklas has found can be fixed in the much simpler way by applying the
> following to the existing code -
> 
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1152,6 +1152,9 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>                  trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
>                  status = NVME_INVALID_FIELD;
>              }
> +            if (zone->w_ptr + nlb > nvme_zone_wr_boundary(zone)) {
> +                status = NVME_ZONE_BOUNDARY_ERROR;
> +            }
>              if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
>                  trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
>                  status = NVME_INVALID_FIELD;
> 
> I am going to send a few patches that take this approach, please take a look. In my
> testing, it works just as well :)
> 
> > 
> > In the process, also fix a missing endianness conversion for the zone
> > append ALBA.
> 
> Great catch! This could be placed to a separate patch though...
> A few more comments below.
> 
> 
> Reported-by: Niklas Cassel <Niklas.Cassel@wdc.com>
> Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 46 ++++++++++++++++++++++++----------------------
>  1 file changed, 24 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 309c26db8ff7..f05dea657b01 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1133,7 +1133,7 @@ static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
>  
>  static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>                                        NvmeZone *zone, uint64_t slba,
> -                                      uint32_t nlb, bool append)
> +                                      uint32_t nlb)
>  {
>      uint16_t status;
>  
> @@ -1147,16 +1147,8 @@ static uint16_t nvme_check_zone_write(NvmeCtrl *n, NvmeNamespace *ns,
>          trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
>      } else {
>          assert(nvme_wp_is_valid(zone));
> -        if (append) {
> -            if (unlikely(slba != zone->d.zslba)) {
> -                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
> -                status = NVME_INVALID_FIELD;
> -            }
> -            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
> -                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
> -                status = NVME_INVALID_FIELD;
> -            }
> -        } else if (unlikely(slba != zone->w_ptr)) {
> +
> +        if (unlikely(slba != zone->w_ptr)) {
>              trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
>                                                 zone->w_ptr);
>              status = NVME_ZONE_INVALID_WRITE;
> @@ -1294,10 +1286,9 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
>      }
>  }
>  
> -static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
> -                                     uint32_t nlb)
> +static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
> +                                 uint32_t nlb)
>  {
> -    uint64_t result = zone->w_ptr;
>      uint8_t zs;
>  
>      zone->w_ptr += nlb;
> @@ -1313,8 +1304,6 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
>              nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
>          }
>      }
> -
> -    return result;
>  }
> 
>  static inline bool nvme_is_write(NvmeRequest *req)
> @@ -1692,7 +1681,24 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
>      if (ns->params.zoned) {
>          zone = nvme_get_zone_by_slba(ns, slba);
> 
> -        status = nvme_check_zone_write(n, ns, zone, slba, nlb, append);
> +        if (append) {
> 
> This is what I see as a drawback of this approach.
> You have to move the ZA checks that were previously nicely tucked in
> nvme_check_zone_write() to the spot below and now this validation is done
> in two different places for appends and regular writes. This can be avoided.
> 

OK. However this means that other commands that write (Write Zeroes,
Copy) to zones has to go through that special case branch even though it
is a special case of regular writes. This is completely irrelevant for
performance so that's not my point, I just found it cleaner to tuck it
away as a special case in write.

> +            if (unlikely(slba != zone->d.zslba)) {
> +                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
> +                status = NVME_INVALID_FIELD;
> +                goto invalid;
> +            }
> +
> +            if (nvme_l2b(ns, nlb) > (n->page_size << n->zasl)) {
> +                trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
> +                status = NVME_INVALID_FIELD;
> +                goto invalid;
> +            }
> +
> +            slba = zone->w_ptr;
> +            res->slba = cpu_to_le64(slba);
> 
> It is a bit premature to set the result here since the nvme_check_zone_write() below
> can still fail. As I recall, ALBA is only returned by a successful command. Again,
> good find about endianness!
> 

There has always been a branch in finalize that clears it to zero on
error.

My patch also has the effect of not setting ALBA for regular writes. To
change that you are gonna have to add a 'if (append)' special case in
nvme_do_write anyway.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
                   ` (3 preceding siblings ...)
  2021-01-25  7:25 ` Klaus Jensen
@ 2021-01-27 17:42 ` Keith Busch
  2021-01-27 23:30   ` Dmitry Fomichev
  2021-01-28  6:24 ` Klaus Jensen
  5 siblings, 1 reply; 12+ messages in thread
From: Keith Busch @ 2021-01-27 17:42 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen,
	qemu-devel, Max Reitz

On Tue, Jan 19, 2021 at 02:54:58PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> refactors the zone write check function to return status codes in a
> different order if there are multiple zone write violations that apply.

Looks good to me.

Reviewed-by: Keith Busch <kbusch@kernel.org>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-27 17:42 ` Keith Busch
@ 2021-01-27 23:30   ` Dmitry Fomichev
  0 siblings, 0 replies; 12+ messages in thread
From: Dmitry Fomichev @ 2021-01-27 23:30 UTC (permalink / raw)
  To: Keith Busch, Klaus Jensen
  Cc: Kevin Wolf, Klaus Jensen, qemu-devel, qemu-block, Max Reitz

> -----Original Message-----
> From: Keith Busch <kbusch@kernel.org>
> Sent: Wednesday, January 27, 2021 12:42 PM
> To: Klaus Jensen <its@irrelevant.dk>
> Cc: qemu-devel@nongnu.org; Kevin Wolf <kwolf@redhat.com>; Max Reitz
> <mreitz@redhat.com>; qemu-block@nongnu.org; Dmitry Fomichev
> <Dmitry.Fomichev@wdc.com>; Klaus Jensen <k.jensen@samsung.com>
> Subject: Re: [PATCH 0/2] hw/block/nvme: zoned fixes
> 
> On Tue, Jan 19, 2021 at 02:54:58PM +0100, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> >
> > Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> > refactors the zone write check function to return status codes in a
> > different order if there are multiple zone write violations that apply.
> 
> Looks good to me.
> 
> Reviewed-by: Keith Busch <kbusch@kernel.org>

Besides aesthetics, I don't have any complaints about this series :)

Tested-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/2] hw/block/nvme: zoned fixes
  2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
                   ` (4 preceding siblings ...)
  2021-01-27 17:42 ` Keith Busch
@ 2021-01-28  6:24 ` Klaus Jensen
  5 siblings, 0 replies; 12+ messages in thread
From: Klaus Jensen @ 2021-01-28  6:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 676 bytes --]

On Jan 19 14:54, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Patch [1/2] fixes the zone append bug reported by Niklas. [2/2]
> refactors the zone write check function to return status codes in a
> different order if there are multiple zone write violations that apply.
> 
> Klaus Jensen (2):
>   hw/block/nvme: fix zone boundary check for append
>   hw/block/nvme: refactor the logic for zone write checks
> 
>  hw/block/nvme.c       | 89 +++++++++++++++++++++----------------------
>  hw/block/trace-events |  5 +++
>  2 files changed, 48 insertions(+), 46 deletions(-)
> 
> -- 
> 2.30.0
> 
> 

Thanks all, applied to nvme-next.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-01-28  6:29 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-19 13:54 [PATCH 0/2] hw/block/nvme: zoned fixes Klaus Jensen
2021-01-19 13:54 ` [PATCH 1/2] hw/block/nvme: fix zone boundary check for append Klaus Jensen
2021-01-26  4:55   ` Dmitry Fomichev
2021-01-26  6:58     ` Klaus Jensen
2021-01-19 13:55 ` [PATCH 2/2] hw/block/nvme: refactor the logic for zone write checks Klaus Jensen
2021-01-26  4:58   ` Dmitry Fomichev
2021-01-20 11:10 ` [PATCH 0/2] hw/block/nvme: zoned fixes Niklas Cassel
2021-01-25  7:25 ` Klaus Jensen
2021-01-25 21:48   ` Dmitry Fomichev
2021-01-27 17:42 ` Keith Busch
2021-01-27 23:30   ` Dmitry Fomichev
2021-01-28  6:24 ` Klaus Jensen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).