From: Dmitry Fomichev <dmitry.fomichev@wdc.com>
To: "Keith Busch" <kbusch@kernel.org>,
"Klaus Jensen" <k.jensen@samsung.com>,
"Kevin Wolf" <kwolf@redhat.com>,
"Philippe Mathieu-Daudé" <philmd@redhat.com>,
"Maxim Levitsky" <mlevitsk@redhat.com>,
"Fam Zheng" <fam@euphon.net>
Cc: Niklas Cassel <niklas.cassel@wdc.com>,
Damien Le Moal <damien.lemoal@wdc.com>,
qemu-block@nongnu.org, Dmitry Fomichev <dmitry.fomichev@wdc.com>,
qemu-devel@nongnu.org,
Alistair Francis <alistair.francis@wdc.com>,
Matias Bjorling <matias.bjorling@wdc.com>
Subject: [PATCH v5 10/14] hw/block/nvme: Introduce max active and open zone limits
Date: Mon, 28 Sep 2020 11:35:24 +0900 [thread overview]
Message-ID: <20200928023528.15260-11-dmitry.fomichev@wdc.com> (raw)
In-Reply-To: <20200928023528.15260-1-dmitry.fomichev@wdc.com>
Added two module properties, "max_active" and "max_open" to control
the maximum number of zones that can be active or open. Once these
variables are set to non-default values, these limits are checked
during I/O and Too Many Active or Too Many Open command status is
returned if they are exceeded.
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
---
hw/block/nvme-ns.c | 42 +++++++++++++++++++-
hw/block/nvme-ns.h | 42 ++++++++++++++++++++
hw/block/nvme.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 181 insertions(+), 2 deletions(-)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 6d9dc9205b..63a2e3f47d 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -126,6 +126,28 @@ void nvme_remove_zone(NvmeNamespace *ns, NvmeZoneList *zl, NvmeZone *zone)
zone->prev = zone->next = 0;
}
+/*
+ * Take the first zone out from a list, return NULL if the list is empty.
+ */
+NvmeZone *nvme_remove_zone_head(NvmeNamespace *ns, NvmeZoneList *zl)
+{
+ NvmeZone *zone = nvme_peek_zone_head(ns, zl);
+
+ if (zone) {
+ --zl->size;
+ if (zl->size == 0) {
+ zl->head = NVME_ZONE_LIST_NIL;
+ zl->tail = NVME_ZONE_LIST_NIL;
+ } else {
+ zl->head = zone->next;
+ ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
+ }
+ zone->prev = zone->next = 0;
+ }
+
+ return zone;
+}
+
static int nvme_calc_zone_geometry(NvmeNamespace *ns, Error **errp)
{
uint64_t zone_size, zone_cap;
@@ -156,6 +178,20 @@ static int nvme_calc_zone_geometry(NvmeNamespace *ns, Error **errp)
ns->zone_size_log2 = 63 - clz64(ns->zone_size);
}
+ /* Make sure that the values of all ZNS properties are sane */
+ if (ns->params.max_open_zones > nz) {
+ error_setg(errp,
+ "max_open_zones value %u exceeds the number of zones %u",
+ ns->params.max_open_zones, nz);
+ return -1;
+ }
+ if (ns->params.max_active_zones > nz) {
+ error_setg(errp,
+ "max_active_zones value %u exceeds the number of zones %u",
+ ns->params.max_active_zones, nz);
+ return -1;
+ }
+
return 0;
}
@@ -215,8 +251,8 @@ static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
/* MAR/MOR are zeroes-based, 0xffffffff means no limit */
- id_ns_z->mar = 0xffffffff;
- id_ns_z->mor = 0xffffffff;
+ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
+ id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
id_ns_z->zoc = 0;
id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
@@ -312,6 +348,8 @@ static Property nvme_ns_props[] = {
params.zone_capacity_mb, 0),
DEFINE_PROP_BOOL("cross_zone_read", NvmeNamespace,
params.cross_zone_read, false),
+ DEFINE_PROP_UINT32("max_active", NvmeNamespace, params.max_active_zones, 0),
+ DEFINE_PROP_UINT32("max_open", NvmeNamespace, params.max_open_zones, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index daa13546c4..0664fe0892 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -45,6 +45,8 @@ typedef struct NvmeNamespaceParams {
bool cross_zone_read;
uint64_t zone_size_mb;
uint64_t zone_capacity_mb;
+ uint32_t max_active_zones;
+ uint32_t max_open_zones;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
@@ -66,6 +68,8 @@ typedef struct NvmeNamespace {
uint64_t zone_capacity;
uint64_t zone_array_size;
uint32_t zone_size_log2;
+ int32_t nr_open_zones;
+ int32_t nr_active_zones;
NvmeNamespaceParams params;
} NvmeNamespace;
@@ -189,7 +193,45 @@ static inline NvmeZone *nvme_next_zone_in_list(NvmeNamespace *ns, NvmeZone *z,
return &ns->zone_array[z->next];
}
+static inline void nvme_aor_inc_open(NvmeNamespace *ns)
+{
+ assert(ns->nr_open_zones >= 0);
+ if (ns->params.max_open_zones) {
+ ns->nr_open_zones++;
+ assert(ns->nr_open_zones <= ns->params.max_open_zones);
+ }
+}
+
+static inline void nvme_aor_dec_open(NvmeNamespace *ns)
+{
+ if (ns->params.max_open_zones) {
+ assert(ns->nr_open_zones > 0);
+ ns->nr_open_zones--;
+ }
+ assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeNamespace *ns)
+{
+ assert(ns->nr_active_zones >= 0);
+ if (ns->params.max_active_zones) {
+ ns->nr_active_zones++;
+ assert(ns->nr_active_zones <= ns->params.max_active_zones);
+ }
+}
+
+static inline void nvme_aor_dec_active(NvmeNamespace *ns)
+{
+ if (ns->params.max_active_zones) {
+ assert(ns->nr_active_zones > 0);
+ ns->nr_active_zones--;
+ assert(ns->nr_active_zones >= ns->nr_open_zones);
+ }
+ assert(ns->nr_active_zones >= 0);
+}
+
void nvme_add_zone_tail(NvmeNamespace *ns, NvmeZoneList *zl, NvmeZone *zone);
void nvme_remove_zone(NvmeNamespace *ns, NvmeZoneList *zl, NvmeZone *zone);
+NvmeZone *nvme_remove_zone_head(NvmeNamespace *ns, NvmeZoneList *zl);
#endif /* NVME_NS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 38e25a4d1f..40947aa659 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -168,6 +168,26 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
}
}
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+{
+ if (ns->params.max_active_zones != 0 &&
+ ns->nr_active_zones + act > ns->params.max_active_zones) {
+ trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
+ return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
+ }
+ if (ns->params.max_open_zones != 0 &&
+ ns->nr_open_zones + opn > ns->params.max_open_zones) {
+ trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
+ return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
+ }
+
+ return NVME_SUCCESS;
+}
+
static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
{
hwaddr low = n->ctrl_mem.addr;
@@ -1035,6 +1055,40 @@ static uint16_t nvme_check_zone_read(NvmeNamespace *ns, NvmeZone *zone,
return status;
}
+static void nvme_auto_transition_zone(NvmeNamespace *ns, bool implicit,
+ bool adding_active)
+{
+ NvmeZone *zone;
+
+ if (implicit && ns->params.max_open_zones &&
+ ns->nr_open_zones == ns->params.max_open_zones) {
+ zone = nvme_remove_zone_head(ns, ns->imp_open_zones);
+ if (zone) {
+ /*
+ * Automatically close this implicitly open zone.
+ */
+ nvme_aor_dec_open(ns);
+ nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+ }
+ }
+}
+
+static uint16_t nvme_auto_open_zone(NvmeNamespace *ns, NvmeZone *zone)
+{
+ uint16_t status = NVME_SUCCESS;
+ uint8_t zs = nvme_get_zone_state(zone);
+
+ if (zs == NVME_ZONE_STATE_EMPTY) {
+ nvme_auto_transition_zone(ns, true, true);
+ status = nvme_aor_check(ns, 1, 1);
+ } else if (zs == NVME_ZONE_STATE_CLOSED) {
+ nvme_auto_transition_zone(ns, true, false);
+ status = nvme_aor_check(ns, 0, 1);
+ }
+
+ return status;
+}
+
static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
{
return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
@@ -1080,7 +1134,11 @@ static bool nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req,
switch (zs) {
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_EMPTY:
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
/* fall through */
@@ -1109,7 +1167,10 @@ static uint64_t nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
zs = nvme_get_zone_state(zone);
switch (zs) {
case NVME_ZONE_STATE_EMPTY:
+ nvme_aor_inc_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_inc_open(ns);
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
}
}
@@ -1282,6 +1343,11 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
return NVME_ZONE_INVALID_WRITE | NVME_DNR;
}
+ status = nvme_auto_open_zone(ns, zone);
+ if (status != NVME_SUCCESS) {
+ return status;
+ }
+
req->cqe.result64 = nvme_advance_zone_wp(ns, zone, nlb);
}
@@ -1349,6 +1415,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req, bool append)
status = NVME_ZONE_INVALID_WRITE | NVME_DNR;
goto invalid;
}
+
+ status = nvme_auto_open_zone(ns, zone);
+ if (status != NVME_SUCCESS) {
+ return status;
+ }
+
req->fill_ofs = -1LL;
} else {
status = nvme_check_zone_read(ns, zone, slba, nlb);
@@ -1434,9 +1506,27 @@ static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
uint8_t state)
{
+ uint16_t status;
+
switch (state) {
case NVME_ZONE_STATE_EMPTY:
+ nvme_auto_transition_zone(ns, false, true);
+ status = nvme_aor_check(ns, 1, 0);
+ if (status != NVME_SUCCESS) {
+ return status;
+ }
+ nvme_aor_inc_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ status = nvme_aor_check(ns, 0, 1);
+ if (status != NVME_SUCCESS) {
+ if (state == NVME_ZONE_STATE_EMPTY) {
+ nvme_aor_dec_active(ns);
+ }
+ return status;
+ }
+ nvme_aor_inc_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
/* fall through */
@@ -1458,6 +1548,7 @@ static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
/* fall through */
case NVME_ZONE_STATE_CLOSED:
@@ -1479,7 +1570,11 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_EMPTY:
zone->w_ptr = nvme_zone_wr_boundary(zone);
zone->d.wp = zone->w_ptr;
@@ -1505,7 +1600,11 @@ static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
switch (state) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_aor_dec_open(ns);
+ /* fall through */
case NVME_ZONE_STATE_CLOSED:
+ nvme_aor_dec_active(ns);
+ /* fall through */
case NVME_ZONE_STATE_FULL:
zone->w_ptr = zone->d.zslba;
zone->d.wp = zone->w_ptr;
--
2.21.0
next prev parent reply other threads:[~2020-09-28 2:44 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-28 2:35 [PATCH v5 00/14] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 01/14] hw/block/nvme: Report actual LBA data shift in LBAF Dmitry Fomichev
2020-09-28 8:51 ` Klaus Jensen
2020-09-28 2:35 ` [PATCH v5 02/14] hw/block/nvme: Add Commands Supported and Effects log Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 03/14] hw/block/nvme: Introduce the Namespace Types definitions Dmitry Fomichev
2020-09-30 8:08 ` Klaus Jensen
2020-09-30 15:21 ` Keith Busch
2020-09-28 2:35 ` [PATCH v5 04/14] hw/block/nvme: Define trace events related to NS Types Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 05/14] hw/block/nvme: Add support for Namespace Types Dmitry Fomichev
2020-09-30 8:15 ` Klaus Jensen
2020-09-30 12:47 ` Niklas Cassel
2020-10-01 11:22 ` Niklas Cassel
2020-10-01 15:29 ` Keith Busch
2020-10-01 15:50 ` Niklas Cassel
2020-10-01 15:59 ` Keith Busch
2020-10-01 16:23 ` Niklas Cassel
2020-10-01 17:08 ` Keith Busch
2020-10-01 22:15 ` Klaus Jensen
2020-10-01 22:30 ` Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 06/14] hw/block/nvme: Add support for active/inactive namespaces Dmitry Fomichev
2020-09-30 13:50 ` Niklas Cassel
2020-10-04 23:54 ` Dmitry Fomichev
2020-10-05 11:26 ` Niklas Cassel
2020-09-28 2:35 ` [PATCH v5 07/14] hw/block/nvme: Make Zoned NS Command Set definitions Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 08/14] hw/block/nvme: Define Zoned NS Command Set trace events Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 09/14] hw/block/nvme: Support Zoned Namespace Command Set Dmitry Fomichev
2020-09-28 6:44 ` Klaus Jensen
2020-09-28 10:42 ` Klaus Jensen
2020-09-30 5:20 ` Klaus Jensen
2020-10-05 0:53 ` Dmitry Fomichev
2020-09-30 5:59 ` Klaus Jensen
2020-10-04 23:48 ` Dmitry Fomichev
2020-09-30 14:50 ` Niklas Cassel
2020-09-30 18:23 ` Klaus Jensen
2020-10-04 23:57 ` Dmitry Fomichev
2020-10-05 11:41 ` Niklas Cassel
2020-10-05 23:08 ` Dmitry Fomichev
2020-09-30 15:12 ` Niklas Cassel
2020-09-28 2:35 ` Dmitry Fomichev [this message]
2020-09-28 2:35 ` [PATCH v5 11/14] hw/block/nvme: Support Zone Descriptor Extensions Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 12/14] hw/block/nvme: Add injection of Offline/Read-Only zones Dmitry Fomichev
2020-09-28 2:35 ` [PATCH v5 13/14] hw/block/nvme: Use zone metadata file for persistence Dmitry Fomichev
2020-09-28 7:51 ` Klaus Jensen
2020-09-29 15:43 ` Dmitry Fomichev
2020-09-29 16:46 ` Klaus Jensen
2020-09-28 2:35 ` [PATCH v5 14/14] hw/block/nvme: Document zoned parameters in usage text Dmitry Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200928023528.15260-11-dmitry.fomichev@wdc.com \
--to=dmitry.fomichev@wdc.com \
--cc=alistair.francis@wdc.com \
--cc=damien.lemoal@wdc.com \
--cc=fam@euphon.net \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=matias.bjorling@wdc.com \
--cc=mlevitsk@redhat.com \
--cc=niklas.cassel@wdc.com \
--cc=philmd@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).