qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Lukasz Maniak <lukasz.maniak@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Fam Zheng" <fam@euphon.net>, "Kevin Wolf" <kwolf@redhat.com>,
	qemu-block@nongnu.org,
	"Łukasz Gieryk" <lukasz.gieryk@linux.intel.com>,
	"Lukasz Maniak" <lukasz.maniak@linux.intel.com>,
	"Klaus Jensen" <its@irrelevant.dk>,
	"Hanna Reitz" <hreitz@redhat.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Keith Busch" <kbusch@kernel.org>,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>
Subject: [PATCH 07/15] hw/nvme: Add support for Secondary Controller List
Date: Thu,  7 Oct 2021 18:23:58 +0200	[thread overview]
Message-ID: <20211007162406.1920374-8-lukasz.maniak@linux.intel.com> (raw)
In-Reply-To: <20211007162406.1920374-1-lukasz.maniak@linux.intel.com>

Introduce handling for Secondary Controller List (Identify command with
CNS value of 15h).

Secondary controller ids are unique in the subsystem, hence they are
reserved by it upon initialization of the primary controller to the
number of sriov_max_vfs.

ID reservation requires the addition of an intermediate controller slot
state, so the reserved controller has the address 0xFFFF.
A secondary controller is in the reserved state when it has no virtual
function assigned, but its primary controller is realized.
Secondary controller reservations are released to NULL when its primary
controller is unregistered.

Signed-off-by: Lukasz Maniak <lukasz.maniak@linux.intel.com>
---
 hw/nvme/ctrl.c       | 42 ++++++++++++++++++++++++-
 hw/nvme/ns.c         |  2 +-
 hw/nvme/nvme.h       | 16 +++++++++-
 hw/nvme/subsys.c     | 74 ++++++++++++++++++++++++++++++++++++++------
 hw/nvme/trace-events |  1 +
 include/block/nvme.h | 20 ++++++++++++
 6 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d2fde3dd07..9687a7322c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4545,6 +4545,14 @@ static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
     return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap, sizeof(NvmePriCtrlCap), req);
 }
 
+static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
+{
+    trace_pci_nvme_identify_sec_ctrl_list(le16_to_cpu(n->pri_ctrl_cap.cntlid),
+                                          n->sec_ctrl_list.numcntl);
+
+    return nvme_c2h(n, (uint8_t *)&n->sec_ctrl_list, sizeof(NvmeSecCtrlList), req);
+}
+
 static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
                                      bool active)
 {
@@ -4765,6 +4773,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
         return nvme_identify_ctrl_list(n, req, false);
     case NVME_ID_CNS_PRIMARY_CTRL_CAP:
         return nvme_identify_pri_ctrl_cap(n, req);
+    case NVME_ID_CNS_SECONDARY_CTRL_LIST:
+        return nvme_identify_sec_ctrl_list(n, req);
     case NVME_ID_CNS_CS_NS:
         return nvme_identify_ns_csi(n, req, true);
     case NVME_ID_CNS_CS_NS_PRESENT:
@@ -6306,6 +6316,9 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
 static void nvme_init_state(NvmeCtrl *n)
 {
     NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+    NvmeSecCtrlList *list = &n->sec_ctrl_list;
+    NvmeSecCtrlEntry *sctrl;
+    int i;
 
     /* add one to max_ioqpairs to account for the admin queue pair */
     n->reg_size = pow2ceil(sizeof(NvmeBar) +
@@ -6317,6 +6330,12 @@ static void nvme_init_state(NvmeCtrl *n)
     n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
     n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
 
+    list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
+    for (i = 0; i < n->params.sriov_max_vfs; i++) {
+        sctrl = &list->sec[i];
+        sctrl->pcid = cpu_to_le16(n->cntlid);
+    }
+
     cap->cntlid = cpu_to_le16(n->cntlid);
 }
 
@@ -6362,6 +6381,27 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
     memory_region_set_enabled(&n->pmr.dev->mr, false);
 }
 
+static void nvme_update_vfs(PCIDevice *pci_dev, uint16_t prev_num_vfs,
+                            uint16_t num_vfs)
+{
+    NvmeCtrl *n = NVME(pci_dev);
+    uint16_t num_active_vfs = MAX(prev_num_vfs, num_vfs);
+    bool vf_enable = (prev_num_vfs < num_vfs);
+    uint16_t i;
+
+    /*
+     * As per SR-IOV design,
+     * VF count can only go from 0 to a set value and vice versa.
+     */
+    for (i = 0; i < num_active_vfs; i++) {
+        if (vf_enable) {
+            n->sec_ctrl_list.sec[i].vfn = cpu_to_le16(i + 1);
+        } else {
+            n->sec_ctrl_list.sec[i].vfn = 0;
+        }
+    }
+}
+
 static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
                             uint64_t bar_size)
 {
@@ -6370,7 +6410,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
 
     pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
                        n->params.sriov_max_vfs, n->params.sriov_max_vfs,
-                       NVME_VF_OFFSET, NVME_VF_STRIDE, NULL);
+                       NVME_VF_OFFSET, NVME_VF_STRIDE, nvme_update_vfs);
 
     pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
                               PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index b7cf1494e7..c70aed8c66 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -517,7 +517,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
             for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
                 NvmeCtrl *ctrl = subsys->ctrls[i];
 
-                if (ctrl) {
+                if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
                     nvme_attach_ns(ctrl, ns);
                 }
             }
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 479817f66e..fd229f06f0 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -24,7 +24,7 @@
 
 #include "block/nvme.h"
 
-#define NVME_MAX_CONTROLLERS 32
+#define NVME_MAX_CONTROLLERS 256
 #define NVME_MAX_NAMESPACES  256
 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
 
@@ -43,6 +43,7 @@ typedef struct NvmeBus {
 #define TYPE_NVME_SUBSYS "nvme-subsys"
 #define NVME_SUBSYS(obj) \
     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+#define SUBSYS_SLOT_RSVD (void *)0xFFFF
 
 typedef struct NvmeSubsystem {
     DeviceState parent_obj;
@@ -463,6 +464,7 @@ typedef struct NvmeCtrl {
     } features;
 
     NvmePriCtrlCap  pri_ctrl_cap;
+    NvmeSecCtrlList sec_ctrl_list;
 } NvmeCtrl;
 
 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
@@ -497,6 +499,18 @@ static inline uint16_t nvme_cid(NvmeRequest *req)
     return le16_to_cpu(req->cqe.cid);
 }
 
+static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
+{
+    PCIDevice *pci_dev = &n->parent_obj;
+    NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
+
+    if (pci_is_vf(pci_dev)) {
+        return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
+    }
+
+    return NULL;
+}
+
 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
 uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
                           NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 495dcff5eb..43c295056f 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -11,20 +11,71 @@
 
 #include "nvme.h"
 
-int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
 {
     NvmeSubsystem *subsys = n->subsys;
-    int cntlid;
+    NvmeSecCtrlList *list = &n->sec_ctrl_list;
+    NvmeSecCtrlEntry *sctrl;
+    int i, cnt = 0;
+
+    for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
+        if (!subsys->ctrls[i]) {
+            sctrl = &list->sec[cnt];
+            sctrl->scid = cpu_to_le16(i);
+            subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
+            cnt++;
+        }
+    }
+
+    return cnt;
+}
 
-    for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
-        if (!subsys->ctrls[cntlid]) {
-            break;
+static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
+{
+    NvmeSubsystem *subsys = n->subsys;
+    NvmeSecCtrlList *list = &n->sec_ctrl_list;
+    NvmeSecCtrlEntry *sctrl;
+    int i, cntlid;
+
+    for (i = 0; i < n->params.sriov_max_vfs; i++) {
+        sctrl = &list->sec[i];
+        cntlid = le16_to_cpu(sctrl->scid);
+
+        if (cntlid) {
+            assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD);
+            subsys->ctrls[cntlid] = NULL;
+            sctrl->scid = 0;
         }
     }
+}
 
-    if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
-        error_setg(errp, "no more free controller id");
-        return -1;
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+    NvmeSubsystem *subsys = n->subsys;
+    NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+    int cntlid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
+
+    if (pci_is_vf(&n->parent_obj)) {
+        cntlid = le16_to_cpu(sctrl->scid);
+    } else {
+        for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+            if (!subsys->ctrls[cntlid]) {
+                break;
+            }
+        }
+
+        if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+            error_setg(errp, "no more free controller id");
+            return -1;
+        }
+
+        num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs);
+        if (num_rsvd != num_vfs) {
+            nvme_subsys_unreserve_cntlids(n);
+            error_setg(errp,
+                       "no more free controller ids for secondary controllers");
+            return -1;
+        }
     }
 
     subsys->ctrls[cntlid] = n;
@@ -34,7 +85,12 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 
 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
 {
-    subsys->ctrls[n->cntlid] = NULL;
+    if (pci_is_vf(&n->parent_obj)) {
+        subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD;
+    } else {
+        subsys->ctrls[n->cntlid] = NULL;
+        nvme_subsys_unreserve_cntlids(n);
+    }
 }
 
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index 1014ebceb6..dd2aac3418 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -53,6 +53,7 @@ pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
 pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
 pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16""
 pci_nvme_identify_pri_ctrl_cap(uint16_t cntlid) "identify primary controller capabilities cntlid=%"PRIu16""
+pci_nvme_identify_sec_ctrl_list(uint16_t cntlid, uint8_t numcntl) "identify secondary controller list cntlid=%"PRIu16" numcntl=%"PRIu8""
 pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
 pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
 pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index f69bd1d14f..96595ea8f1 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1018,6 +1018,7 @@ enum NvmeIdCns {
     NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
     NVME_ID_CNS_CTRL_LIST             = 0x13,
     NVME_ID_CNS_PRIMARY_CTRL_CAP      = 0x14,
+    NVME_ID_CNS_SECONDARY_CTRL_LIST   = 0x15,
     NVME_ID_CNS_CS_NS_PRESENT_LIST    = 0x1a,
     NVME_ID_CNS_CS_NS_PRESENT         = 0x1b,
     NVME_ID_CNS_IO_COMMAND_SET        = 0x1c,
@@ -1487,6 +1488,23 @@ typedef struct QEMU_PACKED NvmePriCtrlCap {
     uint8_t     rsvd80[4016];
 } NvmePriCtrlCap;
 
+typedef struct QEMU_PACKED NvmeSecCtrlEntry {
+    uint16_t    scid;
+    uint16_t    pcid;
+    uint8_t     scs;
+    uint8_t     rsvd5[3];
+    uint16_t    vfn;
+    uint16_t    nvq;
+    uint16_t    nvi;
+    uint8_t     rsvd14[18];
+} NvmeSecCtrlEntry;
+
+typedef struct QEMU_PACKED NvmeSecCtrlList {
+    uint8_t             numcntl;
+    uint8_t             rsvd1[31];
+    NvmeSecCtrlEntry    sec[127];
+} NvmeSecCtrlList;
+
 static inline void _nvme_check_size(void)
 {
     QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
@@ -1520,5 +1538,7 @@ static inline void _nvme_check_size(void)
     QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8);
     QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
 }
 #endif
-- 
2.25.1



  parent reply	other threads:[~2021-10-07 17:14 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-07 16:23 [PATCH 00/15] hw/nvme: SR-IOV with Virtualization Enhancements Lukasz Maniak
2021-10-07 16:23 ` [PATCH 01/15] pcie: Set default and supported MaxReadReq to 512 Lukasz Maniak
2021-10-07 22:12   ` Michael S. Tsirkin
2021-10-26 14:36     ` Lukasz Maniak
2021-10-26 15:37       ` Knut Omang
2021-10-07 16:23 ` [PATCH 02/15] pcie: Add support for Single Root I/O Virtualization (SR/IOV) Lukasz Maniak
2021-10-07 16:23 ` [PATCH 03/15] pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt Lukasz Maniak
2021-10-07 16:23 ` [PATCH 04/15] pcie: Add callback preceding SR-IOV VFs update Lukasz Maniak
2021-10-12  7:25   ` Michael S. Tsirkin
2021-10-12 16:06     ` Lukasz Maniak
2021-10-13  9:10       ` Michael S. Tsirkin
2021-10-15 16:24         ` Lukasz Maniak
2021-10-15 17:30           ` Michael S. Tsirkin
2021-10-20 13:30             ` Lukasz Maniak
2021-10-07 16:23 ` [PATCH 05/15] hw/nvme: Add support for SR-IOV Lukasz Maniak
2021-10-20 19:07   ` Klaus Jensen
2021-10-21 14:33     ` Lukasz Maniak
2021-11-02 14:33   ` Klaus Jensen
2021-11-02 17:33     ` Lukasz Maniak
2021-11-04 14:30       ` Lukasz Maniak
2021-11-08  7:56         ` Klaus Jensen
2021-11-10 13:42           ` Lukasz Maniak
2021-11-10 16:39             ` Klaus Jensen
2021-10-07 16:23 ` [PATCH 06/15] hw/nvme: Add support for Primary Controller Capabilities Lukasz Maniak
2021-11-02 14:34   ` Klaus Jensen
2021-10-07 16:23 ` Lukasz Maniak [this message]
2021-11-02 14:35   ` [PATCH 07/15] hw/nvme: Add support for Secondary Controller List Klaus Jensen
2021-10-07 16:23 ` [PATCH 08/15] pcie: Add 1.2 version token for the Power Management Capability Lukasz Maniak
2021-10-07 16:24 ` [PATCH 09/15] hw/nvme: Implement the Function Level Reset Lukasz Maniak
2021-11-02 14:35   ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 10/15] hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime Lukasz Maniak
2021-10-18 10:06   ` Philippe Mathieu-Daudé
2021-10-18 15:53     ` Łukasz Gieryk
2021-10-20 19:06   ` Klaus Jensen
2021-10-21 13:40     ` Łukasz Gieryk
2021-11-03 12:11       ` Klaus Jensen
2021-10-20 19:26   ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 11/15] hw/nvme: Calculate BAR atributes in a function Lukasz Maniak
2021-10-18  9:52   ` Philippe Mathieu-Daudé
2021-10-07 16:24 ` [PATCH 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers Lukasz Maniak
2021-11-03 12:07   ` Klaus Jensen
2021-11-04 15:48     ` Łukasz Gieryk
2021-11-05  8:46       ` Łukasz Gieryk
2021-11-05 14:04         ` Łukasz Gieryk
2021-11-08  8:25           ` Klaus Jensen
2021-11-08 13:57             ` Łukasz Gieryk
2021-11-09 12:22               ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 13/15] pcie: Add helpers to the SR/IOV API Lukasz Maniak
2021-10-26 16:57   ` Knut Omang
2021-10-07 16:24 ` [PATCH 14/15] hw/nvme: Add support for the Virtualization Management command Lukasz Maniak
2021-10-07 16:24 ` [PATCH 15/15] docs: Add documentation for SR-IOV and Virtualization Enhancements Lukasz Maniak
2021-10-08  6:31 ` [PATCH 00/15] hw/nvme: SR-IOV with " Klaus Jensen
2021-10-26 18:20 ` Klaus Jensen
2021-10-27 16:49   ` Lukasz Maniak
2021-11-02  7:24     ` Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211007162406.1920374-8-lukasz.maniak@linux.intel.com \
    --to=lukasz.maniak@linux.intel.com \
    --cc=fam@euphon.net \
    --cc=hreitz@redhat.com \
    --cc=its@irrelevant.dk \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=lukasz.gieryk@linux.intel.com \
    --cc=philmd@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).