On Oct 7 18:23, Lukasz Maniak wrote: > This patch implements initial support for Single Root I/O Virtualization > on an NVMe device. > > Essentially, it allows to define the maximum number of virtual functions > supported by the NVMe controller via sriov_max_vfs parameter. > > Passing a non-zero value to sriov_max_vfs triggers reporting of SR-IOV > capability by a physical controller and ARI capability by both the > physical and virtual function devices. > > NVMe controllers created via virtual functions mirror functionally > the physical controller, which may not entirely be the case, thus > consideration would be needed on the way to limit the capabilities of > the VF. > > NVMe subsystem is required for the use of SR-IOV. > > Signed-off-by: Lukasz Maniak > --- > hw/nvme/ctrl.c | 74 ++++++++++++++++++++++++++++++++++++++-- > hw/nvme/nvme.h | 1 + > include/hw/pci/pci_ids.h | 1 + > 3 files changed, 73 insertions(+), 3 deletions(-) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 6a571d18cf..ad79ff0c00 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -35,6 +35,7 @@ > * mdts=,vsl=, \ > * zoned.zasl=, \ > * zoned.auto_transition=, \ > + * sriov_max_vfs= \ > * subsys= > * -device nvme-ns,drive=,bus=,nsid=,\ > * zoned=, \ > @@ -106,6 +107,12 @@ > * transitioned to zone state closed for resource management purposes. > * Defaults to 'on'. > * > + * - `sriov_max_vfs` > + * Indicates the maximum number of PCIe virtual functions supported > + * by the controller. The default value is 0. Specifying a non-zero value > + * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. > + * Virtual function controllers will not report SR-IOV capability. > + * > * nvme namespace device parameters > * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > * - `shared` > @@ -160,6 +167,7 @@ > #include "sysemu/block-backend.h" > #include "sysemu/hostmem.h" > #include "hw/pci/msix.h" > +#include "hw/pci/pcie_sriov.h" > #include "migration/vmstate.h" > > #include "nvme.h" > @@ -175,6 +183,9 @@ > #define NVME_TEMPERATURE_CRITICAL 0x175 > #define NVME_NUM_FW_SLOTS 1 > #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) > +#define NVME_MAX_VFS 127 > +#define NVME_VF_OFFSET 0x1 > +#define NVME_VF_STRIDE 1 > > #define NVME_GUEST_ERR(trace, fmt, ...) \ > do { \ > @@ -5583,6 +5594,10 @@ static void nvme_ctrl_reset(NvmeCtrl *n) > g_free(event); > } > > + if (!pci_is_vf(&n->parent_obj) && n->params.sriov_max_vfs) { > + pcie_sriov_pf_disable_vfs(&n->parent_obj); > + } > + > n->aer_queued = 0; > n->outstanding_aers = 0; > n->qs_created = false; > @@ -6264,6 +6279,19 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) > error_setg(errp, "vsl must be non-zero"); > return; > } > + > + if (params->sriov_max_vfs) { > + if (!n->subsys) { > + error_setg(errp, "subsystem is required for the use of SR-IOV"); > + return; > + } > + > + if (params->sriov_max_vfs > NVME_MAX_VFS) { > + error_setg(errp, "sriov_max_vfs must be between 0 and %d", > + NVME_MAX_VFS); > + return; > + } > + } > } > > static void nvme_init_state(NvmeCtrl *n) > @@ -6321,6 +6349,20 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) > memory_region_set_enabled(&n->pmr.dev->mr, false); > } > > +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, > + uint64_t bar_size) > +{ > + uint16_t vf_dev_id = n->params.use_intel_id ? > + PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; > + > + pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, > + n->params.sriov_max_vfs, n->params.sriov_max_vfs, > + NVME_VF_OFFSET, NVME_VF_STRIDE, NULL); Did you consider adding a new device for the virtual function device, "nvmevf"? Down the road it might help with the variations in capabilities that you describe.