All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command
       [not found]   ` <CGME20230725183956uscas1p17a64ec512cdf5b9348451926d6f0b224@uscas1p1.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-04 14:19         ` Jonathan Cameron via
  0 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Based on CXL spec 3.0 Table 8-94 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ad7a6116e4..b013e30314 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,8 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+/* Experimental value: dynamic capacity event log size */
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -519,8 +521,9 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
         uint16_t inject_poison_limit;
         uint8_t poison_caps;
         uint8_t qos_telemetry_caps;
+        uint16_t dc_event_log_size;
     } QEMU_PACKED *id;
-    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 
     CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
     CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
@@ -543,6 +546,7 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
     st24_le_p(id->poison_list_max_mer, 256);
     /* No limit - so limited by main poison record limit */
     stw_le_p(&id->inject_poison_limit, 0);
+    stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
     *len = sizeof(*id);
     return CXL_MBOX_SUCCESS;
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support
       [not found]   ` <CGME20230725183956uscas1p296403063c710f4b546d4fec7650915c4@uscas1p2.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-04 15:24         ` Jonathan Cameron
  0 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Per cxl spec 3.0, add dynamic capacity region representative based on
Table 8-126 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.8.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 72 +++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3.c          |  6 ++++
 include/hw/cxl/cxl_device.h | 17 +++++++++
 3 files changed, 95 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index b013e30314..0fe9f3eb5d 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -81,6 +81,8 @@ enum {
         #define GET_POISON_LIST        0x0
         #define INJECT_POISON          0x1
         #define CLEAR_POISON           0x2
+    DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
+        #define GET_DC_CONFIG          0x0
     PHYSICAL_SWITCH = 0x51
         #define IDENTIFY_SWITCH_DEVICE      0x0
 };
@@ -939,6 +941,71 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * cxl spec 3.0: 8.2.9.8.9.1
+ * Get Dynamic Capacity Configuration
+ **/
+static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
+        CXLDeviceState *cxl_dstate,
+        uint16_t *len)
+{
+    struct get_dyn_cap_config_in_pl {
+        uint8_t region_cnt;
+        uint8_t start_region_id;
+    } QEMU_PACKED;
+
+    struct get_dyn_cap_config_out_pl {
+        uint8_t num_regions;
+        uint8_t rsvd1[7];
+        struct {
+            uint64_t base;
+            uint64_t decode_len;
+            uint64_t region_len;
+            uint64_t block_size;
+            uint32_t dsmadhandle;
+            uint8_t flags;
+            uint8_t rsvd2[3];
+        } QEMU_PACKED records[];
+    } QEMU_PACKED;
+
+    struct get_dyn_cap_config_in_pl *in = (void *)cmd->payload;
+    struct get_dyn_cap_config_out_pl *out = (void *)cmd->payload;
+    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+            cxl_dstate);
+    uint16_t record_count = 0, i;
+    uint16_t out_pl_len;
+    uint8_t start_region_id = in->start_region_id;
+
+    if (start_region_id >= ct3d->dc.num_regions) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+
+    record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
+            in->region_cnt);
+
+    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+    memset(out, 0, out_pl_len);
+    out->num_regions = record_count;
+    for (i = 0; i < record_count; i++) {
+        stq_le_p(&out->records[i].base,
+                ct3d->dc.regions[start_region_id + i].base);
+        stq_le_p(&out->records[i].decode_len,
+                ct3d->dc.regions[start_region_id + i].decode_len);
+        stq_le_p(&out->records[i].region_len,
+                ct3d->dc.regions[start_region_id + i].len);
+        stq_le_p(&out->records[i].block_size,
+                ct3d->dc.regions[start_region_id + i].block_size);
+        stl_le_p(&out->records[i].dsmadhandle,
+                ct3d->dc.regions[start_region_id + i].dsmadhandle);
+        out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
+    }
+
+    *len = out_pl_len;
+    return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -977,6 +1044,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
         cmd_media_inject_poison, 8, 0 },
     [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
         cmd_media_clear_poison, 72, 0 },
+    [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
+        cmd_dcd_get_dyn_cap_config, 2, 0 },
 };
 
 static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
@@ -1164,6 +1233,9 @@ void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate, bool switch_cci)
     }
     for (int set = 0; set < 256; set++) {
         for (int cmd = 0; cmd < 256; cmd++) {
+            if (!cxl_dstate->is_dcd && set == DCD_CONFIG) {
+                continue;
+            }
             if (cxl_dstate->cxl_cmd_set[set][cmd].handler) {
                 struct cxl_cmd *c = &cxl_dstate->cxl_cmd_set[set][cmd];
                 struct cel_log *log =
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 862107c5ef..4d68824dfe 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1046,6 +1046,12 @@ static void ct3d_reset(DeviceState *dev)
     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
 
+    if (ct3d->dc.num_regions) {
+        ct3d->cxl_dstate.is_dcd = true;
+    } else {
+        ct3d->cxl_dstate.is_dcd = false;
+    }
+
     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
     cxl_device_register_init_common(&ct3d->cxl_dstate);
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index cd7f28dba8..dae39da438 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -212,6 +212,7 @@ typedef struct cxl_device_state {
     uint64_t mem_size;
     uint64_t pmem_size;
     uint64_t vmem_size;
+    bool is_dcd;
 
     struct cxl_cmd (*cxl_cmd_set)[256];
     CPMUState cpmu[CXL_NUM_CPMU_INSTANCES];
@@ -382,6 +383,17 @@ typedef struct CXLPoison {
 typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 #define CXL_POISON_LIST_LIMIT 256
 
+#define DCD_MAX_REGION_NUM 8
+
+typedef struct CXLDCD_Region {
+    uint64_t base;
+    uint64_t decode_len; /* in multiples of 256MB */
+    uint64_t len;
+    uint64_t block_size;
+    uint32_t dsmadhandle;
+    uint8_t flags;
+} CXLDCD_Region;
+
 struct CXLType3Dev {
     /* Private */
     PCIDevice parent_obj;
@@ -413,6 +425,11 @@ struct CXLType3Dev {
     unsigned int poison_list_cnt;
     bool poison_list_overflowed;
     uint64_t poison_list_overflow_ts;
+
+    struct dynamic_capacity {
+        uint8_t num_regions; /* 0-8 regions */
+        struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
+    } dc;
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices
       [not found]   ` <CGME20230725183956uscas1p153242eb4b12cb9cb6529476b4e9058c4@uscas1p1.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-04 15:27         ` Jonathan Cameron via
  0 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 5 +++--
 hw/mem/cxl_type3.c          | 8 ++++----
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0fe9f3eb5d..dd5ea95af8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -540,7 +540,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
 
     snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
-    stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+    stq_le_p(&id->total_capacity,
+            cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
     stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
     stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
     stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
@@ -879,7 +880,7 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
     struct clear_poison_pl *in = (void *)cmd->payload;
 
     dpa = ldq_le_p(&in->dpa);
-    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
         return CXL_MBOX_INVALID_PA;
     }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 4d68824dfe..3d7acffcb7 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -748,7 +748,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         }
         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
         ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-        ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+        ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
         g_free(v_name);
     }
 
@@ -771,7 +771,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         }
         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
         ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-        ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+        ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
         g_free(p_name);
     }
 
@@ -984,7 +984,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
         return -EINVAL;
     }
 
-    if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
         return -EINVAL;
     }
 
@@ -1148,7 +1148,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
         return false;
     }
 
-    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
         return false;
     }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index dae39da438..503c344326 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -209,7 +209,7 @@ typedef struct cxl_device_state {
     } timestamp;
 
     /* memory region size, HDM */
-    uint64_t mem_size;
+    uint64_t static_mem_size;
     uint64_t pmem_size;
     uint64_t vmem_size;
     bool is_dcd;
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 0/9] Enabling DCD emulation support in Qemu
       [not found] <CGME20230725183956uscas1p154e945516c2a4091479f4906d7652648@uscas1p1.samsung.com>
@ 2023-07-25 18:39 ` Fan Ni
       [not found]   ` <CGME20230725183956uscas1p17a64ec512cdf5b9348451926d6f0b224@uscas1p1.samsung.com>
                     ` (8 more replies)
  0 siblings, 9 replies; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

v1[1]->v2:

1. fix a regression issue reported by Ira[2]:
2. fix a compile warning due to uninitialized 'rip' in qmp processing function.


[1] https://lore.kernel.org/linux-cxl/20230724162313.34196-1-fan.ni@samsung.com/T/#t
[2] https://lore.kernel.org/linux-cxl/64bfe7b090843_12757b2945b@iweiny-mobl.notmuch/T/#m09983a3dbaa9135a850e345d86714bf2ab957ef6

Fan Ni (9):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
    payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
    and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
    type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
    devices
  hw/mem/cxl_type3: Add host backend and address space handling for DC
    regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
    list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
    dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
    extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

 hw/cxl/cxl-mailbox-utils.c  | 424 +++++++++++++++++++++++++++-
 hw/mem/cxl_type3.c          | 545 +++++++++++++++++++++++++++++++++---
 hw/mem/cxl_type3_stubs.c    |   6 +
 include/hw/cxl/cxl_device.h |  50 +++-
 include/hw/cxl/cxl_events.h |  16 ++
 qapi/cxl.json               |  49 ++++
 6 files changed, 1044 insertions(+), 46 deletions(-)

-- 
2.25.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices
       [not found]   ` <CGME20230725183956uscas1p2008fba59779b70405c74d28a30e4fbaa@uscas1p2.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-04 15:55         ` Jonathan Cameron via
  0 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

With the change, when setting up memory for type3 memory device, we can
create DC regions
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support as below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region> /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x40000000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x40000000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/mem/cxl_type3.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3d7acffcb7..b29bb2309a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -707,6 +707,34 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
     }
 }
 
+/*
+ * Create a dc region to test "Get Dynamic Capacity Configuration" command.
+ */
+static int cxl_create_dc_regions(CXLType3Dev *ct3d)
+{
+    int i;
+    uint64_t region_base = (ct3d->hostvmem ? ct3d->hostvmem->size : 0)
+        + (ct3d->hostpmem ? ct3d->hostpmem->size : 0);
+    uint64_t region_len = (uint64_t)2 * 1024 * 1024 * 1024;
+    uint64_t decode_len = 4; /* 4*256MB */
+    uint64_t blk_size = 2 * 1024 * 1024;
+    struct CXLDCD_Region *region;
+
+    for (i = 0; i < ct3d->dc.num_regions; i++) {
+        region = &ct3d->dc.regions[i];
+        region->base = region_base;
+        region->decode_len = decode_len;
+        region->len = region_len;
+        region->block_size = blk_size;
+        /* dsmad_handle is set when creating cdat table entries */
+        region->flags = 0;
+
+        region_base += region->len;
+    }
+
+    return 0;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
@@ -775,6 +803,10 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         g_free(p_name);
     }
 
+    if (cxl_create_dc_regions(ct3d)) {
+        return false;
+    }
+
     return true;
 }
 
@@ -1068,6 +1100,7 @@ static Property ct3_props[] = {
     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
     DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
+    DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support
       [not found]   ` <CGME20230725183957uscas1p28b38d294f90b97f99769466cc533b4de@uscas1p2.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-07 11:55         ` Jonathan Cameron via
  2023-09-08 13:12       ` Jørgen Hansen
  0 siblings, 2 replies; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and add get DC extent list mailbox command per
CXL.spec.3.0:.8.2.9.8.9.2.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 71 +++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3.c          |  1 +
 include/hw/cxl/cxl_device.h | 23 ++++++++++++
 3 files changed, 95 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0511b8e6f7..3d25a9697e 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -83,6 +83,7 @@ enum {
         #define CLEAR_POISON           0x2
     DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
         #define GET_DC_CONFIG          0x0
+        #define GET_DYN_CAP_EXT_LIST   0x1
     PHYSICAL_SWITCH = 0x51
         #define IDENTIFY_SWITCH_DEVICE      0x0
 };
@@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * cxl spec 3.0: 8.2.9.8.9.2
+ * Get Dynamic Capacity Extent List (Opcode 4810h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
+        CXLDeviceState *cxl_dstate,
+        uint16_t *len)
+{
+    struct get_dyn_cap_ext_list_in_pl {
+        uint32_t extent_cnt;
+        uint32_t start_extent_id;
+    } QEMU_PACKED;
+
+    struct get_dyn_cap_ext_list_out_pl {
+        uint32_t count;
+        uint32_t total_extents;
+        uint32_t generation_num;
+        uint8_t rsvd[4];
+        CXLDCExtent_raw records[];
+    } QEMU_PACKED;
+
+    struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
+    struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
+    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+            cxl_dstate);
+    uint16_t record_count = 0, i = 0, record_done = 0;
+    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
+    CXLDCD_Extent *ent;
+    uint16_t out_pl_len;
+    uint32_t start_extent_id = in->start_extent_id;
+
+    if (start_extent_id > ct3d->dc.total_extent_count) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+
+    record_count = MIN(in->extent_cnt,
+            ct3d->dc.total_extent_count - start_extent_id);
+
+    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+    /* May need more processing here in the future */
+    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+    memset(out, 0, out_pl_len);
+    stl_le_p(&out->count, record_count);
+    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
+    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
+
+    if (record_count > 0) {
+        QTAILQ_FOREACH(ent, extent_list, node) {
+            if (i++ < start_extent_id) {
+                continue;
+            }
+            stq_le_p(&out->records[record_done].start_dpa, ent->start_dpa);
+            stq_le_p(&out->records[record_done].len, ent->len);
+            memcpy(&out->records[record_done].tag, ent->tag, 0x10);
+            stw_le_p(&out->records[record_done].shared_seq, ent->shared_seq);
+            record_done++;
+            if (record_done == record_count) {
+                break;
+            }
+        }
+    }
+
+    *len = out_pl_len;
+    return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
         cmd_media_clear_poison, 72, 0 },
     [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
         cmd_dcd_get_dyn_cap_config, 2, 0 },
+    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+        8, 0 },
 };
 
 static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 76bbd9f785..f1170b8047 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 
         region_base += region->len;
     }
+    QTAILQ_INIT(&ct3d->dc.extents);
 
     return 0;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 1c99b05a66..3a338b3b37 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_REGION_NUM 8
 
+typedef struct CXLDCD_Extent_raw {
+    uint64_t start_dpa;
+    uint64_t len;
+    uint8_t tag[0x10];
+    uint16_t shared_seq;
+    uint8_t rsvd[0x6];
+} QEMU_PACKED CXLDCExtent_raw;
+
+typedef struct CXLDCD_Extent {
+    uint64_t start_dpa;
+    uint64_t len;
+    uint8_t tag[0x10];
+    uint16_t shared_seq;
+    uint8_t rsvd[0x6];
+
+    QTAILQ_ENTRY(CXLDCD_Extent) node;
+} CXLDCD_Extent;
+typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
+
 typedef struct CXLDCD_Region {
     uint64_t base;
     uint64_t decode_len; /* in multiples of 256MB */
@@ -433,6 +452,10 @@ struct CXLType3Dev {
 
         uint8_t num_regions; /* 0-8 regions */
         struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
+        CXLDCDExtentList extents;
+
+        uint32_t total_extent_count;
+        uint32_t ext_list_gen_seq;
     } dc;
 };
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
       [not found]   ` <CGME20230725183957uscas1p2ca5293c7229ab989ad1a2d95395436a6@uscas1p2.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-07  8:53         ` Jonathan Cameron via
  2023-08-30 12:08       ` Jørgen Hansen
  0 siblings, 2 replies; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Not all dpa range in the dc regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a dc block in the region has been backed by dc extent.
For the bitmap, a bit in the bitmap represents a dc block. When a dc
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
 include/hw/cxl/cxl_device.h |   1 +
 2 files changed, 156 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 41a828598a..51943a36fc 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
         /* dsmad_handle is set when creating cdat table entries */
         region->flags = 0;
 
+        region->blk_bitmap = bitmap_new(region->len / region->block_size);
+        if (!region->blk_bitmap) {
+            break;
+        }
+
         region_base += region->len;
     }
+
+    if (i < ct3d->dc.num_regions) {
+        while (--i >= 0) {
+            g_free(ct3d->dc.regions[i].blk_bitmap);
+        }
+        return -1;
+    }
+
     QTAILQ_INIT(&ct3d->dc.extents);
 
     return 0;
 }
 
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+    int i;
+    struct CXLDCD_Region *region;
+
+    for (i = 0; i < ct3d->dc.num_regions; i++) {
+        region = &ct3d->dc.regions[i];
+        g_free(region->blk_bitmap);
+    }
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
@@ -1021,6 +1045,7 @@ err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
@@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
     spdm_sock_fini(ct3d->doe_spdm.socket);
     g_free(regs->special_ops);
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
@@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
     }
 }
 
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be backed and
+ * accessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     **/
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+            len / region->block_size);
+}
+
+/*
+ * This function check whether a dpa range [dpa, dpa + len) has been backed
+ * with dc extents, used when validating read/write to dc regions
+ */
+static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+    uint64_t nbits;
+    long nr;
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return false;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     */
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    nr = (dpa - region->base) / region->block_size;
+    nbits = len / region->block_size;
+    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
+}
+
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be unbacked and
+ * inaccessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+    uint64_t nbits;
+    long nr;
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     */
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    nr = (dpa - region->base) / region->block_size;
+    nbits = len / region->block_size;
+    bitmap_clear(region->blk_bitmap, nr, nbits);
+}
+
 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
 {
     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
@@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
         *as = &ct3d->hostpmem_as;
         *dpa_offset -= vmr_size;
     } else {
+        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
+            return -ENODEV;
+        }
+
         *as = &ct3d->dc.host_dc_as;
         *dpa_offset -= (vmr_size + pmr_size);
     }
@@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
     }
 
     g_free(extents);
+
+    /* Another choice is to do the set/clear after getting mailbox response*/
+    list = records;
+    while (list) {
+        dpa = list->value->dpa * 1024 * 1024;
+        len = list->value->len * 1024 * 1024;
+        rid = list->value->region_id;
+
+        switch (type) {
+        case DC_EVENT_ADD_CAPACITY:
+            set_region_block_backed(dcd, dpa, len);
+            break;
+        case DC_EVENT_RELEASE_CAPACITY:
+            clear_region_block_backed(dcd, dpa, len);
+            break;
+        default:
+            error_setg(errp, "DC event type not handled yet");
+            break;
+        }
+        list = list->next;
+    }
 }
 
 void qmp_cxl_add_dynamic_capacity_event(const char *path,
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 01a5eaca48..1f85c88017 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
     uint64_t block_size;
     uint32_t dsmadhandle;
     uint8_t flags;
+    unsigned long *blk_bitmap;
 } CXLDCD_Region;
 
 struct CXLType3Dev {
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response
       [not found]   ` <CGME20230725183957uscas1p2a076b6f7b694d2e632a0b8025ec331d7@uscas1p2.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-07 11:42         ` Jonathan Cameron
  2023-09-08 13:00       ` Jørgen Hansen
  0 siblings, 2 replies; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Per CXL spec 3.0, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 253 ++++++++++++++++++++++++++++++++++++
 include/hw/cxl/cxl_device.h |   3 +-
 2 files changed, 255 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 3d25a9697e..1e4944da95 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,8 @@ enum {
     DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
         #define GET_DC_CONFIG          0x0
         #define GET_DYN_CAP_EXT_LIST   0x1
+        #define ADD_DYN_CAP_RSP        0x2
+        #define RELEASE_DYN_CAP        0x3
     PHYSICAL_SWITCH = 0x51
         #define IDENTIFY_SWITCH_DEVICE      0x0
 };
@@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether the bits at addr between [nr, nr+size) are all set,
+ * return 1 if all 1s, else return 0
+ */
+static inline int test_bits(const unsigned long *addr, int nr, int size)
+{
+    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
+
+    return (res >= nr + size) ? 1 : 0;
+}
+
+/*
+ * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
+ */
+static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
+        uint64_t len)
+{
+    int8_t i = dev->dc.num_regions - 1;
+
+    while (i > 0 && dpa < dev->dc.regions[i].base) {
+        i--;
+    }
+
+    if (dpa < dev->dc.regions[i].base
+            || dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
+        return dev->dc.num_regions;
+    }
+
+    return i;
+}
+
+static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
+        uint64_t len, uint8_t *tag, uint16_t shared_seq)
+{
+    CXLDCD_Extent *extent;
+    extent = g_new0(CXLDCD_Extent, 1);
+    extent->start_dpa = dpa;
+    extent->len = len;
+    if (tag) {
+        memcpy(extent->tag, tag, 0x10);
+    } else {
+        memset(extent->tag, 0, 0x10);
+    }
+    extent->shared_seq = shared_seq;
+
+    QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+typedef struct updated_dc_extent_list_in_pl {
+    uint32_t num_entries_updated;
+    uint8_t rsvd[4];
+    struct { /* r3.0: Table 8-130 */
+        uint64_t start_dpa;
+        uint64_t len;
+        uint8_t rsvd[8];
+    } QEMU_PACKED updated_entries[];
+} QEMU_PACKED updated_dc_extent_list_in_pl;
+
+/*
+ * The function only check the input extent list against itself.
+ */
+static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,
+        const updated_dc_extent_list_in_pl *in)
+{
+    unsigned long *blk_bitmap;
+    uint64_t min_block_size = dev->dc.regions[0].block_size;
+    struct CXLDCD_Region *region = &dev->dc.regions[0];
+    uint32_t i;
+    uint64_t dpa, len;
+    uint8_t rid;
+    CXLRetCode ret;
+
+    for (i = 1; i < dev->dc.num_regions; i++) {
+        region = &dev->dc.regions[i];
+        if (min_block_size > region->block_size) {
+            min_block_size = region->block_size;
+        }
+    }
+
+    blk_bitmap = bitmap_new((region->len + region->base
+                - dev->dc.regions[0].base) / min_block_size);
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        rid = find_region_id(dev, dpa, len);
+        if (rid == dev->dc.num_regions) {
+            ret = CXL_MBOX_INVALID_PA;
+            goto out;
+        }
+
+        region = &dev->dc.regions[rid];
+        if (dpa % region->block_size || len % region->block_size) {
+            ret = CXL_MBOX_INVALID_EXTENT_LIST;
+            goto out;
+        }
+        /* the dpa range already covered by some other extents in the list */
+        if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) {
+            ret = CXL_MBOX_INVALID_EXTENT_LIST;
+            goto out;
+        }
+        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
+   }
+
+    ret = CXL_MBOX_SUCCESS;
+
+out:
+    g_free(blk_bitmap);
+    return ret;
+}
+
+/*
+ * cxl spec 3.0: 8.2.9.8.9.3
+ * Add Dynamic Capacity Response (opcode 4802h)
+ * Assume an extent is added only after the response is processed successfully
+ * TODO: for better extent list validation, a better solution would be
+ * maintaining a pending extent list and use it to verify the extent list in
+ * the response.
+ */
+static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
+        CXLDeviceState *cxl_dstate, uint16_t *len_unused)
+{
+    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
+    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+            cxl_dstate);
+    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
+    CXLDCD_Extent *ent;
+    uint32_t i;
+    uint64_t dpa, len;
+    CXLRetCode ret;
+ 
+    if (in->num_entries_updated == 0) {
+        ret = CXL_MBOX_SUCCESS;
+        goto out;
+    }
+
+    ret = detect_malformed_extent_list(ct3d, in);
+    if (ret != CXL_MBOX_SUCCESS) {
+        goto out;
+    }
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        /*
+         * Check if the DPA range of the to-be-added extent overlaps with
+         * existing extent list maintained by the device.
+         */
+        QTAILQ_FOREACH(ent, extent_list, node) {
+            if (ent->start_dpa == dpa && ent->len == len) {
+                ret = CXL_MBOX_INVALID_PA;
+                goto out;
+            } else if (ent->start_dpa <= dpa
+                    && dpa + len <= ent->start_dpa + ent->len) {
+                ret = CXL_MBOX_INVALID_PA;
+                goto out;
+            } else if ((dpa < ent->start_dpa + ent->len
+                        && dpa + len > ent->start_dpa + ent->len)
+                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
+                ret = CXL_MBOX_INVALID_PA;
+                goto out;
+            }
+        }
+
+        /*
+         * TODO: add a pending extent list based on event log record and verify
+         * the input response
+         */
+
+        insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
+    }
+    ret = CXL_MBOX_SUCCESS;
+
+out:
+    return ret;
+}
+
+/*
+ * Spec 3.0: 8.2.9.8.9.4
+ * Release Dynamic Capacity (opcode 4803h)
+ **/
+static CXLRetCode cmd_dcd_release_dyn_cap(struct cxl_cmd *cmd,
+        CXLDeviceState *cxl_dstate,
+        uint16_t *len_unused)
+{
+    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
+    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
+            cxl_dstate);
+    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
+    CXLDCD_Extent *ent;
+    uint32_t i;
+    uint64_t dpa, len;
+    CXLRetCode ret;
+
+    if (in->num_entries_updated == 0) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+
+    ret = detect_malformed_extent_list(ct3d, in);
+    if (ret != CXL_MBOX_SUCCESS) {
+        return ret;
+    }
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        QTAILQ_FOREACH(ent, extent_list, node) {
+            if (ent->start_dpa == dpa && ent->len == len) {
+                break;
+            } else if (ent->start_dpa < dpa
+                    && dpa + len <= ent->start_dpa + ent->len) {
+                /* remove partial extent */
+                uint64_t len1 = dpa - ent->start_dpa;
+                uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
+
+                if (len1) {
+                    insert_extent_to_extent_list(extent_list, ent->start_dpa,
+                            len1, NULL, 0);
+                }
+                if (len2) {
+                    insert_extent_to_extent_list(extent_list, dpa + len, len2,
+                            NULL, 0);
+                }
+                break;
+            } else if ((dpa < ent->start_dpa + ent->len
+                        && dpa + len > ent->start_dpa + ent->len)
+                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa))
+                return CXL_MBOX_INVALID_EXTENT_LIST;
+        }
+
+        if (ent) {
+            QTAILQ_REMOVE(extent_list, ent, node);
+            g_free(ent);
+        } else {
+            /* Try to remove a non-existing extent */
+            return CXL_MBOX_INVALID_PA;
+        }
+    }
+
+    return CXL_MBOX_SUCCESS;
+}
+ 
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1129,6 +1376,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
     [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
         "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
         8, 0 },
+    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
+        "ADD_DCD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
+        ~0, IMMEDIATE_DATA_CHANGE },
+    [DCD_CONFIG][RELEASE_DYN_CAP] = {
+        "RELEASE_DCD_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
+        ~0, IMMEDIATE_DATA_CHANGE },
 };
 
 static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 3a338b3b37..01a5eaca48 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -130,7 +130,8 @@ typedef enum {
     CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
     CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
     CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
-    CXL_MBOX_MAX = 0x17
+    CXL_MBOX_INVALID_EXTENT_LIST = 0x1E, /* cxl r3.0: Table 8-34*/
+    CXL_MBOX_MAX = 0x1F
 } CXLRetCode;
 
 struct cxl_cmd;
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
       [not found]   ` <CGME20230725183957uscas1p1eeb8e8eccc6c00b460d183027642374b@uscas1p1.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-07-26 12:53       ` Nathan Fontenot
  2023-08-04 16:36         ` Jonathan Cameron via
  0 siblings, 2 replies; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
   memory backend for dynamic capacity;
2. add namespace for dynamic capacity for read/write support;
3. create cdat entries for each dynamic capacity region;
4. fix dvsec range registers to include DC regions.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  |  19 +++-
 hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 185 insertions(+), 41 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index dd5ea95af8..0511b8e6f7 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
         char fw_rev4[0x10];
     } QEMU_PACKED *fw_info;
     QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
+    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
     if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
     CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 
     if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
         uint64_t next_pmem;
     } QEMU_PACKED *part_info = (void *)cmd->payload;
     QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
     if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
     struct clear_poison_pl *in = (void *)cmd->payload;
 
     dpa = ldq_le_p(&in->dpa);
-    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+    if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size
+            && ct3d->dc.num_regions == 0) {
+        return CXL_MBOX_INVALID_PA;
+    }
+
+    if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
+            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
         return CXL_MBOX_INVALID_PA;
     }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b29bb2309a..76bbd9f785 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -20,6 +20,7 @@
 #include "hw/pci/spdm.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -33,8 +34,8 @@ enum {
 };
 
 static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-                                         int dsmad_handle, MemoryRegion *mr,
-                                         bool is_pmem, uint64_t dpa_base)
+        int dsmad_handle, uint8_t flags,
+        uint64_t dpa_base, uint64_t size)
 {
     g_autofree CDATDsmas *dsmas = NULL;
     g_autofree CDATDslbis *dslbis0 = NULL;
@@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
             .length = sizeof(*dsmas),
         },
         .DSMADhandle = dsmad_handle,
-        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+        .flags = flags,
         .DPA_base = dpa_base,
-        .DPA_length = memory_region_size(mr),
+        .DPA_length = size,
     };
 
     /* For now, no memory side cache, plausiblish numbers */
@@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
          * NV: Reserved - the non volatile from DSMAS matters
          * V: EFI_MEMORY_SP
          */
-        .EFI_memory_type_attr = is_pmem ? 2 : 1,
+        .EFI_memory_type_attr = flags ? 2 : 1,
         .DPA_offset = 0,
-        .DPA_length = memory_region_size(mr),
+        .DPA_length = size,
     };
 
     /* Header always at start of structure */
@@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
     g_autofree CDATSubHeader **table = NULL;
     CXLType3Dev *ct3d = priv;
     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+    MemoryRegion *dc_mr = NULL;
     int dsmad_handle = 0;
     int cur_ent = 0;
     int len = 0;
     int rc, i;
+    uint64_t vmr_size = 0, pmr_size = 0;
 
-    if (!ct3d->hostpmem && !ct3d->hostvmem) {
+    if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
         return 0;
     }
 
+    if (ct3d->hostpmem && ct3d->hostvmem && ct3d->dc.host_dc) {
+        warn_report("The device has static ram and pmem and dynamic capacity");
+    }
+
     if (ct3d->hostvmem) {
         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
         if (!volatile_mr) {
             return -EINVAL;
         }
         len += CT3_CDAT_NUM_ENTRIES;
+        vmr_size = volatile_mr->size;
     }
 
     if (ct3d->hostpmem) {
@@ -181,6 +189,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
             return -EINVAL;
         }
         len += CT3_CDAT_NUM_ENTRIES;
+        pmr_size = nonvolatile_mr->size;
+    }
+
+    if (ct3d->dc.num_regions) {
+        if (ct3d->dc.host_dc) {
+            dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+            if (!dc_mr) {
+                return -EINVAL;
+            }
+            len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
+        } else {
+            return -EINVAL;
+        }
     }
 
     table = g_malloc0(len * sizeof(*table));
@@ -190,8 +211,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
 
     /* Now fill them in */
     if (volatile_mr) {
-        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
-                                           false, 0);
+        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
+                0, 0, vmr_size);
         if (rc < 0) {
             return rc;
         }
@@ -200,14 +221,37 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
 
     if (nonvolatile_mr) {
         rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-                                           nonvolatile_mr, true,
-                                           (volatile_mr ?
-                                            memory_region_size(volatile_mr) : 0));
+                CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);
         if (rc < 0) {
             goto error_cleanup;
         }
         cur_ent += CT3_CDAT_NUM_ENTRIES;
     }
+
+    if (dc_mr) {
+        uint64_t region_base = vmr_size + pmr_size;
+
+        /*
+         * Currently we create cdat entries for each region, should we only
+         * create dsmas table instead??
+         * We assume all dc regions are non-volatile for now.
+         *
+         */
+        for (i = 0; i < ct3d->dc.num_regions; i++) {
+            rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
+                    , dsmad_handle++
+                    , CDAT_DSMAS_FLAG_NV | CDAT_DSMAS_FLAG_DYNAMIC_CAP
+                    , region_base, ct3d->dc.regions[i].len);
+            if (rc < 0) {
+                goto error_cleanup;
+            }
+            ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
+
+            cur_ent += CT3_CDAT_NUM_ENTRIES;
+            region_base += ct3d->dc.regions[i].len;
+        }
+    }
+
     assert(len == cur_ent);
 
     *cdat_table = g_steal_pointer(&table);
@@ -435,11 +479,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
             range2_size_hi = ct3d->hostpmem->size >> 32;
             range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
                              (ct3d->hostpmem->size & 0xF0000000);
+        } else if (ct3d->dc.host_dc) {
+            range2_size_hi = ct3d->dc.host_dc->size >> 32;
+            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                             (ct3d->dc.host_dc->size & 0xF0000000);
         }
-    } else {
+    } else if (ct3d->hostpmem) {
         range1_size_hi = ct3d->hostpmem->size >> 32;
         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
                          (ct3d->hostpmem->size & 0xF0000000);
+        if (ct3d->dc.host_dc) {
+            range2_size_hi = ct3d->dc.host_dc->size >> 32;
+            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                             (ct3d->dc.host_dc->size & 0xF0000000);
+        }
+    } else {
+        range1_size_hi = ct3d->dc.host_dc->size >> 32;
+        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+            (ct3d->dc.host_dc->size & 0xF0000000);
     }
 
     dvsec = (uint8_t *)&(CXLDVSECDevice){
@@ -708,7 +765,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
 }
 
 /*
- * Create a dc region to test "Get Dynamic Capacity Configuration" command.
+ * Create dc regions.
+ * TODO: region parameters are hard coded, may need to change in the future.
  */
 static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 {
@@ -739,7 +797,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
 
-    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
+    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
+            && !ct3d->dc.num_regions) {
         error_setg(errp, "at least one memdev property must be set");
         return false;
     } else if (ct3d->hostmem && ct3d->hostpmem) {
@@ -807,6 +866,50 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         return false;
     }
 
+    ct3d->dc.total_capacity = 0;
+    if (ct3d->dc.host_dc) {
+        MemoryRegion *dc_mr;
+        char *dc_name;
+        uint64_t total_region_size = 0;
+        int i;
+
+        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+        if (!dc_mr) {
+            error_setg(errp, "dynamic capacity must have backing device");
+            return false;
+        }
+        /* FIXME: set dc as nonvolatile for now */
+        memory_region_set_nonvolatile(dc_mr, true);
+        memory_region_set_enabled(dc_mr, true);
+        host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
+        if (ds->id) {
+            dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
+        } else {
+            dc_name = g_strdup("cxl-dcd-dpa-dc-space");
+        }
+        address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
+
+        for (i = 0; i < ct3d->dc.num_regions; i++) {
+            total_region_size += ct3d->dc.regions[i].len;
+        }
+        /* Make sure the host backend is large enough to cover all dc range */
+        if (total_region_size > memory_region_size(dc_mr)) {
+            error_setg(errp,
+                "too small host backend size, increase to %lu MiB or more",
+                total_region_size / 1024 / 1024);
+            return false;
+        }
+
+        if (dc_mr->size % CXL_CAPACITY_MULTIPLIER != 0) {
+            error_setg(errp, "DC region size is unaligned to %lx",
+                    CXL_CAPACITY_MULTIPLIER);
+            return false;
+        }
+
+        ct3d->dc.total_capacity = total_region_size;
+        g_free(dc_name);
+    }
+
     return true;
 }
 
@@ -916,6 +1019,9 @@ err_release_cdat:
 err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
+    if (ct3d->dc.host_dc) {
+        address_space_destroy(&ct3d->dc.host_dc_as);
+    }
     if (ct3d->hostpmem) {
         address_space_destroy(&ct3d->hostpmem_as);
     }
@@ -935,6 +1041,9 @@ static void ct3_exit(PCIDevice *pci_dev)
     cxl_doe_cdat_release(cxl_cstate);
     spdm_sock_fini(ct3d->doe_spdm.socket);
     g_free(regs->special_ops);
+    if (ct3d->dc.host_dc) {
+        address_space_destroy(&ct3d->dc.host_dc_as);
+    }
     if (ct3d->hostpmem) {
         address_space_destroy(&ct3d->hostpmem_as);
     }
@@ -999,16 +1108,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
                                        AddressSpace **as,
                                        uint64_t *dpa_offset)
 {
-    MemoryRegion *vmr = NULL, *pmr = NULL;
+    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
+    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
 
     if (ct3d->hostvmem) {
         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+        vmr_size = memory_region_size(vmr);
     }
     if (ct3d->hostpmem) {
         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+        pmr_size = memory_region_size(pmr);
+    }
+    if (ct3d->dc.host_dc) {
+        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+        /* Do we want dc_size to be dc_mr->size or not?? */
+        dc_size = ct3d->dc.total_capacity;
     }
 
-    if (!vmr && !pmr) {
+    if (!vmr && !pmr && !dc_mr) {
         return -ENODEV;
     }
 
@@ -1016,19 +1133,19 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
         return -EINVAL;
     }
 
-    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
+    if ((*dpa_offset >= vmr_size + pmr_size + dc_size) ||
+       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
         return -EINVAL;
     }
 
-    if (vmr) {
-        if (*dpa_offset < memory_region_size(vmr)) {
-            *as = &ct3d->hostvmem_as;
-        } else {
-            *as = &ct3d->hostpmem_as;
-            *dpa_offset -= memory_region_size(vmr);
-        }
-    } else {
+    if (*dpa_offset < vmr_size) {
+        *as = &ct3d->hostvmem_as;
+    } else if (*dpa_offset < vmr_size + pmr_size) {
         *as = &ct3d->hostpmem_as;
+        *dpa_offset -= vmr_size;
+    } else {
+        *as = &ct3d->dc.host_dc_as;
+        *dpa_offset -= (vmr_size + pmr_size);
     }
 
     return 0;
@@ -1101,6 +1218,8 @@ static Property ct3_props[] = {
     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
     DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
+    DEFINE_PROP_LINK("nonvolatile-dc-memdev", CXLType3Dev, dc.host_dc,
+                    TYPE_MEMORY_BACKEND, HostMemoryBackend *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1167,33 +1286,43 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
 
 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
 {
-    MemoryRegion *vmr = NULL, *pmr = NULL;
+    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
     AddressSpace *as;
+    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
 
     if (ct3d->hostvmem) {
         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+        vmr_size = memory_region_size(vmr);
     }
     if (ct3d->hostpmem) {
         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+        pmr_size = memory_region_size(pmr);
     }
+    if (ct3d->dc.host_dc) {
+        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
+        dc_size = ct3d->dc.total_capacity;
+     }
 
-    if (!vmr && !pmr) {
+    if (!vmr && !pmr && !dc_mr) {
         return false;
     }
 
-    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
+    if (dpa_offset >= vmr_size + pmr_size + dc_size) {
+        return false;
+    }
+    if (dpa_offset + CXL_CACHE_LINE_SIZE >= vmr_size + pmr_size
+            && ct3d->dc.num_regions == 0) {
         return false;
     }
 
-    if (vmr) {
-        if (dpa_offset < memory_region_size(vmr)) {
-            as = &ct3d->hostvmem_as;
-        } else {
-            as = &ct3d->hostpmem_as;
-            dpa_offset -= memory_region_size(vmr);
-        }
-    } else {
+    if (dpa_offset < vmr_size) {
+        as = &ct3d->hostvmem_as;
+    } else if (dpa_offset < vmr_size + pmr_size) {
         as = &ct3d->hostpmem_as;
+        dpa_offset -= vmr->size;
+    } else {
+        as = &ct3d->dc.host_dc_as;
+        dpa_offset -= (vmr_size + pmr_size);
     }
 
     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data,
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 503c344326..1c99b05a66 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -427,6 +427,10 @@ struct CXLType3Dev {
     uint64_t poison_list_overflow_ts;
 
     struct dynamic_capacity {
+        HostMemoryBackend *host_dc;
+        AddressSpace host_dc_as;
+        uint64_t total_capacity; /* 256M aligned */
+
         uint8_t num_regions; /* 0-8 regions */
         struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
     } dc;
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents
       [not found]   ` <CGME20230725183957uscas1p1ebf676c30d21896d1fd7f9b652250449@uscas1p1.samsung.com>
@ 2023-07-25 18:39     ` Fan Ni
  2023-08-07 10:35         ` Jonathan Cameron via
  0 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-07-25 18:39 UTC (permalink / raw)
  To: qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan, Fan Ni

From: Fan Ni <nifan@outlook.com>

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each is 128MB long)
to region 0 (starting at dpa offset 0 and 128MB) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity-event",
  "arguments": {
      "path": "/machine/peripheral/cxl-dcd0",
      "extents": [
      {
          "region-id": 0,
          "dpa": 0,
          "len": 128
      },
      {
          "region-id": 0,
          "dpa": 128,
          "len": 128
      }
      ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MB from region 0
(starting at dpa offset 128MB) look like below:

{ "execute": "cxl-release-dynamic-capacity-event",
  "arguments": {
      "path": "/machine/peripheral/cxl-dcd0",
      "extents": [
      {
          "region-id": 0,
          "dpa": 128,
          "len": 128
      }
      ]
  }
}

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/mem/cxl_type3.c          | 145 ++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3_stubs.c    |   6 ++
 include/hw/cxl/cxl_events.h |  16 ++++
 qapi/cxl.json               |  49 ++++++++++++
 4 files changed, 216 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index f1170b8047..41a828598a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1817,6 +1817,151 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
     }
 }
 
+static const QemuUUID dynamic_capacity_uuid = {
+    .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
+            0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
+};
+
+/*
+ * cxl r3.0: Table 8-47
+ * 00h: add capacity
+ * 01h: release capacity
+ * 02h: forced capacity release
+ * 03h: region configuration updated
+ * 04h: Add capacity response
+ * 05h: capacity released
+ */
+enum DC_Event_Type {
+    DC_EVENT_ADD_CAPACITY,
+    DC_EVENT_RELEASE_CAPACITY,
+    DC_EVENT_FORCED_RELEASE_CAPACITY,
+    DC_EVENT_REGION_CONFIG_UPDATED,
+    DC_EVENT_ADD_CAPACITY_RSP,
+    DC_EVENT_CAPACITY_RELEASED,
+    DC_EVENT_NUM
+};
+
+#define MEM_BLK_SIZE_MB 128
+static void qmp_cxl_process_dynamic_capacity_event(const char *path,
+        CxlEventLog log, enum DC_Event_Type type,
+        uint16_t hid, CXLDCExtentRecordList *records, Error **errp)
+{
+    Object *obj = object_resolve_path(path, NULL);
+    CXLEventDynamicCapacity dCap;
+    CXLEventRecordHdr *hdr = &dCap.hdr;
+    CXLDeviceState *cxlds;
+    CXLType3Dev *dcd;
+    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
+    uint32_t num_extents = 0;
+    CXLDCExtentRecordList *list = records;
+    CXLDCExtent_raw *extents;
+    uint64_t dpa, len;
+    uint8_t rid = 0;
+    int i;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path not point to a valid CXL type3 device");
+        return;
+    }
+
+    dcd = CXL_TYPE3(obj);
+    cxlds = &dcd->cxl_dstate;
+    memset(&dCap, 0, sizeof(dCap));
+
+    if (!dcd->dc.num_regions) {
+        error_setg(errp, "No dynamic capacity support from the device");
+        return;
+    }
+
+    while (list) {
+        dpa = list->value->dpa * 1024 * 1024;
+        len = list->value->len * 1024 * 1024;
+        rid = list->value->region_id;
+
+        if (rid >= dcd->dc.num_regions) {
+            error_setg(errp, "region id is too large");
+            return;
+        }
+
+        if (dpa % dcd->dc.regions[rid].block_size
+                || len % dcd->dc.regions[rid].block_size) {
+            error_setg(errp, "dpa or len is not aligned to region block size");
+            return;
+        }
+
+        if (dpa + len > dcd->dc.regions[rid].decode_len * 256 * 1024 * 1024) {
+            error_setg(errp, "extent range is beyond the region end");
+            return;
+        }
+
+        num_extents++;
+        list = list->next;
+    }
+
+    i = 0;
+    list = records;
+    extents = g_new0(CXLDCExtent_raw, num_extents);
+    while (list) {
+        dpa = list->value->dpa * 1024 * 1024;
+        len = list->value->len * 1024 * 1024;
+        rid = list->value->region_id;
+
+        extents[i].start_dpa = dpa + dcd->dc.regions[rid].base;
+        extents[i].len = len;
+        memset(extents[i].tag, 0, 0x10);
+        extents[i].shared_seq = 0;
+
+        list = list->next;
+        i++;
+    }
+
+    /*
+     * 8.2.9.1.5
+     * All Dynamic Capacity event records shall set the Event Record
+     * Severity field in the Common Event Record Format to Informational
+     * Event. All Dynamic Capacity related events shall be logged in the
+     * Dynamic Capacity Event Log.
+     */
+    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
+            cxl_device_get_timestamp(&dcd->cxl_dstate));
+
+    dCap.type = type;
+    stw_le_p(&dCap.host_id, hid);
+    /* only valid for DC_REGION_CONFIG_UPDATED event */
+    dCap.updated_region_id = rid;
+    for (i = 0; i < num_extents; i++) {
+        memcpy(&dCap.dynamic_capacity_extent, &extents[i]
+                , sizeof(CXLDCExtent_raw));
+
+        if (cxl_event_insert(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP,
+                    (CXLEventRecordRaw *)&dCap)) {
+            cxl_event_irq_assert(dcd);
+        }
+    }
+
+    g_free(extents);
+}
+
+void qmp_cxl_add_dynamic_capacity_event(const char *path,
+        struct CXLDCExtentRecordList  *records,
+        Error **errp)
+{
+   qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
+           DC_EVENT_ADD_CAPACITY, 0, records, errp);
+}
+
+void qmp_cxl_release_dynamic_capacity_event(const char *path,
+        struct CXLDCExtentRecordList  *records,
+        Error **errp)
+{
+    qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
+            DC_EVENT_RELEASE_CAPACITY, 0, records, errp);
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index f3e4a9fa72..482229f3bd 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -56,3 +56,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
 {
     error_setg(errp, "CXL Type 3 support is not compiled in");
 }
+
+void qmp_cxl_add_dynamic_capacity_event(const char *path,
+        struct CXLDCExtentRecordList  *records, Error **errp) {}
+
+void qmp_cxl_release_dynamic_capacity_event(const char *path,
+        struct CXLDCExtentRecordList  *records, Error **errp) {}
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 089ba2091f..3baf745f8d 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -165,4 +165,20 @@ typedef struct CXLEventMemoryModule {
     uint8_t reserved[0x3d];
 } QEMU_PACKED CXLEventMemoryModule;
 
+/*
+ * Dynamic Capacity Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.5: Table 8-47
+ * All fields little endian.
+ */
+typedef struct CXLEventDynamicCapacity {
+    CXLEventRecordHdr hdr;
+    uint8_t type;
+    uint8_t reserved1;
+    uint16_t host_id;
+    uint8_t updated_region_id;
+    uint8_t reserved2[3];
+    uint8_t dynamic_capacity_extent[0x28]; /* defined in cxl_device.h */
+    uint8_t reserved[0x20];
+} QEMU_PACKED CXLEventDynamicCapacity;
+
 #endif /* CXL_EVENTS_H */
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 05c560cfe5..fb04ec4c41 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -369,3 +369,52 @@
 ##
 {'command': 'cxl-inject-correctable-error',
  'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
+
+##
+# @CXLDCExtentRecord:
+#
+# Record of a single extent to add/release
+#
+# @region-id: id of the region where the extent to add/release
+# @dpa: start dpa (in MiB) of the extent, related to region base address
+# @len: extent size (in MiB)
+#
+# Since: 8.0
+##
+{ 'struct': 'CXLDCExtentRecord',
+  'data': {
+      'region-id': 'uint8',
+      'dpa':'uint64',
+      'len': 'uint64'
+  }
+}
+
+##
+# @cxl-add-dynamic-capacity-event:
+#
+# Command to add dynamic capacity extent event
+#
+# @path: CXL DCD canonical QOM path
+# @extents: Extents to add
+#
+##
+{ 'command': 'cxl-add-dynamic-capacity-event',
+  'data': { 'path': 'str',
+            'extents': [ 'CXLDCExtentRecord' ]
+           }
+}
+
+##
+# @cxl-release-dynamic-capacity-event:
+#
+# Command to release dynamic capacity extent event
+#
+# @path: CXL DCD canonical QOM path
+# @extents: Extents to release
+#
+##
+{ 'command': 'cxl-release-dynamic-capacity-event',
+  'data': { 'path': 'str',
+            'extents': [ 'CXLDCExtentRecord' ]
+           }
+}
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
  2023-07-25 18:39     ` [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions Fan Ni
@ 2023-07-26 12:53       ` Nathan Fontenot
  2023-07-26 16:17         ` nifan
  2023-08-04 16:36         ` Jonathan Cameron via
  1 sibling, 1 reply; 48+ messages in thread
From: Nathan Fontenot @ 2023-07-26 12:53 UTC (permalink / raw)
  To: Fan Ni, qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On 7/25/23 13:39, Fan Ni wrote:
> From: Fan Ni <nifan@outlook.com>
> 
> Add (file/memory backed) host backend, all the dynamic capacity regions
> will share a single, large enough host backend. Set up address space for
> DC regions to support read/write operations to dynamic capacity for DCD.
> 
> With the change, following supports are added:
> 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
>    memory backend for dynamic capacity;
> 2. add namespace for dynamic capacity for read/write support;
> 3. create cdat entries for each dynamic capacity region;
> 4. fix dvsec range registers to include DC regions.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
>  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
>  include/hw/cxl/cxl_device.h |   4 +
>  3 files changed, 185 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index dd5ea95af8..0511b8e6f7 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
>          char fw_rev4[0x10];
>      } QEMU_PACKED *fw_info;
>      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> -        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
> +        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
>          uint64_t next_pmem;
>      } QEMU_PACKED *part_info = (void *)cmd->payload;
>      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      struct clear_poison_pl *in = (void *)cmd->payload;
>  
>      dpa = ldq_le_p(&in->dpa);
> -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
> +    if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size
> +            && ct3d->dc.num_regions == 0) {
> +        return CXL_MBOX_INVALID_PA;
> +    }
> +
> +    if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
> +            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
>          return CXL_MBOX_INVALID_PA;
>      }
>  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b29bb2309a..76bbd9f785 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -20,6 +20,7 @@
>  #include "hw/pci/spdm.h"
>  
>  #define DWORD_BYTE 4
> +#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
>  
>  /* Default CDAT entries for a memory region */
>  enum {
> @@ -33,8 +34,8 @@ enum {
>  };
>  
>  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> -                                         int dsmad_handle, MemoryRegion *mr,
> -                                         bool is_pmem, uint64_t dpa_base)
> +        int dsmad_handle, uint8_t flags,
> +        uint64_t dpa_base, uint64_t size)
>  {
>      g_autofree CDATDsmas *dsmas = NULL;
>      g_autofree CDATDslbis *dslbis0 = NULL;
> @@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>              .length = sizeof(*dsmas),
>          },
>          .DSMADhandle = dsmad_handle,
> -        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> +        .flags = flags,
>          .DPA_base = dpa_base,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* For now, no memory side cache, plausiblish numbers */
> @@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>           * NV: Reserved - the non volatile from DSMAS matters
>           * V: EFI_MEMORY_SP
>           */
> -        .EFI_memory_type_attr = is_pmem ? 2 : 1,
> +        .EFI_memory_type_attr = flags ? 2 : 1,
>          .DPA_offset = 0,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* Header always at start of structure */
> @@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>      g_autofree CDATSubHeader **table = NULL;
>      CXLType3Dev *ct3d = priv;
>      MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
> +    MemoryRegion *dc_mr = NULL;
>      int dsmad_handle = 0;
>      int cur_ent = 0;
>      int len = 0;
>      int rc, i;
> +    uint64_t vmr_size = 0, pmr_size = 0;
>  
> -    if (!ct3d->hostpmem && !ct3d->hostvmem) {
> +    if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
>          return 0;
>      }
>  
> +    if (ct3d->hostpmem && ct3d->hostvmem && ct3d->dc.host_dc) {
> +        warn_report("The device has static ram and pmem and dynamic capacity");
> +    }
> +
>      if (ct3d->hostvmem) {
>          volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
>          if (!volatile_mr) {
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        vmr_size = volatile_mr->size;
>      }
>  
>      if (ct3d->hostpmem) {
> @@ -181,6 +189,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        pmr_size = nonvolatile_mr->size;
> +    }
> +
> +    if (ct3d->dc.num_regions) {
> +        if (ct3d->dc.host_dc) {
> +            dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +            if (!dc_mr) {
> +                return -EINVAL;
> +            }
> +            len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> +        } else {
> +            return -EINVAL;
> +        }
>      }
>  
>      table = g_malloc0(len * sizeof(*table));
> @@ -190,8 +211,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      /* Now fill them in */
>      if (volatile_mr) {
> -        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> -                                           false, 0);
> +        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
> +                0, 0, vmr_size);
>          if (rc < 0) {
>              return rc;
>          }
> @@ -200,14 +221,37 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      if (nonvolatile_mr) {
>          rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> -                                           nonvolatile_mr, true,
> -                                           (volatile_mr ?
> -                                            memory_region_size(volatile_mr) : 0));
> +                CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);
>          if (rc < 0) {
>              goto error_cleanup;
>          }
>          cur_ent += CT3_CDAT_NUM_ENTRIES;
>      }
> +
> +    if (dc_mr) {
> +        uint64_t region_base = vmr_size + pmr_size;
> +
> +        /*
> +         * Currently we create cdat entries for each region, should we only
> +         * create dsmas table instead??
> +         * We assume all dc regions are non-volatile for now.
> +         *
> +         */
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
> +                    , dsmad_handle++
> +                    , CDAT_DSMAS_FLAG_NV | CDAT_DSMAS_FLAG_DYNAMIC_CAP
> +                    , region_base, ct3d->dc.regions[i].len);
> +            if (rc < 0) {
> +                goto error_cleanup;
> +            }
> +            ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
> +
> +            cur_ent += CT3_CDAT_NUM_ENTRIES;
> +            region_base += ct3d->dc.regions[i].len;
> +        }
> +    }
> +
>      assert(len == cur_ent);
>  
>      *cdat_table = g_steal_pointer(&table);
> @@ -435,11 +479,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
>              range2_size_hi = ct3d->hostpmem->size >> 32;
>              range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                               (ct3d->hostpmem->size & 0xF0000000);
> +        } else if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
>          }
> -    } else {
> +    } else if (ct3d->hostpmem) {
>          range1_size_hi = ct3d->hostpmem->size >> 32;
>          range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                           (ct3d->hostpmem->size & 0xF0000000);
> +        if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
> +        }
> +    } else {
> +        range1_size_hi = ct3d->dc.host_dc->size >> 32;
> +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +            (ct3d->dc.host_dc->size & 0xF0000000);
>      }
>  
>      dvsec = (uint8_t *)&(CXLDVSECDevice){
> @@ -708,7 +765,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>  }
>  
>  /*
> - * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> + * Create dc regions.
> + * TODO: region parameters are hard coded, may need to change in the future.
>   */
>  static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>  {
> @@ -739,7 +797,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
>  
> -    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
> +            && !ct3d->dc.num_regions) {
>          error_setg(errp, "at least one memdev property must be set");
>          return false;
>      } else if (ct3d->hostmem && ct3d->hostpmem) {
> @@ -807,6 +866,50 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          return false;
>      }
>  
> +    ct3d->dc.total_capacity = 0;
> +    if (ct3d->dc.host_dc) {
> +        MemoryRegion *dc_mr;
> +        char *dc_name;
> +        uint64_t total_region_size = 0;
> +        int i;
> +
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        if (!dc_mr) {
> +            error_setg(errp, "dynamic capacity must have backing device");
> +            return false;
> +        }
> +        /* FIXME: set dc as nonvolatile for now */
> +        memory_region_set_nonvolatile(dc_mr, true);
> +        memory_region_set_enabled(dc_mr, true);
> +        host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
> +        if (ds->id) {
> +            dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
> +        } else {
> +            dc_name = g_strdup("cxl-dcd-dpa-dc-space");
> +        }
> +        address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
> +
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            total_region_size += ct3d->dc.regions[i].len;
> +        }
> +        /* Make sure the host backend is large enough to cover all dc range */
> +        if (total_region_size > memory_region_size(dc_mr)) {
> +            error_setg(errp,
> +                "too small host backend size, increase to %lu MiB or more",
> +                total_region_size / 1024 / 1024);
> +            return false;
> +        }
> +
> +        if (dc_mr->size % CXL_CAPACITY_MULTIPLIER != 0) {
> +            error_setg(errp, "DC region size is unaligned to %lx",
> +                    CXL_CAPACITY_MULTIPLIER);
> +            return false;
> +        }
> +
> +        ct3d->dc.total_capacity = total_region_size;
> +        g_free(dc_name);

Shouldn't dc_name also be free'ed in the two places above where you return 
false?

I think you could just free it after the call address_space_init().

-Nathan

> +    }
> +
>      return true;
>  }
>  
> @@ -916,6 +1019,9 @@ err_release_cdat:
>  err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -935,6 +1041,9 @@ static void ct3_exit(PCIDevice *pci_dev)
>      cxl_doe_cdat_release(cxl_cstate);
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -999,16 +1108,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>                                         AddressSpace **as,
>                                         uint64_t *dpa_offset)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
> +    }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        /* Do we want dc_size to be dc_mr->size or not?? */
> +        dc_size = ct3d->dc.total_capacity;
>      }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return -ENODEV;
>      }
>  
> @@ -1016,19 +1133,19 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          return -EINVAL;
>      }
>  
> -    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
> +    if ((*dpa_offset >= vmr_size + pmr_size + dc_size) ||
> +       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
>          return -EINVAL;
>      }
>  
> -    if (vmr) {
> -        if (*dpa_offset < memory_region_size(vmr)) {
> -            *as = &ct3d->hostvmem_as;
> -        } else {
> -            *as = &ct3d->hostpmem_as;
> -            *dpa_offset -= memory_region_size(vmr);
> -        }
> -    } else {
> +    if (*dpa_offset < vmr_size) {
> +        *as = &ct3d->hostvmem_as;
> +    } else if (*dpa_offset < vmr_size + pmr_size) {
>          *as = &ct3d->hostpmem_as;
> +        *dpa_offset -= vmr_size;
> +    } else {
> +        *as = &ct3d->dc.host_dc_as;
> +        *dpa_offset -= (vmr_size + pmr_size);
>      }
>  
>      return 0;
> @@ -1101,6 +1218,8 @@ static Property ct3_props[] = {
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
>      DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> +    DEFINE_PROP_LINK("nonvolatile-dc-memdev", CXLType3Dev, dc.host_dc,
> +                    TYPE_MEMORY_BACKEND, HostMemoryBackend *),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -1167,33 +1286,43 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>  
>  static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
>      AddressSpace *as;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
>      }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        dc_size = ct3d->dc.total_capacity;
> +     }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return false;
>      }
>  
> -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
> +    if (dpa_offset >= vmr_size + pmr_size + dc_size) {
> +        return false;
> +    }
> +    if (dpa_offset + CXL_CACHE_LINE_SIZE >= vmr_size + pmr_size
> +            && ct3d->dc.num_regions == 0) {
>          return false;
>      }
>  
> -    if (vmr) {
> -        if (dpa_offset < memory_region_size(vmr)) {
> -            as = &ct3d->hostvmem_as;
> -        } else {
> -            as = &ct3d->hostpmem_as;
> -            dpa_offset -= memory_region_size(vmr);
> -        }
> -    } else {
> +    if (dpa_offset < vmr_size) {
> +        as = &ct3d->hostvmem_as;
> +    } else if (dpa_offset < vmr_size + pmr_size) {
>          as = &ct3d->hostpmem_as;
> +        dpa_offset -= vmr->size;
> +    } else {
> +        as = &ct3d->dc.host_dc_as;
> +        dpa_offset -= (vmr_size + pmr_size);
>      }
>  
>      address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 503c344326..1c99b05a66 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -427,6 +427,10 @@ struct CXLType3Dev {
>      uint64_t poison_list_overflow_ts;
>  
>      struct dynamic_capacity {
> +        HostMemoryBackend *host_dc;
> +        AddressSpace host_dc_as;
> +        uint64_t total_capacity; /* 256M aligned */
> +
>          uint8_t num_regions; /* 0-8 regions */
>          struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
>      } dc;

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
  2023-07-26 12:53       ` Nathan Fontenot
@ 2023-07-26 16:17         ` nifan
  0 siblings, 0 replies; 48+ messages in thread
From: nifan @ 2023-07-26 16:17 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Fan Ni, qemu-devel, jonathan.cameron, linux-cxl, gregory.price,
	hchkuo, cbrowy, ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung

The 07/26/2023 07:53, Nathan Fontenot wrote:
> On 7/25/23 13:39, Fan Ni wrote:
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Add (file/memory backed) host backend, all the dynamic capacity regions
> > will share a single, large enough host backend. Set up address space for
> > DC regions to support read/write operations to dynamic capacity for DCD.
> > 
> > With the change, following supports are added:
> > 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
> >    memory backend for dynamic capacity;
> > 2. add namespace for dynamic capacity for read/write support;
> > 3. create cdat entries for each dynamic capacity region;
> > 4. fix dvsec range registers to include DC regions.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > ---
> >  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
> >  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
> >  include/hw/cxl/cxl_device.h |   4 +
> >  3 files changed, 185 insertions(+), 41 deletions(-)
> > 
> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> > index dd5ea95af8..0511b8e6f7 100644
> > --- a/hw/cxl/cxl-mailbox-utils.c
> > +++ b/hw/cxl/cxl-mailbox-utils.c
> > @@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
> >          char fw_rev4[0x10];
> >      } QEMU_PACKED *fw_info;
> >      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
> > +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
> >  
> >      if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> > -        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> > +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
> > +        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
> >      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> >  
> >      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> > +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
> >          uint64_t next_pmem;
> >      } QEMU_PACKED *part_info = (void *)cmd->payload;
> >      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> > +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
> >  
> >      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> > +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> > +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> >  
> > @@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
> >      struct clear_poison_pl *in = (void *)cmd->payload;
> >  
> >      dpa = ldq_le_p(&in->dpa);
> > -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
> > +    if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size
> > +            && ct3d->dc.num_regions == 0) {
> > +        return CXL_MBOX_INVALID_PA;
> > +    }
> > +
> > +    if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
> > +            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
> >          return CXL_MBOX_INVALID_PA;
> >      }
> >  
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index b29bb2309a..76bbd9f785 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -20,6 +20,7 @@
> >  #include "hw/pci/spdm.h"
> >  
> >  #define DWORD_BYTE 4
> > +#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
> >  
> >  /* Default CDAT entries for a memory region */
> >  enum {
> > @@ -33,8 +34,8 @@ enum {
> >  };
> >  
> >  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> > -                                         int dsmad_handle, MemoryRegion *mr,
> > -                                         bool is_pmem, uint64_t dpa_base)
> > +        int dsmad_handle, uint8_t flags,
> > +        uint64_t dpa_base, uint64_t size)
> >  {
> >      g_autofree CDATDsmas *dsmas = NULL;
> >      g_autofree CDATDslbis *dslbis0 = NULL;
> > @@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> >              .length = sizeof(*dsmas),
> >          },
> >          .DSMADhandle = dsmad_handle,
> > -        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> > +        .flags = flags,
> >          .DPA_base = dpa_base,
> > -        .DPA_length = memory_region_size(mr),
> > +        .DPA_length = size,
> >      };
> >  
> >      /* For now, no memory side cache, plausiblish numbers */
> > @@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> >           * NV: Reserved - the non volatile from DSMAS matters
> >           * V: EFI_MEMORY_SP
> >           */
> > -        .EFI_memory_type_attr = is_pmem ? 2 : 1,
> > +        .EFI_memory_type_attr = flags ? 2 : 1,
> >          .DPA_offset = 0,
> > -        .DPA_length = memory_region_size(mr),
> > +        .DPA_length = size,
> >      };
> >  
> >      /* Header always at start of structure */
> > @@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >      g_autofree CDATSubHeader **table = NULL;
> >      CXLType3Dev *ct3d = priv;
> >      MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
> > +    MemoryRegion *dc_mr = NULL;
> >      int dsmad_handle = 0;
> >      int cur_ent = 0;
> >      int len = 0;
> >      int rc, i;
> > +    uint64_t vmr_size = 0, pmr_size = 0;
> >  
> > -    if (!ct3d->hostpmem && !ct3d->hostvmem) {
> > +    if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
> >          return 0;
> >      }
> >  
> > +    if (ct3d->hostpmem && ct3d->hostvmem && ct3d->dc.host_dc) {
> > +        warn_report("The device has static ram and pmem and dynamic capacity");
> > +    }
> > +
> >      if (ct3d->hostvmem) {
> >          volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
> >          if (!volatile_mr) {
> >              return -EINVAL;
> >          }
> >          len += CT3_CDAT_NUM_ENTRIES;
> > +        vmr_size = volatile_mr->size;
> >      }
> >  
> >      if (ct3d->hostpmem) {
> > @@ -181,6 +189,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >              return -EINVAL;
> >          }
> >          len += CT3_CDAT_NUM_ENTRIES;
> > +        pmr_size = nonvolatile_mr->size;
> > +    }
> > +
> > +    if (ct3d->dc.num_regions) {
> > +        if (ct3d->dc.host_dc) {
> > +            dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +            if (!dc_mr) {
> > +                return -EINVAL;
> > +            }
> > +            len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> > +        } else {
> > +            return -EINVAL;
> > +        }
> >      }
> >  
> >      table = g_malloc0(len * sizeof(*table));
> > @@ -190,8 +211,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >  
> >      /* Now fill them in */
> >      if (volatile_mr) {
> > -        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> > -                                           false, 0);
> > +        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
> > +                0, 0, vmr_size);
> >          if (rc < 0) {
> >              return rc;
> >          }
> > @@ -200,14 +221,37 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
> >  
> >      if (nonvolatile_mr) {
> >          rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> > -                                           nonvolatile_mr, true,
> > -                                           (volatile_mr ?
> > -                                            memory_region_size(volatile_mr) : 0));
> > +                CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);
> >          if (rc < 0) {
> >              goto error_cleanup;
> >          }
> >          cur_ent += CT3_CDAT_NUM_ENTRIES;
> >      }
> > +
> > +    if (dc_mr) {
> > +        uint64_t region_base = vmr_size + pmr_size;
> > +
> > +        /*
> > +         * Currently we create cdat entries for each region, should we only
> > +         * create dsmas table instead??
> > +         * We assume all dc regions are non-volatile for now.
> > +         *
> > +         */
> > +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +            rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
> > +                    , dsmad_handle++
> > +                    , CDAT_DSMAS_FLAG_NV | CDAT_DSMAS_FLAG_DYNAMIC_CAP
> > +                    , region_base, ct3d->dc.regions[i].len);
> > +            if (rc < 0) {
> > +                goto error_cleanup;
> > +            }
> > +            ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
> > +
> > +            cur_ent += CT3_CDAT_NUM_ENTRIES;
> > +            region_base += ct3d->dc.regions[i].len;
> > +        }
> > +    }
> > +
> >      assert(len == cur_ent);
> >  
> >      *cdat_table = g_steal_pointer(&table);
> > @@ -435,11 +479,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
> >              range2_size_hi = ct3d->hostpmem->size >> 32;
> >              range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> >                               (ct3d->hostpmem->size & 0xF0000000);
> > +        } else if (ct3d->dc.host_dc) {
> > +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> > +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > +                             (ct3d->dc.host_dc->size & 0xF0000000);
> >          }
> > -    } else {
> > +    } else if (ct3d->hostpmem) {
> >          range1_size_hi = ct3d->hostpmem->size >> 32;
> >          range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> >                           (ct3d->hostpmem->size & 0xF0000000);
> > +        if (ct3d->dc.host_dc) {
> > +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> > +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > +                             (ct3d->dc.host_dc->size & 0xF0000000);
> > +        }
> > +    } else {
> > +        range1_size_hi = ct3d->dc.host_dc->size >> 32;
> > +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> > +            (ct3d->dc.host_dc->size & 0xF0000000);
> >      }
> >  
> >      dvsec = (uint8_t *)&(CXLDVSECDevice){
> > @@ -708,7 +765,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
> >  }
> >  
> >  /*
> > - * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> > + * Create dc regions.
> > + * TODO: region parameters are hard coded, may need to change in the future.
> >   */
> >  static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >  {
> > @@ -739,7 +797,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> >  
> > -    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> > +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
> > +            && !ct3d->dc.num_regions) {
> >          error_setg(errp, "at least one memdev property must be set");
> >          return false;
> >      } else if (ct3d->hostmem && ct3d->hostpmem) {
> > @@ -807,6 +866,50 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >          return false;
> >      }
> >  
> > +    ct3d->dc.total_capacity = 0;
> > +    if (ct3d->dc.host_dc) {
> > +        MemoryRegion *dc_mr;
> > +        char *dc_name;
> > +        uint64_t total_region_size = 0;
> > +        int i;
> > +
> > +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +        if (!dc_mr) {
> > +            error_setg(errp, "dynamic capacity must have backing device");
> > +            return false;
> > +        }
> > +        /* FIXME: set dc as nonvolatile for now */
> > +        memory_region_set_nonvolatile(dc_mr, true);
> > +        memory_region_set_enabled(dc_mr, true);
> > +        host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
> > +        if (ds->id) {
> > +            dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
> > +        } else {
> > +            dc_name = g_strdup("cxl-dcd-dpa-dc-space");
> > +        }
> > +        address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
> > +
> > +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +            total_region_size += ct3d->dc.regions[i].len;
> > +        }
> > +        /* Make sure the host backend is large enough to cover all dc range */
> > +        if (total_region_size > memory_region_size(dc_mr)) {
> > +            error_setg(errp,
> > +                "too small host backend size, increase to %lu MiB or more",
> > +                total_region_size / 1024 / 1024);
> > +            return false;
> > +        }
> > +
> > +        if (dc_mr->size % CXL_CAPACITY_MULTIPLIER != 0) {
> > +            error_setg(errp, "DC region size is unaligned to %lx",
> > +                    CXL_CAPACITY_MULTIPLIER);
> > +            return false;
> > +        }
> > +
> > +        ct3d->dc.total_capacity = total_region_size;
> > +        g_free(dc_name);
> 
> Shouldn't dc_name also be free'ed in the two places above where you return 
> false?
> 
> I think you could just free it after the call address_space_init().
> 
> -Nathan

Make sense. Will fix in the next version. Thanks.

-Fan

> 
> > +    }
> > +
> >      return true;
> >  }
> >  
> > @@ -916,6 +1019,9 @@ err_release_cdat:
> >  err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> > +    if (ct3d->dc.host_dc) {
> > +        address_space_destroy(&ct3d->dc.host_dc_as);
> > +    }
> >      if (ct3d->hostpmem) {
> >          address_space_destroy(&ct3d->hostpmem_as);
> >      }
> > @@ -935,6 +1041,9 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      cxl_doe_cdat_release(cxl_cstate);
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> > +    if (ct3d->dc.host_dc) {
> > +        address_space_destroy(&ct3d->dc.host_dc_as);
> > +    }
> >      if (ct3d->hostpmem) {
> >          address_space_destroy(&ct3d->hostpmem_as);
> >      }
> > @@ -999,16 +1108,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >                                         AddressSpace **as,
> >                                         uint64_t *dpa_offset)
> >  {
> > -    MemoryRegion *vmr = NULL, *pmr = NULL;
> > +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> > +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
> >  
> >      if (ct3d->hostvmem) {
> >          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> > +        vmr_size = memory_region_size(vmr);
> >      }
> >      if (ct3d->hostpmem) {
> >          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> > +        pmr_size = memory_region_size(pmr);
> > +    }
> > +    if (ct3d->dc.host_dc) {
> > +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +        /* Do we want dc_size to be dc_mr->size or not?? */
> > +        dc_size = ct3d->dc.total_capacity;
> >      }
> >  
> > -    if (!vmr && !pmr) {
> > +    if (!vmr && !pmr && !dc_mr) {
> >          return -ENODEV;
> >      }
> >  
> > @@ -1016,19 +1133,19 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          return -EINVAL;
> >      }
> >  
> > -    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
> > +    if ((*dpa_offset >= vmr_size + pmr_size + dc_size) ||
> > +       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
> >          return -EINVAL;
> >      }
> >  
> > -    if (vmr) {
> > -        if (*dpa_offset < memory_region_size(vmr)) {
> > -            *as = &ct3d->hostvmem_as;
> > -        } else {
> > -            *as = &ct3d->hostpmem_as;
> > -            *dpa_offset -= memory_region_size(vmr);
> > -        }
> > -    } else {
> > +    if (*dpa_offset < vmr_size) {
> > +        *as = &ct3d->hostvmem_as;
> > +    } else if (*dpa_offset < vmr_size + pmr_size) {
> >          *as = &ct3d->hostpmem_as;
> > +        *dpa_offset -= vmr_size;
> > +    } else {
> > +        *as = &ct3d->dc.host_dc_as;
> > +        *dpa_offset -= (vmr_size + pmr_size);
> >      }
> >  
> >      return 0;
> > @@ -1101,6 +1218,8 @@ static Property ct3_props[] = {
> >      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
> >      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
> >      DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> > +    DEFINE_PROP_LINK("nonvolatile-dc-memdev", CXLType3Dev, dc.host_dc,
> > +                    TYPE_MEMORY_BACKEND, HostMemoryBackend *),
> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> >  
> > @@ -1167,33 +1286,43 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
> >  
> >  static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
> >  {
> > -    MemoryRegion *vmr = NULL, *pmr = NULL;
> > +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> >      AddressSpace *as;
> > +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
> >  
> >      if (ct3d->hostvmem) {
> >          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> > +        vmr_size = memory_region_size(vmr);
> >      }
> >      if (ct3d->hostpmem) {
> >          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> > +        pmr_size = memory_region_size(pmr);
> >      }
> > +    if (ct3d->dc.host_dc) {
> > +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> > +        dc_size = ct3d->dc.total_capacity;
> > +     }
> >  
> > -    if (!vmr && !pmr) {
> > +    if (!vmr && !pmr && !dc_mr) {
> >          return false;
> >      }
> >  
> > -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
> > +    if (dpa_offset >= vmr_size + pmr_size + dc_size) {
> > +        return false;
> > +    }
> > +    if (dpa_offset + CXL_CACHE_LINE_SIZE >= vmr_size + pmr_size
> > +            && ct3d->dc.num_regions == 0) {
> >          return false;
> >      }
> >  
> > -    if (vmr) {
> > -        if (dpa_offset < memory_region_size(vmr)) {
> > -            as = &ct3d->hostvmem_as;
> > -        } else {
> > -            as = &ct3d->hostpmem_as;
> > -            dpa_offset -= memory_region_size(vmr);
> > -        }
> > -    } else {
> > +    if (dpa_offset < vmr_size) {
> > +        as = &ct3d->hostvmem_as;
> > +    } else if (dpa_offset < vmr_size + pmr_size) {
> >          as = &ct3d->hostpmem_as;
> > +        dpa_offset -= vmr->size;
> > +    } else {
> > +        as = &ct3d->dc.host_dc_as;
> > +        dpa_offset -= (vmr_size + pmr_size);
> >      }
> >  
> >      address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 503c344326..1c99b05a66 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -427,6 +427,10 @@ struct CXLType3Dev {
> >      uint64_t poison_list_overflow_ts;
> >  
> >      struct dynamic_capacity {
> > +        HostMemoryBackend *host_dc;
> > +        AddressSpace host_dc_as;
> > +        uint64_t total_capacity; /* 256M aligned */
> > +
> >          uint8_t num_regions; /* 0-8 regions */
> >          struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> >      } dc;

-- 
Fan Ni <nifan@outlook.com>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command
  2023-07-25 18:39     ` [Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command Fan Ni
@ 2023-08-04 14:19         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-04 14:19 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Based on CXL spec 3.0 Table 8-94 (Identify Memory Device Output
> Payload), dynamic capacity event log size should be part of
> output of the Identify command.
> Add dc_event_log_size to the output payload for the host to get the info.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index ad7a6116e4..b013e30314 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -21,6 +21,8 @@
>  #include "sysemu/hostmem.h"
>  
>  #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
> +/* Experimental value: dynamic capacity event log size */

It's got to have some value.  Not sure why this is 'experimental'.
As such I would drop the comment as not helpful.

> +#define CXL_DC_EVENT_LOG_SIZE 8
>  
>  /*
>   * How to add a new command, example. The command set FOO, with cmd BAR.
> @@ -519,8 +521,9 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>          uint16_t inject_poison_limit;
>          uint8_t poison_caps;
>          uint8_t qos_telemetry_caps;
> +        uint16_t dc_event_log_size;
>      } QEMU_PACKED *id;
> -    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
> +    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
>  
>      CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> @@ -543,6 +546,7 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>      st24_le_p(id->poison_list_max_mer, 256);
>      /* No limit - so limited by main poison record limit */
>      stw_le_p(&id->inject_poison_limit, 0);
> +    stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
>  
>      *len = sizeof(*id);
>      return CXL_MBOX_SUCCESS;


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command
@ 2023-08-04 14:19         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-04 14:19 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Based on CXL spec 3.0 Table 8-94 (Identify Memory Device Output
> Payload), dynamic capacity event log size should be part of
> output of the Identify command.
> Add dc_event_log_size to the output payload for the host to get the info.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index ad7a6116e4..b013e30314 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -21,6 +21,8 @@
>  #include "sysemu/hostmem.h"
>  
>  #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
> +/* Experimental value: dynamic capacity event log size */

It's got to have some value.  Not sure why this is 'experimental'.
As such I would drop the comment as not helpful.

> +#define CXL_DC_EVENT_LOG_SIZE 8
>  
>  /*
>   * How to add a new command, example. The command set FOO, with cmd BAR.
> @@ -519,8 +521,9 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>          uint16_t inject_poison_limit;
>          uint8_t poison_caps;
>          uint8_t qos_telemetry_caps;
> +        uint16_t dc_event_log_size;
>      } QEMU_PACKED *id;
> -    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
> +    QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
>  
>      CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> @@ -543,6 +546,7 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>      st24_le_p(id->poison_list_max_mer, 256);
>      /* No limit - so limited by main poison record limit */
>      stw_le_p(&id->inject_poison_limit, 0);
> +    stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
>  
>      *len = sizeof(*id);
>      return CXL_MBOX_SUCCESS;



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support
  2023-07-25 18:39     ` [Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support Fan Ni
@ 2023-08-04 15:24         ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-04 15:24 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
You should fix the author on these to match the SoB

git commit --amend --author="Fan Ni <fan.ni@samsung.com>" fixes them.

I'm trying to pick these up for my tree as I review them (where they aren't
invasive).  So I'll fix this stuff whilst doing so.

I'm applying on top of the mailbox rework.  Whilst some of that
may take a while to land I think we can move the generalization code
forwards fairly soon and that changes the command functions etc to not
assume as much.

> 
> Per cxl spec 3.0, add dynamic capacity region representative based on
> Table 8-126 and extend the cxl type3 device definition to include dc region
> information. Also, based on info in 8.2.9.8.9.1, add 'Get Dynamic Capacity
> Configuration' mailbox support.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 72 +++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  6 ++++
>  include/hw/cxl/cxl_device.h | 17 +++++++++
>  3 files changed, 95 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index b013e30314..0fe9f3eb5d 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -81,6 +81,8 @@ enum {
>          #define GET_POISON_LIST        0x0
>          #define INJECT_POISON          0x1
>          #define CLEAR_POISON           0x2
> +    DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/

I'd not document the section here (As otherwise this list will get very messy).

> +        #define GET_DC_CONFIG          0x0
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -939,6 +941,71 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.1
> + * Get Dynamic Capacity Configuration

Here is a good place for documentation.  I'm trying to slowly standardize these
references though (might do a blanket fixup if I get time next week).
My current preference is:

 * CXL r3.0 section 8.2.9.8.9.1: Get Dynamic Capacity Configuration
 * (Opcode 4800h)

Long winded but should avoid us being forced to update this for CXL.next when
it's released (and it becomes hard to get old specs)

> + **/
> +static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len)
> +{
> +    struct get_dyn_cap_config_in_pl {
> +        uint8_t region_cnt;
> +        uint8_t start_region_id;
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_config_out_pl {
> +        uint8_t num_regions;
> +        uint8_t rsvd1[7];
> +        struct {
> +            uint64_t base;
> +            uint64_t decode_len;
> +            uint64_t region_len;
> +            uint64_t block_size;
> +            uint32_t dsmadhandle;
> +            uint8_t flags;
> +            uint8_t rsvd2[3];
> +        } QEMU_PACKED records[];
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_config_in_pl *in = (void *)cmd->payload;
> +    struct get_dyn_cap_config_out_pl *out = (void *)cmd->payload;

I've reworked this stuff to take into account the changes in the cci
reworking patch set. Changes the parameter etc but not too hard to 
rebase this.

> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    uint16_t record_count = 0, i;
> +    uint16_t out_pl_len;
> +    uint8_t start_region_id = in->start_region_id;
> +
> +    if (start_region_id >= ct3d->dc.num_regions) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
> +            in->region_cnt);
> +
> +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +    memset(out, 0, out_pl_len);
> +    out->num_regions = record_count;
> +    for (i = 0; i < record_count; i++) {
> +        stq_le_p(&out->records[i].base,
> +                ct3d->dc.regions[start_region_id + i].base);
> +        stq_le_p(&out->records[i].decode_len,
> +                ct3d->dc.regions[start_region_id + i].decode_len);
> +        stq_le_p(&out->records[i].region_len,
> +                ct3d->dc.regions[start_region_id + i].len);
> +        stq_le_p(&out->records[i].block_size,
> +                ct3d->dc.regions[start_region_id + i].block_size);
> +        stl_le_p(&out->records[i].dsmadhandle,
> +                ct3d->dc.regions[start_region_id + i].dsmadhandle);
> +        out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
> +    }
> +
> +    *len = out_pl_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -977,6 +1044,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_inject_poison, 8, 0 },
>      [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
>          cmd_media_clear_poison, 72, 0 },
> +    [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
> +        cmd_dcd_get_dyn_cap_config, 2, 0 },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> @@ -1164,6 +1233,9 @@ void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate, bool switch_cci)
>      }
>      for (int set = 0; set < 256; set++) {
>          for (int cmd = 0; cmd < 256; cmd++) {
> +            if (!cxl_dstate->is_dcd && set == DCD_CONFIG) {
> +                continue;
> +            }
Hmm. This doesn't work any more as at the level of this function we now have
a cci rather that the device state.

I've tried dropping in a version of Gregory's dynamic command registration
so we only register the DCD commands if we have enabled them.

>              if (cxl_dstate->cxl_cmd_set[set][cmd].handler) {
>                  struct cxl_cmd *c = &cxl_dstate->cxl_cmd_set[set][cmd];
>                  struct cel_log *log =
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 862107c5ef..4d68824dfe 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1046,6 +1046,12 @@ static void ct3d_reset(DeviceState *dev)
>      uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
>      uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
> +    if (ct3d->dc.num_regions) {
> +        ct3d->cxl_dstate.is_dcd = true;
> +    } else {
> +        ct3d->cxl_dstate.is_dcd = false;
> +    }
> +
>      cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
>      cxl_device_register_init_common(&ct3d->cxl_dstate);
>  }
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index cd7f28dba8..dae39da438 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -212,6 +212,7 @@ typedef struct cxl_device_state {
>      uint64_t mem_size;
>      uint64_t pmem_size;
>      uint64_t vmem_size;
> +    bool is_dcd;
>  
>      struct cxl_cmd (*cxl_cmd_set)[256];
>      CPMUState cpmu[CXL_NUM_CPMU_INSTANCES];
> @@ -382,6 +383,17 @@ typedef struct CXLPoison {
>  typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
>  #define CXL_POISON_LIST_LIMIT 256
>  
> +#define DCD_MAX_REGION_NUM 8
> +
> +typedef struct CXLDCD_Region {
> +    uint64_t base;
> +    uint64_t decode_len; /* in multiples of 256MB */
> +    uint64_t len;
> +    uint64_t block_size;
> +    uint32_t dsmadhandle;
> +    uint8_t flags;
> +} CXLDCD_Region;

whilst we aren't always strictly inline with QEMU naming conventions
to match local style at least we should have CXLDCDRegion

> +
>  struct CXLType3Dev {
>      /* Private */
>      PCIDevice parent_obj;
> @@ -413,6 +425,11 @@ struct CXLType3Dev {
>      unsigned int poison_list_cnt;
>      bool poison_list_overflowed;
>      uint64_t poison_list_overflow_ts;
> +
> +    struct dynamic_capacity {
> +        uint8_t num_regions; /* 0-8 regions */
> +        struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
Typedef above, so
	   CXLDCDRegion regions[...]

> +    } dc;
>  };
>  
>  #define TYPE_CXL_TYPE3 "cxl-type3"



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support
@ 2023-08-04 15:24         ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-04 15:24 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
You should fix the author on these to match the SoB

git commit --amend --author="Fan Ni <fan.ni@samsung.com>" fixes them.

I'm trying to pick these up for my tree as I review them (where they aren't
invasive).  So I'll fix this stuff whilst doing so.

I'm applying on top of the mailbox rework.  Whilst some of that
may take a while to land I think we can move the generalization code
forwards fairly soon and that changes the command functions etc to not
assume as much.

> 
> Per cxl spec 3.0, add dynamic capacity region representative based on
> Table 8-126 and extend the cxl type3 device definition to include dc region
> information. Also, based on info in 8.2.9.8.9.1, add 'Get Dynamic Capacity
> Configuration' mailbox support.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 72 +++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  6 ++++
>  include/hw/cxl/cxl_device.h | 17 +++++++++
>  3 files changed, 95 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index b013e30314..0fe9f3eb5d 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -81,6 +81,8 @@ enum {
>          #define GET_POISON_LIST        0x0
>          #define INJECT_POISON          0x1
>          #define CLEAR_POISON           0x2
> +    DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/

I'd not document the section here (As otherwise this list will get very messy).

> +        #define GET_DC_CONFIG          0x0
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -939,6 +941,71 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.1
> + * Get Dynamic Capacity Configuration

Here is a good place for documentation.  I'm trying to slowly standardize these
references though (might do a blanket fixup if I get time next week).
My current preference is:

 * CXL r3.0 section 8.2.9.8.9.1: Get Dynamic Capacity Configuration
 * (Opcode 4800h)

Long winded but should avoid us being forced to update this for CXL.next when
it's released (and it becomes hard to get old specs)

> + **/
> +static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len)
> +{
> +    struct get_dyn_cap_config_in_pl {
> +        uint8_t region_cnt;
> +        uint8_t start_region_id;
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_config_out_pl {
> +        uint8_t num_regions;
> +        uint8_t rsvd1[7];
> +        struct {
> +            uint64_t base;
> +            uint64_t decode_len;
> +            uint64_t region_len;
> +            uint64_t block_size;
> +            uint32_t dsmadhandle;
> +            uint8_t flags;
> +            uint8_t rsvd2[3];
> +        } QEMU_PACKED records[];
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_config_in_pl *in = (void *)cmd->payload;
> +    struct get_dyn_cap_config_out_pl *out = (void *)cmd->payload;

I've reworked this stuff to take into account the changes in the cci
reworking patch set. Changes the parameter etc but not too hard to 
rebase this.

> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    uint16_t record_count = 0, i;
> +    uint16_t out_pl_len;
> +    uint8_t start_region_id = in->start_region_id;
> +
> +    if (start_region_id >= ct3d->dc.num_regions) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
> +            in->region_cnt);
> +
> +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +    memset(out, 0, out_pl_len);
> +    out->num_regions = record_count;
> +    for (i = 0; i < record_count; i++) {
> +        stq_le_p(&out->records[i].base,
> +                ct3d->dc.regions[start_region_id + i].base);
> +        stq_le_p(&out->records[i].decode_len,
> +                ct3d->dc.regions[start_region_id + i].decode_len);
> +        stq_le_p(&out->records[i].region_len,
> +                ct3d->dc.regions[start_region_id + i].len);
> +        stq_le_p(&out->records[i].block_size,
> +                ct3d->dc.regions[start_region_id + i].block_size);
> +        stl_le_p(&out->records[i].dsmadhandle,
> +                ct3d->dc.regions[start_region_id + i].dsmadhandle);
> +        out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
> +    }
> +
> +    *len = out_pl_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -977,6 +1044,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_inject_poison, 8, 0 },
>      [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
>          cmd_media_clear_poison, 72, 0 },
> +    [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
> +        cmd_dcd_get_dyn_cap_config, 2, 0 },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> @@ -1164,6 +1233,9 @@ void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate, bool switch_cci)
>      }
>      for (int set = 0; set < 256; set++) {
>          for (int cmd = 0; cmd < 256; cmd++) {
> +            if (!cxl_dstate->is_dcd && set == DCD_CONFIG) {
> +                continue;
> +            }
Hmm. This doesn't work any more as at the level of this function we now have
a cci rather that the device state.

I've tried dropping in a version of Gregory's dynamic command registration
so we only register the DCD commands if we have enabled them.

>              if (cxl_dstate->cxl_cmd_set[set][cmd].handler) {
>                  struct cxl_cmd *c = &cxl_dstate->cxl_cmd_set[set][cmd];
>                  struct cel_log *log =
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 862107c5ef..4d68824dfe 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1046,6 +1046,12 @@ static void ct3d_reset(DeviceState *dev)
>      uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
>      uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
> +    if (ct3d->dc.num_regions) {
> +        ct3d->cxl_dstate.is_dcd = true;
> +    } else {
> +        ct3d->cxl_dstate.is_dcd = false;
> +    }
> +
>      cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
>      cxl_device_register_init_common(&ct3d->cxl_dstate);
>  }
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index cd7f28dba8..dae39da438 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -212,6 +212,7 @@ typedef struct cxl_device_state {
>      uint64_t mem_size;
>      uint64_t pmem_size;
>      uint64_t vmem_size;
> +    bool is_dcd;
>  
>      struct cxl_cmd (*cxl_cmd_set)[256];
>      CPMUState cpmu[CXL_NUM_CPMU_INSTANCES];
> @@ -382,6 +383,17 @@ typedef struct CXLPoison {
>  typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
>  #define CXL_POISON_LIST_LIMIT 256
>  
> +#define DCD_MAX_REGION_NUM 8
> +
> +typedef struct CXLDCD_Region {
> +    uint64_t base;
> +    uint64_t decode_len; /* in multiples of 256MB */
> +    uint64_t len;
> +    uint64_t block_size;
> +    uint32_t dsmadhandle;
> +    uint8_t flags;
> +} CXLDCD_Region;

whilst we aren't always strictly inline with QEMU naming conventions
to match local style at least we should have CXLDCDRegion

> +
>  struct CXLType3Dev {
>      /* Private */
>      PCIDevice parent_obj;
> @@ -413,6 +425,11 @@ struct CXLType3Dev {
>      unsigned int poison_list_cnt;
>      bool poison_list_overflowed;
>      uint64_t poison_list_overflow_ts;
> +
> +    struct dynamic_capacity {
> +        uint8_t num_regions; /* 0-8 regions */
> +        struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
Typedef above, so
	   CXLDCDRegion regions[...]

> +    } dc;
>  };
>  
>  #define TYPE_CXL_TYPE3 "cxl-type3"


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices
  2023-07-25 18:39     ` [Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices Fan Ni
@ 2023-08-04 15:27         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-04 15:27 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
> pmem capacity, preparing for the introduction of dynamic capacity to support
> dynamic capacity devices.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Looks good.  I've picked this up with the author change.

Note that just because I've starting picking these up, doesn't
mean you can't keep changing them, but if you start from where my
gitlab.com/jic23/qemu tree is that will make life easier given
we have a lot of stuff in flight.

Jonathan

p.s. that assumes I've actually pushed the result of this out before
you get back to it!


> ---
>  hw/cxl/cxl-mailbox-utils.c  | 5 +++--
>  hw/mem/cxl_type3.c          | 8 ++++----
>  include/hw/cxl/cxl_device.h | 2 +-
>  3 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 0fe9f3eb5d..dd5ea95af8 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -540,7 +540,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>  
>      snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
>  
> -    stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
> +    stq_le_p(&id->total_capacity,
> +            cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
>      stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
>      stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
>      stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
> @@ -879,7 +880,7 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      struct clear_poison_pl *in = (void *)cmd->payload;
>  
>      dpa = ldq_le_p(&in->dpa);
> -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
> +    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
>          return CXL_MBOX_INVALID_PA;
>      }
>  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 4d68824dfe..3d7acffcb7 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -748,7 +748,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          }
>          address_space_init(&ct3d->hostvmem_as, vmr, v_name);
>          ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
> -        ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
> +        ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
>          g_free(v_name);
>      }
>  
> @@ -771,7 +771,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          }
>          address_space_init(&ct3d->hostpmem_as, pmr, p_name);
>          ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
> -        ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
> +        ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
>          g_free(p_name);
>      }
>  
> @@ -984,7 +984,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          return -EINVAL;
>      }
>  
> -    if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
> +    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
>          return -EINVAL;
>      }
>  
> @@ -1148,7 +1148,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
>          return false;
>      }
>  
> -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
> +    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
>          return false;
>      }
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index dae39da438..503c344326 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -209,7 +209,7 @@ typedef struct cxl_device_state {
>      } timestamp;
>  
>      /* memory region size, HDM */
> -    uint64_t mem_size;
> +    uint64_t static_mem_size;
>      uint64_t pmem_size;
>      uint64_t vmem_size;
>      bool is_dcd;


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices
@ 2023-08-04 15:27         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-04 15:27 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
> pmem capacity, preparing for the introduction of dynamic capacity to support
> dynamic capacity devices.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Looks good.  I've picked this up with the author change.

Note that just because I've starting picking these up, doesn't
mean you can't keep changing them, but if you start from where my
gitlab.com/jic23/qemu tree is that will make life easier given
we have a lot of stuff in flight.

Jonathan

p.s. that assumes I've actually pushed the result of this out before
you get back to it!


> ---
>  hw/cxl/cxl-mailbox-utils.c  | 5 +++--
>  hw/mem/cxl_type3.c          | 8 ++++----
>  include/hw/cxl/cxl_device.h | 2 +-
>  3 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 0fe9f3eb5d..dd5ea95af8 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -540,7 +540,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>  
>      snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
>  
> -    stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
> +    stq_le_p(&id->total_capacity,
> +            cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
>      stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
>      stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER);
>      stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
> @@ -879,7 +880,7 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      struct clear_poison_pl *in = (void *)cmd->payload;
>  
>      dpa = ldq_le_p(&in->dpa);
> -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
> +    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
>          return CXL_MBOX_INVALID_PA;
>      }
>  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 4d68824dfe..3d7acffcb7 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -748,7 +748,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          }
>          address_space_init(&ct3d->hostvmem_as, vmr, v_name);
>          ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
> -        ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
> +        ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
>          g_free(v_name);
>      }
>  
> @@ -771,7 +771,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          }
>          address_space_init(&ct3d->hostpmem_as, pmr, p_name);
>          ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
> -        ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
> +        ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
>          g_free(p_name);
>      }
>  
> @@ -984,7 +984,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          return -EINVAL;
>      }
>  
> -    if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
> +    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
>          return -EINVAL;
>      }
>  
> @@ -1148,7 +1148,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
>          return false;
>      }
>  
> -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
> +    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
>          return false;
>      }
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index dae39da438..503c344326 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -209,7 +209,7 @@ typedef struct cxl_device_state {
>      } timestamp;
>  
>      /* memory region size, HDM */
> -    uint64_t mem_size;
> +    uint64_t static_mem_size;
>      uint64_t pmem_size;
>      uint64_t vmem_size;
>      bool is_dcd;



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices
  2023-07-25 18:39     ` [Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to " Fan Ni
@ 2023-08-04 15:55         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-04 15:55 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> With the change, when setting up memory for type3 memory device, we can
> create DC regions
> A property 'num-dc-regions' is added to ct3_props to allow users to pass the
> number of DC regions to create. To make it easier, other region parameters
> like region base, length, and block size are hard coded. If needed,
> these parameters can be added easily.

Longer term I think we need to have an interface based on one or more
memory backends.  Gets fiddly if we allow live configuration of the regions
but for static regions it should be easy and look like the vmem and pmem
already in place.

This is good for testing in the meantime.

> 
> With the change, we can create DC regions with proper kernel side
> support as below:
> 
> region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
> echo $region> /sys/bus/cxl/devices/decoder0.0/create_dc_region
> echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
> echo 1 > /sys/bus/cxl/devices/$region/interleave_ways
> 
> echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
> echo 0x40000000 >/sys/bus/cxl/devices/decoder2.0/dpa_size
> 
> echo 0x40000000 > /sys/bus/cxl/devices/$region/size
> echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
> echo 1 > /sys/bus/cxl/devices/$region/commit
> echo $region > /sys/bus/cxl/drivers/cxl_region/bind
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/mem/cxl_type3.c | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 3d7acffcb7..b29bb2309a 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -707,6 +707,34 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>      }
>  }
>  
> +/*
> + * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> + */
> +static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    uint64_t region_base = (ct3d->hostvmem ? ct3d->hostvmem->size : 0)
> +        + (ct3d->hostpmem ? ct3d->hostpmem->size : 0);

This is getting hard to read. Perhaps long hand version with if statements is easier?

	uint64_t region_base = 0;

	if (ct3d->hostvmem) {
		region_base += ct3d->hostvmem->size;
	}
etc.


> +    uint64_t region_len = (uint64_t)2 * 1024 * 1024 * 1024;

include/qemu/units.h GiB and MiB as appropraite.

> +    uint64_t decode_len = 4; /* 4*256MB */
> +    uint64_t blk_size = 2 * 1024 * 1024;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        region->base = region_base;
> +        region->decode_len = decode_len;
> +        region->len = region_len;
> +        region->block_size = blk_size;
> +        /* dsmad_handle is set when creating cdat table entries */
> +        region->flags = 0;
> +
> +        region_base += region->len;
> +    }
> +
> +    return 0;
> +}
> +
>  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
> @@ -775,6 +803,10 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          g_free(p_name);
>      }
>  
> +    if (cxl_create_dc_regions(ct3d)) {
> +        return false;
> +    }
> +
>      return true;
>  }
>  
> @@ -1068,6 +1100,7 @@ static Property ct3_props[] = {
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
> +    DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices
@ 2023-08-04 15:55         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-04 15:55 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:55 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> With the change, when setting up memory for type3 memory device, we can
> create DC regions
> A property 'num-dc-regions' is added to ct3_props to allow users to pass the
> number of DC regions to create. To make it easier, other region parameters
> like region base, length, and block size are hard coded. If needed,
> these parameters can be added easily.

Longer term I think we need to have an interface based on one or more
memory backends.  Gets fiddly if we allow live configuration of the regions
but for static regions it should be easy and look like the vmem and pmem
already in place.

This is good for testing in the meantime.

> 
> With the change, we can create DC regions with proper kernel side
> support as below:
> 
> region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
> echo $region> /sys/bus/cxl/devices/decoder0.0/create_dc_region
> echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
> echo 1 > /sys/bus/cxl/devices/$region/interleave_ways
> 
> echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
> echo 0x40000000 >/sys/bus/cxl/devices/decoder2.0/dpa_size
> 
> echo 0x40000000 > /sys/bus/cxl/devices/$region/size
> echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
> echo 1 > /sys/bus/cxl/devices/$region/commit
> echo $region > /sys/bus/cxl/drivers/cxl_region/bind
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/mem/cxl_type3.c | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 3d7acffcb7..b29bb2309a 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -707,6 +707,34 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>      }
>  }
>  
> +/*
> + * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> + */
> +static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    uint64_t region_base = (ct3d->hostvmem ? ct3d->hostvmem->size : 0)
> +        + (ct3d->hostpmem ? ct3d->hostpmem->size : 0);

This is getting hard to read. Perhaps long hand version with if statements is easier?

	uint64_t region_base = 0;

	if (ct3d->hostvmem) {
		region_base += ct3d->hostvmem->size;
	}
etc.


> +    uint64_t region_len = (uint64_t)2 * 1024 * 1024 * 1024;

include/qemu/units.h GiB and MiB as appropraite.

> +    uint64_t decode_len = 4; /* 4*256MB */
> +    uint64_t blk_size = 2 * 1024 * 1024;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        region->base = region_base;
> +        region->decode_len = decode_len;
> +        region->len = region_len;
> +        region->block_size = blk_size;
> +        /* dsmad_handle is set when creating cdat table entries */
> +        region->flags = 0;
> +
> +        region_base += region->len;
> +    }
> +
> +    return 0;
> +}
> +
>  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
> @@ -775,6 +803,10 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          g_free(p_name);
>      }
>  
> +    if (cxl_create_dc_regions(ct3d)) {
> +        return false;
> +    }
> +
>      return true;
>  }
>  
> @@ -1068,6 +1100,7 @@ static Property ct3_props[] = {
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
> +    DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
  2023-07-25 18:39     ` [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions Fan Ni
@ 2023-08-04 16:36         ` Jonathan Cameron via
  2023-08-04 16:36         ` Jonathan Cameron via
  1 sibling, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-04 16:36 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Add (file/memory backed) host backend, all the dynamic capacity regions
> will share a single, large enough host backend. Set up address space for
> DC regions to support read/write operations to dynamic capacity for DCD.
> 
> With the change, following supports are added:
> 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
>    memory backend for dynamic capacity;
> 2. add namespace for dynamic capacity for read/write support;
> 3. create cdat entries for each dynamic capacity region;
> 4. fix dvsec range registers to include DC regions.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Hi Fan,

I'm not sure if we want to do all regions backed by one memory backend
or one backend each.  It will become complex when some are shared
(e.g. what Gregory is working on).

A few questions inline.  In particular there are subtle changes to
existing handling that are either bug fixes (in which case they need
to be sent first) or bugs / have no effect and shouldn't be in here.


> ---
>  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
>  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
>  include/hw/cxl/cxl_device.h |   4 +
>  3 files changed, 185 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index dd5ea95af8..0511b8e6f7 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
>          char fw_rev4[0x10];
>      } QEMU_PACKED *fw_info;
>      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> -        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
> +        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
>          uint64_t next_pmem;
>      } QEMU_PACKED *part_info = (void *)cmd->payload;
>      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      struct clear_poison_pl *in = (void *)cmd->payload;
>  
>      dpa = ldq_le_p(&in->dpa);
> -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
> +    if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size

If there is already a bug here we should pull it out. If not I can't
see why the >= change is here.  

> +            && ct3d->dc.num_regions == 0) {
> +        return CXL_MBOX_INVALID_PA;
> +    }
> +
> +    if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
> +            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
>          return CXL_MBOX_INVALID_PA;
>      }
>  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b29bb2309a..76bbd9f785 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -20,6 +20,7 @@
>  #include "hw/pci/spdm.h"
>  
>  #define DWORD_BYTE 4
> +#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
>  
>  /* Default CDAT entries for a memory region */
>  enum {
> @@ -33,8 +34,8 @@ enum {
>  };
>  
>  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> -                                         int dsmad_handle, MemoryRegion *mr,
> -                                         bool is_pmem, uint64_t dpa_base)
> +        int dsmad_handle, uint8_t flags,
> +        uint64_t dpa_base, uint64_t size)
>  {
>      g_autofree CDATDsmas *dsmas = NULL;
>      g_autofree CDATDslbis *dslbis0 = NULL;
> @@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>              .length = sizeof(*dsmas),
>          },
>          .DSMADhandle = dsmad_handle,
> -        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> +        .flags = flags,
>          .DPA_base = dpa_base,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* For now, no memory side cache, plausiblish numbers */
> @@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>           * NV: Reserved - the non volatile from DSMAS matters
>           * V: EFI_MEMORY_SP
>           */
> -        .EFI_memory_type_attr = is_pmem ? 2 : 1,
> +        .EFI_memory_type_attr = flags ? 2 : 1,

This doesn't look good.  Previously we used a boolean to control
this now you are using flags which contains other things?

I don't see the flags expanding that much more, so instead of
this I'd just change the function to take two booleans.
is_pmem, is_dynamic

>          .DPA_offset = 0,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* Header always at start of structure */
> @@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>      g_autofree CDATSubHeader **table = NULL;
>      CXLType3Dev *ct3d = priv;
>      MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
> +    MemoryRegion *dc_mr = NULL;
>      int dsmad_handle = 0;
>      int cur_ent = 0;
>      int len = 0;
>      int rc, i;
> +    uint64_t vmr_size = 0, pmr_size = 0;
>  
> -    if (!ct3d->hostpmem && !ct3d->hostvmem) {
> +    if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
>          return 0;
>      }
>  
> +    if (ct3d->hostpmem && ct3d->hostvmem && ct3d->dc.host_dc) {
> +        warn_report("The device has static ram and pmem and dynamic capacity");
> +    }
> +
>      if (ct3d->hostvmem) {
>          volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
>          if (!volatile_mr) {
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        vmr_size = volatile_mr->size;
>      }
>  
>      if (ct3d->hostpmem) {
> @@ -181,6 +189,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        pmr_size = nonvolatile_mr->size;
> +    }
> +
> +    if (ct3d->dc.num_regions) {
> +        if (ct3d->dc.host_dc) {
> +            dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +            if (!dc_mr) {
> +                return -EINVAL;
> +            }
> +            len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> +        } else {
> +            return -EINVAL;
> +        }
>      }
>  
>      table = g_malloc0(len * sizeof(*table));
> @@ -190,8 +211,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      /* Now fill them in */
>      if (volatile_mr) {
> -        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> -                                           false, 0);
> +        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
> +                0, 0, vmr_size);
>          if (rc < 0) {
>              return rc;
>          }
> @@ -200,14 +221,37 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      if (nonvolatile_mr) {
>          rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> -                                           nonvolatile_mr, true,
> -                                           (volatile_mr ?
> -                                            memory_region_size(volatile_mr) : 0));
> +                CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);

These lines don't end up that long, so I'd prefer to keep aligned 
with the brackets where we can do so and stay under 80 chars.

>          if (rc < 0) {
>              goto error_cleanup;
>          }
>          cur_ent += CT3_CDAT_NUM_ENTRIES;
>      }
> +
> +    if (dc_mr) {
> +        uint64_t region_base = vmr_size + pmr_size;
> +
> +        /*
> +         * Currently we create cdat entries for each region, should we only
> +         * create dsmas table instead??
> +         * We assume all dc regions are non-volatile for now.
> +         *
> +         */
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
> +                    , dsmad_handle++
> +                    , CDAT_DSMAS_FLAG_NV | CDAT_DSMAS_FLAG_DYNAMIC_CAP
> +                    , region_base, ct3d->dc.regions[i].len);

Formatting should have those , on the end of lines, not start of next ones.

> +            if (rc < 0) {
> +                goto error_cleanup;
> +            }
> +            ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
> +
> +            cur_ent += CT3_CDAT_NUM_ENTRIES;
> +            region_base += ct3d->dc.regions[i].len;
> +        }
> +    }
> +
>      assert(len == cur_ent);
>  
>      *cdat_table = g_steal_pointer(&table);
> @@ -435,11 +479,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
>              range2_size_hi = ct3d->hostpmem->size >> 32;
>              range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                               (ct3d->hostpmem->size & 0xF0000000);
> +        } else if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
>          }
> -    } else {
> +    } else if (ct3d->hostpmem) {
>          range1_size_hi = ct3d->hostpmem->size >> 32;
>          range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                           (ct3d->hostpmem->size & 0xF0000000);
> +        if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
> +        }
> +    } else {
> +        range1_size_hi = ct3d->dc.host_dc->size >> 32;
> +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +            (ct3d->dc.host_dc->size & 0xF0000000);
>      }
I think we concluded in that other thread that DCD doesn't belong in here
at all?  I'll leave it for now though.

>  
>      dvsec = (uint8_t *)&(CXLDVSECDevice){
> @@ -708,7 +765,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>  }
>  
>  /*
> - * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> + * Create dc regions.
> + * TODO: region parameters are hard coded, may need to change in the future.
>   */
>  static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>  {
> @@ -739,7 +797,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
>  
> -    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
> +            && !ct3d->dc.num_regions) {
>          error_setg(errp, "at least one memdev property must be set");
>          return false;
>      } else if (ct3d->hostmem && ct3d->hostpmem) {
> @@ -807,6 +866,50 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          return false;
>      }
>  
> +    ct3d->dc.total_capacity = 0;
> +    if (ct3d->dc.host_dc) {
> +        MemoryRegion *dc_mr;
> +        char *dc_name;
> +        uint64_t total_region_size = 0;
> +        int i;
> +
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        if (!dc_mr) {
> +            error_setg(errp, "dynamic capacity must have backing device");
> +            return false;
> +        }
> +        /* FIXME: set dc as nonvolatile for now */

Yup. This is where we need to think about interface, or decide
that no one cares about PMEM DCD on basis it's a pain to deal with
and I think some stuff is still not well defined in the spec.

> +        memory_region_set_nonvolatile(dc_mr, true);
> +        memory_region_set_enabled(dc_mr, true);
> +        host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
> +        if (ds->id) {
> +            dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
> +        } else {
> +            dc_name = g_strdup("cxl-dcd-dpa-dc-space");
> +        }
> +        address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
> +
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            total_region_size += ct3d->dc.regions[i].len;
> +        }
> +        /* Make sure the host backend is large enough to cover all dc range */
> +        if (total_region_size > memory_region_size(dc_mr)) {
> +            error_setg(errp,
> +                "too small host backend size, increase to %lu MiB or more",
> +                total_region_size / 1024 / 1024);

/ MiB

> +            return false;
> +        }
> +
> +        if (dc_mr->size % CXL_CAPACITY_MULTIPLIER != 0) {
> +            error_setg(errp, "DC region size is unaligned to %lx",
> +                    CXL_CAPACITY_MULTIPLIER);
> +            return false;
> +        }
> +
> +        ct3d->dc.total_capacity = total_region_size;
> +        g_free(dc_name);
> +    }
> +
>      return true;
>  }
>  
> @@ -916,6 +1019,9 @@ err_release_cdat:
>  err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -935,6 +1041,9 @@ static void ct3_exit(PCIDevice *pci_dev)
>      cxl_doe_cdat_release(cxl_cstate);
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -999,16 +1108,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>                                         AddressSpace **as,
>                                         uint64_t *dpa_offset)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
> +    }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        /* Do we want dc_size to be dc_mr->size or not?? */

yes, I think we do. No need for qemu to care about remapping and complex
allocation strategies.  The host can't tell if we are doing that or not
anyway.

> +        dc_size = ct3d->dc.total_capacity;
>      }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return -ENODEV;
>      }
>  
> @@ -1016,19 +1133,19 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          return -EINVAL;
>      }
>  
> -    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
> +    if ((*dpa_offset >= vmr_size + pmr_size + dc_size) ||
> +       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
>          return -EINVAL;
>      }
>  
> -    if (vmr) {
> -        if (*dpa_offset < memory_region_size(vmr)) {
> -            *as = &ct3d->hostvmem_as;
> -        } else {
> -            *as = &ct3d->hostpmem_as;
> -            *dpa_offset -= memory_region_size(vmr);
> -        }
> -    } else {
> +    if (*dpa_offset < vmr_size) {
> +        *as = &ct3d->hostvmem_as;
> +    } else if (*dpa_offset < vmr_size + pmr_size) {
>          *as = &ct3d->hostpmem_as;
> +        *dpa_offset -= vmr_size;
> +    } else {
> +        *as = &ct3d->dc.host_dc_as;
> +        *dpa_offset -= (vmr_size + pmr_size);
>      }
>  
>      return 0;
> @@ -1101,6 +1218,8 @@ static Property ct3_props[] = {
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
>      DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> +    DEFINE_PROP_LINK("nonvolatile-dc-memdev", CXLType3Dev, dc.host_dc,
> +                    TYPE_MEMORY_BACKEND, HostMemoryBackend *),

I think we will want a more adaptable interface for this, but I'll apply with this for now
so we have something to iterate on.

>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -1167,33 +1286,43 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>  
>  static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
>      AddressSpace *as;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
>      }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        dc_size = ct3d->dc.total_capacity;
> +     }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return false;
>      }
>  
> -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
> +    if (dpa_offset >= vmr_size + pmr_size + dc_size) {

What is reasoning behind not having offset + cacheline size here?
DC blocks are multiples of CXL_CACHE_LINE_SIZE anyway.


> +        return false;
> +    }
> +    if (dpa_offset + CXL_CACHE_LINE_SIZE >= vmr_size + pmr_size
> +            && ct3d->dc.num_regions == 0) {

This is getting messy - we have the dc_size set above on basis
of one condition and this checked on num_regions.

Need to only allow backed regions to keep this simpler.

>          return false;
>      }


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
@ 2023-08-04 16:36         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-04 16:36 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Add (file/memory backed) host backend, all the dynamic capacity regions
> will share a single, large enough host backend. Set up address space for
> DC regions to support read/write operations to dynamic capacity for DCD.
> 
> With the change, following supports are added:
> 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
>    memory backend for dynamic capacity;
> 2. add namespace for dynamic capacity for read/write support;
> 3. create cdat entries for each dynamic capacity region;
> 4. fix dvsec range registers to include DC regions.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Hi Fan,

I'm not sure if we want to do all regions backed by one memory backend
or one backend each.  It will become complex when some are shared
(e.g. what Gregory is working on).

A few questions inline.  In particular there are subtle changes to
existing handling that are either bug fixes (in which case they need
to be sent first) or bugs / have no effect and shouldn't be in here.


> ---
>  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
>  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
>  include/hw/cxl/cxl_device.h |   4 +
>  3 files changed, 185 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index dd5ea95af8..0511b8e6f7 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -388,9 +388,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd,
>          char fw_rev4[0x10];
>      } QEMU_PACKED *fw_info;
>      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> -        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
> +        (ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -531,7 +533,8 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd,
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -566,9 +569,11 @@ static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
>          uint64_t next_pmem;
>      } QEMU_PACKED *part_info = (void *)cmd->payload;
>      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> +    CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>  
>      if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> -        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> @@ -880,7 +885,13 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
>      struct clear_poison_pl *in = (void *)cmd->payload;
>  
>      dpa = ldq_le_p(&in->dpa);
> -    if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
> +    if (dpa + CXL_CACHE_LINE_SIZE >= cxl_dstate->static_mem_size

If there is already a bug here we should pull it out. If not I can't
see why the >= change is here.  

> +            && ct3d->dc.num_regions == 0) {
> +        return CXL_MBOX_INVALID_PA;
> +    }
> +
> +    if (ct3d->dc.num_regions && dpa + CXL_CACHE_LINE_SIZE >=
> +            cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
>          return CXL_MBOX_INVALID_PA;
>      }
>  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b29bb2309a..76bbd9f785 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -20,6 +20,7 @@
>  #include "hw/pci/spdm.h"
>  
>  #define DWORD_BYTE 4
> +#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
>  
>  /* Default CDAT entries for a memory region */
>  enum {
> @@ -33,8 +34,8 @@ enum {
>  };
>  
>  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> -                                         int dsmad_handle, MemoryRegion *mr,
> -                                         bool is_pmem, uint64_t dpa_base)
> +        int dsmad_handle, uint8_t flags,
> +        uint64_t dpa_base, uint64_t size)
>  {
>      g_autofree CDATDsmas *dsmas = NULL;
>      g_autofree CDATDslbis *dslbis0 = NULL;
> @@ -53,9 +54,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>              .length = sizeof(*dsmas),
>          },
>          .DSMADhandle = dsmad_handle,
> -        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> +        .flags = flags,
>          .DPA_base = dpa_base,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* For now, no memory side cache, plausiblish numbers */
> @@ -137,9 +138,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>           * NV: Reserved - the non volatile from DSMAS matters
>           * V: EFI_MEMORY_SP
>           */
> -        .EFI_memory_type_attr = is_pmem ? 2 : 1,
> +        .EFI_memory_type_attr = flags ? 2 : 1,

This doesn't look good.  Previously we used a boolean to control
this now you are using flags which contains other things?

I don't see the flags expanding that much more, so instead of
this I'd just change the function to take two booleans.
is_pmem, is_dynamic

>          .DPA_offset = 0,
> -        .DPA_length = memory_region_size(mr),
> +        .DPA_length = size,
>      };
>  
>      /* Header always at start of structure */
> @@ -158,21 +159,28 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>      g_autofree CDATSubHeader **table = NULL;
>      CXLType3Dev *ct3d = priv;
>      MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
> +    MemoryRegion *dc_mr = NULL;
>      int dsmad_handle = 0;
>      int cur_ent = 0;
>      int len = 0;
>      int rc, i;
> +    uint64_t vmr_size = 0, pmr_size = 0;
>  
> -    if (!ct3d->hostpmem && !ct3d->hostvmem) {
> +    if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
>          return 0;
>      }
>  
> +    if (ct3d->hostpmem && ct3d->hostvmem && ct3d->dc.host_dc) {
> +        warn_report("The device has static ram and pmem and dynamic capacity");
> +    }
> +
>      if (ct3d->hostvmem) {
>          volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
>          if (!volatile_mr) {
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        vmr_size = volatile_mr->size;
>      }
>  
>      if (ct3d->hostpmem) {
> @@ -181,6 +189,19 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>              return -EINVAL;
>          }
>          len += CT3_CDAT_NUM_ENTRIES;
> +        pmr_size = nonvolatile_mr->size;
> +    }
> +
> +    if (ct3d->dc.num_regions) {
> +        if (ct3d->dc.host_dc) {
> +            dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +            if (!dc_mr) {
> +                return -EINVAL;
> +            }
> +            len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
> +        } else {
> +            return -EINVAL;
> +        }
>      }
>  
>      table = g_malloc0(len * sizeof(*table));
> @@ -190,8 +211,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      /* Now fill them in */
>      if (volatile_mr) {
> -        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> -                                           false, 0);
> +        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++,
> +                0, 0, vmr_size);
>          if (rc < 0) {
>              return rc;
>          }
> @@ -200,14 +221,37 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  
>      if (nonvolatile_mr) {
>          rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> -                                           nonvolatile_mr, true,
> -                                           (volatile_mr ?
> -                                            memory_region_size(volatile_mr) : 0));
> +                CDAT_DSMAS_FLAG_NV, vmr_size, pmr_size);

These lines don't end up that long, so I'd prefer to keep aligned 
with the brackets where we can do so and stay under 80 chars.

>          if (rc < 0) {
>              goto error_cleanup;
>          }
>          cur_ent += CT3_CDAT_NUM_ENTRIES;
>      }
> +
> +    if (dc_mr) {
> +        uint64_t region_base = vmr_size + pmr_size;
> +
> +        /*
> +         * Currently we create cdat entries for each region, should we only
> +         * create dsmas table instead??
> +         * We assume all dc regions are non-volatile for now.
> +         *
> +         */
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent])
> +                    , dsmad_handle++
> +                    , CDAT_DSMAS_FLAG_NV | CDAT_DSMAS_FLAG_DYNAMIC_CAP
> +                    , region_base, ct3d->dc.regions[i].len);

Formatting should have those , on the end of lines, not start of next ones.

> +            if (rc < 0) {
> +                goto error_cleanup;
> +            }
> +            ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
> +
> +            cur_ent += CT3_CDAT_NUM_ENTRIES;
> +            region_base += ct3d->dc.regions[i].len;
> +        }
> +    }
> +
>      assert(len == cur_ent);
>  
>      *cdat_table = g_steal_pointer(&table);
> @@ -435,11 +479,24 @@ static void build_dvsecs(CXLType3Dev *ct3d)
>              range2_size_hi = ct3d->hostpmem->size >> 32;
>              range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                               (ct3d->hostpmem->size & 0xF0000000);
> +        } else if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
>          }
> -    } else {
> +    } else if (ct3d->hostpmem) {
>          range1_size_hi = ct3d->hostpmem->size >> 32;
>          range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
>                           (ct3d->hostpmem->size & 0xF0000000);
> +        if (ct3d->dc.host_dc) {
> +            range2_size_hi = ct3d->dc.host_dc->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->dc.host_dc->size & 0xF0000000);
> +        }
> +    } else {
> +        range1_size_hi = ct3d->dc.host_dc->size >> 32;
> +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +            (ct3d->dc.host_dc->size & 0xF0000000);
>      }
I think we concluded in that other thread that DCD doesn't belong in here
at all?  I'll leave it for now though.

>  
>      dvsec = (uint8_t *)&(CXLDVSECDevice){
> @@ -708,7 +765,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
>  }
>  
>  /*
> - * Create a dc region to test "Get Dynamic Capacity Configuration" command.
> + * Create dc regions.
> + * TODO: region parameters are hard coded, may need to change in the future.
>   */
>  static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>  {
> @@ -739,7 +797,8 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
>  
> -    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
> +            && !ct3d->dc.num_regions) {
>          error_setg(errp, "at least one memdev property must be set");
>          return false;
>      } else if (ct3d->hostmem && ct3d->hostpmem) {
> @@ -807,6 +866,50 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          return false;
>      }
>  
> +    ct3d->dc.total_capacity = 0;
> +    if (ct3d->dc.host_dc) {
> +        MemoryRegion *dc_mr;
> +        char *dc_name;
> +        uint64_t total_region_size = 0;
> +        int i;
> +
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        if (!dc_mr) {
> +            error_setg(errp, "dynamic capacity must have backing device");
> +            return false;
> +        }
> +        /* FIXME: set dc as nonvolatile for now */

Yup. This is where we need to think about interface, or decide
that no one cares about PMEM DCD on basis it's a pain to deal with
and I think some stuff is still not well defined in the spec.

> +        memory_region_set_nonvolatile(dc_mr, true);
> +        memory_region_set_enabled(dc_mr, true);
> +        host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
> +        if (ds->id) {
> +            dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
> +        } else {
> +            dc_name = g_strdup("cxl-dcd-dpa-dc-space");
> +        }
> +        address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
> +
> +        for (i = 0; i < ct3d->dc.num_regions; i++) {
> +            total_region_size += ct3d->dc.regions[i].len;
> +        }
> +        /* Make sure the host backend is large enough to cover all dc range */
> +        if (total_region_size > memory_region_size(dc_mr)) {
> +            error_setg(errp,
> +                "too small host backend size, increase to %lu MiB or more",
> +                total_region_size / 1024 / 1024);

/ MiB

> +            return false;
> +        }
> +
> +        if (dc_mr->size % CXL_CAPACITY_MULTIPLIER != 0) {
> +            error_setg(errp, "DC region size is unaligned to %lx",
> +                    CXL_CAPACITY_MULTIPLIER);
> +            return false;
> +        }
> +
> +        ct3d->dc.total_capacity = total_region_size;
> +        g_free(dc_name);
> +    }
> +
>      return true;
>  }
>  
> @@ -916,6 +1019,9 @@ err_release_cdat:
>  err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -935,6 +1041,9 @@ static void ct3_exit(PCIDevice *pci_dev)
>      cxl_doe_cdat_release(cxl_cstate);
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
> +    if (ct3d->dc.host_dc) {
> +        address_space_destroy(&ct3d->dc.host_dc_as);
> +    }
>      if (ct3d->hostpmem) {
>          address_space_destroy(&ct3d->hostpmem_as);
>      }
> @@ -999,16 +1108,24 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>                                         AddressSpace **as,
>                                         uint64_t *dpa_offset)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
> +    }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        /* Do we want dc_size to be dc_mr->size or not?? */

yes, I think we do. No need for qemu to care about remapping and complex
allocation strategies.  The host can't tell if we are doing that or not
anyway.

> +        dc_size = ct3d->dc.total_capacity;
>      }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return -ENODEV;
>      }
>  
> @@ -1016,19 +1133,19 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          return -EINVAL;
>      }
>  
> -    if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
> +    if ((*dpa_offset >= vmr_size + pmr_size + dc_size) ||
> +       (*dpa_offset >= vmr_size + pmr_size && ct3d->dc.num_regions == 0)) {
>          return -EINVAL;
>      }
>  
> -    if (vmr) {
> -        if (*dpa_offset < memory_region_size(vmr)) {
> -            *as = &ct3d->hostvmem_as;
> -        } else {
> -            *as = &ct3d->hostpmem_as;
> -            *dpa_offset -= memory_region_size(vmr);
> -        }
> -    } else {
> +    if (*dpa_offset < vmr_size) {
> +        *as = &ct3d->hostvmem_as;
> +    } else if (*dpa_offset < vmr_size + pmr_size) {
>          *as = &ct3d->hostpmem_as;
> +        *dpa_offset -= vmr_size;
> +    } else {
> +        *as = &ct3d->dc.host_dc_as;
> +        *dpa_offset -= (vmr_size + pmr_size);
>      }
>  
>      return 0;
> @@ -1101,6 +1218,8 @@ static Property ct3_props[] = {
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
>      DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> +    DEFINE_PROP_LINK("nonvolatile-dc-memdev", CXLType3Dev, dc.host_dc,
> +                    TYPE_MEMORY_BACKEND, HostMemoryBackend *),

I think we will want a more adaptable interface for this, but I'll apply with this for now
so we have something to iterate on.

>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -1167,33 +1286,43 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>  
>  static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
>  {
> -    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
>      AddressSpace *as;
> +    uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
>  
>      if (ct3d->hostvmem) {
>          vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        vmr_size = memory_region_size(vmr);
>      }
>      if (ct3d->hostpmem) {
>          pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        pmr_size = memory_region_size(pmr);
>      }
> +    if (ct3d->dc.host_dc) {
> +        dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
> +        dc_size = ct3d->dc.total_capacity;
> +     }
>  
> -    if (!vmr && !pmr) {
> +    if (!vmr && !pmr && !dc_mr) {
>          return false;
>      }
>  
> -    if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
> +    if (dpa_offset >= vmr_size + pmr_size + dc_size) {

What is reasoning behind not having offset + cacheline size here?
DC blocks are multiples of CXL_CACHE_LINE_SIZE anyway.


> +        return false;
> +    }
> +    if (dpa_offset + CXL_CACHE_LINE_SIZE >= vmr_size + pmr_size
> +            && ct3d->dc.num_regions == 0) {

This is getting messy - we have the dc_size set above on basis
of one condition and this checked on num_regions.

Need to only allow backed regions to keep this simpler.

>          return false;
>      }



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
  2023-08-04 16:36         ` Jonathan Cameron via
  (?)
@ 2023-08-04 18:07         ` Gregory Price
  2023-08-07 12:10             ` Jonathan Cameron via
  -1 siblings, 1 reply; 48+ messages in thread
From: Gregory Price @ 2023-08-04 18:07 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Fan Ni, qemu-devel, linux-cxl, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Fri, Aug 04, 2023 at 05:36:23PM +0100, Jonathan Cameron wrote:
> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
> 
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Add (file/memory backed) host backend, all the dynamic capacity regions
> > will share a single, large enough host backend. Set up address space for
> > DC regions to support read/write operations to dynamic capacity for DCD.
> > 
> > With the change, following supports are added:
> > 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
> >    memory backend for dynamic capacity;
> > 2. add namespace for dynamic capacity for read/write support;
> > 3. create cdat entries for each dynamic capacity region;
> > 4. fix dvsec range registers to include DC regions.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> Hi Fan,
> 
> I'm not sure if we want to do all regions backed by one memory backend
> or one backend each.  It will become complex when some are shared
> (e.g. what Gregory is working on).

I thought about this briefly when i implemented the original volatile
support due to the potential for partitioning. We landed on, iirc, 
2 backends (1 for volatile, 1 for non-volatile).

The reality, though, is the driver (presently) does not have a good way
to create more than 1 dax per memdev, and in practice with real devices
we see that this just tends to be the case: 1 dax per device.  So unless
that's going to change, ever having more than 1 backend will just be
unused complexity.

To me, this is a good example of "maybe piling everything into the core
ct3d is going to get ugly fast".  Maybe it would be better to do
something similar to the CCI interface and allow for overriding the
other functions as well.

just a thought.  I apologize for not engaging with the DCD patch set,
conferences have been keeping me busier than expected.  I plan on
putting it through the grinder this month.

> 
> A few questions inline.  In particular there are subtle changes to
> existing handling that are either bug fixes (in which case they need
> to be sent first) or bugs / have no effect and shouldn't be in here.
> 
> 
> > ---
> >  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
> >  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
> >  include/hw/cxl/cxl_device.h |   4 +
> >  3 files changed, 185 insertions(+), 41 deletions(-)
> > 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-07-25 18:39     ` [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions Fan Ni
@ 2023-08-07  8:53         ` Jonathan Cameron via
  2023-08-30 12:08       ` Jørgen Hansen
  1 sibling, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07  8:53 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Not all dpa range in the dc regions is valid to access until an extent
> covering the range has been added. Add a bitmap for each region to
> record whether a dc block in the region has been backed by dc extent.
> For the bitmap, a bit in the bitmap represents a dc block. When a dc
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Hi Fan,

A few of the bits of feedback apply broadly across the series.  Given I'm
rebasing this anyway to give myself something to test I'll tidy things up
(feel free to disagree with and revert any changes !) 
and push a tree out in next day or two.  I'll message when I've done so.

Jonathan

> ---
>  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h |   1 +
>  2 files changed, 156 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 41a828598a..51943a36fc 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>          /* dsmad_handle is set when creating cdat table entries */
>          region->flags = 0;
>  
> +        region->blk_bitmap = bitmap_new(region->len / region->block_size);

In common with many allocators in qemu if this fails it calls abort()
internally so no need to handle potential errors.

> +        if (!region->blk_bitmap) {
> +            break;
> +        }
> +
>          region_base += region->len;
>      }
> +
> +    if (i < ct3d->dc.num_regions) {
> +        while (--i >= 0) {
> +            g_free(ct3d->dc.regions[i].blk_bitmap);
> +        }
> +        return -1;
> +    }
> +
>      QTAILQ_INIT(&ct3d->dc.extents);
>  
>      return 0;
>  }
>  
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        g_free(region->blk_bitmap);
> +    }
> +}
> +
>  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
> @@ -1021,6 +1045,7 @@ err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
>      }
>  }
>  
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be backed and
> + * accessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     **/
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> +            len / region->block_size);
> +}
> +
> +/*
> + * This function check whether a dpa range [dpa, dpa + len) has been backed
> + * with dc extents, used when validating read/write to dc regions
> + */
> +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return false;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> +}
> +
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> + * inaccessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    bitmap_clear(region->blk_bitmap, nr, nbits);
> +}
> +
>  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
>  {
>      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          *as = &ct3d->hostpmem_as;
>          *dpa_offset -= vmr_size;
>      } else {
> +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> +            return -ENODEV;
> +        }
> +
>          *as = &ct3d->dc.host_dc_as;
>          *dpa_offset -= (vmr_size + pmr_size);
>      }
> @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
>      }
>  
>      g_free(extents);
> +
> +    /* Another choice is to do the set/clear after getting mailbox response*/
> +    list = records;
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        switch (type) {
> +        case DC_EVENT_ADD_CAPACITY:
> +            set_region_block_backed(dcd, dpa, len);
> +            break;
> +        case DC_EVENT_RELEASE_CAPACITY:
> +            clear_region_block_backed(dcd, dpa, len);
> +            break;
> +        default:
> +            error_setg(errp, "DC event type not handled yet");
> +            break;
> +        }
> +        list = list->next;
> +    }
>  }
>  
>  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 01a5eaca48..1f85c88017 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
>      uint64_t block_size;
>      uint32_t dsmadhandle;
>      uint8_t flags;
> +    unsigned long *blk_bitmap;
>  } CXLDCD_Region;
>  
>  struct CXLType3Dev {


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
@ 2023-08-07  8:53         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07  8:53 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Not all dpa range in the dc regions is valid to access until an extent
> covering the range has been added. Add a bitmap for each region to
> record whether a dc block in the region has been backed by dc extent.
> For the bitmap, a bit in the bitmap represents a dc block. When a dc
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Hi Fan,

A few of the bits of feedback apply broadly across the series.  Given I'm
rebasing this anyway to give myself something to test I'll tidy things up
(feel free to disagree with and revert any changes !) 
and push a tree out in next day or two.  I'll message when I've done so.

Jonathan

> ---
>  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h |   1 +
>  2 files changed, 156 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 41a828598a..51943a36fc 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>          /* dsmad_handle is set when creating cdat table entries */
>          region->flags = 0;
>  
> +        region->blk_bitmap = bitmap_new(region->len / region->block_size);

In common with many allocators in qemu if this fails it calls abort()
internally so no need to handle potential errors.

> +        if (!region->blk_bitmap) {
> +            break;
> +        }
> +
>          region_base += region->len;
>      }
> +
> +    if (i < ct3d->dc.num_regions) {
> +        while (--i >= 0) {
> +            g_free(ct3d->dc.regions[i].blk_bitmap);
> +        }
> +        return -1;
> +    }
> +
>      QTAILQ_INIT(&ct3d->dc.extents);
>  
>      return 0;
>  }
>  
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        g_free(region->blk_bitmap);
> +    }
> +}
> +
>  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
> @@ -1021,6 +1045,7 @@ err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
>      }
>  }
>  
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be backed and
> + * accessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     **/
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> +            len / region->block_size);
> +}
> +
> +/*
> + * This function check whether a dpa range [dpa, dpa + len) has been backed
> + * with dc extents, used when validating read/write to dc regions
> + */
> +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return false;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> +}
> +
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> + * inaccessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    bitmap_clear(region->blk_bitmap, nr, nbits);
> +}
> +
>  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
>  {
>      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          *as = &ct3d->hostpmem_as;
>          *dpa_offset -= vmr_size;
>      } else {
> +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> +            return -ENODEV;
> +        }
> +
>          *as = &ct3d->dc.host_dc_as;
>          *dpa_offset -= (vmr_size + pmr_size);
>      }
> @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
>      }
>  
>      g_free(extents);
> +
> +    /* Another choice is to do the set/clear after getting mailbox response*/
> +    list = records;
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        switch (type) {
> +        case DC_EVENT_ADD_CAPACITY:
> +            set_region_block_backed(dcd, dpa, len);
> +            break;
> +        case DC_EVENT_RELEASE_CAPACITY:
> +            clear_region_block_backed(dcd, dpa, len);
> +            break;
> +        default:
> +            error_setg(errp, "DC event type not handled yet");
> +            break;
> +        }
> +        list = list->next;
> +    }
>  }
>  
>  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 01a5eaca48..1f85c88017 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
>      uint64_t block_size;
>      uint32_t dsmadhandle;
>      uint8_t flags;
> +    unsigned long *blk_bitmap;
>  } CXLDCD_Region;
>  
>  struct CXLType3Dev {



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-07  8:53         ` Jonathan Cameron via
@ 2023-08-07  9:37           ` Jonathan Cameron via
  -1 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07  9:37 UTC (permalink / raw)
  To: Jonathan Cameron via
  Cc: Jonathan Cameron, Fan Ni, linux-cxl, gregory.price, hchkuo,
	cbrowy, ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Mon, 7 Aug 2023 09:53:42 +0100
Jonathan Cameron via <qemu-devel@nongnu.org> wrote:

> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
> 
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Not all dpa range in the dc regions is valid to access until an extent
> > covering the range has been added. Add a bitmap for each region to
> > record whether a dc block in the region has been backed by dc extent.
> > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > extent is added, all the bits of the blocks in the extent will be set,
> > which will be cleared when the extent is released.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> Hi Fan,
> 
> A few of the bits of feedback apply broadly across the series.  Given I'm
> rebasing this anyway to give myself something to test I'll tidy things up
> (feel free to disagree with and revert any changes !) 
> and push a tree out in next day or two.  I'll message when I've done so.
> 
> Jonathan
> 

I'll review here but note I've changed all this in my tree anyway 
unless I specifically add questions etc.

> > ---
> >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> >  include/hw/cxl/cxl_device.h |   1 +
> >  2 files changed, 156 insertions(+)
> > 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 41a828598a..51943a36fc 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >          /* dsmad_handle is set when creating cdat table entries */
> >          region->flags = 0;
> >  
> > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> 
> In common with many allocators in qemu if this fails it calls abort()
> internally so no need to handle potential errors.
> 
> > +        if (!region->blk_bitmap) {
> > +            break;
> > +        }
> > +
> >          region_base += region->len;
> >      }
> > +
> > +    if (i < ct3d->dc.num_regions) {
> > +        while (--i >= 0) {
> > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > +        }
> > +        return -1;
> > +    }
> > +
> >      QTAILQ_INIT(&ct3d->dc.extents);
> >  
> >      return 0;
> >  }
> >  
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    int i;
> > +    struct CXLDCD_Region *region;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        g_free(region->blk_bitmap);
> > +    }
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >  
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > + * accessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,

I'd prefer all functions to be in the ct3 namespace.

> > +        uint64_t len)
> > +{
> > +    int i;

A large chunk of stuff here is repeated as it is just finding the
relevant region.  Pulled out to a ct3_find_dc_region() utility function.

> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     **/
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > +            len / region->block_size);
> > +}
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > +}
> > +
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > + * inaccessible, this happens when a dc extent is added and accepted by the
> > + * host.
Second part of comment wrong (Cut and paste fun ;)

> > + */
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > +}
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          *as = &ct3d->hostpmem_as;
> >          *dpa_offset -= vmr_size;
> >      } else {
> > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > +            return -ENODEV;
> > +        }
> > +
> >          *as = &ct3d->dc.host_dc_as;
> >          *dpa_offset -= (vmr_size + pmr_size);
> >      }
> > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> >      }
> >  
> >      g_free(extents);
> > +
> > +    /* Another choice is to do the set/clear after getting mailbox response*/

I haven't changed this yet - but it needs to be done on host acceptance, not on
the QMP command. We also need to validate it - so keep a record of what has
been offered and not yet accepted.  Unfortunately that probably doubles the bitmaps :(

I've updated the comment to reflect this.
> > +    list = records;
> > +    while (list) {
> > +        dpa = list->value->dpa * 1024 * 1024;
* MiB
> > +        len = list->value->len * 1024 * 1024;
> > +        rid = list->value->region_id;
> > +
> > +        switch (type) {
> > +        case DC_EVENT_ADD_CAPACITY:
> > +            set_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        case DC_EVENT_RELEASE_CAPACITY:
> > +            clear_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        default:
> > +            error_setg(errp, "DC event type not handled yet");
> > +            break;
> > +        }
> > +        list = list->next;
> > +    }
> >  }
> >  
> >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 01a5eaca48..1f85c88017 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >      uint64_t block_size;
> >      uint32_t dsmadhandle;
> >      uint8_t flags;
> > +    unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >  
> >  struct CXLType3Dev {  
> 
> 
> 


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
@ 2023-08-07  9:37           ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07  9:37 UTC (permalink / raw)
  To: Jonathan Cameron via
  Cc: Jonathan Cameron, Fan Ni, linux-cxl, gregory.price, hchkuo,
	cbrowy, ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Mon, 7 Aug 2023 09:53:42 +0100
Jonathan Cameron via <qemu-devel@nongnu.org> wrote:

> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
> 
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Not all dpa range in the dc regions is valid to access until an extent
> > covering the range has been added. Add a bitmap for each region to
> > record whether a dc block in the region has been backed by dc extent.
> > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > extent is added, all the bits of the blocks in the extent will be set,
> > which will be cleared when the extent is released.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> Hi Fan,
> 
> A few of the bits of feedback apply broadly across the series.  Given I'm
> rebasing this anyway to give myself something to test I'll tidy things up
> (feel free to disagree with and revert any changes !) 
> and push a tree out in next day or two.  I'll message when I've done so.
> 
> Jonathan
> 

I'll review here but note I've changed all this in my tree anyway 
unless I specifically add questions etc.

> > ---
> >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> >  include/hw/cxl/cxl_device.h |   1 +
> >  2 files changed, 156 insertions(+)
> > 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 41a828598a..51943a36fc 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >          /* dsmad_handle is set when creating cdat table entries */
> >          region->flags = 0;
> >  
> > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> 
> In common with many allocators in qemu if this fails it calls abort()
> internally so no need to handle potential errors.
> 
> > +        if (!region->blk_bitmap) {
> > +            break;
> > +        }
> > +
> >          region_base += region->len;
> >      }
> > +
> > +    if (i < ct3d->dc.num_regions) {
> > +        while (--i >= 0) {
> > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > +        }
> > +        return -1;
> > +    }
> > +
> >      QTAILQ_INIT(&ct3d->dc.extents);
> >  
> >      return 0;
> >  }
> >  
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    int i;
> > +    struct CXLDCD_Region *region;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        g_free(region->blk_bitmap);
> > +    }
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >  
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > + * accessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,

I'd prefer all functions to be in the ct3 namespace.

> > +        uint64_t len)
> > +{
> > +    int i;

A large chunk of stuff here is repeated as it is just finding the
relevant region.  Pulled out to a ct3_find_dc_region() utility function.

> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     **/
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > +            len / region->block_size);
> > +}
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > +}
> > +
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > + * inaccessible, this happens when a dc extent is added and accepted by the
> > + * host.
Second part of comment wrong (Cut and paste fun ;)

> > + */
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > +}
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          *as = &ct3d->hostpmem_as;
> >          *dpa_offset -= vmr_size;
> >      } else {
> > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > +            return -ENODEV;
> > +        }
> > +
> >          *as = &ct3d->dc.host_dc_as;
> >          *dpa_offset -= (vmr_size + pmr_size);
> >      }
> > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> >      }
> >  
> >      g_free(extents);
> > +
> > +    /* Another choice is to do the set/clear after getting mailbox response*/

I haven't changed this yet - but it needs to be done on host acceptance, not on
the QMP command. We also need to validate it - so keep a record of what has
been offered and not yet accepted.  Unfortunately that probably doubles the bitmaps :(

I've updated the comment to reflect this.
> > +    list = records;
> > +    while (list) {
> > +        dpa = list->value->dpa * 1024 * 1024;
* MiB
> > +        len = list->value->len * 1024 * 1024;
> > +        rid = list->value->region_id;
> > +
> > +        switch (type) {
> > +        case DC_EVENT_ADD_CAPACITY:
> > +            set_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        case DC_EVENT_RELEASE_CAPACITY:
> > +            clear_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        default:
> > +            error_setg(errp, "DC event type not handled yet");
> > +            break;
> > +        }
> > +        list = list->next;
> > +    }
> >  }
> >  
> >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 01a5eaca48..1f85c88017 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >      uint64_t block_size;
> >      uint32_t dsmadhandle;
> >      uint8_t flags;
> > +    unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >  
> >  struct CXLType3Dev {  
> 
> 
> 



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents
  2023-07-25 18:39     ` [Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents Fan Ni
@ 2023-08-07 10:35         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07 10:35 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Since fabric manager emulation is not supported yet, the change implements
> the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> 1. Add dynamic capacity extents:
> 
> For example, the command to add two continuous extents (each is 128MB long)
> to region 0 (starting at dpa offset 0 and 128MB) looks like below:
> 
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-add-dynamic-capacity-event",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "extents": [
>       {
>           "region-id": 0,
>           "dpa": 0,
>           "len": 128
>       },
>       {
>           "region-id": 0,
>           "dpa": 128,
>           "len": 128
>       }
>       ]
>   }
> }
> 
> 2. Release dynamic capacity extents:
> 
> For example, the command to release an extent of size 128MB from region 0
> (starting at dpa offset 128MB) look like below:
> 
> { "execute": "cxl-release-dynamic-capacity-event",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "extents": [
>       {
>           "region-id": 0,
>           "dpa": 128,
>           "len": 128
>       }
>       ]
>   }
> }
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/mem/cxl_type3.c          | 145 ++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3_stubs.c    |   6 ++
>  include/hw/cxl/cxl_events.h |  16 ++++
>  qapi/cxl.json               |  49 ++++++++++++
>  4 files changed, 216 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index f1170b8047..41a828598a 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1817,6 +1817,151 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
>      }
>  }
>  
> +static const QemuUUID dynamic_capacity_uuid = {
> +    .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
> +            0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
> +};
> +
> +/*
> + * cxl r3.0: Table 8-47
> + * 00h: add capacity
> + * 01h: release capacity
> + * 02h: forced capacity release
> + * 03h: region configuration updated
> + * 04h: Add capacity response
> + * 05h: capacity released

If we explicitly set the values in the enum below then this
comment only adds the useful reference. Hence I've done that
and updated reference to my preferred format.
Also moved the reference up a few lines so it covers the
UUID as well.

> + */
> +enum DC_Event_Type {
> +    DC_EVENT_ADD_CAPACITY,
> +    DC_EVENT_RELEASE_CAPACITY,
> +    DC_EVENT_FORCED_RELEASE_CAPACITY,
> +    DC_EVENT_REGION_CONFIG_UPDATED,
> +    DC_EVENT_ADD_CAPACITY_RSP,
> +    DC_EVENT_CAPACITY_RELEASED,
> +    DC_EVENT_NUM
> +};
> +
> +#define MEM_BLK_SIZE_MB 128
> +static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> +        CxlEventLog log, enum DC_Event_Type type,
> +        uint16_t hid, CXLDCExtentRecordList *records, Error **errp)
> +{
> +    Object *obj = object_resolve_path(path, NULL);
> +    CXLEventDynamicCapacity dCap;
> +    CXLEventRecordHdr *hdr = &dCap.hdr;
> +    CXLDeviceState *cxlds;
> +    CXLType3Dev *dcd;
> +    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> +    uint32_t num_extents = 0;
> +    CXLDCExtentRecordList *list = records;
For consistency (as we reset this for second pass) I've moved the
setting of this down to just above the first loop.

> +    CXLDCExtent_raw *extents;
> +    uint64_t dpa, len;
> +    uint8_t rid = 0;

> +    int i;
> +
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path not point to a valid CXL type3 device");
> +        return;
> +    }
> +
> +    dcd = CXL_TYPE3(obj);
> +    cxlds = &dcd->cxl_dstate;

Only used once so I've moved it inline.

> +    memset(&dCap, 0, sizeof(dCap));

Can use dCap = {}; It's packed so no holes to be covered by the memset.
AS a side note, I'd have done this after the next check if we were doing
it explicitly.  With the {} approach we can rely on compiler to optimize
when it is done.


> +
> +    if (!dcd->dc.num_regions) {
> +        error_setg(errp, "No dynamic capacity support from the device");
> +        return;
> +    }
> +
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;

MiB

> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        if (rid >= dcd->dc.num_regions) {
> +            error_setg(errp, "region id is too large");
> +            return;
> +        }
> +
> +        if (dpa % dcd->dc.regions[rid].block_size
> +                || len % dcd->dc.regions[rid].block_size) {
> +            error_setg(errp, "dpa or len is not aligned to region block size");
> +            return;
> +        }
> +
> +        if (dpa + len > dcd->dc.regions[rid].decode_len * 256 * 1024 * 1024) {
> +            error_setg(errp, "extent range is beyond the region end");
> +            return;
> +        }
> +
> +        num_extents++;
> +        list = list->next;
> +    }
> +
> +    i = 0;
> +    list = records;
> +    extents = g_new0(CXLDCExtent_raw, num_extents);
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;

MiB

> +        rid = list->value->region_id;
> +
> +        extents[i].start_dpa = dpa + dcd->dc.regions[rid].base;
> +        extents[i].len = len;
> +        memset(extents[i].tag, 0, 0x10);

I'd suggest we add a tag sooner rather than later. Can make it optional
and default to zero though.  Note I'm not making that change whilst rebasing
this.

> +        extents[i].shared_seq = 0;
> +
> +        list = list->next;
> +        i++;
> +    }
> +
> +    /*
> +     * 8.2.9.1.5
> +     * All Dynamic Capacity event records shall set the Event Record
> +     * Severity field in the Common Event Record Format to Informational
> +     * Event. All Dynamic Capacity related events shall be logged in the
> +     * Dynamic Capacity Event Log.
> +     */
> +    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> +            cxl_device_get_timestamp(&dcd->cxl_dstate));
> +
> +    dCap.type = type;
> +    stw_le_p(&dCap.host_id, hid);
> +    /* only valid for DC_REGION_CONFIG_UPDATED event */
> +    dCap.updated_region_id = rid;
> +    for (i = 0; i < num_extents; i++) {
> +        memcpy(&dCap.dynamic_capacity_extent, &extents[i]
> +                , sizeof(CXLDCExtent_raw));
> +
> +        if (cxl_event_insert(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP,
> +                    (CXLEventRecordRaw *)&dCap)) {
> +            cxl_event_irq_assert(dcd);
> +        }
> +    }
> +
> +    g_free(extents);
Can use g_autofree given lifetime of this can be governed by the scope.
Qemu code does this a lot - it's just starting to sneak into the kernel
as well and makes this sort of handling much nicer as they end up more
or less looking like they are on the stack ;)

> +}
> +
> +void qmp_cxl_add_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records,
Don't need the struct as there is a typedef autocreated from the qmp
schema stuff.

> +        Error **errp)
Where it doesn't go over 80 chars, prefer aligned to one space after the (

> +{
> +   qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
> +           DC_EVENT_ADD_CAPACITY, 0, records, errp);
> +}
> +
> +void qmp_cxl_release_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records,
> +        Error **errp)
> +{
> +    qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
> +            DC_EVENT_RELEASE_CAPACITY, 0, records, errp);
> +}
> +
>  static void ct3_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> index f3e4a9fa72..482229f3bd 100644
> --- a/hw/mem/cxl_type3_stubs.c
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -56,3 +56,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
>  {
>      error_setg(errp, "CXL Type 3 support is not compiled in");
>  }
> +
> +void qmp_cxl_add_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records, Error **errp) {}

Good to have the error prints as done for the other cases.

> +
> +void qmp_cxl_release_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records, Error **errp) {}
> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
> index 089ba2091f..3baf745f8d 100644
> --- a/include/hw/cxl/cxl_events.h
> +++ b/include/hw/cxl/cxl_events.h
> @@ -165,4 +165,20 @@ typedef struct CXLEventMemoryModule {
>      uint8_t reserved[0x3d];
>  } QEMU_PACKED CXLEventMemoryModule;
>  
> +/*
> + * Dynamic Capacity Event Record
> + * CXL Rev 3.0 Section 8.2.9.2.1.5: Table 8-47
> + * All fields little endian.
> + */
> +typedef struct CXLEventDynamicCapacity {
> +    CXLEventRecordHdr hdr;
> +    uint8_t type;
> +    uint8_t reserved1;
> +    uint16_t host_id;
> +    uint8_t updated_region_id;
> +    uint8_t reserved2[3];
> +    uint8_t dynamic_capacity_extent[0x28]; /* defined in cxl_device.h */
> +    uint8_t reserved[0x20];
> +} QEMU_PACKED CXLEventDynamicCapacity;
> +
>  #endif /* CXL_EVENTS_H */
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 05c560cfe5..fb04ec4c41 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -369,3 +369,52 @@
>  ##
>  {'command': 'cxl-inject-correctable-error',
>   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> +
> +##
> +# @CXLDCExtentRecord:
> +#
> +# Record of a single extent to add/release
> +#
> +# @region-id: id of the region where the extent to add/release
> +# @dpa: start dpa (in MiB) of the extent, related to region base address
> +# @len: extent size (in MiB)
> +#
> +# Since: 8.0
> +##
> +{ 'struct': 'CXLDCExtentRecord',
> +  'data': {
> +      'region-id': 'uint8',
> +      'dpa':'uint64',
> +      'len': 'uint64'
> +  }
> +}
> +
> +##
> +# @cxl-add-dynamic-capacity-event:
In later patches this is going to add the capacity - the event is
just part of it. So I've renamed to simply cxl-add-dynamic-capacity
and added a bit about it 'starting the add capacity flow./

> +#
> +# Command to add dynamic capacity extent event
> +#
> +# @path: CXL DCD canonical QOM path
> +# @extents: Extents to add

Added a highly speculative (and optimistic) Since: 8.2
as hopefully we can remember to update them.  I'm thinking this
is at least 9.0 material but you never know! :)

> +#
> +##
> +{ 'command': 'cxl-add-dynamic-capacity-event',
> +  'data': { 'path': 'str',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}
> +
> +##
> +# @cxl-release-dynamic-capacity-event:
> +#
> +# Command to release dynamic capacity extent event
> +#
> +# @path: CXL DCD canonical QOM path
> +# @extents: Extents to release
> +#
> +##
> +{ 'command': 'cxl-release-dynamic-capacity-event',
> +  'data': { 'path': 'str',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents
@ 2023-08-07 10:35         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07 10:35 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Since fabric manager emulation is not supported yet, the change implements
> the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> 1. Add dynamic capacity extents:
> 
> For example, the command to add two continuous extents (each is 128MB long)
> to region 0 (starting at dpa offset 0 and 128MB) looks like below:
> 
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-add-dynamic-capacity-event",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "extents": [
>       {
>           "region-id": 0,
>           "dpa": 0,
>           "len": 128
>       },
>       {
>           "region-id": 0,
>           "dpa": 128,
>           "len": 128
>       }
>       ]
>   }
> }
> 
> 2. Release dynamic capacity extents:
> 
> For example, the command to release an extent of size 128MB from region 0
> (starting at dpa offset 128MB) look like below:
> 
> { "execute": "cxl-release-dynamic-capacity-event",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "extents": [
>       {
>           "region-id": 0,
>           "dpa": 128,
>           "len": 128
>       }
>       ]
>   }
> }
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>  hw/mem/cxl_type3.c          | 145 ++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3_stubs.c    |   6 ++
>  include/hw/cxl/cxl_events.h |  16 ++++
>  qapi/cxl.json               |  49 ++++++++++++
>  4 files changed, 216 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index f1170b8047..41a828598a 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1817,6 +1817,151 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
>      }
>  }
>  
> +static const QemuUUID dynamic_capacity_uuid = {
> +    .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
> +            0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
> +};
> +
> +/*
> + * cxl r3.0: Table 8-47
> + * 00h: add capacity
> + * 01h: release capacity
> + * 02h: forced capacity release
> + * 03h: region configuration updated
> + * 04h: Add capacity response
> + * 05h: capacity released

If we explicitly set the values in the enum below then this
comment only adds the useful reference. Hence I've done that
and updated reference to my preferred format.
Also moved the reference up a few lines so it covers the
UUID as well.

> + */
> +enum DC_Event_Type {
> +    DC_EVENT_ADD_CAPACITY,
> +    DC_EVENT_RELEASE_CAPACITY,
> +    DC_EVENT_FORCED_RELEASE_CAPACITY,
> +    DC_EVENT_REGION_CONFIG_UPDATED,
> +    DC_EVENT_ADD_CAPACITY_RSP,
> +    DC_EVENT_CAPACITY_RELEASED,
> +    DC_EVENT_NUM
> +};
> +
> +#define MEM_BLK_SIZE_MB 128
> +static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> +        CxlEventLog log, enum DC_Event_Type type,
> +        uint16_t hid, CXLDCExtentRecordList *records, Error **errp)
> +{
> +    Object *obj = object_resolve_path(path, NULL);
> +    CXLEventDynamicCapacity dCap;
> +    CXLEventRecordHdr *hdr = &dCap.hdr;
> +    CXLDeviceState *cxlds;
> +    CXLType3Dev *dcd;
> +    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> +    uint32_t num_extents = 0;
> +    CXLDCExtentRecordList *list = records;
For consistency (as we reset this for second pass) I've moved the
setting of this down to just above the first loop.

> +    CXLDCExtent_raw *extents;
> +    uint64_t dpa, len;
> +    uint8_t rid = 0;

> +    int i;
> +
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path not point to a valid CXL type3 device");
> +        return;
> +    }
> +
> +    dcd = CXL_TYPE3(obj);
> +    cxlds = &dcd->cxl_dstate;

Only used once so I've moved it inline.

> +    memset(&dCap, 0, sizeof(dCap));

Can use dCap = {}; It's packed so no holes to be covered by the memset.
AS a side note, I'd have done this after the next check if we were doing
it explicitly.  With the {} approach we can rely on compiler to optimize
when it is done.


> +
> +    if (!dcd->dc.num_regions) {
> +        error_setg(errp, "No dynamic capacity support from the device");
> +        return;
> +    }
> +
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;

MiB

> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        if (rid >= dcd->dc.num_regions) {
> +            error_setg(errp, "region id is too large");
> +            return;
> +        }
> +
> +        if (dpa % dcd->dc.regions[rid].block_size
> +                || len % dcd->dc.regions[rid].block_size) {
> +            error_setg(errp, "dpa or len is not aligned to region block size");
> +            return;
> +        }
> +
> +        if (dpa + len > dcd->dc.regions[rid].decode_len * 256 * 1024 * 1024) {
> +            error_setg(errp, "extent range is beyond the region end");
> +            return;
> +        }
> +
> +        num_extents++;
> +        list = list->next;
> +    }
> +
> +    i = 0;
> +    list = records;
> +    extents = g_new0(CXLDCExtent_raw, num_extents);
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;

MiB

> +        rid = list->value->region_id;
> +
> +        extents[i].start_dpa = dpa + dcd->dc.regions[rid].base;
> +        extents[i].len = len;
> +        memset(extents[i].tag, 0, 0x10);

I'd suggest we add a tag sooner rather than later. Can make it optional
and default to zero though.  Note I'm not making that change whilst rebasing
this.

> +        extents[i].shared_seq = 0;
> +
> +        list = list->next;
> +        i++;
> +    }
> +
> +    /*
> +     * 8.2.9.1.5
> +     * All Dynamic Capacity event records shall set the Event Record
> +     * Severity field in the Common Event Record Format to Informational
> +     * Event. All Dynamic Capacity related events shall be logged in the
> +     * Dynamic Capacity Event Log.
> +     */
> +    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> +            cxl_device_get_timestamp(&dcd->cxl_dstate));
> +
> +    dCap.type = type;
> +    stw_le_p(&dCap.host_id, hid);
> +    /* only valid for DC_REGION_CONFIG_UPDATED event */
> +    dCap.updated_region_id = rid;
> +    for (i = 0; i < num_extents; i++) {
> +        memcpy(&dCap.dynamic_capacity_extent, &extents[i]
> +                , sizeof(CXLDCExtent_raw));
> +
> +        if (cxl_event_insert(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP,
> +                    (CXLEventRecordRaw *)&dCap)) {
> +            cxl_event_irq_assert(dcd);
> +        }
> +    }
> +
> +    g_free(extents);
Can use g_autofree given lifetime of this can be governed by the scope.
Qemu code does this a lot - it's just starting to sneak into the kernel
as well and makes this sort of handling much nicer as they end up more
or less looking like they are on the stack ;)

> +}
> +
> +void qmp_cxl_add_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records,
Don't need the struct as there is a typedef autocreated from the qmp
schema stuff.

> +        Error **errp)
Where it doesn't go over 80 chars, prefer aligned to one space after the (

> +{
> +   qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
> +           DC_EVENT_ADD_CAPACITY, 0, records, errp);
> +}
> +
> +void qmp_cxl_release_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records,
> +        Error **errp)
> +{
> +    qmp_cxl_process_dynamic_capacity_event(path, CXL_EVENT_LOG_INFORMATIONAL,
> +            DC_EVENT_RELEASE_CAPACITY, 0, records, errp);
> +}
> +
>  static void ct3_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> index f3e4a9fa72..482229f3bd 100644
> --- a/hw/mem/cxl_type3_stubs.c
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -56,3 +56,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
>  {
>      error_setg(errp, "CXL Type 3 support is not compiled in");
>  }
> +
> +void qmp_cxl_add_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records, Error **errp) {}

Good to have the error prints as done for the other cases.

> +
> +void qmp_cxl_release_dynamic_capacity_event(const char *path,
> +        struct CXLDCExtentRecordList  *records, Error **errp) {}
> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
> index 089ba2091f..3baf745f8d 100644
> --- a/include/hw/cxl/cxl_events.h
> +++ b/include/hw/cxl/cxl_events.h
> @@ -165,4 +165,20 @@ typedef struct CXLEventMemoryModule {
>      uint8_t reserved[0x3d];
>  } QEMU_PACKED CXLEventMemoryModule;
>  
> +/*
> + * Dynamic Capacity Event Record
> + * CXL Rev 3.0 Section 8.2.9.2.1.5: Table 8-47
> + * All fields little endian.
> + */
> +typedef struct CXLEventDynamicCapacity {
> +    CXLEventRecordHdr hdr;
> +    uint8_t type;
> +    uint8_t reserved1;
> +    uint16_t host_id;
> +    uint8_t updated_region_id;
> +    uint8_t reserved2[3];
> +    uint8_t dynamic_capacity_extent[0x28]; /* defined in cxl_device.h */
> +    uint8_t reserved[0x20];
> +} QEMU_PACKED CXLEventDynamicCapacity;
> +
>  #endif /* CXL_EVENTS_H */
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 05c560cfe5..fb04ec4c41 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -369,3 +369,52 @@
>  ##
>  {'command': 'cxl-inject-correctable-error',
>   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> +
> +##
> +# @CXLDCExtentRecord:
> +#
> +# Record of a single extent to add/release
> +#
> +# @region-id: id of the region where the extent to add/release
> +# @dpa: start dpa (in MiB) of the extent, related to region base address
> +# @len: extent size (in MiB)
> +#
> +# Since: 8.0
> +##
> +{ 'struct': 'CXLDCExtentRecord',
> +  'data': {
> +      'region-id': 'uint8',
> +      'dpa':'uint64',
> +      'len': 'uint64'
> +  }
> +}
> +
> +##
> +# @cxl-add-dynamic-capacity-event:
In later patches this is going to add the capacity - the event is
just part of it. So I've renamed to simply cxl-add-dynamic-capacity
and added a bit about it 'starting the add capacity flow./

> +#
> +# Command to add dynamic capacity extent event
> +#
> +# @path: CXL DCD canonical QOM path
> +# @extents: Extents to add

Added a highly speculative (and optimistic) Since: 8.2
as hopefully we can remember to update them.  I'm thinking this
is at least 9.0 material but you never know! :)

> +#
> +##
> +{ 'command': 'cxl-add-dynamic-capacity-event',
> +  'data': { 'path': 'str',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}
> +
> +##
> +# @cxl-release-dynamic-capacity-event:
> +#
> +# Command to release dynamic capacity extent event
> +#
> +# @path: CXL DCD canonical QOM path
> +# @extents: Extents to release
> +#
> +##
> +{ 'command': 'cxl-release-dynamic-capacity-event',
> +  'data': { 'path': 'str',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response
  2023-07-25 18:39     ` [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response Fan Ni
@ 2023-08-07 11:42         ` Jonathan Cameron
  2023-09-08 13:00       ` Jørgen Hansen
  1 sibling, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07 11:42 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Per CXL spec 3.0, two mailbox commands are implemented:
> Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
> Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

I'm reviewing these backwards mostly because I'm tidying them up in that
order (makes rebases easier).  Hence some of this only makes sense in light of
patch 9 comments!

In my rebase of this, I've made some changes that are non trivial
so definitely want you to look at them.

I also left what I think is a nasty bug.  If we get extents
added next to each other they aren't fused, so a release extent that
covers more than one will fail.  Far as I can tell that's a valid
if weird corner cases.

Jonathan


> ---
>  hw/cxl/cxl-mailbox-utils.c  | 253 ++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h |   3 +-
>  2 files changed, 255 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 3d25a9697e..1e4944da95 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -84,6 +84,8 @@ enum {
>      DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>          #define GET_DC_CONFIG          0x0
>          #define GET_DYN_CAP_EXT_LIST   0x1
> +        #define ADD_DYN_CAP_RSP        0x2
> +        #define RELEASE_DYN_CAP        0x3
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * Check whether the bits at addr between [nr, nr+size) are all set,
> + * return 1 if all 1s, else return 0
> + */
> +static inline int test_bits(const unsigned long *addr, int nr, int size)
bool

Also, let the compiler make decisions on inlining.

Hmm. Documentation lacking on find_next_zero_bit() but I think it returns
the size if there aren't any so this should be fine.

> +{
> +    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
> +
> +    return (res >= nr + size) ? 1 : 0;
> +}
> +
> +/*
> + * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
> + */
> +static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int8_t i = dev->dc.num_regions - 1;
> +
> +    while (i > 0 && dpa < dev->dc.regions[i].base) {
> +        i--;
> +    }
This is another search function, similar to the one I factored out when applying
patch 9.  I'll pull that function back here instead of having it in patch 9
+ rename it to be more in keeping with functions in this file.

The handling is a little different (NULL or the region pointer, and that also
simplifies the code around where it is used a little.

> +
> +    if (dpa < dev->dc.regions[i].base
> +            || dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
> +        return dev->dc.num_regions;
> +    }
> +
> +    return i;
> +}
> +
> +static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
> +        uint64_t len, uint8_t *tag, uint16_t shared_seq)
> +{
> +    CXLDCD_Extent *extent;
> +    extent = g_new0(CXLDCD_Extent, 1);
> +    extent->start_dpa = dpa;
> +    extent->len = len;
> +    if (tag) {
> +        memcpy(extent->tag, tag, 0x10);
> +    } else {
> +        memset(extent->tag, 0, 0x10);
> +    }
> +    extent->shared_seq = shared_seq;
> +
> +    QTAILQ_INSERT_TAIL(list, extent, node);
> +}
> +
Added a reference to both Table 8-129 and Table 8-131 here

> +typedef struct updated_dc_extent_list_in_pl {
> +    uint32_t num_entries_updated;
> +    uint8_t rsvd[4];
> +    struct { /* r3.0: Table 8-130 */

I reformatted this and added name of table. Makes it easier
to find in CXL rN.0

> +        uint64_t start_dpa;
> +        uint64_t len;
> +        uint8_t rsvd[8];
> +    } QEMU_PACKED updated_entries[];
> +} QEMU_PACKED updated_dc_extent_list_in_pl;
> +
> +/*
> + * The function only check the input extent list against itself.

I haven't added any info here yet, but feels like this function needs
comments on what those checks are doing.

> + */
> +static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,

prefixed with cxl_ to keep everything that is easy to do namespaced.

> +        const updated_dc_extent_list_in_pl *in)
> +{
> +    unsigned long *blk_bitmap;
> +    uint64_t min_block_size = dev->dc.regions[0].block_size;

If we use UINT64_MAX then anything seen will be less than it and
we can just loop over all regions.

> +    struct CXLDCD_Region *region = &dev->dc.regions[0];
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    uint8_t rid;
> +    CXLRetCode ret;
> +
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
	min_block_size = MIN(min_block_size, region->block_size);
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base

This is tricky to read as relies on side effect loop above.
Better to use
	&dev->dc.regions[dev->dc.num_regions - 1]
explicitly via a
CXLDCDregion *lastregion = &dev->dc.regions[dev->dc.num_regions]
though I also renamed dev at ct3d to make the type more obvious.



> +                - dev->dc.regions[0].base) / min_block_size);
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        rid = find_region_id(dev, dpa, len);
rid is never needed here as such - just a check on whether the
region exists. Hence use the find function previously pulled out of patch 9.

> +        if (rid == dev->dc.num_regions) {
> +            ret = CXL_MBOX_INVALID_PA;
> +            goto out;
> +        }
> +
> +        region = &dev->dc.regions[rid];
> +        if (dpa % region->block_size || len % region->block_size) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }
> +        /* the dpa range already covered by some other extents in the list */
> +        if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }
> +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> +   }
> +
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:

Great place for a g_autofree magic pointer as then can return above without any
manual cleanup.

> +    g_free(blk_bitmap);
> +    return ret;
> +}
> +
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.3
> + * Add Dynamic Capacity Response (opcode 4802h)
> + * Assume an extent is added only after the response is processed successfully
> + * TODO: for better extent list validation, a better solution would be
> + * maintaining a pending extent list and use it to verify the extent list in
> + * the response.
> + */
> +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate, uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> + 
> +    if (in->num_entries_updated == 0) {
> +        ret = CXL_MBOX_SUCCESS;
> +        goto out;

I always prefer direct returns as it makes the flows easier to follow.
(don't need to go see what cleanup is there - in this case none!)

> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        goto out;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        /*
> +         * Check if the DPA range of the to-be-added extent overlaps with
> +         * existing extent list maintained by the device.
> +         */
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if (ent->start_dpa <= dpa
> +                    && dpa + len <= ent->start_dpa + ent->len) {

I think this second one always incorporates the first case.
I haven't changed this yet though...  Added a todo note.

> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            }
> +        }
> +
> +        /*
> +         * TODO: add a pending extent list based on event log record and verify
> +         * the input response
> +         */
> +
> +        insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> +    }
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:
> +    return ret;
> +}
> +
> +/*
> + * Spec 3.0: 8.2.9.8.9.4
> + * Release Dynamic Capacity (opcode 4803h)
> + **/
> +static CXLRetCode cmd_dcd_release_dyn_cap(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                break;
> +            } else if (ent->start_dpa < dpa
<=  We want to do the partial but at start case.
Please check my logic on this! I'll test it later, but for now I've
made the change.

We shuould just let this code run even in the exact match case
as len1 == len2 == 0 and math is cehap.


> +                    && dpa + len <= ent->start_dpa + ent->len) {

Not commented on it though I've tidied this up in other places...
Local style is to have the boolean operator on the end of the line before
not the start of the next one.

> +                /* remove partial extent */
> +                uint64_t len1 = dpa - ent->start_dpa;
> +                uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
> +
> +                if (len1) {
> +                    insert_extent_to_extent_list(extent_list, ent->start_dpa,
> +                            len1, NULL, 0);
> +                }
> +                if (len2) {
> +                    insert_extent_to_extent_list(extent_list, dpa + len, len2,
> +                            NULL, 0);
> +                }
> +                break;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa))

This is rejecting attempt to remove a superset. Fair enough but does this code
fuse neighbouring extents? Left for now.


> +                return CXL_MBOX_INVALID_EXTENT_LIST;
> +        }
> +
> +        if (ent) {
> +            QTAILQ_REMOVE(extent_list, ent, node);
> +            g_free(ent);
> +        } else {
> +            /* Try to remove a non-existing extent */
> +            return CXL_MBOX_INVALID_PA;
> +        }
> +    }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> + 
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1129,6 +1376,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>      [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
>          "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
>          8, 0 },
> +    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
> +        "ADD_DCD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
> +        ~0, IMMEDIATE_DATA_CHANGE },
> +    [DCD_CONFIG][RELEASE_DYN_CAP] = {
> +        "RELEASE_DCD_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
> +        ~0, IMMEDIATE_DATA_CHANGE },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 3a338b3b37..01a5eaca48 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -130,7 +130,8 @@ typedef enum {
>      CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
>      CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
>      CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
> -    CXL_MBOX_MAX = 0x17
> +    CXL_MBOX_INVALID_EXTENT_LIST = 0x1E, /* cxl r3.0: Table 8-34*/
> +    CXL_MBOX_MAX = 0x1F
>  } CXLRetCode;
>  
>  struct cxl_cmd;



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response
@ 2023-08-07 11:42         ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07 11:42 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Per CXL spec 3.0, two mailbox commands are implemented:
> Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
> Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

I'm reviewing these backwards mostly because I'm tidying them up in that
order (makes rebases easier).  Hence some of this only makes sense in light of
patch 9 comments!

In my rebase of this, I've made some changes that are non trivial
so definitely want you to look at them.

I also left what I think is a nasty bug.  If we get extents
added next to each other they aren't fused, so a release extent that
covers more than one will fail.  Far as I can tell that's a valid
if weird corner cases.

Jonathan


> ---
>  hw/cxl/cxl-mailbox-utils.c  | 253 ++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h |   3 +-
>  2 files changed, 255 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 3d25a9697e..1e4944da95 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -84,6 +84,8 @@ enum {
>      DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>          #define GET_DC_CONFIG          0x0
>          #define GET_DYN_CAP_EXT_LIST   0x1
> +        #define ADD_DYN_CAP_RSP        0x2
> +        #define RELEASE_DYN_CAP        0x3
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * Check whether the bits at addr between [nr, nr+size) are all set,
> + * return 1 if all 1s, else return 0
> + */
> +static inline int test_bits(const unsigned long *addr, int nr, int size)
bool

Also, let the compiler make decisions on inlining.

Hmm. Documentation lacking on find_next_zero_bit() but I think it returns
the size if there aren't any so this should be fine.

> +{
> +    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
> +
> +    return (res >= nr + size) ? 1 : 0;
> +}
> +
> +/*
> + * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
> + */
> +static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int8_t i = dev->dc.num_regions - 1;
> +
> +    while (i > 0 && dpa < dev->dc.regions[i].base) {
> +        i--;
> +    }
This is another search function, similar to the one I factored out when applying
patch 9.  I'll pull that function back here instead of having it in patch 9
+ rename it to be more in keeping with functions in this file.

The handling is a little different (NULL or the region pointer, and that also
simplifies the code around where it is used a little.

> +
> +    if (dpa < dev->dc.regions[i].base
> +            || dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
> +        return dev->dc.num_regions;
> +    }
> +
> +    return i;
> +}
> +
> +static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
> +        uint64_t len, uint8_t *tag, uint16_t shared_seq)
> +{
> +    CXLDCD_Extent *extent;
> +    extent = g_new0(CXLDCD_Extent, 1);
> +    extent->start_dpa = dpa;
> +    extent->len = len;
> +    if (tag) {
> +        memcpy(extent->tag, tag, 0x10);
> +    } else {
> +        memset(extent->tag, 0, 0x10);
> +    }
> +    extent->shared_seq = shared_seq;
> +
> +    QTAILQ_INSERT_TAIL(list, extent, node);
> +}
> +
Added a reference to both Table 8-129 and Table 8-131 here

> +typedef struct updated_dc_extent_list_in_pl {
> +    uint32_t num_entries_updated;
> +    uint8_t rsvd[4];
> +    struct { /* r3.0: Table 8-130 */

I reformatted this and added name of table. Makes it easier
to find in CXL rN.0

> +        uint64_t start_dpa;
> +        uint64_t len;
> +        uint8_t rsvd[8];
> +    } QEMU_PACKED updated_entries[];
> +} QEMU_PACKED updated_dc_extent_list_in_pl;
> +
> +/*
> + * The function only check the input extent list against itself.

I haven't added any info here yet, but feels like this function needs
comments on what those checks are doing.

> + */
> +static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,

prefixed with cxl_ to keep everything that is easy to do namespaced.

> +        const updated_dc_extent_list_in_pl *in)
> +{
> +    unsigned long *blk_bitmap;
> +    uint64_t min_block_size = dev->dc.regions[0].block_size;

If we use UINT64_MAX then anything seen will be less than it and
we can just loop over all regions.

> +    struct CXLDCD_Region *region = &dev->dc.regions[0];
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    uint8_t rid;
> +    CXLRetCode ret;
> +
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
	min_block_size = MIN(min_block_size, region->block_size);
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base

This is tricky to read as relies on side effect loop above.
Better to use
	&dev->dc.regions[dev->dc.num_regions - 1]
explicitly via a
CXLDCDregion *lastregion = &dev->dc.regions[dev->dc.num_regions]
though I also renamed dev at ct3d to make the type more obvious.



> +                - dev->dc.regions[0].base) / min_block_size);
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        rid = find_region_id(dev, dpa, len);
rid is never needed here as such - just a check on whether the
region exists. Hence use the find function previously pulled out of patch 9.

> +        if (rid == dev->dc.num_regions) {
> +            ret = CXL_MBOX_INVALID_PA;
> +            goto out;
> +        }
> +
> +        region = &dev->dc.regions[rid];
> +        if (dpa % region->block_size || len % region->block_size) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }
> +        /* the dpa range already covered by some other extents in the list */
> +        if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }
> +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> +   }
> +
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:

Great place for a g_autofree magic pointer as then can return above without any
manual cleanup.

> +    g_free(blk_bitmap);
> +    return ret;
> +}
> +
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.3
> + * Add Dynamic Capacity Response (opcode 4802h)
> + * Assume an extent is added only after the response is processed successfully
> + * TODO: for better extent list validation, a better solution would be
> + * maintaining a pending extent list and use it to verify the extent list in
> + * the response.
> + */
> +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate, uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> + 
> +    if (in->num_entries_updated == 0) {
> +        ret = CXL_MBOX_SUCCESS;
> +        goto out;

I always prefer direct returns as it makes the flows easier to follow.
(don't need to go see what cleanup is there - in this case none!)

> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        goto out;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        /*
> +         * Check if the DPA range of the to-be-added extent overlaps with
> +         * existing extent list maintained by the device.
> +         */
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if (ent->start_dpa <= dpa
> +                    && dpa + len <= ent->start_dpa + ent->len) {

I think this second one always incorporates the first case.
I haven't changed this yet though...  Added a todo note.

> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            }
> +        }
> +
> +        /*
> +         * TODO: add a pending extent list based on event log record and verify
> +         * the input response
> +         */
> +
> +        insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> +    }
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:
> +    return ret;
> +}
> +
> +/*
> + * Spec 3.0: 8.2.9.8.9.4
> + * Release Dynamic Capacity (opcode 4803h)
> + **/
> +static CXLRetCode cmd_dcd_release_dyn_cap(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                break;
> +            } else if (ent->start_dpa < dpa
<=  We want to do the partial but at start case.
Please check my logic on this! I'll test it later, but for now I've
made the change.

We shuould just let this code run even in the exact match case
as len1 == len2 == 0 and math is cehap.


> +                    && dpa + len <= ent->start_dpa + ent->len) {

Not commented on it though I've tidied this up in other places...
Local style is to have the boolean operator on the end of the line before
not the start of the next one.

> +                /* remove partial extent */
> +                uint64_t len1 = dpa - ent->start_dpa;
> +                uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
> +
> +                if (len1) {
> +                    insert_extent_to_extent_list(extent_list, ent->start_dpa,
> +                            len1, NULL, 0);
> +                }
> +                if (len2) {
> +                    insert_extent_to_extent_list(extent_list, dpa + len, len2,
> +                            NULL, 0);
> +                }
> +                break;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa))

This is rejecting attempt to remove a superset. Fair enough but does this code
fuse neighbouring extents? Left for now.


> +                return CXL_MBOX_INVALID_EXTENT_LIST;
> +        }
> +
> +        if (ent) {
> +            QTAILQ_REMOVE(extent_list, ent, node);
> +            g_free(ent);
> +        } else {
> +            /* Try to remove a non-existing extent */
> +            return CXL_MBOX_INVALID_PA;
> +        }
> +    }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> + 
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1129,6 +1376,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>      [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
>          "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
>          8, 0 },
> +    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
> +        "ADD_DCD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
> +        ~0, IMMEDIATE_DATA_CHANGE },
> +    [DCD_CONFIG][RELEASE_DYN_CAP] = {
> +        "RELEASE_DCD_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
> +        ~0, IMMEDIATE_DATA_CHANGE },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 3a338b3b37..01a5eaca48 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -130,7 +130,8 @@ typedef enum {
>      CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
>      CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
>      CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
> -    CXL_MBOX_MAX = 0x17
> +    CXL_MBOX_INVALID_EXTENT_LIST = 0x1E, /* cxl r3.0: Table 8-34*/
> +    CXL_MBOX_MAX = 0x1F
>  } CXLRetCode;
>  
>  struct cxl_cmd;


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support
  2023-07-25 18:39     ` [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support Fan Ni
@ 2023-08-07 11:55         ` Jonathan Cameron via
  2023-09-08 13:12       ` Jørgen Hansen
  1 sibling, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07 11:55 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Add dynamic capacity extent list representative to the definition of
> CXLType3Dev and add get DC extent list mailbox command per
> CXL.spec.3.0:.8.2.9.8.9.2.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

A couple of general name format changes. Otherwise LGTM

Jonathan

> ---
>  hw/cxl/cxl-mailbox-utils.c  | 71 +++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  1 +
>  include/hw/cxl/cxl_device.h | 23 ++++++++++++
>  3 files changed, 95 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 0511b8e6f7..3d25a9697e 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -83,6 +83,7 @@ enum {
>          #define CLEAR_POISON           0x2
>      DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>          #define GET_DC_CONFIG          0x0
> +        #define GET_DYN_CAP_EXT_LIST   0x1
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.2
> + * Get Dynamic Capacity Extent List (Opcode 4810h)
> + */
> +static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len)
> +{
> +    struct get_dyn_cap_ext_list_in_pl {
> +        uint32_t extent_cnt;
> +        uint32_t start_extent_id;
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_out_pl {
> +        uint32_t count;
> +        uint32_t total_extents;
> +        uint32_t generation_num;
> +        uint8_t rsvd[4];
> +        CXLDCExtent_raw records[];
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
> +    struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    uint16_t record_count = 0, i = 0, record_done = 0;
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint16_t out_pl_len;
> +    uint32_t start_extent_id = in->start_extent_id;
> +
> +    if (start_extent_id > ct3d->dc.total_extent_count) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    record_count = MIN(in->extent_cnt,
> +            ct3d->dc.total_extent_count - start_extent_id);
> +
> +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +    /* May need more processing here in the future */
> +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +    memset(out, 0, out_pl_len);
> +    stl_le_p(&out->count, record_count);
> +    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
> +    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
> +
> +    if (record_count > 0) {
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (i++ < start_extent_id) {
> +                continue;
> +            }
> +            stq_le_p(&out->records[record_done].start_dpa, ent->start_dpa);
> +            stq_le_p(&out->records[record_done].len, ent->len);
> +            memcpy(&out->records[record_done].tag, ent->tag, 0x10);
> +            stw_le_p(&out->records[record_done].shared_seq, ent->shared_seq);
> +            record_done++;
> +            if (record_done == record_count) {
> +                break;
> +            }
> +        }
> +    }
> +
> +    *len = out_pl_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_clear_poison, 72, 0 },
>      [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
>          cmd_dcd_get_dyn_cap_config, 2, 0 },
> +    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
> +        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
> +        8, 0 },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 76bbd9f785..f1170b8047 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>  
>          region_base += region->len;
>      }
> +    QTAILQ_INIT(&ct3d->dc.extents);
>  
>      return 0;
>  }
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 1c99b05a66..3a338b3b37 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
>  
>  #define DCD_MAX_REGION_NUM 8
>  
> +typedef struct CXLDCD_Extent_raw {

Renamed to match QEMU naming conventions (more or less)
CXLDCDExtentRaw

> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +} QEMU_PACKED CXLDCExtent_raw;
> +
> +typedef struct CXLDCD_Extent {

CXLDCDExtent

> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +
> +    QTAILQ_ENTRY(CXLDCD_Extent) node;
> +} CXLDCD_Extent;
> +typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
> +
>  typedef struct CXLDCD_Region {
>      uint64_t base;
>      uint64_t decode_len; /* in multiples of 256MB */
> @@ -433,6 +452,10 @@ struct CXLType3Dev {
>  
>          uint8_t num_regions; /* 0-8 regions */
>          struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> +        CXLDCDExtentList extents;
> +
> +        uint32_t total_extent_count;
> +        uint32_t ext_list_gen_seq;
>      } dc;
>  };
>  


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support
@ 2023-08-07 11:55         ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07 11:55 UTC (permalink / raw)
  To: Fan Ni
  Cc: qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Add dynamic capacity extent list representative to the definition of
> CXLType3Dev and add get DC extent list mailbox command per
> CXL.spec.3.0:.8.2.9.8.9.2.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

A couple of general name format changes. Otherwise LGTM

Jonathan

> ---
>  hw/cxl/cxl-mailbox-utils.c  | 71 +++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  1 +
>  include/hw/cxl/cxl_device.h | 23 ++++++++++++
>  3 files changed, 95 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 0511b8e6f7..3d25a9697e 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -83,6 +83,7 @@ enum {
>          #define CLEAR_POISON           0x2
>      DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>          #define GET_DC_CONFIG          0x0
> +        #define GET_DYN_CAP_EXT_LIST   0x1
>      PHYSICAL_SWITCH = 0x51
>          #define IDENTIFY_SWITCH_DEVICE      0x0
>  };
> @@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.2
> + * Get Dynamic Capacity Extent List (Opcode 4810h)
> + */
> +static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len)
> +{
> +    struct get_dyn_cap_ext_list_in_pl {
> +        uint32_t extent_cnt;
> +        uint32_t start_extent_id;
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_out_pl {
> +        uint32_t count;
> +        uint32_t total_extents;
> +        uint32_t generation_num;
> +        uint8_t rsvd[4];
> +        CXLDCExtent_raw records[];
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
> +    struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    uint16_t record_count = 0, i = 0, record_done = 0;
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint16_t out_pl_len;
> +    uint32_t start_extent_id = in->start_extent_id;
> +
> +    if (start_extent_id > ct3d->dc.total_extent_count) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    record_count = MIN(in->extent_cnt,
> +            ct3d->dc.total_extent_count - start_extent_id);
> +
> +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +    /* May need more processing here in the future */
> +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +    memset(out, 0, out_pl_len);
> +    stl_le_p(&out->count, record_count);
> +    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
> +    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
> +
> +    if (record_count > 0) {
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (i++ < start_extent_id) {
> +                continue;
> +            }
> +            stq_le_p(&out->records[record_done].start_dpa, ent->start_dpa);
> +            stq_le_p(&out->records[record_done].len, ent->len);
> +            memcpy(&out->records[record_done].tag, ent->tag, 0x10);
> +            stw_le_p(&out->records[record_done].shared_seq, ent->shared_seq);
> +            record_done++;
> +            if (record_done == record_count) {
> +                break;
> +            }
> +        }
> +    }
> +
> +    *len = out_pl_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_clear_poison, 72, 0 },
>      [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
>          cmd_dcd_get_dyn_cap_config, 2, 0 },
> +    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
> +        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
> +        8, 0 },
>  };
>  
>  static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 76bbd9f785..f1170b8047 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>  
>          region_base += region->len;
>      }
> +    QTAILQ_INIT(&ct3d->dc.extents);
>  
>      return 0;
>  }
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 1c99b05a66..3a338b3b37 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
>  
>  #define DCD_MAX_REGION_NUM 8
>  
> +typedef struct CXLDCD_Extent_raw {

Renamed to match QEMU naming conventions (more or less)
CXLDCDExtentRaw

> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +} QEMU_PACKED CXLDCExtent_raw;
> +
> +typedef struct CXLDCD_Extent {

CXLDCDExtent

> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +
> +    QTAILQ_ENTRY(CXLDCD_Extent) node;
> +} CXLDCD_Extent;
> +typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
> +
>  typedef struct CXLDCD_Region {
>      uint64_t base;
>      uint64_t decode_len; /* in multiples of 256MB */
> @@ -433,6 +452,10 @@ struct CXLType3Dev {
>  
>          uint8_t num_regions; /* 0-8 regions */
>          struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> +        CXLDCDExtentList extents;
> +
> +        uint32_t total_extent_count;
> +        uint32_t ext_list_gen_seq;
>      } dc;
>  };
>  



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
  2023-08-04 18:07         ` Gregory Price
@ 2023-08-07 12:10             ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-07 12:10 UTC (permalink / raw)
  To: Gregory Price
  Cc: Fan Ni, qemu-devel, linux-cxl, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Fri, 4 Aug 2023 14:07:55 -0400
Gregory Price <gregory.price@memverge.com> wrote:

> On Fri, Aug 04, 2023 at 05:36:23PM +0100, Jonathan Cameron wrote:
> > On Tue, 25 Jul 2023 18:39:56 +0000
> > Fan Ni <fan.ni@samsung.com> wrote:
> >   
> > > From: Fan Ni <nifan@outlook.com>
> > > 
> > > Add (file/memory backed) host backend, all the dynamic capacity regions
> > > will share a single, large enough host backend. Set up address space for
> > > DC regions to support read/write operations to dynamic capacity for DCD.
> > > 
> > > With the change, following supports are added:
> > > 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
> > >    memory backend for dynamic capacity;
> > > 2. add namespace for dynamic capacity for read/write support;
> > > 3. create cdat entries for each dynamic capacity region;
> > > 4. fix dvsec range registers to include DC regions.
> > > 
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > Hi Fan,
> > 
> > I'm not sure if we want to do all regions backed by one memory backend
> > or one backend each.  It will become complex when some are shared
> > (e.g. what Gregory is working on).  
> 
> I thought about this briefly when i implemented the original volatile
> support due to the potential for partitioning. We landed on, iirc, 
> 2 backends (1 for volatile, 1 for non-volatile).
> 
> The reality, though, is the driver (presently) does not have a good way
> to create more than 1 dax per memdev, and in practice with real devices
> we see that this just tends to be the case: 1 dax per device.  So unless
> that's going to change, ever having more than 1 backend will just be
> unused complexity.

I'm not sure how this will turn out.  I guess we play with what Fan has
done here and see if it ever ends up mattering!

> 
> To me, this is a good example of "maybe piling everything into the core
> ct3d is going to get ugly fast".  Maybe it would be better to do
> something similar to the CCI interface and allow for overriding the
> other functions as well.

In general I agree - but DCD is going to be a fairly standard facility
so for this one I think it'll end up either in ct3d or in the MHD / MLD
generalizations of that. For now I'm still thinking a normal type 3 device
is an MHD or MLD with a limited feature set - so easier to just turn things
off in one of those than do it as additions.   Now I'm not sure if
we end up with a MHD MLD with a lot of options in the end - probably still
as the ct3d but with a default where most stuff is turned off.

Ultimately I want that super device to be maintainable. That may mean
breaking the functionality up, but I don't yet think that means going
the simple + extend model.

> 
> just a thought.  I apologize for not engaging with the DCD patch set,
> conferences have been keeping me busier than expected.  I plan on
> putting it through the grinder this month.

No problem. Definitely some testing needed here so great to have
some more of that when you get to it.  I think most of the issues
will occur when the kernel isn't do it's normal flows. So weird
add and remove sequences linux many never use but which we should
emulate the handling for correctly.

> 
> > 
> > A few questions inline.  In particular there are subtle changes to
> > existing handling that are either bug fixes (in which case they need
> > to be sent first) or bugs / have no effect and shouldn't be in here.
> > 
> >   
> > > ---
> > >  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
> > >  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
> > >  include/hw/cxl/cxl_device.h |   4 +
> > >  3 files changed, 185 insertions(+), 41 deletions(-)
> > >   


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions
@ 2023-08-07 12:10             ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-07 12:10 UTC (permalink / raw)
  To: Gregory Price
  Cc: Fan Ni, qemu-devel, linux-cxl, hchkuo, cbrowy, ira.weiny,
	dan.j.williams, Adam Manzanares, dave, nmtadam.samsung, nifan

On Fri, 4 Aug 2023 14:07:55 -0400
Gregory Price <gregory.price@memverge.com> wrote:

> On Fri, Aug 04, 2023 at 05:36:23PM +0100, Jonathan Cameron wrote:
> > On Tue, 25 Jul 2023 18:39:56 +0000
> > Fan Ni <fan.ni@samsung.com> wrote:
> >   
> > > From: Fan Ni <nifan@outlook.com>
> > > 
> > > Add (file/memory backed) host backend, all the dynamic capacity regions
> > > will share a single, large enough host backend. Set up address space for
> > > DC regions to support read/write operations to dynamic capacity for DCD.
> > > 
> > > With the change, following supports are added:
> > > 1. add a new property to type3 device "nonvolatile-dc-memdev" to point to host
> > >    memory backend for dynamic capacity;
> > > 2. add namespace for dynamic capacity for read/write support;
> > > 3. create cdat entries for each dynamic capacity region;
> > > 4. fix dvsec range registers to include DC regions.
> > > 
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > Hi Fan,
> > 
> > I'm not sure if we want to do all regions backed by one memory backend
> > or one backend each.  It will become complex when some are shared
> > (e.g. what Gregory is working on).  
> 
> I thought about this briefly when i implemented the original volatile
> support due to the potential for partitioning. We landed on, iirc, 
> 2 backends (1 for volatile, 1 for non-volatile).
> 
> The reality, though, is the driver (presently) does not have a good way
> to create more than 1 dax per memdev, and in practice with real devices
> we see that this just tends to be the case: 1 dax per device.  So unless
> that's going to change, ever having more than 1 backend will just be
> unused complexity.

I'm not sure how this will turn out.  I guess we play with what Fan has
done here and see if it ever ends up mattering!

> 
> To me, this is a good example of "maybe piling everything into the core
> ct3d is going to get ugly fast".  Maybe it would be better to do
> something similar to the CCI interface and allow for overriding the
> other functions as well.

In general I agree - but DCD is going to be a fairly standard facility
so for this one I think it'll end up either in ct3d or in the MHD / MLD
generalizations of that. For now I'm still thinking a normal type 3 device
is an MHD or MLD with a limited feature set - so easier to just turn things
off in one of those than do it as additions.   Now I'm not sure if
we end up with a MHD MLD with a lot of options in the end - probably still
as the ct3d but with a default where most stuff is turned off.

Ultimately I want that super device to be maintainable. That may mean
breaking the functionality up, but I don't yet think that means going
the simple + extend model.

> 
> just a thought.  I apologize for not engaging with the DCD patch set,
> conferences have been keeping me busier than expected.  I plan on
> putting it through the grinder this month.

No problem. Definitely some testing needed here so great to have
some more of that when you get to it.  I think most of the issues
will occur when the kernel isn't do it's normal flows. So weird
add and remove sequences linux many never use but which we should
emulate the handling for correctly.

> 
> > 
> > A few questions inline.  In particular there are subtle changes to
> > existing handling that are either bug fixes (in which case they need
> > to be sent first) or bugs / have no effect and shouldn't be in here.
> > 
> >   
> > > ---
> > >  hw/cxl/cxl-mailbox-utils.c  |  19 +++-
> > >  hw/mem/cxl_type3.c          | 203 +++++++++++++++++++++++++++++-------
> > >  include/hw/cxl/cxl_device.h |   4 +
> > >  3 files changed, 185 insertions(+), 41 deletions(-)
> > >   



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-07  8:53         ` Jonathan Cameron via
  (?)
  (?)
@ 2023-08-24 20:49         ` Fan Ni
  2023-08-25 11:42             ` Jonathan Cameron via
  -1 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-08-24 20:49 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
>
> > From: Fan Ni <nifan@outlook.com>
> >
> > Not all dpa range in the dc regions is valid to access until an extent
> > covering the range has been added. Add a bitmap for each region to
> > record whether a dc block in the region has been backed by dc extent.
> > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > extent is added, all the bits of the blocks in the extent will be set,
> > which will be cleared when the extent is released.
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> Hi Fan,
>
> A few of the bits of feedback apply broadly across the series.  Given I'm
> rebasing this anyway to give myself something to test I'll tidy things up
> (feel free to disagree with and revert any changes !)
> and push a tree out in next day or two.  I'll message when I've done so.
>
> Jonathan

Hi Jonathan,
I tried DCD with your branch "cxl-2023-08-07", and noticed the
following,
1. You made some changes to the bitmap functionality, now it is only
used to validate extents when adding/releasing dc extents. My original
thought of adding the bitmap is to 1) validating extents for extent
add/release as you do; 2) Add validating when doing read/write to the dc
regions since some address region may not have valid extent added yet.
Do you think 2) is not necessary?

2. Your change introduced a bug in the code.
https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
ct3d->dc.num_regions should be ct3d->dc.num_regions-1.

Thanks,
Fan

>
> > ---
> >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> >  include/hw/cxl/cxl_device.h |   1 +
> >  2 files changed, 156 insertions(+)
> >
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 41a828598a..51943a36fc 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >          /* dsmad_handle is set when creating cdat table entries */
> >          region->flags = 0;
> >
> > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
>
> In common with many allocators in qemu if this fails it calls abort()
> internally so no need to handle potential errors.
>
> > +        if (!region->blk_bitmap) {
> > +            break;
> > +        }
> > +
> >          region_base += region->len;
> >      }
> > +
> > +    if (i < ct3d->dc.num_regions) {
> > +        while (--i >= 0) {
> > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > +        }
> > +        return -1;
> > +    }
> > +
> >      QTAILQ_INIT(&ct3d->dc.extents);
> >
> >      return 0;
> >  }
> >
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    int i;
> > +    struct CXLDCD_Region *region;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        g_free(region->blk_bitmap);
> > +    }
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > + * accessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     **/
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > +            len / region->block_size);
> > +}
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > +}
> > +
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > + * inaccessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > +}
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          *as = &ct3d->hostpmem_as;
> >          *dpa_offset -= vmr_size;
> >      } else {
> > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > +            return -ENODEV;
> > +        }
> > +
> >          *as = &ct3d->dc.host_dc_as;
> >          *dpa_offset -= (vmr_size + pmr_size);
> >      }
> > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> >      }
> >
> >      g_free(extents);
> > +
> > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > +    list = records;
> > +    while (list) {
> > +        dpa = list->value->dpa * 1024 * 1024;
> > +        len = list->value->len * 1024 * 1024;
> > +        rid = list->value->region_id;
> > +
> > +        switch (type) {
> > +        case DC_EVENT_ADD_CAPACITY:
> > +            set_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        case DC_EVENT_RELEASE_CAPACITY:
> > +            clear_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        default:
> > +            error_setg(errp, "DC event type not handled yet");
> > +            break;
> > +        }
> > +        list = list->next;
> > +    }
> >  }
> >
> >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 01a5eaca48..1f85c88017 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >      uint64_t block_size;
> >      uint32_t dsmadhandle;
> >      uint8_t flags;
> > +    unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >
> >  struct CXLType3Dev {
>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-24 20:49         ` Fan Ni
@ 2023-08-25 11:42             ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-25 11:42 UTC (permalink / raw)
  To: Fan Ni
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Thu, 24 Aug 2023 13:49:00 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > On Tue, 25 Jul 2023 18:39:56 +0000
> > Fan Ni <fan.ni@samsung.com> wrote:
> >  
> > > From: Fan Ni <nifan@outlook.com>
> > >
> > > Not all dpa range in the dc regions is valid to access until an extent
> > > covering the range has been added. Add a bitmap for each region to
> > > record whether a dc block in the region has been backed by dc extent.
> > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > extent is added, all the bits of the blocks in the extent will be set,
> > > which will be cleared when the extent is released.
> > >
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > Hi Fan,
> >
> > A few of the bits of feedback apply broadly across the series.  Given I'm
> > rebasing this anyway to give myself something to test I'll tidy things up
> > (feel free to disagree with and revert any changes !)
> > and push a tree out in next day or two.  I'll message when I've done so.
> >
> > Jonathan  
> 
> Hi Jonathan,
> I tried DCD with your branch "cxl-2023-08-07", and noticed the
> following,
> 1. You made some changes to the bitmap functionality, now it is only
> used to validate extents when adding/releasing dc extents. My original
> thought of adding the bitmap is to 1) validating extents for extent
> add/release as you do; 2) Add validating when doing read/write to the dc
> regions since some address region may not have valid extent added yet.
> Do you think 2) is not necessary?

Change wasn't intentional. I probably just messed up the rebase!

> 
> 2. Your change introduced a bug in the code.
> https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
Thanks.  Given I might forget about about it, if you want to incorporate that in
your next version that would be great. I might remember to fix it in the meantime!

Jonathan

> 
> Thanks,
> Fan
> 
> >  
> > > ---
> > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > >  include/hw/cxl/cxl_device.h |   1 +
> > >  2 files changed, 156 insertions(+)
> > >
> > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > index 41a828598a..51943a36fc 100644
> > > --- a/hw/mem/cxl_type3.c
> > > +++ b/hw/mem/cxl_type3.c
> > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > >          /* dsmad_handle is set when creating cdat table entries */
> > >          region->flags = 0;
> > >
> > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> >
> > In common with many allocators in qemu if this fails it calls abort()
> > internally so no need to handle potential errors.
> >  
> > > +        if (!region->blk_bitmap) {
> > > +            break;
> > > +        }
> > > +
> > >          region_base += region->len;
> > >      }
> > > +
> > > +    if (i < ct3d->dc.num_regions) {
> > > +        while (--i >= 0) {
> > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > +        }
> > > +        return -1;
> > > +    }
> > > +
> > >      QTAILQ_INIT(&ct3d->dc.extents);
> > >
> > >      return 0;
> > >  }
> > >
> > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > +{
> > > +    int i;
> > > +    struct CXLDCD_Region *region;
> > > +
> > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        g_free(region->blk_bitmap);
> > > +    }
> > > +}
> > > +
> > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > >  {
> > >      DeviceState *ds = DEVICE(ct3d);
> > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > >      g_free(regs->special_ops);
> > >  err_address_space_free:
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > >      g_free(regs->special_ops);
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      }
> > >  }
> > >
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > + * accessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     **/
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > +            len / region->block_size);
> > > +}
> > > +
> > > +/*
> > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > + * with dc extents, used when validating read/write to dc regions
> > > + */
> > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return false;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > +}
> > > +
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > +}
> > > +
> > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > >  {
> > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > >          *as = &ct3d->hostpmem_as;
> > >          *dpa_offset -= vmr_size;
> > >      } else {
> > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > +            return -ENODEV;
> > > +        }
> > > +
> > >          *as = &ct3d->dc.host_dc_as;
> > >          *dpa_offset -= (vmr_size + pmr_size);
> > >      }
> > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > >      }
> > >
> > >      g_free(extents);
> > > +
> > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > +    list = records;
> > > +    while (list) {
> > > +        dpa = list->value->dpa * 1024 * 1024;
> > > +        len = list->value->len * 1024 * 1024;
> > > +        rid = list->value->region_id;
> > > +
> > > +        switch (type) {
> > > +        case DC_EVENT_ADD_CAPACITY:
> > > +            set_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > +            clear_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        default:
> > > +            error_setg(errp, "DC event type not handled yet");
> > > +            break;
> > > +        }
> > > +        list = list->next;
> > > +    }
> > >  }
> > >
> > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > index 01a5eaca48..1f85c88017 100644
> > > --- a/include/hw/cxl/cxl_device.h
> > > +++ b/include/hw/cxl/cxl_device.h
> > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > >      uint64_t block_size;
> > >      uint32_t dsmadhandle;
> > >      uint8_t flags;
> > > +    unsigned long *blk_bitmap;
> > >  } CXLDCD_Region;
> > >
> > >  struct CXLType3Dev {  
> >  


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
@ 2023-08-25 11:42             ` Jonathan Cameron via
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-25 11:42 UTC (permalink / raw)
  To: Fan Ni
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Thu, 24 Aug 2023 13:49:00 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > On Tue, 25 Jul 2023 18:39:56 +0000
> > Fan Ni <fan.ni@samsung.com> wrote:
> >  
> > > From: Fan Ni <nifan@outlook.com>
> > >
> > > Not all dpa range in the dc regions is valid to access until an extent
> > > covering the range has been added. Add a bitmap for each region to
> > > record whether a dc block in the region has been backed by dc extent.
> > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > extent is added, all the bits of the blocks in the extent will be set,
> > > which will be cleared when the extent is released.
> > >
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > Hi Fan,
> >
> > A few of the bits of feedback apply broadly across the series.  Given I'm
> > rebasing this anyway to give myself something to test I'll tidy things up
> > (feel free to disagree with and revert any changes !)
> > and push a tree out in next day or two.  I'll message when I've done so.
> >
> > Jonathan  
> 
> Hi Jonathan,
> I tried DCD with your branch "cxl-2023-08-07", and noticed the
> following,
> 1. You made some changes to the bitmap functionality, now it is only
> used to validate extents when adding/releasing dc extents. My original
> thought of adding the bitmap is to 1) validating extents for extent
> add/release as you do; 2) Add validating when doing read/write to the dc
> regions since some address region may not have valid extent added yet.
> Do you think 2) is not necessary?

Change wasn't intentional. I probably just messed up the rebase!

> 
> 2. Your change introduced a bug in the code.
> https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
Thanks.  Given I might forget about about it, if you want to incorporate that in
your next version that would be great. I might remember to fix it in the meantime!

Jonathan

> 
> Thanks,
> Fan
> 
> >  
> > > ---
> > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > >  include/hw/cxl/cxl_device.h |   1 +
> > >  2 files changed, 156 insertions(+)
> > >
> > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > index 41a828598a..51943a36fc 100644
> > > --- a/hw/mem/cxl_type3.c
> > > +++ b/hw/mem/cxl_type3.c
> > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > >          /* dsmad_handle is set when creating cdat table entries */
> > >          region->flags = 0;
> > >
> > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> >
> > In common with many allocators in qemu if this fails it calls abort()
> > internally so no need to handle potential errors.
> >  
> > > +        if (!region->blk_bitmap) {
> > > +            break;
> > > +        }
> > > +
> > >          region_base += region->len;
> > >      }
> > > +
> > > +    if (i < ct3d->dc.num_regions) {
> > > +        while (--i >= 0) {
> > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > +        }
> > > +        return -1;
> > > +    }
> > > +
> > >      QTAILQ_INIT(&ct3d->dc.extents);
> > >
> > >      return 0;
> > >  }
> > >
> > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > +{
> > > +    int i;
> > > +    struct CXLDCD_Region *region;
> > > +
> > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        g_free(region->blk_bitmap);
> > > +    }
> > > +}
> > > +
> > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > >  {
> > >      DeviceState *ds = DEVICE(ct3d);
> > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > >      g_free(regs->special_ops);
> > >  err_address_space_free:
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > >      g_free(regs->special_ops);
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      }
> > >  }
> > >
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > + * accessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     **/
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > +            len / region->block_size);
> > > +}
> > > +
> > > +/*
> > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > + * with dc extents, used when validating read/write to dc regions
> > > + */
> > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return false;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > +}
> > > +
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > +}
> > > +
> > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > >  {
> > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > >          *as = &ct3d->hostpmem_as;
> > >          *dpa_offset -= vmr_size;
> > >      } else {
> > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > +            return -ENODEV;
> > > +        }
> > > +
> > >          *as = &ct3d->dc.host_dc_as;
> > >          *dpa_offset -= (vmr_size + pmr_size);
> > >      }
> > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > >      }
> > >
> > >      g_free(extents);
> > > +
> > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > +    list = records;
> > > +    while (list) {
> > > +        dpa = list->value->dpa * 1024 * 1024;
> > > +        len = list->value->len * 1024 * 1024;
> > > +        rid = list->value->region_id;
> > > +
> > > +        switch (type) {
> > > +        case DC_EVENT_ADD_CAPACITY:
> > > +            set_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > +            clear_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        default:
> > > +            error_setg(errp, "DC event type not handled yet");
> > > +            break;
> > > +        }
> > > +        list = list->next;
> > > +    }
> > >  }
> > >
> > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > index 01a5eaca48..1f85c88017 100644
> > > --- a/include/hw/cxl/cxl_device.h
> > > +++ b/include/hw/cxl/cxl_device.h
> > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > >      uint64_t block_size;
> > >      uint32_t dsmadhandle;
> > >      uint8_t flags;
> > > +    unsigned long *blk_bitmap;
> > >  } CXLDCD_Region;
> > >
> > >  struct CXLType3Dev {  
> >  



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-25 11:42             ` Jonathan Cameron via
  (?)
@ 2023-08-25 16:34             ` Fan Ni
  2023-08-30 15:04                 ` Jonathan Cameron
  -1 siblings, 1 reply; 48+ messages in thread
From: Fan Ni @ 2023-08-25 16:34 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> On Thu, 24 Aug 2023 13:49:00 -0700
> Fan Ni <fan.ni@gmx.us> wrote:
>
> > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > > On Tue, 25 Jul 2023 18:39:56 +0000
> > > Fan Ni <fan.ni@samsung.com> wrote:
> > >
> > > > From: Fan Ni <nifan@outlook.com>
> > > >
> > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > covering the range has been added. Add a bitmap for each region to
> > > > record whether a dc block in the region has been backed by dc extent.
> > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > which will be cleared when the extent is released.
> > > >
> > > > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > > Hi Fan,
> > >
> > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > rebasing this anyway to give myself something to test I'll tidy things up
> > > (feel free to disagree with and revert any changes !)
> > > and push a tree out in next day or two.  I'll message when I've done so.
> > >
> > > Jonathan
> >
> > Hi Jonathan,
> > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > following,
> > 1. You made some changes to the bitmap functionality, now it is only
> > used to validate extents when adding/releasing dc extents. My original
> > thought of adding the bitmap is to 1) validating extents for extent
> > add/release as you do; 2) Add validating when doing read/write to the dc
> > regions since some address region may not have valid extent added yet.
> > Do you think 2) is not necessary?
>
> Change wasn't intentional. I probably just messed up the rebase!

Just double checked the code. The logic is still there, but in another
patch in the series, so no issue and ignore my previous question.
Sorry for the confusion.

>
> >
> > 2. Your change introduced a bug in the code.
> > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
> Thanks.  Given I might forget about about it, if you want to incorporate that in
> your next version that would be great. I might remember to fix it in the meantime!
>
> Jonathan
>

My code does not have this. It seems you added the lastregion variable
to record the last region, while I use the following logic to iterate
the regions and record last region automatically while collecting
min_block_size.

+    for (i = 1; i < dev->dc.num_regions; i++) {
+        region = &dev->dc.regions[i];
+        if (min_block_size > region->block_size) {
+            min_block_size = region->block_size;
+        }
+    }
+
+    blk_bitmap = bitmap_new((region->len + region->base
+                - dev->dc.regions[0].base) / min_block_size);


Fan

> >
> > Thanks,
> > Fan
> >
> > >
> > > > ---
> > > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > > >  include/hw/cxl/cxl_device.h |   1 +
> > > >  2 files changed, 156 insertions(+)
> > > >
> > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > index 41a828598a..51943a36fc 100644
> > > > --- a/hw/mem/cxl_type3.c
> > > > +++ b/hw/mem/cxl_type3.c
> > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > > >          /* dsmad_handle is set when creating cdat table entries */
> > > >          region->flags = 0;
> > > >
> > > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
> > >
> > > In common with many allocators in qemu if this fails it calls abort()
> > > internally so no need to handle potential errors.
> > >
> > > > +        if (!region->blk_bitmap) {
> > > > +            break;
> > > > +        }
> > > > +
> > > >          region_base += region->len;
> > > >      }
> > > > +
> > > > +    if (i < ct3d->dc.num_regions) {
> > > > +        while (--i >= 0) {
> > > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > +        }
> > > > +        return -1;
> > > > +    }
> > > > +
> > > >      QTAILQ_INIT(&ct3d->dc.extents);
> > > >
> > > >      return 0;
> > > >  }
> > > >
> > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > +{
> > > > +    int i;
> > > > +    struct CXLDCD_Region *region;
> > > > +
> > > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        g_free(region->blk_bitmap);
> > > > +    }
> > > > +}
> > > > +
> > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > >  {
> > > >      DeviceState *ds = DEVICE(ct3d);
> > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > >      g_free(regs->special_ops);
> > > >  err_address_space_free:
> > > >      if (ct3d->dc.host_dc) {
> > > > +        cxl_destroy_dc_regions(ct3d);
> > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > >      }
> > > >      if (ct3d->hostpmem) {
> > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > > >      g_free(regs->special_ops);
> > > >      if (ct3d->dc.host_dc) {
> > > > +        cxl_destroy_dc_regions(ct3d);
> > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > >      }
> > > >      if (ct3d->hostpmem) {
> > > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > >      }
> > > >  }
> > > >
> > > > +/*
> > > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > > + * accessible, this happens when a dc extent is added and accepted by the
> > > > + * host.
> > > > + */
> > > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     **/
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > > +            len / region->block_size);
> > > > +}
> > > > +
> > > > +/*
> > > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > > + * with dc extents, used when validating read/write to dc regions
> > > > + */
> > > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +    uint64_t nbits;
> > > > +    long nr;
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return false;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     */
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    nr = (dpa - region->base) / region->block_size;
> > > > +    nbits = len / region->block_size;
> > > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > > +}
> > > > +
> > > > +/*
> > > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > > + * host.
> > > > + */
> > > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +    uint64_t nbits;
> > > > +    long nr;
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     */
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    nr = (dpa - region->base) / region->block_size;
> > > > +    nbits = len / region->block_size;
> > > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > > +}
> > > > +
> > > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > > >  {
> > > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > > >          *as = &ct3d->hostpmem_as;
> > > >          *dpa_offset -= vmr_size;
> > > >      } else {
> > > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > > +            return -ENODEV;
> > > > +        }
> > > > +
> > > >          *as = &ct3d->dc.host_dc_as;
> > > >          *dpa_offset -= (vmr_size + pmr_size);
> > > >      }
> > > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > > >      }
> > > >
> > > >      g_free(extents);
> > > > +
> > > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > > +    list = records;
> > > > +    while (list) {
> > > > +        dpa = list->value->dpa * 1024 * 1024;
> > > > +        len = list->value->len * 1024 * 1024;
> > > > +        rid = list->value->region_id;
> > > > +
> > > > +        switch (type) {
> > > > +        case DC_EVENT_ADD_CAPACITY:
> > > > +            set_region_block_backed(dcd, dpa, len);
> > > > +            break;
> > > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > > +            clear_region_block_backed(dcd, dpa, len);
> > > > +            break;
> > > > +        default:
> > > > +            error_setg(errp, "DC event type not handled yet");
> > > > +            break;
> > > > +        }
> > > > +        list = list->next;
> > > > +    }
> > > >  }
> > > >
> > > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > index 01a5eaca48..1f85c88017 100644
> > > > --- a/include/hw/cxl/cxl_device.h
> > > > +++ b/include/hw/cxl/cxl_device.h
> > > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > > >      uint64_t block_size;
> > > >      uint32_t dsmadhandle;
> > > >      uint8_t flags;
> > > > +    unsigned long *blk_bitmap;
> > > >  } CXLDCD_Region;
> > > >
> > > >  struct CXLType3Dev {
> > >
>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-07-25 18:39     ` [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions Fan Ni
  2023-08-07  8:53         ` Jonathan Cameron via
@ 2023-08-30 12:08       ` Jørgen Hansen
  2023-08-30 15:37           ` Jonathan Cameron
  1 sibling, 1 reply; 48+ messages in thread
From: Jørgen Hansen @ 2023-08-30 12:08 UTC (permalink / raw)
  To: Fan Ni, qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On 7/25/23 20:39, Fan Ni wrote:
> From: Fan Ni <nifan@outlook.com>
> 
> Not all dpa range in the dc regions is valid to access until an extent
> covering the range has been added. Add a bitmap for each region to
> record whether a dc block in the region has been backed by dc extent.
> For the bitmap, a bit in the bitmap represents a dc block. When a dc
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

Hi Fan,

Great to see this being implemented. I've been playing around with it 
for a bit, and ran into the issue mentioned below.

> ---
>   hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
>   include/hw/cxl/cxl_device.h |   1 +
>   2 files changed, 156 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 41a828598a..51943a36fc 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>           /* dsmad_handle is set when creating cdat table entries */
>           region->flags = 0;
> 
> +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
> +        if (!region->blk_bitmap) {
> +            break;
> +        }
> +
>           region_base += region->len;
>       }
> +
> +    if (i < ct3d->dc.num_regions) {
> +        while (--i >= 0) {
> +            g_free(ct3d->dc.regions[i].blk_bitmap);
> +        }
> +        return -1;
> +    }
> +
>       QTAILQ_INIT(&ct3d->dc.extents);
> 
>       return 0;
>   }
> 
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        g_free(region->blk_bitmap);
> +    }
> +}
> +
>   static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>   {
>       DeviceState *ds = DEVICE(ct3d);
> @@ -1021,6 +1045,7 @@ err_free_special_ops:
>       g_free(regs->special_ops);
>   err_address_space_free:
>       if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>           address_space_destroy(&ct3d->dc.host_dc_as);
>       }
>       if (ct3d->hostpmem) {
> @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
>       spdm_sock_fini(ct3d->doe_spdm.socket);
>       g_free(regs->special_ops);
>       if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>           address_space_destroy(&ct3d->dc.host_dc_as);
>       }
>       if (ct3d->hostpmem) {
> @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
>       }
>   }
> 
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be backed and
> + * accessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     **/
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> +            len / region->block_size);
> +}
> +
> +/*
> + * This function check whether a dpa range [dpa, dpa + len) has been backed
> + * with dc extents, used when validating read/write to dc regions
> + */
> +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return false;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;

> +    nbits = len / region->block_size;
> +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;

The 2nd parameter to find_next_zero_bit is the length of the bitmap, so 
shouldn't this be something like (also considering that len is the 
read/write len, and will be smaller than the region block size):

   nbits = DIV_ROUND_UP(len, region->block_size); 
 

   return find_next_zero_bit(region->blk_bitmap, nbits + nr, nr) ==
          nbits + nr;

> +}
> +
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> + * inaccessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    bitmap_clear(region->blk_bitmap, nr, nbits);
> +}
> +
>   static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
>   {
>       uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>           *as = &ct3d->hostpmem_as;
>           *dpa_offset -= vmr_size;
>       } else {
> +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> +            return -ENODEV;
> +        }
> +
>           *as = &ct3d->dc.host_dc_as;
>           *dpa_offset -= (vmr_size + pmr_size);
>       }
> @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
>       }
> 
>       g_free(extents);
> +
> +    /* Another choice is to do the set/clear after getting mailbox response*/
> +    list = records;
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        switch (type) {
> +        case DC_EVENT_ADD_CAPACITY:
> +            set_region_block_backed(dcd, dpa, len);
> +            break;
> +        case DC_EVENT_RELEASE_CAPACITY:
> +            clear_region_block_backed(dcd, dpa, len);
> +            break;
> +        default:
> +            error_setg(errp, "DC event type not handled yet");
> +            break;
> +        }
> +        list = list->next;
> +    }
>   }
> 
>   void qmp_cxl_add_dynamic_capacity_event(const char *path,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 01a5eaca48..1f85c88017 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
>       uint64_t block_size;
>       uint32_t dsmadhandle;
>       uint8_t flags;
> +    unsigned long *blk_bitmap;
>   } CXLDCD_Region;
> 
>   struct CXLType3Dev {
> --
> 2.25.1

Thanks,
Jorgen

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-25 16:34             ` Fan Ni
@ 2023-08-30 15:04                 ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-30 15:04 UTC (permalink / raw)
  To: Fan Ni
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Fri, 25 Aug 2023 09:34:50 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> > On Thu, 24 Aug 2023 13:49:00 -0700
> > Fan Ni <fan.ni@gmx.us> wrote:
> >  
> > > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:  
> > > > On Tue, 25 Jul 2023 18:39:56 +0000
> > > > Fan Ni <fan.ni@samsung.com> wrote:
> > > >  
> > > > > From: Fan Ni <nifan@outlook.com>
> > > > >
> > > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > > covering the range has been added. Add a bitmap for each region to
> > > > > record whether a dc block in the region has been backed by dc extent.
> > > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > > which will be cleared when the extent is released.
> > > > >
> > > > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > > > Hi Fan,
> > > >
> > > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > > rebasing this anyway to give myself something to test I'll tidy things up
> > > > (feel free to disagree with and revert any changes !)
> > > > and push a tree out in next day or two.  I'll message when I've done so.
> > > >
> > > > Jonathan  
> > >
> > > Hi Jonathan,
> > > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > > following,
> > > 1. You made some changes to the bitmap functionality, now it is only
> > > used to validate extents when adding/releasing dc extents. My original
> > > thought of adding the bitmap is to 1) validating extents for extent
> > > add/release as you do; 2) Add validating when doing read/write to the dc
> > > regions since some address region may not have valid extent added yet.
> > > Do you think 2) is not necessary?  
> >
> > Change wasn't intentional. I probably just messed up the rebase!  
> 
> Just double checked the code. The logic is still there, but in another
> patch in the series, so no issue and ignore my previous question.
> Sorry for the confusion.
> 
> >  
> > >
> > > 2. Your change introduced a bug in the code.
> > > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.  
> > Thanks.  Given I might forget about about it, if you want to incorporate that in
> > your next version that would be great. I might remember to fix it in the meantime!
Oops. I'll tiddy that up in my tree. 
> >
> > Jonathan
> >  
> 
> My code does not have this. It seems you added the lastregion variable
> to record the last region, while I use the following logic to iterate
> the regions and record last region automatically while collecting
> min_block_size.
> 
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base
> +                - dev->dc.regions[0].base) / min_block_size);
Understood.  I found that hard to read (see review of patch 7).
I then messed up the cleanup as you've noted.

Jonathan
> 
> 
> Fan
> 
> > >
> > > Thanks,
> > > Fan
> > >  
> > > >  
> > > > > ---
> > > > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > > > >  include/hw/cxl/cxl_device.h |   1 +
> > > > >  2 files changed, 156 insertions(+)
> > > > >
> > > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > > index 41a828598a..51943a36fc 100644
> > > > > --- a/hw/mem/cxl_type3.c
> > > > > +++ b/hw/mem/cxl_type3.c
> > > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > > > >          /* dsmad_handle is set when creating cdat table entries */
> > > > >          region->flags = 0;
> > > > >
> > > > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> > > >
> > > > In common with many allocators in qemu if this fails it calls abort()
> > > > internally so no need to handle potential errors.
> > > >  
> > > > > +        if (!region->blk_bitmap) {
> > > > > +            break;
> > > > > +        }
> > > > > +
> > > > >          region_base += region->len;
> > > > >      }
> > > > > +
> > > > > +    if (i < ct3d->dc.num_regions) {
> > > > > +        while (--i >= 0) {
> > > > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > > +        }
> > > > > +        return -1;
> > > > > +    }
> > > > > +
> > > > >      QTAILQ_INIT(&ct3d->dc.extents);
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > > +{
> > > > > +    int i;
> > > > > +    struct CXLDCD_Region *region;
> > > > > +
> > > > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        g_free(region->blk_bitmap);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > > >  {
> > > > >      DeviceState *ds = DEVICE(ct3d);
> > > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > > >      g_free(regs->special_ops);
> > > > >  err_address_space_free:
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > > > >      g_free(regs->special_ops);
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      }
> > > > >  }
> > > > >
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > > > + * accessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     **/
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > > > +            len / region->block_size);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > > > + * with dc extents, used when validating read/write to dc regions
> > > > > + */
> > > > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > > > +}
> > > > > +
> > > > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > > > >  {
> > > > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > > > >          *as = &ct3d->hostpmem_as;
> > > > >          *dpa_offset -= vmr_size;
> > > > >      } else {
> > > > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > > > +            return -ENODEV;
> > > > > +        }
> > > > > +
> > > > >          *as = &ct3d->dc.host_dc_as;
> > > > >          *dpa_offset -= (vmr_size + pmr_size);
> > > > >      }
> > > > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > > > >      }
> > > > >
> > > > >      g_free(extents);
> > > > > +
> > > > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > > > +    list = records;
> > > > > +    while (list) {
> > > > > +        dpa = list->value->dpa * 1024 * 1024;
> > > > > +        len = list->value->len * 1024 * 1024;
> > > > > +        rid = list->value->region_id;
> > > > > +
> > > > > +        switch (type) {
> > > > > +        case DC_EVENT_ADD_CAPACITY:
> > > > > +            set_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > > > +            clear_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        default:
> > > > > +            error_setg(errp, "DC event type not handled yet");
> > > > > +            break;
> > > > > +        }
> > > > > +        list = list->next;
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > > index 01a5eaca48..1f85c88017 100644
> > > > > --- a/include/hw/cxl/cxl_device.h
> > > > > +++ b/include/hw/cxl/cxl_device.h
> > > > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > > > >      uint64_t block_size;
> > > > >      uint32_t dsmadhandle;
> > > > >      uint8_t flags;
> > > > > +    unsigned long *blk_bitmap;
> > > > >  } CXLDCD_Region;
> > > > >
> > > > >  struct CXLType3Dev {  
> > > >  
> >  



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
@ 2023-08-30 15:04                 ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-30 15:04 UTC (permalink / raw)
  To: Fan Ni
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Fri, 25 Aug 2023 09:34:50 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> > On Thu, 24 Aug 2023 13:49:00 -0700
> > Fan Ni <fan.ni@gmx.us> wrote:
> >  
> > > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:  
> > > > On Tue, 25 Jul 2023 18:39:56 +0000
> > > > Fan Ni <fan.ni@samsung.com> wrote:
> > > >  
> > > > > From: Fan Ni <nifan@outlook.com>
> > > > >
> > > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > > covering the range has been added. Add a bitmap for each region to
> > > > > record whether a dc block in the region has been backed by dc extent.
> > > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > > which will be cleared when the extent is released.
> > > > >
> > > > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > > > Hi Fan,
> > > >
> > > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > > rebasing this anyway to give myself something to test I'll tidy things up
> > > > (feel free to disagree with and revert any changes !)
> > > > and push a tree out in next day or two.  I'll message when I've done so.
> > > >
> > > > Jonathan  
> > >
> > > Hi Jonathan,
> > > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > > following,
> > > 1. You made some changes to the bitmap functionality, now it is only
> > > used to validate extents when adding/releasing dc extents. My original
> > > thought of adding the bitmap is to 1) validating extents for extent
> > > add/release as you do; 2) Add validating when doing read/write to the dc
> > > regions since some address region may not have valid extent added yet.
> > > Do you think 2) is not necessary?  
> >
> > Change wasn't intentional. I probably just messed up the rebase!  
> 
> Just double checked the code. The logic is still there, but in another
> patch in the series, so no issue and ignore my previous question.
> Sorry for the confusion.
> 
> >  
> > >
> > > 2. Your change introduced a bug in the code.
> > > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.  
> > Thanks.  Given I might forget about about it, if you want to incorporate that in
> > your next version that would be great. I might remember to fix it in the meantime!
Oops. I'll tiddy that up in my tree. 
> >
> > Jonathan
> >  
> 
> My code does not have this. It seems you added the lastregion variable
> to record the last region, while I use the following logic to iterate
> the regions and record last region automatically while collecting
> min_block_size.
> 
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base
> +                - dev->dc.regions[0].base) / min_block_size);
Understood.  I found that hard to read (see review of patch 7).
I then messed up the cleanup as you've noted.

Jonathan
> 
> 
> Fan
> 
> > >
> > > Thanks,
> > > Fan
> > >  
> > > >  
> > > > > ---
> > > > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > > > >  include/hw/cxl/cxl_device.h |   1 +
> > > > >  2 files changed, 156 insertions(+)
> > > > >
> > > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > > index 41a828598a..51943a36fc 100644
> > > > > --- a/hw/mem/cxl_type3.c
> > > > > +++ b/hw/mem/cxl_type3.c
> > > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > > > >          /* dsmad_handle is set when creating cdat table entries */
> > > > >          region->flags = 0;
> > > > >
> > > > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> > > >
> > > > In common with many allocators in qemu if this fails it calls abort()
> > > > internally so no need to handle potential errors.
> > > >  
> > > > > +        if (!region->blk_bitmap) {
> > > > > +            break;
> > > > > +        }
> > > > > +
> > > > >          region_base += region->len;
> > > > >      }
> > > > > +
> > > > > +    if (i < ct3d->dc.num_regions) {
> > > > > +        while (--i >= 0) {
> > > > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > > +        }
> > > > > +        return -1;
> > > > > +    }
> > > > > +
> > > > >      QTAILQ_INIT(&ct3d->dc.extents);
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > > +{
> > > > > +    int i;
> > > > > +    struct CXLDCD_Region *region;
> > > > > +
> > > > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        g_free(region->blk_bitmap);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > > >  {
> > > > >      DeviceState *ds = DEVICE(ct3d);
> > > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > > >      g_free(regs->special_ops);
> > > > >  err_address_space_free:
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > > > >      g_free(regs->special_ops);
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      }
> > > > >  }
> > > > >
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > > > + * accessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     **/
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > > > +            len / region->block_size);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > > > + * with dc extents, used when validating read/write to dc regions
> > > > > + */
> > > > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > > > +}
> > > > > +
> > > > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > > > >  {
> > > > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > > > >          *as = &ct3d->hostpmem_as;
> > > > >          *dpa_offset -= vmr_size;
> > > > >      } else {
> > > > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > > > +            return -ENODEV;
> > > > > +        }
> > > > > +
> > > > >          *as = &ct3d->dc.host_dc_as;
> > > > >          *dpa_offset -= (vmr_size + pmr_size);
> > > > >      }
> > > > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > > > >      }
> > > > >
> > > > >      g_free(extents);
> > > > > +
> > > > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > > > +    list = records;
> > > > > +    while (list) {
> > > > > +        dpa = list->value->dpa * 1024 * 1024;
> > > > > +        len = list->value->len * 1024 * 1024;
> > > > > +        rid = list->value->region_id;
> > > > > +
> > > > > +        switch (type) {
> > > > > +        case DC_EVENT_ADD_CAPACITY:
> > > > > +            set_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > > > +            clear_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        default:
> > > > > +            error_setg(errp, "DC event type not handled yet");
> > > > > +            break;
> > > > > +        }
> > > > > +        list = list->next;
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > > index 01a5eaca48..1f85c88017 100644
> > > > > --- a/include/hw/cxl/cxl_device.h
> > > > > +++ b/include/hw/cxl/cxl_device.h
> > > > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > > > >      uint64_t block_size;
> > > > >      uint32_t dsmadhandle;
> > > > >      uint8_t flags;
> > > > > +    unsigned long *blk_bitmap;
> > > > >  } CXLDCD_Region;
> > > > >
> > > > >  struct CXLType3Dev {  
> > > >  
> >  


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
  2023-08-30 12:08       ` Jørgen Hansen
@ 2023-08-30 15:37           ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron via @ 2023-08-30 15:37 UTC (permalink / raw)
  To: Jørgen Hansen
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan


> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;  
> 
> > +    nbits = len / region->block_size;
oops. Len is probably always smaller than block_size (typically 8 or less)
so nbits always 0.  Should be 1 in those cases.

> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;  
> 
> The 2nd parameter to find_next_zero_bit is the length of the bitmap, so 
> shouldn't this be something like (also considering that len is the 
> read/write len, and will be smaller than the region block size):
> 
>    nbits = DIV_ROUND_UP(len, region->block_size);

>  
> 
>    return find_next_zero_bit(region->blk_bitmap, nbits + nr, nr) ==
>           nbits + nr;

Agreed with your suggestion. I'll carry that in my forward port of this
series for now and update my tree at
gitlab.com/jic23/qemu branch will probably be cxl-2023-08-30
a bit later today.

Jonathan



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions
@ 2023-08-30 15:37           ` Jonathan Cameron
  0 siblings, 0 replies; 48+ messages in thread
From: Jonathan Cameron @ 2023-08-30 15:37 UTC (permalink / raw)
  To: Jørgen Hansen
  Cc: Fan Ni, qemu-devel, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan


> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;  
> 
> > +    nbits = len / region->block_size;
oops. Len is probably always smaller than block_size (typically 8 or less)
so nbits always 0.  Should be 1 in those cases.

> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;  
> 
> The 2nd parameter to find_next_zero_bit is the length of the bitmap, so 
> shouldn't this be something like (also considering that len is the 
> read/write len, and will be smaller than the region block size):
> 
>    nbits = DIV_ROUND_UP(len, region->block_size);

>  
> 
>    return find_next_zero_bit(region->blk_bitmap, nbits + nr, nr) ==
>           nbits + nr;

Agreed with your suggestion. I'll carry that in my forward port of this
series for now and update my tree at
gitlab.com/jic23/qemu branch will probably be cxl-2023-08-30
a bit later today.

Jonathan


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response
  2023-07-25 18:39     ` [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response Fan Ni
  2023-08-07 11:42         ` Jonathan Cameron
@ 2023-09-08 13:00       ` Jørgen Hansen
  2023-09-08 17:19         ` Fan Ni
  1 sibling, 1 reply; 48+ messages in thread
From: Jørgen Hansen @ 2023-09-08 13:00 UTC (permalink / raw)
  To: Fan Ni, qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On 7/25/23 20:39, Fan Ni wrote:
> From: Fan Ni <nifan@outlook.com>
> 
> Per CXL spec 3.0, two mailbox commands are implemented:
> Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
> Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>   hw/cxl/cxl-mailbox-utils.c  | 253 ++++++++++++++++++++++++++++++++++++
>   include/hw/cxl/cxl_device.h |   3 +-
>   2 files changed, 255 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 3d25a9697e..1e4944da95 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -84,6 +84,8 @@ enum {
>       DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>           #define GET_DC_CONFIG          0x0
>           #define GET_DYN_CAP_EXT_LIST   0x1
> +        #define ADD_DYN_CAP_RSP        0x2
> +        #define RELEASE_DYN_CAP        0x3
>       PHYSICAL_SWITCH = 0x51
>           #define IDENTIFY_SWITCH_DEVICE      0x0
>   };
> @@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
>       return CXL_MBOX_SUCCESS;
>   }
> 
> +/*
> + * Check whether the bits at addr between [nr, nr+size) are all set,
> + * return 1 if all 1s, else return 0
> + */
> +static inline int test_bits(const unsigned long *addr, int nr, int size)
> +{
> +    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
> +
> +    return (res >= nr + size) ? 1 : 0;
> +}
> +
> +/*
> + * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
> + */
> +static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int8_t i = dev->dc.num_regions - 1;
> +
> +    while (i > 0 && dpa < dev->dc.regions[i].base) {
> +        i--;
> +    }
> +
> +    if (dpa < dev->dc.regions[i].base
> +            || dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
> +        return dev->dc.num_regions;
> +    }
> +
> +    return i;
> +}
> +
> +static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
> +        uint64_t len, uint8_t *tag, uint16_t shared_seq)
> +{
> +    CXLDCD_Extent *extent;
> +    extent = g_new0(CXLDCD_Extent, 1);
> +    extent->start_dpa = dpa;
> +    extent->len = len;
> +    if (tag) {
> +        memcpy(extent->tag, tag, 0x10);
> +    } else {
> +        memset(extent->tag, 0, 0x10);
> +    }
> +    extent->shared_seq = shared_seq;
> +
> +    QTAILQ_INSERT_TAIL(list, extent, node);
> +}
> +
> +typedef struct updated_dc_extent_list_in_pl {
> +    uint32_t num_entries_updated;
> +    uint8_t rsvd[4];
> +    struct { /* r3.0: Table 8-130 */
> +        uint64_t start_dpa;
> +        uint64_t len;
> +        uint8_t rsvd[8];
> +    } QEMU_PACKED updated_entries[];
> +} QEMU_PACKED updated_dc_extent_list_in_pl;
> +
> +/*
> + * The function only check the input extent list against itself.
> + */
> +static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,
> +        const updated_dc_extent_list_in_pl *in)
> +{
> +    unsigned long *blk_bitmap;
> +    uint64_t min_block_size = dev->dc.regions[0].block_size;
> +    struct CXLDCD_Region *region = &dev->dc.regions[0];
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    uint8_t rid;
> +    CXLRetCode ret;
> +
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base
> +                - dev->dc.regions[0].base) / min_block_size);
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        rid = find_region_id(dev, dpa, len);
> +        if (rid == dev->dc.num_regions) {
> +            ret = CXL_MBOX_INVALID_PA;
> +            goto out;
> +        }
> +
> +        region = &dev->dc.regions[rid];
> +        if (dpa % region->block_size || len % region->block_size) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }

Hi,

The bitmap uses the dc region 0 base address as the baseline, so when 
checking the dpa against the bitmap it needs to be adjusted for that 
before the bitmap checks, e.g.,

+        dpa -= dev->dc.regions[0].base;

Thanks,
Jorgen

> +        /* the dpa range already covered by some other extents in the list */
> +        if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) {
> +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> +            goto out;
> +        }
> +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> +   }
> +
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:
> +    g_free(blk_bitmap);
> +    return ret;
> +}
> +
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.3
> + * Add Dynamic Capacity Response (opcode 4802h)
> + * Assume an extent is added only after the response is processed successfully
> + * TODO: for better extent list validation, a better solution would be
> + * maintaining a pending extent list and use it to verify the extent list in
> + * the response.
> + */
> +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate, uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        ret = CXL_MBOX_SUCCESS;
> +        goto out;
> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        goto out;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        /*
> +         * Check if the DPA range of the to-be-added extent overlaps with
> +         * existing extent list maintained by the device.
> +         */
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if (ent->start_dpa <= dpa
> +                    && dpa + len <= ent->start_dpa + ent->len) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> +                ret = CXL_MBOX_INVALID_PA;
> +                goto out;
> +            }
> +        }
> +
> +        /*
> +         * TODO: add a pending extent list based on event log record and verify
> +         * the input response
> +         */
> +
> +        insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> +    }
> +    ret = CXL_MBOX_SUCCESS;
> +
> +out:
> +    return ret;
> +}
> +
> +/*
> + * Spec 3.0: 8.2.9.8.9.4
> + * Release Dynamic Capacity (opcode 4803h)
> + **/
> +static CXLRetCode cmd_dcd_release_dyn_cap(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len_unused)
> +{
> +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    ret = detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (ent->start_dpa == dpa && ent->len == len) {
> +                break;
> +            } else if (ent->start_dpa < dpa
> +                    && dpa + len <= ent->start_dpa + ent->len) {
> +                /* remove partial extent */
> +                uint64_t len1 = dpa - ent->start_dpa;
> +                uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
> +
> +                if (len1) {
> +                    insert_extent_to_extent_list(extent_list, ent->start_dpa,
> +                            len1, NULL, 0);
> +                }
> +                if (len2) {
> +                    insert_extent_to_extent_list(extent_list, dpa + len, len2,
> +                            NULL, 0);
> +                }
> +                break;
> +            } else if ((dpa < ent->start_dpa + ent->len
> +                        && dpa + len > ent->start_dpa + ent->len)
> +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa))
> +                return CXL_MBOX_INVALID_EXTENT_LIST;
> +        }
> +
> +        if (ent) {
> +            QTAILQ_REMOVE(extent_list, ent, node);
> +            g_free(ent);
> +        } else {
> +            /* Try to remove a non-existing extent */
> +            return CXL_MBOX_INVALID_PA;
> +        }
> +    }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>   #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>   #define IMMEDIATE_DATA_CHANGE (1 << 2)
>   #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1129,6 +1376,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>       [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
>           "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
>           8, 0 },
> +    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
> +        "ADD_DCD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
> +        ~0, IMMEDIATE_DATA_CHANGE },
> +    [DCD_CONFIG][RELEASE_DYN_CAP] = {
> +        "RELEASE_DCD_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
> +        ~0, IMMEDIATE_DATA_CHANGE },
>   };
> 
>   static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 3a338b3b37..01a5eaca48 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -130,7 +130,8 @@ typedef enum {
>       CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
>       CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
>       CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
> -    CXL_MBOX_MAX = 0x17
> +    CXL_MBOX_INVALID_EXTENT_LIST = 0x1E, /* cxl r3.0: Table 8-34*/
> +    CXL_MBOX_MAX = 0x1F
>   } CXLRetCode;
> 
>   struct cxl_cmd;
> --
> 2.25.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support
  2023-07-25 18:39     ` [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support Fan Ni
  2023-08-07 11:55         ` Jonathan Cameron via
@ 2023-09-08 13:12       ` Jørgen Hansen
  2023-09-08 17:12         ` Fan Ni
  1 sibling, 1 reply; 48+ messages in thread
From: Jørgen Hansen @ 2023-09-08 13:12 UTC (permalink / raw)
  To: Fan Ni, qemu-devel
  Cc: jonathan.cameron, linux-cxl, gregory.price, hchkuo, cbrowy,
	ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On 7/25/23 20:39, Fan Ni wrote:
> From: Fan Ni <nifan@outlook.com>
> 
> Add dynamic capacity extent list representative to the definition of
> CXLType3Dev and add get DC extent list mailbox command per
> CXL.spec.3.0:.8.2.9.8.9.2.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
>   hw/cxl/cxl-mailbox-utils.c  | 71 +++++++++++++++++++++++++++++++++++++
>   hw/mem/cxl_type3.c          |  1 +
>   include/hw/cxl/cxl_device.h | 23 ++++++++++++
>   3 files changed, 95 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 0511b8e6f7..3d25a9697e 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -83,6 +83,7 @@ enum {
>           #define CLEAR_POISON           0x2
>       DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
>           #define GET_DC_CONFIG          0x0
> +        #define GET_DYN_CAP_EXT_LIST   0x1
>       PHYSICAL_SWITCH = 0x51
>           #define IDENTIFY_SWITCH_DEVICE      0x0
>   };
> @@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
>       return CXL_MBOX_SUCCESS;
>   }
> 
> +/*
> + * cxl spec 3.0: 8.2.9.8.9.2
> + * Get Dynamic Capacity Extent List (Opcode 4810h)
> + */
> +static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
> +        CXLDeviceState *cxl_dstate,
> +        uint16_t *len)
> +{
> +    struct get_dyn_cap_ext_list_in_pl {
> +        uint32_t extent_cnt;
> +        uint32_t start_extent_id;
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_out_pl {
> +        uint32_t count;
> +        uint32_t total_extents;
> +        uint32_t generation_num;
> +        uint8_t rsvd[4];
> +        CXLDCExtent_raw records[];
> +    } QEMU_PACKED;
> +
> +    struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
> +    struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
> +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> +            cxl_dstate);
> +    uint16_t record_count = 0, i = 0, record_done = 0;
> +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCD_Extent *ent;
> +    uint16_t out_pl_len;
> +    uint32_t start_extent_id = in->start_extent_id;
> +
> +    if (start_extent_id > ct3d->dc.total_extent_count) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    record_count = MIN(in->extent_cnt,
> +            ct3d->dc.total_extent_count - start_extent_id);
> +
> +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +    /* May need more processing here in the future */
> +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +    memset(out, 0, out_pl_len);
> +    stl_le_p(&out->count, record_count);
> +    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
> +    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
> +
> +    if (record_count > 0) {
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            if (i++ < start_extent_id) {
> +                continue;
> +            }
> +            stq_le_p(&out->records[record_done].start_dpa, ent->start_dpa);
> +            stq_le_p(&out->records[record_done].len, ent->len);
> +            memcpy(&out->records[record_done].tag, ent->tag, 0x10);
> +            stw_le_p(&out->records[record_done].shared_seq, ent->shared_seq);
> +            record_done++;
> +            if (record_done == record_count) {
> +                break;
> +            }
> +        }
> +    }
> +
> +    *len = out_pl_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>   #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>   #define IMMEDIATE_DATA_CHANGE (1 << 2)
>   #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
>           cmd_media_clear_poison, 72, 0 },
>       [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
>           cmd_dcd_get_dyn_cap_config, 2, 0 },
> +    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
> +        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
> +        8, 0 },
>   };
> 
>   static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 76bbd9f785..f1170b8047 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> 
>           region_base += region->len;
>       }
> +    QTAILQ_INIT(&ct3d->dc.extents);
> 
>       return 0;
>   }
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 1c99b05a66..3a338b3b37 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
> 
>   #define DCD_MAX_REGION_NUM 8
> 
> +typedef struct CXLDCD_Extent_raw {
> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +} QEMU_PACKED CXLDCExtent_raw;
> +
> +typedef struct CXLDCD_Extent {
> +    uint64_t start_dpa;
> +    uint64_t len;
> +    uint8_t tag[0x10];
> +    uint16_t shared_seq;
> +    uint8_t rsvd[0x6];
> +
> +    QTAILQ_ENTRY(CXLDCD_Extent) node;
> +} CXLDCD_Extent;
> +typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
> +
>   typedef struct CXLDCD_Region {
>       uint64_t base;
>       uint64_t decode_len; /* in multiples of 256MB */
> @@ -433,6 +452,10 @@ struct CXLType3Dev {
> 
>           uint8_t num_regions; /* 0-8 regions */
>           struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> +        CXLDCDExtentList extents;
> +
> +        uint32_t total_extent_count;

Hi,

I don't see total_extent_count being updated anywhere. Shouldn't this be 
adjusted as part of cmd_dcd_add_dyn_cap_rsp()/cmd_dcd_release_dyn_cap()?

Thanks,
Jorgen


> +        uint32_t ext_list_gen_seq;
>       } dc;
>   };
> 
> --
> 2.25.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support
  2023-09-08 13:12       ` Jørgen Hansen
@ 2023-09-08 17:12         ` Fan Ni
  0 siblings, 0 replies; 48+ messages in thread
From: Fan Ni @ 2023-09-08 17:12 UTC (permalink / raw)
  To: J?rgen Hansen
  Cc: Fan Ni, qemu-devel, jonathan.cameron, linux-cxl, gregory.price,
	hchkuo, cbrowy, ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Fri, Sep 08, 2023 at 01:12:45PM +0000, J?rgen Hansen wrote:
> On 7/25/23 20:39, Fan Ni wrote:
> > From: Fan Ni <nifan@outlook.com>
> >
> > Add dynamic capacity extent list representative to the definition of
> > CXLType3Dev and add get DC extent list mailbox command per
> > CXL.spec.3.0:.8.2.9.8.9.2.
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > ---
> >   hw/cxl/cxl-mailbox-utils.c  | 71 +++++++++++++++++++++++++++++++++++++
> >   hw/mem/cxl_type3.c          |  1 +
> >   include/hw/cxl/cxl_device.h | 23 ++++++++++++
> >   3 files changed, 95 insertions(+)
> >
> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> > index 0511b8e6f7..3d25a9697e 100644
> > --- a/hw/cxl/cxl-mailbox-utils.c
> > +++ b/hw/cxl/cxl-mailbox-utils.c
> > @@ -83,6 +83,7 @@ enum {
> >           #define CLEAR_POISON           0x2
> >       DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
> >           #define GET_DC_CONFIG          0x0
> > +        #define GET_DYN_CAP_EXT_LIST   0x1
> >       PHYSICAL_SWITCH = 0x51
> >           #define IDENTIFY_SWITCH_DEVICE      0x0
> >   };
> > @@ -1018,6 +1019,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(struct cxl_cmd *cmd,
> >       return CXL_MBOX_SUCCESS;
> >   }
> >
> > +/*
> > + * cxl spec 3.0: 8.2.9.8.9.2
> > + * Get Dynamic Capacity Extent List (Opcode 4810h)
> > + */
> > +static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
> > +        CXLDeviceState *cxl_dstate,
> > +        uint16_t *len)
> > +{
> > +    struct get_dyn_cap_ext_list_in_pl {
> > +        uint32_t extent_cnt;
> > +        uint32_t start_extent_id;
> > +    } QEMU_PACKED;
> > +
> > +    struct get_dyn_cap_ext_list_out_pl {
> > +        uint32_t count;
> > +        uint32_t total_extents;
> > +        uint32_t generation_num;
> > +        uint8_t rsvd[4];
> > +        CXLDCExtent_raw records[];
> > +    } QEMU_PACKED;
> > +
> > +    struct get_dyn_cap_ext_list_in_pl *in = (void *)cmd->payload;
> > +    struct get_dyn_cap_ext_list_out_pl *out = (void *)cmd->payload;
> > +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> > +            cxl_dstate);
> > +    uint16_t record_count = 0, i = 0, record_done = 0;
> > +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCD_Extent *ent;
> > +    uint16_t out_pl_len;
> > +    uint32_t start_extent_id = in->start_extent_id;
> > +
> > +    if (start_extent_id > ct3d->dc.total_extent_count) {
> > +        return CXL_MBOX_INVALID_INPUT;
> > +    }
> > +
> > +    record_count = MIN(in->extent_cnt,
> > +            ct3d->dc.total_extent_count - start_extent_id);
> > +
> > +    out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> > +    /* May need more processing here in the future */
> > +    assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> > +
> > +    memset(out, 0, out_pl_len);
> > +    stl_le_p(&out->count, record_count);
> > +    stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
> > +    stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
> > +
> > +    if (record_count > 0) {
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> > +            if (i++ < start_extent_id) {
> > +                continue;
> > +            }
> > +            stq_le_p(&out->records[record_done].start_dpa, ent->start_dpa);
> > +            stq_le_p(&out->records[record_done].len, ent->len);
> > +            memcpy(&out->records[record_done].tag, ent->tag, 0x10);
> > +            stw_le_p(&out->records[record_done].shared_seq, ent->shared_seq);
> > +            record_done++;
> > +            if (record_done == record_count) {
> > +                break;
> > +            }
> > +        }
> > +    }
> > +
> > +    *len = out_pl_len;
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> >   #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
> >   #define IMMEDIATE_DATA_CHANGE (1 << 2)
> >   #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> > @@ -1058,6 +1126,9 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
> >           cmd_media_clear_poison, 72, 0 },
> >       [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
> >           cmd_dcd_get_dyn_cap_config, 2, 0 },
> > +    [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
> > +        "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
> > +        8, 0 },
> >   };
> >
> >   static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 76bbd9f785..f1170b8047 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -789,6 +789,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >
> >           region_base += region->len;
> >       }
> > +    QTAILQ_INIT(&ct3d->dc.extents);
> >
> >       return 0;
> >   }
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 1c99b05a66..3a338b3b37 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -385,6 +385,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
> >
> >   #define DCD_MAX_REGION_NUM 8
> >
> > +typedef struct CXLDCD_Extent_raw {
> > +    uint64_t start_dpa;
> > +    uint64_t len;
> > +    uint8_t tag[0x10];
> > +    uint16_t shared_seq;
> > +    uint8_t rsvd[0x6];
> > +} QEMU_PACKED CXLDCExtent_raw;
> > +
> > +typedef struct CXLDCD_Extent {
> > +    uint64_t start_dpa;
> > +    uint64_t len;
> > +    uint8_t tag[0x10];
> > +    uint16_t shared_seq;
> > +    uint8_t rsvd[0x6];
> > +
> > +    QTAILQ_ENTRY(CXLDCD_Extent) node;
> > +} CXLDCD_Extent;
> > +typedef QTAILQ_HEAD(, CXLDCD_Extent) CXLDCDExtentList;
> > +
> >   typedef struct CXLDCD_Region {
> >       uint64_t base;
> >       uint64_t decode_len; /* in multiples of 256MB */
> > @@ -433,6 +452,10 @@ struct CXLType3Dev {
> >
> >           uint8_t num_regions; /* 0-8 regions */
> >           struct CXLDCD_Region regions[DCD_MAX_REGION_NUM];
> > +        CXLDCDExtentList extents;
> > +
> > +        uint32_t total_extent_count;
>
> Hi,
>
> I don't see total_extent_count being updated anywhere. Shouldn't this be
> adjusted as part of cmd_dcd_add_dyn_cap_rsp()/cmd_dcd_release_dyn_cap()?
>
> Thanks,
> Jorgen

Good catch. Thanks Jorgen, will fix in the next version.

Fan

>
>
> > +        uint32_t ext_list_gen_seq;
> >       } dc;
> >   };
> >
> > --
> > 2.25.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response
  2023-09-08 13:00       ` Jørgen Hansen
@ 2023-09-08 17:19         ` Fan Ni
  0 siblings, 0 replies; 48+ messages in thread
From: Fan Ni @ 2023-09-08 17:19 UTC (permalink / raw)
  To: J?rgen Hansen
  Cc: Fan Ni, qemu-devel, jonathan.cameron, linux-cxl, gregory.price,
	hchkuo, cbrowy, ira.weiny, dan.j.williams, Adam Manzanares, dave,
	nmtadam.samsung, nifan

On Fri, Sep 08, 2023 at 01:00:16PM +0000, J?rgen Hansen wrote:
> On 7/25/23 20:39, Fan Ni wrote:
> > From: Fan Ni <nifan@outlook.com>
> >
> > Per CXL spec 3.0, two mailbox commands are implemented:
> > Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
> > Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > ---
> >   hw/cxl/cxl-mailbox-utils.c  | 253 ++++++++++++++++++++++++++++++++++++
> >   include/hw/cxl/cxl_device.h |   3 +-
> >   2 files changed, 255 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> > index 3d25a9697e..1e4944da95 100644
> > --- a/hw/cxl/cxl-mailbox-utils.c
> > +++ b/hw/cxl/cxl-mailbox-utils.c
> > @@ -84,6 +84,8 @@ enum {
> >       DCD_CONFIG  = 0x48, /*r3.0: 8.2.9.8.9*/
> >           #define GET_DC_CONFIG          0x0
> >           #define GET_DYN_CAP_EXT_LIST   0x1
> > +        #define ADD_DYN_CAP_RSP        0x2
> > +        #define RELEASE_DYN_CAP        0x3
> >       PHYSICAL_SWITCH = 0x51
> >           #define IDENTIFY_SWITCH_DEVICE      0x0
> >   };
> > @@ -1086,6 +1088,251 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(struct cxl_cmd *cmd,
> >       return CXL_MBOX_SUCCESS;
> >   }
> >
> > +/*
> > + * Check whether the bits at addr between [nr, nr+size) are all set,
> > + * return 1 if all 1s, else return 0
> > + */
> > +static inline int test_bits(const unsigned long *addr, int nr, int size)
> > +{
> > +    unsigned long res = find_next_zero_bit(addr, size + nr, nr);
> > +
> > +    return (res >= nr + size) ? 1 : 0;
> > +}
> > +
> > +/*
> > + * Find dynamic capacity region id based on dpa range [dpa, dpa+len)
> > + */
> > +static uint8_t find_region_id(struct CXLType3Dev *dev, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int8_t i = dev->dc.num_regions - 1;
> > +
> > +    while (i > 0 && dpa < dev->dc.regions[i].base) {
> > +        i--;
> > +    }
> > +
> > +    if (dpa < dev->dc.regions[i].base
> > +            || dpa + len > dev->dc.regions[i].base + dev->dc.regions[i].len) {
> > +        return dev->dc.num_regions;
> > +    }
> > +
> > +    return i;
> > +}
> > +
> > +static void insert_extent_to_extent_list(CXLDCDExtentList *list, uint64_t dpa,
> > +        uint64_t len, uint8_t *tag, uint16_t shared_seq)
> > +{
> > +    CXLDCD_Extent *extent;
> > +    extent = g_new0(CXLDCD_Extent, 1);
> > +    extent->start_dpa = dpa;
> > +    extent->len = len;
> > +    if (tag) {
> > +        memcpy(extent->tag, tag, 0x10);
> > +    } else {
> > +        memset(extent->tag, 0, 0x10);
> > +    }
> > +    extent->shared_seq = shared_seq;
> > +
> > +    QTAILQ_INSERT_TAIL(list, extent, node);
> > +}
> > +
> > +typedef struct updated_dc_extent_list_in_pl {
> > +    uint32_t num_entries_updated;
> > +    uint8_t rsvd[4];
> > +    struct { /* r3.0: Table 8-130 */
> > +        uint64_t start_dpa;
> > +        uint64_t len;
> > +        uint8_t rsvd[8];
> > +    } QEMU_PACKED updated_entries[];
> > +} QEMU_PACKED updated_dc_extent_list_in_pl;
> > +
> > +/*
> > + * The function only check the input extent list against itself.
> > + */
> > +static CXLRetCode detect_malformed_extent_list(CXLType3Dev *dev,
> > +        const updated_dc_extent_list_in_pl *in)
> > +{
> > +    unsigned long *blk_bitmap;
> > +    uint64_t min_block_size = dev->dc.regions[0].block_size;
> > +    struct CXLDCD_Region *region = &dev->dc.regions[0];
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    uint8_t rid;
> > +    CXLRetCode ret;
> > +
> > +    for (i = 1; i < dev->dc.num_regions; i++) {
> > +        region = &dev->dc.regions[i];
> > +        if (min_block_size > region->block_size) {
> > +            min_block_size = region->block_size;
> > +        }
> > +    }
> > +
> > +    blk_bitmap = bitmap_new((region->len + region->base
> > +                - dev->dc.regions[0].base) / min_block_size);
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        rid = find_region_id(dev, dpa, len);
> > +        if (rid == dev->dc.num_regions) {
> > +            ret = CXL_MBOX_INVALID_PA;
> > +            goto out;
> > +        }
> > +
> > +        region = &dev->dc.regions[rid];
> > +        if (dpa % region->block_size || len % region->block_size) {
> > +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> > +            goto out;
> > +        }
>
> Hi,
>
> The bitmap uses the dc region 0 base address as the baseline, so when
> checking the dpa against the bitmap it needs to be adjusted for that
> before the bitmap checks, e.g.,
>
> +        dpa -= dev->dc.regions[0].base;
>
> Thanks,
> Jorgen

Make sense. Will fix. Thanks.

Fan
>
> > +        /* the dpa range already covered by some other extents in the list */
> > +        if (test_bits(blk_bitmap, dpa / min_block_size, len / min_block_size)) {
> > +            ret = CXL_MBOX_INVALID_EXTENT_LIST;
> > +            goto out;
> > +        }
> > +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> > +   }
> > +
> > +    ret = CXL_MBOX_SUCCESS;
> > +
> > +out:
> > +    g_free(blk_bitmap);
> > +    return ret;
> > +}
> > +
> > +/*
> > + * cxl spec 3.0: 8.2.9.8.9.3
> > + * Add Dynamic Capacity Response (opcode 4802h)
> > + * Assume an extent is added only after the response is processed successfully
> > + * TODO: for better extent list validation, a better solution would be
> > + * maintaining a pending extent list and use it to verify the extent list in
> > + * the response.
> > + */
> > +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(struct cxl_cmd *cmd,
> > +        CXLDeviceState *cxl_dstate, uint16_t *len_unused)
> > +{
> > +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> > +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> > +            cxl_dstate);
> > +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCD_Extent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        ret = CXL_MBOX_SUCCESS;
> > +        goto out;
> > +    }
> > +
> > +    ret = detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        goto out;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        /*
> > +         * Check if the DPA range of the to-be-added extent overlaps with
> > +         * existing extent list maintained by the device.
> > +         */
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> > +            if (ent->start_dpa == dpa && ent->len == len) {
> > +                ret = CXL_MBOX_INVALID_PA;
> > +                goto out;
> > +            } else if (ent->start_dpa <= dpa
> > +                    && dpa + len <= ent->start_dpa + ent->len) {
> > +                ret = CXL_MBOX_INVALID_PA;
> > +                goto out;
> > +            } else if ((dpa < ent->start_dpa + ent->len
> > +                        && dpa + len > ent->start_dpa + ent->len)
> > +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> > +                ret = CXL_MBOX_INVALID_PA;
> > +                goto out;
> > +            }
> > +        }
> > +
> > +        /*
> > +         * TODO: add a pending extent list based on event log record and verify
> > +         * the input response
> > +         */
> > +
> > +        insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> > +    }
> > +    ret = CXL_MBOX_SUCCESS;
> > +
> > +out:
> > +    return ret;
> > +}
> > +
> > +/*
> > + * Spec 3.0: 8.2.9.8.9.4
> > + * Release Dynamic Capacity (opcode 4803h)
> > + **/
> > +static CXLRetCode cmd_dcd_release_dyn_cap(struct cxl_cmd *cmd,
> > +        CXLDeviceState *cxl_dstate,
> > +        uint16_t *len_unused)
> > +{
> > +    updated_dc_extent_list_in_pl *in = (void *)cmd->payload;
> > +    struct CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev,
> > +            cxl_dstate);
> > +    CXLDCDExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCD_Extent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        return CXL_MBOX_INVALID_INPUT;
> > +    }
> > +
> > +    ret = detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        return ret;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> > +            if (ent->start_dpa == dpa && ent->len == len) {
> > +                break;
> > +            } else if (ent->start_dpa < dpa
> > +                    && dpa + len <= ent->start_dpa + ent->len) {
> > +                /* remove partial extent */
> > +                uint64_t len1 = dpa - ent->start_dpa;
> > +                uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
> > +
> > +                if (len1) {
> > +                    insert_extent_to_extent_list(extent_list, ent->start_dpa,
> > +                            len1, NULL, 0);
> > +                }
> > +                if (len2) {
> > +                    insert_extent_to_extent_list(extent_list, dpa + len, len2,
> > +                            NULL, 0);
> > +                }
> > +                break;
> > +            } else if ((dpa < ent->start_dpa + ent->len
> > +                        && dpa + len > ent->start_dpa + ent->len)
> > +                    || (dpa < ent->start_dpa && dpa + len > ent->start_dpa))
> > +                return CXL_MBOX_INVALID_EXTENT_LIST;
> > +        }
> > +
> > +        if (ent) {
> > +            QTAILQ_REMOVE(extent_list, ent, node);
> > +            g_free(ent);
> > +        } else {
> > +            /* Try to remove a non-existing extent */
> > +            return CXL_MBOX_INVALID_PA;
> > +        }
> > +    }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> >   #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
> >   #define IMMEDIATE_DATA_CHANGE (1 << 2)
> >   #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> > @@ -1129,6 +1376,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
> >       [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
> >           "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
> >           8, 0 },
> > +    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
> > +        "ADD_DCD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
> > +        ~0, IMMEDIATE_DATA_CHANGE },
> > +    [DCD_CONFIG][RELEASE_DYN_CAP] = {
> > +        "RELEASE_DCD_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
> > +        ~0, IMMEDIATE_DATA_CHANGE },
> >   };
> >
> >   static struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 3a338b3b37..01a5eaca48 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -130,7 +130,8 @@ typedef enum {
> >       CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
> >       CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
> >       CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
> > -    CXL_MBOX_MAX = 0x17
> > +    CXL_MBOX_INVALID_EXTENT_LIST = 0x1E, /* cxl r3.0: Table 8-34*/
> > +    CXL_MBOX_MAX = 0x1F
> >   } CXLRetCode;
> >
> >   struct cxl_cmd;
> > --
> > 2.25.1

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2023-09-08 17:21 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20230725183956uscas1p154e945516c2a4091479f4906d7652648@uscas1p1.samsung.com>
2023-07-25 18:39 ` [Qemu PATCH v2 0/9] Enabling DCD emulation support in Qemu Fan Ni
     [not found]   ` <CGME20230725183956uscas1p17a64ec512cdf5b9348451926d6f0b224@uscas1p1.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command Fan Ni
2023-08-04 14:19       ` Jonathan Cameron
2023-08-04 14:19         ` Jonathan Cameron via
     [not found]   ` <CGME20230725183956uscas1p153242eb4b12cb9cb6529476b4e9058c4@uscas1p1.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices Fan Ni
2023-08-04 15:27       ` Jonathan Cameron
2023-08-04 15:27         ` Jonathan Cameron via
     [not found]   ` <CGME20230725183956uscas1p2008fba59779b70405c74d28a30e4fbaa@uscas1p2.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 4/9] hw/mem/cxl_type3: Add support to create DC regions to " Fan Ni
2023-08-04 15:55       ` Jonathan Cameron
2023-08-04 15:55         ` Jonathan Cameron via
     [not found]   ` <CGME20230725183956uscas1p296403063c710f4b546d4fec7650915c4@uscas1p2.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support Fan Ni
2023-08-04 15:24       ` Jonathan Cameron via
2023-08-04 15:24         ` Jonathan Cameron
     [not found]   ` <CGME20230725183957uscas1p2a076b6f7b694d2e632a0b8025ec331d7@uscas1p2.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response Fan Ni
2023-08-07 11:42       ` Jonathan Cameron via
2023-08-07 11:42         ` Jonathan Cameron
2023-09-08 13:00       ` Jørgen Hansen
2023-09-08 17:19         ` Fan Ni
     [not found]   ` <CGME20230725183957uscas1p1eeb8e8eccc6c00b460d183027642374b@uscas1p1.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions Fan Ni
2023-07-26 12:53       ` Nathan Fontenot
2023-07-26 16:17         ` nifan
2023-08-04 16:36       ` Jonathan Cameron
2023-08-04 16:36         ` Jonathan Cameron via
2023-08-04 18:07         ` Gregory Price
2023-08-07 12:10           ` Jonathan Cameron
2023-08-07 12:10             ` Jonathan Cameron via
     [not found]   ` <CGME20230725183957uscas1p28b38d294f90b97f99769466cc533b4de@uscas1p2.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support Fan Ni
2023-08-07 11:55       ` Jonathan Cameron
2023-08-07 11:55         ` Jonathan Cameron via
2023-09-08 13:12       ` Jørgen Hansen
2023-09-08 17:12         ` Fan Ni
     [not found]   ` <CGME20230725183957uscas1p1ebf676c30d21896d1fd7f9b652250449@uscas1p1.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents Fan Ni
2023-08-07 10:35       ` Jonathan Cameron
2023-08-07 10:35         ` Jonathan Cameron via
     [not found]   ` <CGME20230725183957uscas1p2ca5293c7229ab989ad1a2d95395436a6@uscas1p2.samsung.com>
2023-07-25 18:39     ` [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions Fan Ni
2023-08-07  8:53       ` Jonathan Cameron
2023-08-07  8:53         ` Jonathan Cameron via
2023-08-07  9:37         ` Jonathan Cameron
2023-08-07  9:37           ` Jonathan Cameron via
2023-08-24 20:49         ` Fan Ni
2023-08-25 11:42           ` Jonathan Cameron
2023-08-25 11:42             ` Jonathan Cameron via
2023-08-25 16:34             ` Fan Ni
2023-08-30 15:04               ` Jonathan Cameron via
2023-08-30 15:04                 ` Jonathan Cameron
2023-08-30 12:08       ` Jørgen Hansen
2023-08-30 15:37         ` Jonathan Cameron via
2023-08-30 15:37           ` Jonathan Cameron

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.