All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support
@ 2016-03-15  4:38 Bharata B Rao
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Bharata B Rao @ 2016-03-15  4:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david

This patchset adds memory hot removal support for PowerPC sPAPR.
This new version switches to using the proposed "count-indexed" type of
hotplug identifier which allows to hot remove a number of LMBs starting
with a given DRC index.

This count-indexed hotplug identifier isn't yet part of PAPR.

Changes in v2
-------------
- Use count-indexed hotplug identifier type for LMB removal.

v1: https://lists.gnu.org/archive/html/qemu-ppc/2015-10/msg00163.html

Bharata B Rao (2):
  spapr: Add DRC count indexed hotplug identifier type
  spapr: Memory hot-unplug support

 hw/ppc/spapr.c         | 93 +++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/ppc/spapr_drc.c     | 18 ++++++++++
 hw/ppc/spapr_events.c  | 57 +++++++++++++++++++++++--------
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 155 insertions(+), 15 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type
  2016-03-15  4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao
@ 2016-03-15  4:38 ` Bharata B Rao
  2016-03-16  1:29   ` David Gibson
  2016-03-17 16:03   ` Michael Roth
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao
  2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth
  2 siblings, 2 replies; 33+ messages in thread
From: Bharata B Rao @ 2016-03-15  4:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david

Add support for DRC count indexed hotplug ID type which is primarily
needed for memory hot unplug. This type allows for specifying the
number of DRs that should be plugged/unplugged starting from a given
DRC index.

NOTE: This new hotplug identifier type is not yet part of PAPR.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr_events.c  | 57 +++++++++++++++++++++++++++++++++++++-------------
 include/hw/ppc/spapr.h |  2 ++
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 39f4682..5d1d13d 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -171,6 +171,16 @@ struct epow_log_full {
     struct rtas_event_log_v6_epow epow;
 } QEMU_PACKED;
 
+union drc_id {
+    uint32_t index;
+    uint32_t count;
+    struct count_index {
+        uint32_t index;
+        uint32_t count;
+    } count_index;
+    char name[1];
+} QEMU_PACKED;
+
 struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
     struct rtas_event_log_v6_section_header hdr;
@@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
 #define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
 #define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
     uint8_t reserved;
-    union {
-        uint32_t index;
-        uint32_t count;
-        char name[1];
-    } drc;
+    union drc_id drc_id;
 } QEMU_PACKED;
 
 struct hp_log_full {
@@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
 static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
                                     sPAPRDRConnectorType drc_type,
-                                    uint32_t drc)
+                                    union drc_id *drc_id)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct hp_log_full *new_hp;
@@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     }
 
     if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
-        hp->drc.count = cpu_to_be32(drc);
+        hp->drc_id.count = cpu_to_be32(drc_id->count);
     } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
-        hp->drc.index = cpu_to_be32(drc);
+        hp->drc_id.index = cpu_to_be32(drc_id->index);
+    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
+        hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count);
+        hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index);
     }
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
@@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_id drc_id;
+    drc_id.index = drck->get_index(drc);
 
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_id drc_id;
+    drc_id.index = drck->get_index(drc);
 
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
                                        uint32_t count)
 {
+    union drc_id drc_id;
+    drc_id.count = count;
+
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count)
 {
+    union drc_id drc_id;
+    drc_id.count = count;
+
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                               uint32_t count, uint32_t index)
+{
+    union drc_id drc_id;
+    drc_id.count_index.count = count;
+    drc_id.count_index.index = index;
+
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 098d85d..f0c426b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
                                        uint32_t count);
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count);
+void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                               uint32_t count, uint32_t index);
 
 /* rtas-configure-connector state */
 struct sPAPRConfigureConnectorState {
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-15  4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
@ 2016-03-15  4:38 ` Bharata B Rao
  2016-03-16  1:36   ` David Gibson
  2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth
  2 siblings, 1 reply; 33+ messages in thread
From: Bharata B Rao @ 2016-03-15  4:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david

Add support to hot remove pc-dimm memory devices.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c     | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/ppc/spapr_drc.c | 18 +++++++++++
 2 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 43708a2..cdf268a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2197,6 +2197,88 @@ out:
     error_propagate(errp, local_err);
 }
 
+typedef struct sPAPRDIMMState {
+    uint32_t nr_lmbs;
+} sPAPRDIMMState;
+
+static void spapr_lmb_release(DeviceState *dev, void *opaque)
+{
+    sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
+    HotplugHandler *hotplug_ctrl = NULL;
+
+    if (--ds->nr_lmbs) {
+        return;
+    }
+
+    g_free(ds);
+
+    /*
+     * Now that all the LMBs have been removed by the guest, call the
+     * pc-dimm unplug handler to cleanup up the pc-dimm device.
+     */
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+}
+
+static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
+                           Error **errp)
+{
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    int i;
+    sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
+    uint32_t start_index;
+
+    ds->nr_lmbs = nr_lmbs;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->detach(drc, dev, spapr_lmb_release, ds, errp);
+        if (!i) {
+            start_index = drck->get_index(drc);
+        }
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                              nr_lmbs, start_index);
+}
+
+static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+
+    pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    Error *local_err = NULL;
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+    uint64_t size = memory_region_size(mr);
+    uint64_t addr;
+
+    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    spapr_del_lmbs(dev, addr, size, &error_abort);
+out:
+    error_propagate(errp, local_err);
+}
+
 static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
@@ -2244,7 +2326,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        error_setg(errp, "Memory hot unplug not supported by sPAPR");
+        spapr_memory_unplug(hotplug_dev, dev, errp);
+    }
+}
+
+static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
+                                                DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        spapr_memory_unplug_request(hotplug_dev, dev, errp);
     }
 }
 
@@ -2293,6 +2383,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    hc->unplug_request = spapr_machine_device_unplug_request;
 
     smc->dr_lmb_enabled = true;
     fwc->get_dev_path = spapr_get_fw_dev_path;
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index ef063c0..740b9d4 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/ppc/spapr_drc.h"
+#include "hw/ppc/spapr.h"
 #include "qom/object.h"
 #include "hw/qdev.h"
 #include "qapi/visitor.h"
@@ -78,6 +79,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
         }
     }
 
+    /*
+     * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
+     * belong to a DIMM device that is marked for removal.
+     *
+     * Currently the guest userspace tool drmgr that drives the memory
+     * hotplug/unplug will just try to remove a set of 'removable' LMBs
+     * in response to a hot unplug request that is based on drc-count.
+     * If the LMB being removed doesn't belong to a DIMM device that is
+     * actually being unplugged, fail the isolation request here.
+     */
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+        if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
+             !drc->awaiting_release) {
+            return RTAS_OUT_HW_ERROR;
+        }
+    }
+
     drc->isolation_state = state;
 
     if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
@ 2016-03-16  1:29   ` David Gibson
  2016-03-17 16:03   ` Michael Roth
  1 sibling, 0 replies; 33+ messages in thread
From: David Gibson @ 2016-03-16  1:29 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo

[-- Attachment #1: Type: text/plain, Size: 6568 bytes --]

On Tue, Mar 15, 2016 at 10:08:55AM +0530, Bharata B Rao wrote:
> Add support for DRC count indexed hotplug ID type which is primarily
> needed for memory hot unplug. This type allows for specifying the
> number of DRs that should be plugged/unplugged starting from a given
> DRC index.
> 
> NOTE: This new hotplug identifier type is not yet part of PAPR.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Looks correct, but obviously I won't apply until the change reaches
PAPR.


> ---
>  hw/ppc/spapr_events.c  | 57 +++++++++++++++++++++++++++++++++++++-------------
>  include/hw/ppc/spapr.h |  2 ++
>  2 files changed, 45 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 39f4682..5d1d13d 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -171,6 +171,16 @@ struct epow_log_full {
>      struct rtas_event_log_v6_epow epow;
>  } QEMU_PACKED;
>  
> +union drc_id {
> +    uint32_t index;
> +    uint32_t count;
> +    struct count_index {
> +        uint32_t index;
> +        uint32_t count;
> +    } count_index;
> +    char name[1];
> +} QEMU_PACKED;
> +
>  struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
>      struct rtas_event_log_v6_section_header hdr;
> @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
>  #define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
>  #define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
> +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
>      uint8_t reserved;
> -    union {
> -        uint32_t index;
> -        uint32_t count;
> -        char name[1];
> -    } drc;
> +    union drc_id drc_id;
>  } QEMU_PACKED;
>  
>  struct hp_log_full {
> @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
>  
>  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>                                      sPAPRDRConnectorType drc_type,
> -                                    uint32_t drc)
> +                                    union drc_id *drc_id)
>  {
>      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>      struct hp_log_full *new_hp;
> @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>      }
>  
>      if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
> -        hp->drc.count = cpu_to_be32(drc);
> +        hp->drc_id.count = cpu_to_be32(drc_id->count);
>      } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
> -        hp->drc.index = cpu_to_be32(drc);
> +        hp->drc_id.index = cpu_to_be32(drc_id->index);
> +    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
> +        hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count);
> +        hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index);
>      }
>  
>      rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
> @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
>  {
>      sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>      sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -    uint32_t index = drck->get_index(drc);
> +    union drc_id drc_id;
> +    drc_id.index = drck->get_index(drc);
>  
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
> +                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
>  }
>  
>  void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
>  {
>      sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>      sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -    uint32_t index = drck->get_index(drc);
> +    union drc_id drc_id;
> +    drc_id.index = drck->get_index(drc);
>  
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
>  void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
>                                         uint32_t count)
>  {
> +    union drc_id drc_id;
> +    drc_id.count = count;
> +
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
> +                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
>  }
>  
>  void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
>                                            uint32_t count)
>  {
> +    union drc_id drc_id;
> +    drc_id.count = count;
> +
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> +}
> +
> +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
> +                                               uint32_t count, uint32_t index)
> +{
> +    union drc_id drc_id;
> +    drc_id.count_index.count = count;
> +    drc_id.count_index.index = index;
> +
> +    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
>  static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 098d85d..f0c426b 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
>                                         uint32_t count);
>  void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
>                                            uint32_t count);
> +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
> +                                               uint32_t count, uint32_t index);
>  
>  /* rtas-configure-connector state */
>  struct sPAPRConfigureConnectorState {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao
@ 2016-03-16  1:36   ` David Gibson
  2016-03-16  4:41     ` Bharata B Rao
  0 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2016-03-16  1:36 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo

[-- Attachment #1: Type: text/plain, Size: 6520 bytes --]

On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> Add support to hot remove pc-dimm memory devices.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Looks correct, but again, needs to wait on the PAPR change.

Have you thought any further on the idea of sending an index message,
then a count message as an interim approach to fixing this without
requiring a PAPR change?

> ---
>  hw/ppc/spapr.c     | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/ppc/spapr_drc.c | 18 +++++++++++
>  2 files changed, 110 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 43708a2..cdf268a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2197,6 +2197,88 @@ out:
>      error_propagate(errp, local_err);
>  }
>  
> +typedef struct sPAPRDIMMState {
> +    uint32_t nr_lmbs;
> +} sPAPRDIMMState;
> +
> +static void spapr_lmb_release(DeviceState *dev, void *opaque)
> +{
> +    sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
> +    HotplugHandler *hotplug_ctrl = NULL;
> +
> +    if (--ds->nr_lmbs) {
> +        return;
> +    }
> +
> +    g_free(ds);
> +
> +    /*
> +     * Now that all the LMBs have been removed by the guest, call the
> +     * pc-dimm unplug handler to cleanup up the pc-dimm device.
> +     */
> +    hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
> +}
> +
> +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
> +                           Error **errp)
> +{
> +    sPAPRDRConnector *drc;
> +    sPAPRDRConnectorClass *drck;
> +    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
> +    int i;
> +    sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
> +    uint32_t start_index;
> +
> +    ds->nr_lmbs = nr_lmbs;
> +    for (i = 0; i < nr_lmbs; i++) {
> +        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> +                addr / SPAPR_MEMORY_BLOCK_SIZE);
> +        g_assert(drc);
> +
> +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +        drck->detach(drc, dev, spapr_lmb_release, ds, errp);
> +        if (!i) {
> +            start_index = drck->get_index(drc);
> +        }
> +        addr += SPAPR_MEMORY_BLOCK_SIZE;
> +    }
> +    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
> +                                              nr_lmbs, start_index);
> +}
> +
> +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                                Error **errp)
> +{
> +    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
> +    PCDIMMDevice *dimm = PC_DIMM(dev);
> +    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> +    MemoryRegion *mr = ddc->get_memory_region(dimm);
> +
> +    pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
> +    object_unparent(OBJECT(dev));
> +}
> +
> +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
> +                                        DeviceState *dev, Error **errp)
> +{
> +    Error *local_err = NULL;
> +    PCDIMMDevice *dimm = PC_DIMM(dev);
> +    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> +    MemoryRegion *mr = ddc->get_memory_region(dimm);
> +    uint64_t size = memory_region_size(mr);
> +    uint64_t addr;
> +
> +    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
> +
> +    spapr_del_lmbs(dev, addr, size, &error_abort);
> +out:
> +    error_propagate(errp, local_err);
> +}
> +
>  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>                                        DeviceState *dev, Error **errp)
>  {
> @@ -2244,7 +2326,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>                                        DeviceState *dev, Error **errp)
>  {
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> -        error_setg(errp, "Memory hot unplug not supported by sPAPR");
> +        spapr_memory_unplug(hotplug_dev, dev, errp);
> +    }
> +}
> +
> +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
> +                                                DeviceState *dev, Error **errp)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> +        spapr_memory_unplug_request(hotplug_dev, dev, errp);
>      }
>  }
>  
> @@ -2293,6 +2383,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      hc->plug = spapr_machine_device_plug;
>      hc->unplug = spapr_machine_device_unplug;
>      mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
> +    hc->unplug_request = spapr_machine_device_unplug_request;
>  
>      smc->dr_lmb_enabled = true;
>      fwc->get_dev_path = spapr_get_fw_dev_path;
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index ef063c0..740b9d4 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -12,6 +12,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "hw/ppc/spapr_drc.h"
> +#include "hw/ppc/spapr.h"
>  #include "qom/object.h"
>  #include "hw/qdev.h"
>  #include "qapi/visitor.h"
> @@ -78,6 +79,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
>          }
>      }
>  
> +    /*
> +     * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
> +     * belong to a DIMM device that is marked for removal.
> +     *
> +     * Currently the guest userspace tool drmgr that drives the memory
> +     * hotplug/unplug will just try to remove a set of 'removable' LMBs
> +     * in response to a hot unplug request that is based on drc-count.
> +     * If the LMB being removed doesn't belong to a DIMM device that is
> +     * actually being unplugged, fail the isolation request here.
> +     */
> +    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) {
> +        if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
> +             !drc->awaiting_release) {
> +            return RTAS_OUT_HW_ERROR;
> +        }
> +    }
> +
>      drc->isolation_state = state;
>  
>      if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-16  1:36   ` David Gibson
@ 2016-03-16  4:41     ` Bharata B Rao
  2016-03-16  5:11       ` David Gibson
                         ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Bharata B Rao @ 2016-03-16  4:41 UTC (permalink / raw)
  To: David Gibson; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo

On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> > Add support to hot remove pc-dimm memory devices.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Looks correct, but again, needs to wait on the PAPR change.
> 
> Have you thought any further on the idea of sending an index message,
> then a count message as an interim approach to fixing this without
> requiring a PAPR change?

Removal by index and removal by count are valid messages by themselves
and drmgr would go ahead and start the removal in reponse to those
calls. IIUC, you are suggesting that lets remove one LMB by index in
response to 1st message and remove (count -1) LMBs from where the last
removal was done in the previous message.

Since the same code base of powerpc-utils works on PowerVM too, I am not
sure if such an approach would impact PowerVM in any undesirable manner.
May be Nathan can clarify ?

I see that this can be done, but the changes in drmgr code specially the
code related to LMB list handling/removal can be non-trivial. So not sure
if the temporary approach is all that worth here and hence I feel it is better
to wait and do it the count-indexed way.

While we are here, I would also like to get some opinion on the real
need for memory unplug. Is there anything that memory unplug gives us
which memory ballooning (shrinking mem via ballooning) can't give ?

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-16  4:41     ` Bharata B Rao
@ 2016-03-16  5:11       ` David Gibson
  2016-03-23  3:22       ` David Gibson
  2016-04-25  9:20       ` Igor Mammedov
  2 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2016-03-16  5:11 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo

[-- Attachment #1: Type: text/plain, Size: 2426 bytes --]

On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote:
> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> > > Add support to hot remove pc-dimm memory devices.
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Looks correct, but again, needs to wait on the PAPR change.
> > 
> > Have you thought any further on the idea of sending an index message,
> > then a count message as an interim approach to fixing this without
> > requiring a PAPR change?
> 
> Removal by index and removal by count are valid messages by themselves
> and drmgr would go ahead and start the removal in reponse to those
> calls. IIUC, you are suggesting that lets remove one LMB by index in
> response to 1st message and remove (count -1) LMBs from where the last
> removal was done in the previous message.

That's right.

> Since the same code base of powerpc-utils works on PowerVM too, I am not
> sure if such an approach would impact PowerVM in any undesirable manner.
> May be Nathan can clarify ?

Ah..  My first guess would be that it's ok; since IIUC PowerVM doesn't
care where the LMBs are removed from, removing them starting from the
last place we removed something should be as good as anywhere.

But it's possible there's some issue I haven't considered.

> I see that this can be done, but the changes in drmgr code specially the
> code related to LMB list handling/removal can be non-trivial. So not sure
> if the temporary approach is all that worth here and hence I feel it is better
> to wait and do it the count-indexed way.

Ok.  It seems like it ought to be fairly straightforward, but I don't
know the drmgr code, so..

It would certainly be useful if Nathan could chime in on this.

> While we are here, I would also like to get some opinion on the real
> need for memory unplug. Is there anything that memory unplug gives us
> which memory ballooning (shrinking mem via ballooning) can't give ?

That's.. a good question.  I guess it means avoiding another interface
and a pseudo-device at least.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
  2016-03-16  1:29   ` David Gibson
@ 2016-03-17 16:03   ` Michael Roth
  1 sibling, 0 replies; 33+ messages in thread
From: Michael Roth @ 2016-03-17 16:03 UTC (permalink / raw)
  To: Bharata B Rao, qemu-devel; +Cc: thuth, qemu-ppc, imammedo, nfont, david

Quoting Bharata B Rao (2016-03-14 23:38:55)
> Add support for DRC count indexed hotplug ID type which is primarily
> needed for memory hot unplug. This type allows for specifying the
> number of DRs that should be plugged/unplugged starting from a given
> DRC index.
> 
> NOTE: This new hotplug identifier type is not yet part of PAPR.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_events.c  | 57 +++++++++++++++++++++++++++++++++++++-------------
>  include/hw/ppc/spapr.h |  2 ++
>  2 files changed, 45 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 39f4682..5d1d13d 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -171,6 +171,16 @@ struct epow_log_full {
>      struct rtas_event_log_v6_epow epow;
>  } QEMU_PACKED;
> 
> +union drc_id {
> +    uint32_t index;
> +    uint32_t count;
> +    struct count_index {
> +        uint32_t index;
> +        uint32_t count;

The current version of the spec proposal is actually count followed by
index. I kind of wish it was in the opposite order, and it's probably
not too late to change this if there's pressing reason, but that's how
things stand atm.

> +    } count_index;

> +    char name[1];
> +} QEMU_PACKED;
> +
>  struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
>      struct rtas_event_log_v6_section_header hdr;
> @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
>  #define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
>  #define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
> +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
>      uint8_t reserved;
> -    union {
> -        uint32_t index;
> -        uint32_t count;
> -        char name[1];
> -    } drc;
> +    union drc_id drc_id;
>  } QEMU_PACKED;
> 
>  struct hp_log_full {
> @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
> 
>  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>                                      sPAPRDRConnectorType drc_type,
> -                                    uint32_t drc)
> +                                    union drc_id *drc_id)
>  {
>      sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>      struct hp_log_full *new_hp;
> @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>      }
> 
>      if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
> -        hp->drc.count = cpu_to_be32(drc);
> +        hp->drc_id.count = cpu_to_be32(drc_id->count);
>      } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
> -        hp->drc.index = cpu_to_be32(drc);
> +        hp->drc_id.index = cpu_to_be32(drc_id->index);
> +    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
> +        hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count);
> +        hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index);
>      }
> 
>      rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
> @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
>  {
>      sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>      sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -    uint32_t index = drck->get_index(drc);
> +    union drc_id drc_id;

I'd rather we used 'union drc_id id' or something. Having the typename
and variable names be identical is a little confusing.

> +    drc_id.index = drck->get_index(drc);
> 
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
> +                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
>  }
> 
>  void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
>  {
>      sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>      sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -    uint32_t index = drck->get_index(drc);
> +    union drc_id drc_id;
> +    drc_id.index = drck->get_index(drc);
> 
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
> 
>  void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
>                                         uint32_t count)
>  {
> +    union drc_id drc_id;
> +    drc_id.count = count;
> +
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
> +                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
>  }
> 
>  void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
>                                            uint32_t count)
>  {
> +    union drc_id drc_id;
> +    drc_id.count = count;
> +
>      spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> +}
> +
> +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
> +                                               uint32_t count, uint32_t index)
> +{
> +    union drc_id drc_id;
> +    drc_id.count_index.count = count;
> +    drc_id.count_index.index = index;
> +
> +    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
> +                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
> 
>  static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 098d85d..f0c426b 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
>                                         uint32_t count);
>  void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
>                                            uint32_t count);
> +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
> +                                               uint32_t count, uint32_t index);
> 
>  /* rtas-configure-connector state */
>  struct sPAPRConfigureConnectorState {
> -- 
> 2.1.0
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-16  4:41     ` Bharata B Rao
  2016-03-16  5:11       ` David Gibson
@ 2016-03-23  3:22       ` David Gibson
  2016-03-24 14:15         ` Nathan Fontenot
  2016-04-25  9:20       ` Igor Mammedov
  2 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2016-03-23  3:22 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo

[-- Attachment #1: Type: text/plain, Size: 2336 bytes --]

On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote:
> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> > > Add support to hot remove pc-dimm memory devices.
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Looks correct, but again, needs to wait on the PAPR change.
> > 
> > Have you thought any further on the idea of sending an index message,
> > then a count message as an interim approach to fixing this without
> > requiring a PAPR change?
> 
> Removal by index and removal by count are valid messages by themselves
> and drmgr would go ahead and start the removal in reponse to those
> calls. IIUC, you are suggesting that lets remove one LMB by index in
> response to 1st message and remove (count -1) LMBs from where the last
> removal was done in the previous message.

Yes, that's the idea.

> Since the same code base of powerpc-utils works on PowerVM too, I am not
> sure if such an approach would impact PowerVM in any undesirable manner.
> May be Nathan can clarify ?

Heard anything from Nathan?  I don't really see how it would be bad
under PowerVM.  Under PowerVM it generally doesn't matter which LMBs
you remove, right?  So removing the ones immediately after the last
one you removed should be as good a choice as any.

> I see that this can be done, but the changes in drmgr code specially the
> code related to LMB list handling/removal can be non-trivial. So not sure
> if the temporary approach is all that worth here and hence I feel it is better
> to wait and do it the count-indexed way.

Really?  drmgr is already scanning LMBs to find ones it can remove.
Seeding that scan with the last removed LMB shouldn't be too hard.

> While we are here, I would also like to get some opinion on the real
> need for memory unplug. Is there anything that memory unplug gives us
> which memory ballooning (shrinking mem via ballooning) can't give ?

Hmm.. that's an interesting question.  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-23  3:22       ` David Gibson
@ 2016-03-24 14:15         ` Nathan Fontenot
  2016-03-29  4:41           ` David Gibson
  0 siblings, 1 reply; 33+ messages in thread
From: Nathan Fontenot @ 2016-03-24 14:15 UTC (permalink / raw)
  To: David Gibson, Bharata B Rao; +Cc: imammedo, thuth, qemu-ppc, qemu-devel, mdroth

On 03/22/2016 10:22 PM, David Gibson wrote:
> On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote:
>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
>>>> Add support to hot remove pc-dimm memory devices.
>>>>
>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>>>
>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>
>>> Looks correct, but again, needs to wait on the PAPR change.
>>>
>>> Have you thought any further on the idea of sending an index message,
>>> then a count message as an interim approach to fixing this without
>>> requiring a PAPR change?
>>
>> Removal by index and removal by count are valid messages by themselves
>> and drmgr would go ahead and start the removal in reponse to those
>> calls. IIUC, you are suggesting that lets remove one LMB by index in
>> response to 1st message and remove (count -1) LMBs from where the last
>> removal was done in the previous message.
> 
> Yes, that's the idea.
> 
>> Since the same code base of powerpc-utils works on PowerVM too, I am not
>> sure if such an approach would impact PowerVM in any undesirable manner.
>> May be Nathan can clarify ?

The issue I see with this approach is that there is no way in the current
drmgr code to correlate the two memory remove requests. If I understand
what you are asking to do correctly, this would result in two separate
invocations of drmgr. The first to remove a specific LMB by index, this
index then needing to be saved somewhere, then a second invocation that
would retrieve the index and remove count-1 LMBs.

Would there be anything tying these two requests together? or would we
assume that two requests received in this order are correlated?

What happens if another request comes in in between these two requests?
I see this as being a pretty rare possibility, but it is a possibility.

> 
> Heard anything from Nathan?  I don't really see how it would be bad
> under PowerVM.  Under PowerVM it generally doesn't matter which LMBs
> you remove, right?  So removing the ones immediately after the last
> one you removed should be as good a choice as any.

This shouldn't hurt anything for PowerVM systems. In general the only
time a specific LMB is specified for PowerVM systems is on memory guard
operations.

> 
>> I see that this can be done, but the changes in drmgr code specially the
>> code related to LMB list handling/removal can be non-trivial. So not sure
>> if the temporary approach is all that worth here and hence I feel it is better
>> to wait and do it the count-indexed way.
> 
> Really?  drmgr is already scanning LMBs to find ones it can remove.
> Seeding that scan with the last removed LMB shouldn't be too hard.

This shouldn't be difficult to implement in the drmgr code. We already
search a list of LMBs to find ones to remove, updating to just return
the LMB with the next sequential index shouldn't be difficult.

-Nathan

> 
>> While we are here, I would also like to get some opinion on the real
>> need for memory unplug. Is there anything that memory unplug gives us
>> which memory ballooning (shrinking mem via ballooning) can't give ?
> 
> Hmm.. that's an interesting question.  
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-24 14:15         ` Nathan Fontenot
@ 2016-03-29  4:41           ` David Gibson
  0 siblings, 0 replies; 33+ messages in thread
From: David Gibson @ 2016-03-29  4:41 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: thuth, mdroth, qemu-devel, qemu-ppc, Bharata B Rao, imammedo

[-- Attachment #1: Type: text/plain, Size: 4560 bytes --]

On Thu, Mar 24, 2016 at 09:15:58AM -0500, Nathan Fontenot wrote:
> On 03/22/2016 10:22 PM, David Gibson wrote:
> > On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote:
> >> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> >>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> >>>> Add support to hot remove pc-dimm memory devices.
> >>>>
> >>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>>
> >>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>
> >>> Looks correct, but again, needs to wait on the PAPR change.
> >>>
> >>> Have you thought any further on the idea of sending an index message,
> >>> then a count message as an interim approach to fixing this without
> >>> requiring a PAPR change?
> >>
> >> Removal by index and removal by count are valid messages by themselves
> >> and drmgr would go ahead and start the removal in reponse to those
> >> calls. IIUC, you are suggesting that lets remove one LMB by index in
> >> response to 1st message and remove (count -1) LMBs from where the last
> >> removal was done in the previous message.
> > 
> > Yes, that's the idea.
> > 
> >> Since the same code base of powerpc-utils works on PowerVM too, I am not
> >> sure if such an approach would impact PowerVM in any undesirable manner.
> >> May be Nathan can clarify ?
> 
> The issue I see with this approach is that there is no way in the current
> drmgr code to correlate the two memory remove requests. If I understand
> what you are asking to do correctly, this would result in two separate
> invocations of drmgr. The first to remove a specific LMB by index, this
> index then needing to be saved somewhere, then a second invocation that
> would retrieve the index and remove count-1 LMBs.

Ah.. yes.. I had forgotten that this would be two separate drmgr
invocations, and therefore we'd need a way to carry data between
them.  That does complicate this rather.

> Would there be anything tying these two requests together? or would we
> assume that two requests received in this order are correlated?

My assumption was that it would be based simply on order.

> What happens if another request comes in in between these two requests?
> I see this as being a pretty rare possibility, but it is a possibility.

I'm not sure it actually is possible under KVM - I think the qemu side
processes the requests synchronously.  I'm not 100% certain about that
though.

 The plan was that the qemu HV would not permit LMBs to be removed if
they're not the ones that are supposed to be removed, and so drmgr
would keep scanning until it finds the right ones.

So, even if the request order is jumbled, the behaviour should be
still technically correct - it could be *very* slow though as drmgr
might end up vacating (piece by piece) large areas of the guest's RAM
while it scans for the right LMBs to remove.

> > Heard anything from Nathan?  I don't really see how it would be bad
> > under PowerVM.  Under PowerVM it generally doesn't matter which LMBs
> > you remove, right?  So removing the ones immediately after the last
> > one you removed should be as good a choice as any.
> 
> This shouldn't hurt anything for PowerVM systems. In general the only
> time a specific LMB is specified for PowerVM systems is on memory guard
> operations.

Ok.

> >> I see that this can be done, but the changes in drmgr code specially the
> >> code related to LMB list handling/removal can be non-trivial. So not sure
> >> if the temporary approach is all that worth here and hence I feel it is better
> >> to wait and do it the count-indexed way.
> > 
> > Really?  drmgr is already scanning LMBs to find ones it can remove.
> > Seeding that scan with the last removed LMB shouldn't be too hard.
> 
> This shouldn't be difficult to implement in the drmgr code. We already
> search a list of LMBs to find ones to remove, updating to just return
> the LMB with the next sequential index shouldn't be difficult.
> 
> -Nathan
> 
> > 
> >> While we are here, I would also like to get some opinion on the real
> >> need for memory unplug. Is there anything that memory unplug gives us
> >> which memory ballooning (shrinking mem via ballooning) can't give ?
> > 
> > Hmm.. that's an interesting question.  
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-03-16  4:41     ` Bharata B Rao
  2016-03-16  5:11       ` David Gibson
  2016-03-23  3:22       ` David Gibson
@ 2016-04-25  9:20       ` Igor Mammedov
  2016-04-26  5:09         ` Bharata B Rao
  2 siblings, 1 reply; 33+ messages in thread
From: Igor Mammedov @ 2016-04-25  9:20 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont

On Wed, 16 Mar 2016 10:11:54 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:  
> > > Add support to hot remove pc-dimm memory devices.
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>  
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Looks correct, but again, needs to wait on the PAPR change.
[...]
> 
> While we are here, I would also like to get some opinion on the real
> need for memory unplug. Is there anything that memory unplug gives us
> which memory ballooning (shrinking mem via ballooning) can't give ?
Sure ballooning can complement memory hotplug but turning it on would
effectively reduce hotplug to balloning as it would enable overcommit
capability instead of hard partitioning pc-dimms provides. So one
could just use ballooning only and not bother with hotplug at all.

On the other hand memory hotplug/unplug (at least on x86) tries
to model real hardware, thus removing need in paravirt ballooning
solution in favor of native guest support.

PS:
Guest wise, currently hot-unplug is not well supported in linux,
i.e. it's not guarantied that guest will honor unplug request
as it may pin dimm by using it as a non migratable memory. So
there is something to work on guest side to make unplug more
reliable/guarantied.

> 
> Regards,
> Bharata.
> 
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-25  9:20       ` Igor Mammedov
@ 2016-04-26  5:09         ` Bharata B Rao
  2016-04-26  7:52           ` Igor Mammedov
  0 siblings, 1 reply; 33+ messages in thread
From: Bharata B Rao @ 2016-04-26  5:09 UTC (permalink / raw)
  To: Igor Mammedov; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont

On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:
> On Wed, 16 Mar 2016 10:11:54 +0530
> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:  
> > > > Add support to hot remove pc-dimm memory devices.
> > > > 
> > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>  
> > > 
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > 
> > > Looks correct, but again, needs to wait on the PAPR change.
> [...]
> > 
> > While we are here, I would also like to get some opinion on the real
> > need for memory unplug. Is there anything that memory unplug gives us
> > which memory ballooning (shrinking mem via ballooning) can't give ?
> Sure ballooning can complement memory hotplug but turning it on would
> effectively reduce hotplug to balloning as it would enable overcommit
> capability instead of hard partitioning pc-dimms provides. So one
> could just use ballooning only and not bother with hotplug at all.
> 
> On the other hand memory hotplug/unplug (at least on x86) tries
> to model real hardware, thus removing need in paravirt ballooning
> solution in favor of native guest support.

Thanks for your views.

> 
> PS:
> Guest wise, currently hot-unplug is not well supported in linux,
> i.e. it's not guarantied that guest will honor unplug request
> as it may pin dimm by using it as a non migratable memory. So
> there is something to work on guest side to make unplug more
> reliable/guarantied.

In the above scenario where the guest doesn't allow removal of certain
parts of DIMM memory, what is the expected behaviour as far as QEMU
DIMM device is concerned ? I seem to be running into this situation
very often with PowerPC mem unplug where I am left with a DIMM device
that has only some memory blocks released. In this situation, I would like
to block further unplug requests on the same device, but QEMU seems
to allow more such unplug requests to come in via the monitor. So
qdev won't help me here ? Should I detect such condition from the
machine unplug() handler and take required action ?

On x86, if some pages are offlined and subsequently other pages couldn't
be offlined, then I see the full DIMM memory size remaining
with the guest. So I infer that on x86, QEMU memory unplug either
removes full DIMM or nothing. Is that understanding correct ?

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-26  5:09         ` Bharata B Rao
@ 2016-04-26  7:52           ` Igor Mammedov
  2016-04-26 21:03             ` Michael Roth
  0 siblings, 1 reply; 33+ messages in thread
From: Igor Mammedov @ 2016-04-26  7:52 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont

On Tue, 26 Apr 2016 10:39:23 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:
> > On Wed, 16 Mar 2016 10:11:54 +0530
> > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> >   
> > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:  
> > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:    
> > > > > Add support to hot remove pc-dimm memory devices.
> > > > > 
> > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>    
> > > > 
> > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > 
> > > > Looks correct, but again, needs to wait on the PAPR change.  
> > [...]  
> > > 
> > > While we are here, I would also like to get some opinion on the real
> > > need for memory unplug. Is there anything that memory unplug gives us
> > > which memory ballooning (shrinking mem via ballooning) can't give ?  
> > Sure ballooning can complement memory hotplug but turning it on would
> > effectively reduce hotplug to balloning as it would enable overcommit
> > capability instead of hard partitioning pc-dimms provides. So one
> > could just use ballooning only and not bother with hotplug at all.
> > 
> > On the other hand memory hotplug/unplug (at least on x86) tries
> > to model real hardware, thus removing need in paravirt ballooning
> > solution in favor of native guest support.  
> 
> Thanks for your views.
> 
> > 
> > PS:
> > Guest wise, currently hot-unplug is not well supported in linux,
> > i.e. it's not guarantied that guest will honor unplug request
> > as it may pin dimm by using it as a non migratable memory. So
> > there is something to work on guest side to make unplug more
> > reliable/guarantied.  
> 
> In the above scenario where the guest doesn't allow removal of certain
> parts of DIMM memory, what is the expected behaviour as far as QEMU
> DIMM device is concerned ? I seem to be running into this situation
> very often with PowerPC mem unplug where I am left with a DIMM device
> that has only some memory blocks released. In this situation, I would like
> to block further unplug requests on the same device, but QEMU seems
> to allow more such unplug requests to come in via the monitor. So
> qdev won't help me here ? Should I detect such condition from the
> machine unplug() handler and take required action ?
I think offlining is a guests task along with recovering from
inability to offline (i.e. offline all + eject or restore original state).
QUEM does it's job by notifying guest what dimm it wants to remove
and removes it when guest asks it (at least in x86 world).

> 
> On x86, if some pages are offlined and subsequently other pages couldn't
> be offlined, then I see the full DIMM memory size remaining
> with the guest. So I infer that on x86, QEMU memory unplug either
> removes full DIMM or nothing. Is that understanding correct ?
I wouldn't bet that it's guarantied behavior but it should be this way.

> 
> Regards,
> Bharata.
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-26  7:52           ` Igor Mammedov
@ 2016-04-26 21:03             ` Michael Roth
  2016-04-27  6:54               ` Thomas Huth
                                 ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Michael Roth @ 2016-04-26 21:03 UTC (permalink / raw)
  To: Igor Mammedov, Bharata B Rao
  Cc: David Gibson, thuth, qemu-devel, qemu-ppc, nfont

Quoting Igor Mammedov (2016-04-26 02:52:36)
> On Tue, 26 Apr 2016 10:39:23 +0530
> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:
> > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > >   
> > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:  
> > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:    
> > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > 
> > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>    
> > > > > 
> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > 
> > > > > Looks correct, but again, needs to wait on the PAPR change.  
> > > [...]  
> > > > 
> > > > While we are here, I would also like to get some opinion on the real
> > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > which memory ballooning (shrinking mem via ballooning) can't give ?  
> > > Sure ballooning can complement memory hotplug but turning it on would
> > > effectively reduce hotplug to balloning as it would enable overcommit
> > > capability instead of hard partitioning pc-dimms provides. So one
> > > could just use ballooning only and not bother with hotplug at all.
> > > 
> > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > to model real hardware, thus removing need in paravirt ballooning
> > > solution in favor of native guest support.  
> > 
> > Thanks for your views.
> > 
> > > 
> > > PS:
> > > Guest wise, currently hot-unplug is not well supported in linux,
> > > i.e. it's not guarantied that guest will honor unplug request
> > > as it may pin dimm by using it as a non migratable memory. So
> > > there is something to work on guest side to make unplug more
> > > reliable/guarantied.  
> > 
> > In the above scenario where the guest doesn't allow removal of certain
> > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > DIMM device is concerned ? I seem to be running into this situation
> > very often with PowerPC mem unplug where I am left with a DIMM device
> > that has only some memory blocks released. In this situation, I would like
> > to block further unplug requests on the same device, but QEMU seems
> > to allow more such unplug requests to come in via the monitor. So
> > qdev won't help me here ? Should I detect such condition from the
> > machine unplug() handler and take required action ?
> I think offlining is a guests task along with recovering from
> inability to offline (i.e. offline all + eject or restore original state).
> QUEM does it's job by notifying guest what dimm it wants to remove
> and removes it when guest asks it (at least in x86 world).

In the case of pseries, the DIMM abstraction isn't really exposed to
the guest, but rather the memory blocks we use to make the backing
memdev memory available to the guest. During unplug, the guest
completely releases these blocks back to QEMU, and if it can only
release a subset of what's requested it does not attempt to recover.
We can potentially change that behavior on the guest side, since
partially-freed DIMMs aren't currently useful on the host-side...

But, in the case of pseries, I wonder if it makes sense to maybe go
ahead and MADV_DONTNEED the ranges backing these released blocks so the
host can at least partially reclaim the memory from a partially
unplugged DIMM?

> 
> > 
> > On x86, if some pages are offlined and subsequently other pages couldn't
> > be offlined, then I see the full DIMM memory size remaining
> > with the guest. So I infer that on x86, QEMU memory unplug either
> > removes full DIMM or nothing. Is that understanding correct ?
> I wouldn't bet that it's guarantied behavior but it should be this way.
> 
> > 
> > Regards,
> > Bharata.
> > 
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-26 21:03             ` Michael Roth
@ 2016-04-27  6:54               ` Thomas Huth
  2016-04-27 13:37               ` Igor Mammedov
  2016-04-29  3:24               ` David Gibson
  2 siblings, 0 replies; 33+ messages in thread
From: Thomas Huth @ 2016-04-27  6:54 UTC (permalink / raw)
  To: Michael Roth, Igor Mammedov, Bharata B Rao
  Cc: nfont, qemu-ppc, qemu-devel, David Gibson

On 26.04.2016 23:03, Michael Roth wrote:
> Quoting Igor Mammedov (2016-04-26 02:52:36)
>> On Tue, 26 Apr 2016 10:39:23 +0530
>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
>>
>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:
>>>> On Wed, 16 Mar 2016 10:11:54 +0530
>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
>>>>   
>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:  
>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:    
>>>>>>> Add support to hot remove pc-dimm memory devices.
>>>>>>>
>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>    
>>>>>>
>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>>>>
>>>>>> Looks correct, but again, needs to wait on the PAPR change.  
>>>> [...]  
>>>>>
>>>>> While we are here, I would also like to get some opinion on the real
>>>>> need for memory unplug. Is there anything that memory unplug gives us
>>>>> which memory ballooning (shrinking mem via ballooning) can't give ?  
>>>> Sure ballooning can complement memory hotplug but turning it on would
>>>> effectively reduce hotplug to balloning as it would enable overcommit
>>>> capability instead of hard partitioning pc-dimms provides. So one
>>>> could just use ballooning only and not bother with hotplug at all.
>>>>
>>>> On the other hand memory hotplug/unplug (at least on x86) tries
>>>> to model real hardware, thus removing need in paravirt ballooning
>>>> solution in favor of native guest support.  
>>>
>>> Thanks for your views.
>>>
>>>>
>>>> PS:
>>>> Guest wise, currently hot-unplug is not well supported in linux,
>>>> i.e. it's not guarantied that guest will honor unplug request
>>>> as it may pin dimm by using it as a non migratable memory. So
>>>> there is something to work on guest side to make unplug more
>>>> reliable/guarantied.  
>>>
>>> In the above scenario where the guest doesn't allow removal of certain
>>> parts of DIMM memory, what is the expected behaviour as far as QEMU
>>> DIMM device is concerned ? I seem to be running into this situation
>>> very often with PowerPC mem unplug where I am left with a DIMM device
>>> that has only some memory blocks released. In this situation, I would like
>>> to block further unplug requests on the same device, but QEMU seems
>>> to allow more such unplug requests to come in via the monitor. So
>>> qdev won't help me here ? Should I detect such condition from the
>>> machine unplug() handler and take required action ?
>> I think offlining is a guests task along with recovering from
>> inability to offline (i.e. offline all + eject or restore original state).
>> QUEM does it's job by notifying guest what dimm it wants to remove
>> and removes it when guest asks it (at least in x86 world).
> 
> In the case of pseries, the DIMM abstraction isn't really exposed to
> the guest, but rather the memory blocks we use to make the backing
> memdev memory available to the guest. During unplug, the guest
> completely releases these blocks back to QEMU, and if it can only
> release a subset of what's requested it does not attempt to recover.
> We can potentially change that behavior on the guest side, since
> partially-freed DIMMs aren't currently useful on the host-side...
> 
> But, in the case of pseries, I wonder if it makes sense to maybe go
> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> host can at least partially reclaim the memory from a partially
> unplugged DIMM?

Sounds like this could be a good compromise.

 Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-26 21:03             ` Michael Roth
  2016-04-27  6:54               ` Thomas Huth
@ 2016-04-27 13:37               ` Igor Mammedov
  2016-04-27 13:59                 ` Thomas Huth
                                   ` (2 more replies)
  2016-04-29  3:24               ` David Gibson
  2 siblings, 3 replies; 33+ messages in thread
From: Igor Mammedov @ 2016-04-27 13:37 UTC (permalink / raw)
  To: Michael Roth
  Cc: Bharata B Rao, David Gibson, thuth, qemu-devel, qemu-ppc, nfont

On Tue, 26 Apr 2016 16:03:37 -0500
Michael Roth <mdroth@linux.vnet.ibm.com> wrote:

> Quoting Igor Mammedov (2016-04-26 02:52:36)
> > On Tue, 26 Apr 2016 10:39:23 +0530
> > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> >   
> > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:  
> > > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > >     
> > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:    
> > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:      
> > > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > > 
> > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>      
> > > > > > 
> > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > > 
> > > > > > Looks correct, but again, needs to wait on the PAPR change.    
> > > > [...]    
> > > > > 
> > > > > While we are here, I would also like to get some opinion on the real
> > > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > > which memory ballooning (shrinking mem via ballooning) can't give ?    
> > > > Sure ballooning can complement memory hotplug but turning it on would
> > > > effectively reduce hotplug to balloning as it would enable overcommit
> > > > capability instead of hard partitioning pc-dimms provides. So one
> > > > could just use ballooning only and not bother with hotplug at all.
> > > > 
> > > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > > to model real hardware, thus removing need in paravirt ballooning
> > > > solution in favor of native guest support.    
> > > 
> > > Thanks for your views.
> > >   
> > > > 
> > > > PS:
> > > > Guest wise, currently hot-unplug is not well supported in linux,
> > > > i.e. it's not guarantied that guest will honor unplug request
> > > > as it may pin dimm by using it as a non migratable memory. So
> > > > there is something to work on guest side to make unplug more
> > > > reliable/guarantied.    
> > > 
> > > In the above scenario where the guest doesn't allow removal of certain
> > > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > DIMM device is concerned ? I seem to be running into this situation
> > > very often with PowerPC mem unplug where I am left with a DIMM device
> > > that has only some memory blocks released. In this situation, I would like
> > > to block further unplug requests on the same device, but QEMU seems
> > > to allow more such unplug requests to come in via the monitor. So
> > > qdev won't help me here ? Should I detect such condition from the
> > > machine unplug() handler and take required action ?  
> > I think offlining is a guests task along with recovering from
> > inability to offline (i.e. offline all + eject or restore original state).
> > QUEM does it's job by notifying guest what dimm it wants to remove
> > and removes it when guest asks it (at least in x86 world).  
> 
> In the case of pseries, the DIMM abstraction isn't really exposed to
> the guest, but rather the memory blocks we use to make the backing
> memdev memory available to the guest. During unplug, the guest
> completely releases these blocks back to QEMU, and if it can only
> release a subset of what's requested it does not attempt to recover.
> We can potentially change that behavior on the guest side, since
> partially-freed DIMMs aren't currently useful on the host-side...
> 
> But, in the case of pseries, I wonder if it makes sense to maybe go
> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> host can at least partially reclaim the memory from a partially
> unplugged DIMM?
It's a little bit confusing, one asked to remove device but it's still
there but not completely usable/available.
What will happen when user wants that memory plugged back?

It looks like reinventing ballooning,
maybe it's would be better to disable unplug and use ballooning
to release some memory, until guest is ready to unplug all or none of
requested blocks?

> 
> >   
> > > 
> > > On x86, if some pages are offlined and subsequently other pages couldn't
> > > be offlined, then I see the full DIMM memory size remaining
> > > with the guest. So I infer that on x86, QEMU memory unplug either
> > > removes full DIMM or nothing. Is that understanding correct ?  
> > I wouldn't bet that it's guarantied behavior but it should be this way.
> >   
> > > 
> > > Regards,
> > > Bharata.
> > >   
> >   
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 13:37               ` Igor Mammedov
@ 2016-04-27 13:59                 ` Thomas Huth
  2016-04-27 14:34                   ` Igor Mammedov
  2016-04-27 14:24                 ` Bharata B Rao
  2016-04-29  3:28                 ` David Gibson
  2 siblings, 1 reply; 33+ messages in thread
From: Thomas Huth @ 2016-04-27 13:59 UTC (permalink / raw)
  To: Igor Mammedov, Michael Roth
  Cc: Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont

On 27.04.2016 15:37, Igor Mammedov wrote:
> On Tue, 26 Apr 2016 16:03:37 -0500
> Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> 
>> Quoting Igor Mammedov (2016-04-26 02:52:36)
>>> On Tue, 26 Apr 2016 10:39:23 +0530
>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
>>>   
>>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:  
>>>>> On Wed, 16 Mar 2016 10:11:54 +0530
>>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
>>>>>     
>>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:    
>>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:      
>>>>>>>> Add support to hot remove pc-dimm memory devices.
>>>>>>>>
>>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>      
>>>>>>>
>>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>>>>>
>>>>>>> Looks correct, but again, needs to wait on the PAPR change.    
>>>>> [...]    
>>>>>>
>>>>>> While we are here, I would also like to get some opinion on the real
>>>>>> need for memory unplug. Is there anything that memory unplug gives us
>>>>>> which memory ballooning (shrinking mem via ballooning) can't give ?    
>>>>> Sure ballooning can complement memory hotplug but turning it on would
>>>>> effectively reduce hotplug to balloning as it would enable overcommit
>>>>> capability instead of hard partitioning pc-dimms provides. So one
>>>>> could just use ballooning only and not bother with hotplug at all.
>>>>>
>>>>> On the other hand memory hotplug/unplug (at least on x86) tries
>>>>> to model real hardware, thus removing need in paravirt ballooning
>>>>> solution in favor of native guest support.    
>>>>
>>>> Thanks for your views.
>>>>   
>>>>>
>>>>> PS:
>>>>> Guest wise, currently hot-unplug is not well supported in linux,
>>>>> i.e. it's not guarantied that guest will honor unplug request
>>>>> as it may pin dimm by using it as a non migratable memory. So
>>>>> there is something to work on guest side to make unplug more
>>>>> reliable/guarantied.    
>>>>
>>>> In the above scenario where the guest doesn't allow removal of certain
>>>> parts of DIMM memory, what is the expected behaviour as far as QEMU
>>>> DIMM device is concerned ? I seem to be running into this situation
>>>> very often with PowerPC mem unplug where I am left with a DIMM device
>>>> that has only some memory blocks released. In this situation, I would like
>>>> to block further unplug requests on the same device, but QEMU seems
>>>> to allow more such unplug requests to come in via the monitor. So
>>>> qdev won't help me here ? Should I detect such condition from the
>>>> machine unplug() handler and take required action ?  
>>> I think offlining is a guests task along with recovering from
>>> inability to offline (i.e. offline all + eject or restore original state).
>>> QUEM does it's job by notifying guest what dimm it wants to remove
>>> and removes it when guest asks it (at least in x86 world).  
>>
>> In the case of pseries, the DIMM abstraction isn't really exposed to
>> the guest, but rather the memory blocks we use to make the backing
>> memdev memory available to the guest. During unplug, the guest
>> completely releases these blocks back to QEMU, and if it can only
>> release a subset of what's requested it does not attempt to recover.
>> We can potentially change that behavior on the guest side, since
>> partially-freed DIMMs aren't currently useful on the host-side...
>>
>> But, in the case of pseries, I wonder if it makes sense to maybe go
>> ahead and MADV_DONTNEED the ranges backing these released blocks so the
>> host can at least partially reclaim the memory from a partially
>> unplugged DIMM?
> It's a little bit confusing, one asked to remove device but it's still
> there but not completely usable/available.
> What will happen when user wants that memory plugged back?

As far as I've understood MADV_DONTNEED, you can use the memory again at
any time - just the previous contents will be gone, which is ok in this
case since the guest previously marked this area as unavailable.

 Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 13:37               ` Igor Mammedov
  2016-04-27 13:59                 ` Thomas Huth
@ 2016-04-27 14:24                 ` Bharata B Rao
  2016-04-29  3:28                 ` David Gibson
  2 siblings, 0 replies; 33+ messages in thread
From: Bharata B Rao @ 2016-04-27 14:24 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Michael Roth, David Gibson, thuth, qemu-devel, qemu-ppc, nfont

On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote:
> On Tue, 26 Apr 2016 16:03:37 -0500
> Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> 
> > Quoting Igor Mammedov (2016-04-26 02:52:36)
> > > On Tue, 26 Apr 2016 10:39:23 +0530
> > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > >   
> > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:  
> > > > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > > >     
> > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:    
> > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:      
> > > > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > > > 
> > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>      
> > > > > > > 
> > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > > > 
> > > > > > > Looks correct, but again, needs to wait on the PAPR change.    
> > > > > [...]    
> > > > > > 
> > > > > > While we are here, I would also like to get some opinion on the real
> > > > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > > > which memory ballooning (shrinking mem via ballooning) can't give ?    
> > > > > Sure ballooning can complement memory hotplug but turning it on would
> > > > > effectively reduce hotplug to balloning as it would enable overcommit
> > > > > capability instead of hard partitioning pc-dimms provides. So one
> > > > > could just use ballooning only and not bother with hotplug at all.
> > > > > 
> > > > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > > > to model real hardware, thus removing need in paravirt ballooning
> > > > > solution in favor of native guest support.    
> > > > 
> > > > Thanks for your views.
> > > >   
> > > > > 
> > > > > PS:
> > > > > Guest wise, currently hot-unplug is not well supported in linux,
> > > > > i.e. it's not guarantied that guest will honor unplug request
> > > > > as it may pin dimm by using it as a non migratable memory. So
> > > > > there is something to work on guest side to make unplug more
> > > > > reliable/guarantied.    
> > > > 
> > > > In the above scenario where the guest doesn't allow removal of certain
> > > > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > > DIMM device is concerned ? I seem to be running into this situation
> > > > very often with PowerPC mem unplug where I am left with a DIMM device
> > > > that has only some memory blocks released. In this situation, I would like
> > > > to block further unplug requests on the same device, but QEMU seems
> > > > to allow more such unplug requests to come in via the monitor. So
> > > > qdev won't help me here ? Should I detect such condition from the
> > > > machine unplug() handler and take required action ?  
> > > I think offlining is a guests task along with recovering from
> > > inability to offline (i.e. offline all + eject or restore original state).
> > > QUEM does it's job by notifying guest what dimm it wants to remove
> > > and removes it when guest asks it (at least in x86 world).  
> > 
> > In the case of pseries, the DIMM abstraction isn't really exposed to
> > the guest, but rather the memory blocks we use to make the backing
> > memdev memory available to the guest. During unplug, the guest
> > completely releases these blocks back to QEMU, and if it can only
> > release a subset of what's requested it does not attempt to recover.
> > We can potentially change that behavior on the guest side, since
> > partially-freed DIMMs aren't currently useful on the host-side...
> > 
> > But, in the case of pseries, I wonder if it makes sense to maybe go
> > ahead and MADV_DONTNEED the ranges backing these released blocks so the
> > host can at least partially reclaim the memory from a partially
> > unplugged DIMM?
> It's a little bit confusing, one asked to remove device but it's still
> there but not completely usable/available.
> What will happen when user wants that memory plugged back?

In the current patchset, the DIMM device still persists since some
blocks belonging to it aren't released yet. So it is not possible
to plug it back again.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 13:59                 ` Thomas Huth
@ 2016-04-27 14:34                   ` Igor Mammedov
  2016-04-27 19:07                     ` Michael Roth
  0 siblings, 1 reply; 33+ messages in thread
From: Igor Mammedov @ 2016-04-27 14:34 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Michael Roth, Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont

On Wed, 27 Apr 2016 15:59:52 +0200
Thomas Huth <thuth@redhat.com> wrote:

> On 27.04.2016 15:37, Igor Mammedov wrote:
> > On Tue, 26 Apr 2016 16:03:37 -0500
> > Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> >   
> >> Quoting Igor Mammedov (2016-04-26 02:52:36)  
> >>> On Tue, 26 Apr 2016 10:39:23 +0530
> >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> >>>     
> >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:    
> >>>>> On Wed, 16 Mar 2016 10:11:54 +0530
> >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> >>>>>       
> >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:      
> >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:        
> >>>>>>>> Add support to hot remove pc-dimm memory devices.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>        
> >>>>>>>
> >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>>>
> >>>>>>> Looks correct, but again, needs to wait on the PAPR change.      
> >>>>> [...]      
> >>>>>>
> >>>>>> While we are here, I would also like to get some opinion on the real
> >>>>>> need for memory unplug. Is there anything that memory unplug gives us
> >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ?      
> >>>>> Sure ballooning can complement memory hotplug but turning it on would
> >>>>> effectively reduce hotplug to balloning as it would enable overcommit
> >>>>> capability instead of hard partitioning pc-dimms provides. So one
> >>>>> could just use ballooning only and not bother with hotplug at all.
> >>>>>
> >>>>> On the other hand memory hotplug/unplug (at least on x86) tries
> >>>>> to model real hardware, thus removing need in paravirt ballooning
> >>>>> solution in favor of native guest support.      
> >>>>
> >>>> Thanks for your views.
> >>>>     
> >>>>>
> >>>>> PS:
> >>>>> Guest wise, currently hot-unplug is not well supported in linux,
> >>>>> i.e. it's not guarantied that guest will honor unplug request
> >>>>> as it may pin dimm by using it as a non migratable memory. So
> >>>>> there is something to work on guest side to make unplug more
> >>>>> reliable/guarantied.      
> >>>>
> >>>> In the above scenario where the guest doesn't allow removal of certain
> >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU
> >>>> DIMM device is concerned ? I seem to be running into this situation
> >>>> very often with PowerPC mem unplug where I am left with a DIMM device
> >>>> that has only some memory blocks released. In this situation, I would like
> >>>> to block further unplug requests on the same device, but QEMU seems
> >>>> to allow more such unplug requests to come in via the monitor. So
> >>>> qdev won't help me here ? Should I detect such condition from the
> >>>> machine unplug() handler and take required action ?    
> >>> I think offlining is a guests task along with recovering from
> >>> inability to offline (i.e. offline all + eject or restore original state).
> >>> QUEM does it's job by notifying guest what dimm it wants to remove
> >>> and removes it when guest asks it (at least in x86 world).    
> >>
> >> In the case of pseries, the DIMM abstraction isn't really exposed to
> >> the guest, but rather the memory blocks we use to make the backing
> >> memdev memory available to the guest. During unplug, the guest
> >> completely releases these blocks back to QEMU, and if it can only
> >> release a subset of what's requested it does not attempt to recover.
> >> We can potentially change that behavior on the guest side, since
> >> partially-freed DIMMs aren't currently useful on the host-side...
> >>
> >> But, in the case of pseries, I wonder if it makes sense to maybe go
> >> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> >> host can at least partially reclaim the memory from a partially
> >> unplugged DIMM?  
> > It's a little bit confusing, one asked to remove device but it's still
> > there but not completely usable/available.
> > What will happen when user wants that memory plugged back?  
> 
> As far as I've understood MADV_DONTNEED, you can use the memory again at
> any time - just the previous contents will be gone, which is ok in this
> case since the guest previously marked this area as unavailable.
If host gave returned memory to someone else there might not be enough
resources to give it back (what would happen I can't tell may be VM will
stall or just get exception).

Anyhow I'd suggest ballooning if one needs partial unplug and fix
physical unplug to unplug whole pc-dimm or none instead of
turning pc-dimm device model into some hybrid with balloon device
and making users/mgmt even more confused.

> 
>  Thomas
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 14:34                   ` Igor Mammedov
@ 2016-04-27 19:07                     ` Michael Roth
  2016-04-28  7:55                       ` Igor Mammedov
  0 siblings, 1 reply; 33+ messages in thread
From: Michael Roth @ 2016-04-27 19:07 UTC (permalink / raw)
  To: Igor Mammedov, Thomas Huth
  Cc: Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont

Quoting Igor Mammedov (2016-04-27 09:34:53)
> On Wed, 27 Apr 2016 15:59:52 +0200
> Thomas Huth <thuth@redhat.com> wrote:
> 
> > On 27.04.2016 15:37, Igor Mammedov wrote:
> > > On Tue, 26 Apr 2016 16:03:37 -0500
> > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> > >   
> > >> Quoting Igor Mammedov (2016-04-26 02:52:36)  
> > >>> On Tue, 26 Apr 2016 10:39:23 +0530
> > >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > >>>     
> > >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:    
> > >>>>> On Wed, 16 Mar 2016 10:11:54 +0530
> > >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > >>>>>       
> > >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:      
> > >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:        
> > >>>>>>>> Add support to hot remove pc-dimm memory devices.
> > >>>>>>>>
> > >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>        
> > >>>>>>>
> > >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > >>>>>>>
> > >>>>>>> Looks correct, but again, needs to wait on the PAPR change.      
> > >>>>> [...]      
> > >>>>>>
> > >>>>>> While we are here, I would also like to get some opinion on the real
> > >>>>>> need for memory unplug. Is there anything that memory unplug gives us
> > >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ?      
> > >>>>> Sure ballooning can complement memory hotplug but turning it on would
> > >>>>> effectively reduce hotplug to balloning as it would enable overcommit
> > >>>>> capability instead of hard partitioning pc-dimms provides. So one
> > >>>>> could just use ballooning only and not bother with hotplug at all.
> > >>>>>
> > >>>>> On the other hand memory hotplug/unplug (at least on x86) tries
> > >>>>> to model real hardware, thus removing need in paravirt ballooning
> > >>>>> solution in favor of native guest support.      
> > >>>>
> > >>>> Thanks for your views.
> > >>>>     
> > >>>>>
> > >>>>> PS:
> > >>>>> Guest wise, currently hot-unplug is not well supported in linux,
> > >>>>> i.e. it's not guarantied that guest will honor unplug request
> > >>>>> as it may pin dimm by using it as a non migratable memory. So
> > >>>>> there is something to work on guest side to make unplug more
> > >>>>> reliable/guarantied.      
> > >>>>
> > >>>> In the above scenario where the guest doesn't allow removal of certain
> > >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU
> > >>>> DIMM device is concerned ? I seem to be running into this situation
> > >>>> very often with PowerPC mem unplug where I am left with a DIMM device
> > >>>> that has only some memory blocks released. In this situation, I would like
> > >>>> to block further unplug requests on the same device, but QEMU seems
> > >>>> to allow more such unplug requests to come in via the monitor. So
> > >>>> qdev won't help me here ? Should I detect such condition from the
> > >>>> machine unplug() handler and take required action ?    
> > >>> I think offlining is a guests task along with recovering from
> > >>> inability to offline (i.e. offline all + eject or restore original state).
> > >>> QUEM does it's job by notifying guest what dimm it wants to remove
> > >>> and removes it when guest asks it (at least in x86 world).    
> > >>
> > >> In the case of pseries, the DIMM abstraction isn't really exposed to
> > >> the guest, but rather the memory blocks we use to make the backing
> > >> memdev memory available to the guest. During unplug, the guest
> > >> completely releases these blocks back to QEMU, and if it can only
> > >> release a subset of what's requested it does not attempt to recover.
> > >> We can potentially change that behavior on the guest side, since
> > >> partially-freed DIMMs aren't currently useful on the host-side...
> > >>
> > >> But, in the case of pseries, I wonder if it makes sense to maybe go
> > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> > >> host can at least partially reclaim the memory from a partially
> > >> unplugged DIMM?  
> > > It's a little bit confusing, one asked to remove device but it's still
> > > there but not completely usable/available.
> > > What will happen when user wants that memory plugged back?  
> > 
> > As far as I've understood MADV_DONTNEED, you can use the memory again at
> > any time - just the previous contents will be gone, which is ok in this
> > case since the guest previously marked this area as unavailable.
> If host gave returned memory to someone else there might not be enough
> resources to give it back (what would happen I can't tell may be VM will
> stall or just get exception).

It's not really an issue for pseries, since once the LMB is released
it's totally gone as far as the guest is concerned, and there's no
way to plug it back in via the still-present DIMM until removal
completes after, say, reset time.

But, either way, I agree if we'll intend to let the guest recover, it
would be immediately upon being unable to satisfy the whole unplug and
not some future time.

> 
> Anyhow I'd suggest ballooning if one needs partial unplug and fix
> physical unplug to unplug whole pc-dimm or none instead of
> turning pc-dimm device model into some hybrid with balloon device
> and making users/mgmt even more confused.

That seems reasonable, I can see why recovering memory from partially
removed DIMMs overlaps a lot with the ballooning use case...

But I think that kind of leaves the question of how to make memory
unplug useful in practice? In practice, memory unplug seems quite
likely to fail in all-or-nothing scenarios. So if we expect
all-or-nothing removal in the guest, then it seems like some work
needs to be done with the balloon driver or elsewhere to provide the
sort of specificity management would need to know to determine if a
DIMM has become fully unpluggable, and let the guest make ballooning
decisions that complement eventual DIMM unplug more effectively.

> 
> > 
> >  Thomas
> > 
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 19:07                     ` Michael Roth
@ 2016-04-28  7:55                       ` Igor Mammedov
  0 siblings, 0 replies; 33+ messages in thread
From: Igor Mammedov @ 2016-04-28  7:55 UTC (permalink / raw)
  To: Michael Roth
  Cc: Thomas Huth, nfont, David Gibson, qemu-ppc, qemu-devel, Bharata B Rao

On Wed, 27 Apr 2016 14:07:10 -0500
Michael Roth <mdroth@linux.vnet.ibm.com> wrote:

> Quoting Igor Mammedov (2016-04-27 09:34:53)
> > On Wed, 27 Apr 2016 15:59:52 +0200
> > Thomas Huth <thuth@redhat.com> wrote:
> >   
> > > On 27.04.2016 15:37, Igor Mammedov wrote:  
> > > > On Tue, 26 Apr 2016 16:03:37 -0500
> > > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> > > >     
> > > >> Quoting Igor Mammedov (2016-04-26 02:52:36)    
> > > >>> On Tue, 26 Apr 2016 10:39:23 +0530
> > > >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > >>>       
> > > >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:      
> > > >>>>> On Wed, 16 Mar 2016 10:11:54 +0530
> > > >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > >>>>>         
> > > >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:        
> > > >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:          
> > > >>>>>>>> Add support to hot remove pc-dimm memory devices.
> > > >>>>>>>>
> > > >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>          
> > > >>>>>>>
> > > >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > >>>>>>>
> > > >>>>>>> Looks correct, but again, needs to wait on the PAPR change.        
> > > >>>>> [...]        
> > > >>>>>>
> > > >>>>>> While we are here, I would also like to get some opinion on the real
> > > >>>>>> need for memory unplug. Is there anything that memory unplug gives us
> > > >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ?        
> > > >>>>> Sure ballooning can complement memory hotplug but turning it on would
> > > >>>>> effectively reduce hotplug to balloning as it would enable overcommit
> > > >>>>> capability instead of hard partitioning pc-dimms provides. So one
> > > >>>>> could just use ballooning only and not bother with hotplug at all.
> > > >>>>>
> > > >>>>> On the other hand memory hotplug/unplug (at least on x86) tries
> > > >>>>> to model real hardware, thus removing need in paravirt ballooning
> > > >>>>> solution in favor of native guest support.        
> > > >>>>
> > > >>>> Thanks for your views.
> > > >>>>       
> > > >>>>>
> > > >>>>> PS:
> > > >>>>> Guest wise, currently hot-unplug is not well supported in linux,
> > > >>>>> i.e. it's not guarantied that guest will honor unplug request
> > > >>>>> as it may pin dimm by using it as a non migratable memory. So
> > > >>>>> there is something to work on guest side to make unplug more
> > > >>>>> reliable/guarantied.        
> > > >>>>
> > > >>>> In the above scenario where the guest doesn't allow removal of certain
> > > >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > >>>> DIMM device is concerned ? I seem to be running into this situation
> > > >>>> very often with PowerPC mem unplug where I am left with a DIMM device
> > > >>>> that has only some memory blocks released. In this situation, I would like
> > > >>>> to block further unplug requests on the same device, but QEMU seems
> > > >>>> to allow more such unplug requests to come in via the monitor. So
> > > >>>> qdev won't help me here ? Should I detect such condition from the
> > > >>>> machine unplug() handler and take required action ?      
> > > >>> I think offlining is a guests task along with recovering from
> > > >>> inability to offline (i.e. offline all + eject or restore original state).
> > > >>> QUEM does it's job by notifying guest what dimm it wants to remove
> > > >>> and removes it when guest asks it (at least in x86 world).      
> > > >>
> > > >> In the case of pseries, the DIMM abstraction isn't really exposed to
> > > >> the guest, but rather the memory blocks we use to make the backing
> > > >> memdev memory available to the guest. During unplug, the guest
> > > >> completely releases these blocks back to QEMU, and if it can only
> > > >> release a subset of what's requested it does not attempt to recover.
> > > >> We can potentially change that behavior on the guest side, since
> > > >> partially-freed DIMMs aren't currently useful on the host-side...
> > > >>
> > > >> But, in the case of pseries, I wonder if it makes sense to maybe go
> > > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> > > >> host can at least partially reclaim the memory from a partially
> > > >> unplugged DIMM?    
> > > > It's a little bit confusing, one asked to remove device but it's still
> > > > there but not completely usable/available.
> > > > What will happen when user wants that memory plugged back?    
> > > 
> > > As far as I've understood MADV_DONTNEED, you can use the memory again at
> > > any time - just the previous contents will be gone, which is ok in this
> > > case since the guest previously marked this area as unavailable.  
> > If host gave returned memory to someone else there might not be enough
> > resources to give it back (what would happen I can't tell may be VM will
> > stall or just get exception).  
> 
> It's not really an issue for pseries, since once the LMB is released
> it's totally gone as far as the guest is concerned, and there's no
> way to plug it back in via the still-present DIMM until removal
> completes after, say, reset time.
> 
> But, either way, I agree if we'll intend to let the guest recover, it
> would be immediately upon being unable to satisfy the whole unplug and
> not some future time.
> 
> > 
> > Anyhow I'd suggest ballooning if one needs partial unplug and fix
> > physical unplug to unplug whole pc-dimm or none instead of
> > turning pc-dimm device model into some hybrid with balloon device
> > and making users/mgmt even more confused.  
> 
> That seems reasonable, I can see why recovering memory from partially
> removed DIMMs overlaps a lot with the ballooning use case...
> 
> But I think that kind of leaves the question of how to make memory
> unplug useful in practice? In practice, memory unplug seems quite
> likely to fail in all-or-nothing scenarios. So if we expect
I'd work on improving not yet mature native unplug support
on guest side making guarantied unplug available. That would
benefit not only virt which would be the first big consumer
but physical systems as well. Also it would allow drop
ballooning support guest wise in favor of native solution.

> all-or-nothing removal in the guest, then it seems like some work
> needs to be done with the balloon driver or elsewhere to provide the
> sort of specificity management would need to know to determine if a
> DIMM has become fully unpluggable, and let the guest make ballooning
> decisions that complement eventual DIMM unplug more effectively.
Currently using ballooning effectively bars pc-dimm unplug as
balloon driver pins all unused pages to itself. So using them
together might need some work done on ballooning side,
I can't tell how much though as I'm not familiar with ballooning
nor with how kernel memory allocator works.


> >   
> > > 
> > >  Thomas
> > >   
> >   
> 
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-26 21:03             ` Michael Roth
  2016-04-27  6:54               ` Thomas Huth
  2016-04-27 13:37               ` Igor Mammedov
@ 2016-04-29  3:24               ` David Gibson
  2016-04-29  6:45                 ` Thomas Huth
  2 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2016-04-29  3:24 UTC (permalink / raw)
  To: Michael Roth
  Cc: Igor Mammedov, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont

[-- Attachment #1: Type: text/plain, Size: 4160 bytes --]

On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:
> Quoting Igor Mammedov (2016-04-26 02:52:36)
> > On Tue, 26 Apr 2016 10:39:23 +0530
> > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > 
> > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:
> > > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > >   
> > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:  
> > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:    
> > > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > > 
> > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>    
> > > > > > 
> > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > > 
> > > > > > Looks correct, but again, needs to wait on the PAPR change.  
> > > > [...]  
> > > > > 
> > > > > While we are here, I would also like to get some opinion on the real
> > > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > > which memory ballooning (shrinking mem via ballooning) can't give ?  
> > > > Sure ballooning can complement memory hotplug but turning it on would
> > > > effectively reduce hotplug to balloning as it would enable overcommit
> > > > capability instead of hard partitioning pc-dimms provides. So one
> > > > could just use ballooning only and not bother with hotplug at all.
> > > > 
> > > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > > to model real hardware, thus removing need in paravirt ballooning
> > > > solution in favor of native guest support.  
> > > 
> > > Thanks for your views.
> > > 
> > > > 
> > > > PS:
> > > > Guest wise, currently hot-unplug is not well supported in linux,
> > > > i.e. it's not guarantied that guest will honor unplug request
> > > > as it may pin dimm by using it as a non migratable memory. So
> > > > there is something to work on guest side to make unplug more
> > > > reliable/guarantied.  
> > > 
> > > In the above scenario where the guest doesn't allow removal of certain
> > > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > DIMM device is concerned ? I seem to be running into this situation
> > > very often with PowerPC mem unplug where I am left with a DIMM device
> > > that has only some memory blocks released. In this situation, I would like
> > > to block further unplug requests on the same device, but QEMU seems
> > > to allow more such unplug requests to come in via the monitor. So
> > > qdev won't help me here ? Should I detect such condition from the
> > > machine unplug() handler and take required action ?
> > I think offlining is a guests task along with recovering from
> > inability to offline (i.e. offline all + eject or restore original state).
> > QUEM does it's job by notifying guest what dimm it wants to remove
> > and removes it when guest asks it (at least in x86 world).
> 
> In the case of pseries, the DIMM abstraction isn't really exposed to
> the guest, but rather the memory blocks we use to make the backing
> memdev memory available to the guest. During unplug, the guest
> completely releases these blocks back to QEMU, and if it can only
> release a subset of what's requested it does not attempt to recover.
> We can potentially change that behavior on the guest side, since
> partially-freed DIMMs aren't currently useful on the host-side...
> 
> But, in the case of pseries, I wonder if it makes sense to maybe go
> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> host can at least partially reclaim the memory from a partially
> unplugged DIMM?

Urgh.. I can see the benefit, but I'm a bit uneasy about making the
DIMM semantics different in this way on Power.

I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
was a good idea after all.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-27 13:37               ` Igor Mammedov
  2016-04-27 13:59                 ` Thomas Huth
  2016-04-27 14:24                 ` Bharata B Rao
@ 2016-04-29  3:28                 ` David Gibson
  2016-04-29  8:42                   ` Igor Mammedov
  2 siblings, 1 reply; 33+ messages in thread
From: David Gibson @ 2016-04-29  3:28 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Michael Roth, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont

[-- Attachment #1: Type: text/plain, Size: 5099 bytes --]

On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote:
> On Tue, 26 Apr 2016 16:03:37 -0500
> Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> 
> > Quoting Igor Mammedov (2016-04-26 02:52:36)
> > > On Tue, 26 Apr 2016 10:39:23 +0530
> > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > >   
> > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:  
> > > > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > > >     
> > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:    
> > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:      
> > > > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > > > 
> > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>      
> > > > > > > 
> > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > > > 
> > > > > > > Looks correct, but again, needs to wait on the PAPR change.    
> > > > > [...]    
> > > > > > 
> > > > > > While we are here, I would also like to get some opinion on the real
> > > > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > > > which memory ballooning (shrinking mem via ballooning) can't give ?    
> > > > > Sure ballooning can complement memory hotplug but turning it on would
> > > > > effectively reduce hotplug to balloning as it would enable overcommit
> > > > > capability instead of hard partitioning pc-dimms provides. So one
> > > > > could just use ballooning only and not bother with hotplug at all.
> > > > > 
> > > > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > > > to model real hardware, thus removing need in paravirt ballooning
> > > > > solution in favor of native guest support.    
> > > > 
> > > > Thanks for your views.
> > > >   
> > > > > 
> > > > > PS:
> > > > > Guest wise, currently hot-unplug is not well supported in linux,
> > > > > i.e. it's not guarantied that guest will honor unplug request
> > > > > as it may pin dimm by using it as a non migratable memory. So
> > > > > there is something to work on guest side to make unplug more
> > > > > reliable/guarantied.    
> > > > 
> > > > In the above scenario where the guest doesn't allow removal of certain
> > > > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > > DIMM device is concerned ? I seem to be running into this situation
> > > > very often with PowerPC mem unplug where I am left with a DIMM device
> > > > that has only some memory blocks released. In this situation, I would like
> > > > to block further unplug requests on the same device, but QEMU seems
> > > > to allow more such unplug requests to come in via the monitor. So
> > > > qdev won't help me here ? Should I detect such condition from the
> > > > machine unplug() handler and take required action ?  
> > > I think offlining is a guests task along with recovering from
> > > inability to offline (i.e. offline all + eject or restore original state).
> > > QUEM does it's job by notifying guest what dimm it wants to remove
> > > and removes it when guest asks it (at least in x86 world).  
> > 
> > In the case of pseries, the DIMM abstraction isn't really exposed to
> > the guest, but rather the memory blocks we use to make the backing
> > memdev memory available to the guest. During unplug, the guest
> > completely releases these blocks back to QEMU, and if it can only
> > release a subset of what's requested it does not attempt to recover.
> > We can potentially change that behavior on the guest side, since
> > partially-freed DIMMs aren't currently useful on the host-side...
> > 
> > But, in the case of pseries, I wonder if it makes sense to maybe go
> > ahead and MADV_DONTNEED the ranges backing these released blocks so the
> > host can at least partially reclaim the memory from a partially
> > unplugged DIMM?
> It's a little bit confusing, one asked to remove device but it's still
> there but not completely usable/available.
> What will happen when user wants that memory plugged back?
> 
> It looks like reinventing ballooning,
> maybe it's would be better to disable unplug and use ballooning
> to release some memory, until guest is ready to unplug all or none of
> requested blocks?

I see your point, and it gives me an idea.

I think it might be possible to connect qemu's ballooning backend, to
the PAPR LMB mechanism - and in fact that might be a better match than
the DIMM backend for it.  The common way of removing memory with PAPR
is for the host to just ask for an amount and the guest choses what to
give up, which is indeed more like balloning than physical hotplug.

How we intergrate that with true memory hot (in)plug which will need
the DIMM mechanism I'm not quite sure.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  3:24               ` David Gibson
@ 2016-04-29  6:45                 ` Thomas Huth
  2016-04-29  6:59                   ` Bharata B Rao
  2016-04-29 10:11                   ` David Gibson
  0 siblings, 2 replies; 33+ messages in thread
From: Thomas Huth @ 2016-04-29  6:45 UTC (permalink / raw)
  To: David Gibson, Michael Roth
  Cc: Igor Mammedov, Bharata B Rao, qemu-devel, qemu-ppc, nfont

[-- Attachment #1: Type: text/plain, Size: 1308 bytes --]

On 29.04.2016 05:24, David Gibson wrote:
> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:
...
>> In the case of pseries, the DIMM abstraction isn't really exposed to
>> the guest, but rather the memory blocks we use to make the backing
>> memdev memory available to the guest. During unplug, the guest
>> completely releases these blocks back to QEMU, and if it can only
>> release a subset of what's requested it does not attempt to recover.
>> We can potentially change that behavior on the guest side, since
>> partially-freed DIMMs aren't currently useful on the host-side...
>>
>> But, in the case of pseries, I wonder if it makes sense to maybe go
>> ahead and MADV_DONTNEED the ranges backing these released blocks so the
>> host can at least partially reclaim the memory from a partially
>> unplugged DIMM?
> 
> Urgh.. I can see the benefit, but I'm a bit uneasy about making the
> DIMM semantics different in this way on Power.
> 
> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
> was a good idea after all.

Ignorant question (sorry, I really don't have much experience yet here):
Could we maybe align the size of the LMBs with the size of the DIMMs?
E.g. make the LMBs bigger or the DIMMs smaller, so that they match?

 Thomas



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  6:45                 ` Thomas Huth
@ 2016-04-29  6:59                   ` Bharata B Rao
  2016-04-29  8:22                     ` Thomas Huth
  2016-04-29 10:11                   ` David Gibson
  1 sibling, 1 reply; 33+ messages in thread
From: Bharata B Rao @ 2016-04-29  6:59 UTC (permalink / raw)
  To: Thomas Huth
  Cc: David Gibson, Michael Roth, Igor Mammedov, qemu-devel, qemu-ppc, nfont

On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote:
> On 29.04.2016 05:24, David Gibson wrote:
> > On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:
> ...
> >> In the case of pseries, the DIMM abstraction isn't really exposed to
> >> the guest, but rather the memory blocks we use to make the backing
> >> memdev memory available to the guest. During unplug, the guest
> >> completely releases these blocks back to QEMU, and if it can only
> >> release a subset of what's requested it does not attempt to recover.
> >> We can potentially change that behavior on the guest side, since
> >> partially-freed DIMMs aren't currently useful on the host-side...
> >>
> >> But, in the case of pseries, I wonder if it makes sense to maybe go
> >> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> >> host can at least partially reclaim the memory from a partially
> >> unplugged DIMM?
> > 
> > Urgh.. I can see the benefit, but I'm a bit uneasy about making the
> > DIMM semantics different in this way on Power.
> > 
> > I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
> > was a good idea after all.
> 
> Ignorant question (sorry, I really don't have much experience yet here):
> Could we maybe align the size of the LMBs with the size of the DIMMs?
> E.g. make the LMBs bigger or the DIMMs smaller, so that they match?

Should work, but the question is what should be the right size so that
we have good granularity of hotplug but also not run out of mem slots
thereby limiting us on the maxmem. I remember you changed the memslots
to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  6:59                   ` Bharata B Rao
@ 2016-04-29  8:22                     ` Thomas Huth
  2016-04-29  8:30                       ` Igor Mammedov
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Huth @ 2016-04-29  8:22 UTC (permalink / raw)
  To: bharata
  Cc: David Gibson, Michael Roth, Igor Mammedov, qemu-devel, qemu-ppc, nfont

On 29.04.2016 08:59, Bharata B Rao wrote:
> On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote:
>> On 29.04.2016 05:24, David Gibson wrote:
>>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:
>> ...
>>>> In the case of pseries, the DIMM abstraction isn't really exposed to
>>>> the guest, but rather the memory blocks we use to make the backing
>>>> memdev memory available to the guest. During unplug, the guest
>>>> completely releases these blocks back to QEMU, and if it can only
>>>> release a subset of what's requested it does not attempt to recover.
>>>> We can potentially change that behavior on the guest side, since
>>>> partially-freed DIMMs aren't currently useful on the host-side...
>>>>
>>>> But, in the case of pseries, I wonder if it makes sense to maybe go
>>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the
>>>> host can at least partially reclaim the memory from a partially
>>>> unplugged DIMM?
>>>
>>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the
>>> DIMM semantics different in this way on Power.
>>>
>>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
>>> was a good idea after all.
>>
>> Ignorant question (sorry, I really don't have much experience yet here):
>> Could we maybe align the size of the LMBs with the size of the DIMMs?
>> E.g. make the LMBs bigger or the DIMMs smaller, so that they match?
> 
> Should work, but the question is what should be the right size so that
> we have good granularity of hotplug but also not run out of mem slots
> thereby limiting us on the maxmem. I remember you changed the memslots
> to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though.

Half of the slots should be "reserved" for PCI and other stuff, so we
could use 256 for memory - that way we would also on the same level as
x86 which also uses 256 memslots here, as far as I know.

Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE
dynamically, according to the maxmem and slot values that the user
specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM
size? ... or is there some constraint that I've missed so that
SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value?

 Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  8:22                     ` Thomas Huth
@ 2016-04-29  8:30                       ` Igor Mammedov
  2016-04-29 11:01                         ` Thomas Huth
  0 siblings, 1 reply; 33+ messages in thread
From: Igor Mammedov @ 2016-04-29  8:30 UTC (permalink / raw)
  To: Thomas Huth
  Cc: bharata, David Gibson, Michael Roth, qemu-devel, qemu-ppc, nfont

On Fri, 29 Apr 2016 10:22:03 +0200
Thomas Huth <thuth@redhat.com> wrote:

> On 29.04.2016 08:59, Bharata B Rao wrote:
> > On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote:  
> >> On 29.04.2016 05:24, David Gibson wrote:  
> >>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:  
> >> ...  
> >>>> In the case of pseries, the DIMM abstraction isn't really exposed to
> >>>> the guest, but rather the memory blocks we use to make the backing
> >>>> memdev memory available to the guest. During unplug, the guest
> >>>> completely releases these blocks back to QEMU, and if it can only
> >>>> release a subset of what's requested it does not attempt to recover.
> >>>> We can potentially change that behavior on the guest side, since
> >>>> partially-freed DIMMs aren't currently useful on the host-side...
> >>>>
> >>>> But, in the case of pseries, I wonder if it makes sense to maybe go
> >>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> >>>> host can at least partially reclaim the memory from a partially
> >>>> unplugged DIMM?  
> >>>
> >>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the
> >>> DIMM semantics different in this way on Power.
> >>>
> >>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
> >>> was a good idea after all.  
> >>
> >> Ignorant question (sorry, I really don't have much experience yet here):
> >> Could we maybe align the size of the LMBs with the size of the DIMMs?
> >> E.g. make the LMBs bigger or the DIMMs smaller, so that they match?  
> > 
> > Should work, but the question is what should be the right size so that
> > we have good granularity of hotplug but also not run out of mem slots
> > thereby limiting us on the maxmem. I remember you changed the memslots
> > to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though.  
> 
> Half of the slots should be "reserved" for PCI and other stuff, so we
> could use 256 for memory - that way we would also on the same level as
> x86 which also uses 256 memslots here, as far as I know.
> 
> Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE
> dynamically, according to the maxmem and slot values that the user
> specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM
> size? ... or is there some constraint that I've missed so that
> SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value?
If you do that than possible DIMM size should be decided at startup
and fixed. If DIMM of wrong size is plugged in machine should fail
hotplug request.
Question is how mgmt will know fixed DIMM size that sPAPR just calculated?

> 
>  Thomas
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  3:28                 ` David Gibson
@ 2016-04-29  8:42                   ` Igor Mammedov
  0 siblings, 0 replies; 33+ messages in thread
From: Igor Mammedov @ 2016-04-29  8:42 UTC (permalink / raw)
  To: David Gibson
  Cc: Michael Roth, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont

On Fri, 29 Apr 2016 13:28:50 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote:
> > On Tue, 26 Apr 2016 16:03:37 -0500
> > Michael Roth <mdroth@linux.vnet.ibm.com> wrote:
> >   
> > > Quoting Igor Mammedov (2016-04-26 02:52:36)  
> > > > On Tue, 26 Apr 2016 10:39:23 +0530
> > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > >     
> > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote:    
> > > > > > On Wed, 16 Mar 2016 10:11:54 +0530
> > > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > > > > >       
> > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:      
> > > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:        
> > > > > > > > > Add support to hot remove pc-dimm memory devices.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>        
> > > > > > > > 
> > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > > > > > 
> > > > > > > > Looks correct, but again, needs to wait on the PAPR change.      
> > > > > > [...]      
> > > > > > > 
> > > > > > > While we are here, I would also like to get some opinion on the real
> > > > > > > need for memory unplug. Is there anything that memory unplug gives us
> > > > > > > which memory ballooning (shrinking mem via ballooning) can't give ?      
> > > > > > Sure ballooning can complement memory hotplug but turning it on would
> > > > > > effectively reduce hotplug to balloning as it would enable overcommit
> > > > > > capability instead of hard partitioning pc-dimms provides. So one
> > > > > > could just use ballooning only and not bother with hotplug at all.
> > > > > > 
> > > > > > On the other hand memory hotplug/unplug (at least on x86) tries
> > > > > > to model real hardware, thus removing need in paravirt ballooning
> > > > > > solution in favor of native guest support.      
> > > > > 
> > > > > Thanks for your views.
> > > > >     
> > > > > > 
> > > > > > PS:
> > > > > > Guest wise, currently hot-unplug is not well supported in linux,
> > > > > > i.e. it's not guarantied that guest will honor unplug request
> > > > > > as it may pin dimm by using it as a non migratable memory. So
> > > > > > there is something to work on guest side to make unplug more
> > > > > > reliable/guarantied.      
> > > > > 
> > > > > In the above scenario where the guest doesn't allow removal of certain
> > > > > parts of DIMM memory, what is the expected behaviour as far as QEMU
> > > > > DIMM device is concerned ? I seem to be running into this situation
> > > > > very often with PowerPC mem unplug where I am left with a DIMM device
> > > > > that has only some memory blocks released. In this situation, I would like
> > > > > to block further unplug requests on the same device, but QEMU seems
> > > > > to allow more such unplug requests to come in via the monitor. So
> > > > > qdev won't help me here ? Should I detect such condition from the
> > > > > machine unplug() handler and take required action ?    
> > > > I think offlining is a guests task along with recovering from
> > > > inability to offline (i.e. offline all + eject or restore original state).
> > > > QUEM does it's job by notifying guest what dimm it wants to remove
> > > > and removes it when guest asks it (at least in x86 world).    
> > > 
> > > In the case of pseries, the DIMM abstraction isn't really exposed to
> > > the guest, but rather the memory blocks we use to make the backing
> > > memdev memory available to the guest. During unplug, the guest
> > > completely releases these blocks back to QEMU, and if it can only
> > > release a subset of what's requested it does not attempt to recover.
> > > We can potentially change that behavior on the guest side, since
> > > partially-freed DIMMs aren't currently useful on the host-side...
> > > 
> > > But, in the case of pseries, I wonder if it makes sense to maybe go
> > > ahead and MADV_DONTNEED the ranges backing these released blocks so the
> > > host can at least partially reclaim the memory from a partially
> > > unplugged DIMM?  
> > It's a little bit confusing, one asked to remove device but it's still
> > there but not completely usable/available.
> > What will happen when user wants that memory plugged back?
> > 
> > It looks like reinventing ballooning,
> > maybe it's would be better to disable unplug and use ballooning
> > to release some memory, until guest is ready to unplug all or none of
> > requested blocks?  
> 
> I see your point, and it gives me an idea.
> 
> I think it might be possible to connect qemu's ballooning backend, to
> the PAPR LMB mechanism - and in fact that might be a better match than
> the DIMM backend for it.  The common way of removing memory with PAPR
> is for the host to just ask for an amount and the guest choses what to
> give up, which is indeed more like balloning than physical hotplug.
looks like ballooning case,
one thing to consider here is what PAPR expects when it adds memory
to guest?
It's probably possible to fail request in QEMU gracefully if it
can't get relinquished memory back.

 
> How we intergrate that with true memory hot (in)plug which will need
> the DIMM mechanism I'm not quite sure.
Me neither, so far I was thinking about replacing paravirt ballooning
with native mem hot(un)plug and dropping balloon support (which is
sort of an orphan without active maintainer).
But native hotplug by it's nature isn't so fine gained as ballooning,
so ones who need to give up memory in chunks less that DIMM size would
have to use balloon driver/device.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  6:45                 ` Thomas Huth
  2016-04-29  6:59                   ` Bharata B Rao
@ 2016-04-29 10:11                   ` David Gibson
  1 sibling, 0 replies; 33+ messages in thread
From: David Gibson @ 2016-04-29 10:11 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Michael Roth, Igor Mammedov, Bharata B Rao, qemu-devel, qemu-ppc, nfont

[-- Attachment #1: Type: text/plain, Size: 2040 bytes --]

On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote:
> On 29.04.2016 05:24, David Gibson wrote:
> > On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:
> ...
> >> In the case of pseries, the DIMM abstraction isn't really exposed to
> >> the guest, but rather the memory blocks we use to make the backing
> >> memdev memory available to the guest. During unplug, the guest
> >> completely releases these blocks back to QEMU, and if it can only
> >> release a subset of what's requested it does not attempt to recover.
> >> We can potentially change that behavior on the guest side, since
> >> partially-freed DIMMs aren't currently useful on the host-side...
> >>
> >> But, in the case of pseries, I wonder if it makes sense to maybe go
> >> ahead and MADV_DONTNEED the ranges backing these released blocks so the
> >> host can at least partially reclaim the memory from a partially
> >> unplugged DIMM?
> > 
> > Urgh.. I can see the benefit, but I'm a bit uneasy about making the
> > DIMM semantics different in this way on Power.
> > 
> > I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
> > was a good idea after all.
> 
> Ignorant question (sorry, I really don't have much experience yet here):
> Could we maybe align the size of the LMBs with the size of the DIMMs?
> E.g. make the LMBs bigger or the DIMMs smaller, so that they match?

Um... maybe.  DIMMs don't have to all be the same size, whereas LMBs
do, but maybe we can work around that.

In theory we could increase the LMB size, but I'd be pretty worried
that guests might not cope with a setup so different from what PowerVM
gives us.

Decreasing the DIMMs to LMB size should certainly work in theory, but
could be very painful from the user point of view to have to add a
memory block for every 256MiB.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support
  2016-04-29  8:30                       ` Igor Mammedov
@ 2016-04-29 11:01                         ` Thomas Huth
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Huth @ 2016-04-29 11:01 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: bharata, David Gibson, Michael Roth, qemu-devel, qemu-ppc, nfont

On 29.04.2016 10:30, Igor Mammedov wrote:
> On Fri, 29 Apr 2016 10:22:03 +0200
> Thomas Huth <thuth@redhat.com> wrote:
> 
>> On 29.04.2016 08:59, Bharata B Rao wrote:
>>> On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote:  
>>>> On 29.04.2016 05:24, David Gibson wrote:  
>>>>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote:  
>>>> ...  
>>>>>> In the case of pseries, the DIMM abstraction isn't really exposed to
>>>>>> the guest, but rather the memory blocks we use to make the backing
>>>>>> memdev memory available to the guest. During unplug, the guest
>>>>>> completely releases these blocks back to QEMU, and if it can only
>>>>>> release a subset of what's requested it does not attempt to recover.
>>>>>> We can potentially change that behavior on the guest side, since
>>>>>> partially-freed DIMMs aren't currently useful on the host-side...
>>>>>>
>>>>>> But, in the case of pseries, I wonder if it makes sense to maybe go
>>>>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the
>>>>>> host can at least partially reclaim the memory from a partially
>>>>>> unplugged DIMM?  
>>>>>
>>>>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the
>>>>> DIMM semantics different in this way on Power.
>>>>>
>>>>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model
>>>>> was a good idea after all.  
>>>>
>>>> Ignorant question (sorry, I really don't have much experience yet here):
>>>> Could we maybe align the size of the LMBs with the size of the DIMMs?
>>>> E.g. make the LMBs bigger or the DIMMs smaller, so that they match?  
>>>
>>> Should work, but the question is what should be the right size so that
>>> we have good granularity of hotplug but also not run out of mem slots
>>> thereby limiting us on the maxmem. I remember you changed the memslots
>>> to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though.  
>>
>> Half of the slots should be "reserved" for PCI and other stuff, so we
>> could use 256 for memory - that way we would also on the same level as
>> x86 which also uses 256 memslots here, as far as I know.
>>
>> Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE
>> dynamically, according to the maxmem and slot values that the user
>> specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM
>> size? ... or is there some constraint that I've missed so that
>> SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value?
> If you do that than possible DIMM size should be decided at startup
> and fixed. If DIMM of wrong size is plugged in machine should fail
> hotplug request.
> Question is how mgmt will know fixed DIMM size that sPAPR just calculated?

Ok, sorry, I somehow had that bad idea in mind that all DIMMs for
hot-plugging should have the same size. That's of course not the case if
we model something similar to DIMM plugging on real hardware. So please
never mind, it was just a wrong assumption on my side.

OTOH, it maybe also does not make sense to keep the LMB size always at
such a small, fixed value. Imagine the user specifies slots=32 and
maxmem=32G ... maybe we should then disallow plugging DIMMs that are
smaller than 1G, so we could use a LMB size of 1G in this case?
(plugging DIMMs of different size > 1G would then still be allowed, too,
of course)

 Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support
  2016-03-15  4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
  2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao
@ 2016-05-27 15:48 ` Thomas Huth
  2016-05-27 16:32   ` Michael Roth
  2 siblings, 1 reply; 33+ messages in thread
From: Thomas Huth @ 2016-05-27 15:48 UTC (permalink / raw)
  To: Bharata B Rao, qemu-devel; +Cc: mdroth, qemu-ppc, nfont, imammedo, david

 Hi Bharata,

On 15.03.2016 05:38, Bharata B Rao wrote:
> This patchset adds memory hot removal support for PowerPC sPAPR.
> This new version switches to using the proposed "count-indexed" type of
> hotplug identifier which allows to hot remove a number of LMBs starting
> with a given DRC index.
> 
> This count-indexed hotplug identifier isn't yet part of PAPR.

Just for clarification / my understanding: That means we also need a
modified guest to support this new interface? If yes, did you post such
patches somewhere else already, too?

 Thomas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support
  2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth
@ 2016-05-27 16:32   ` Michael Roth
  0 siblings, 0 replies; 33+ messages in thread
From: Michael Roth @ 2016-05-27 16:32 UTC (permalink / raw)
  To: Thomas Huth, Bharata B Rao, qemu-devel; +Cc: qemu-ppc, nfont, imammedo, david

Quoting Thomas Huth (2016-05-27 10:48:45)
>  Hi Bharata,
> 
> On 15.03.2016 05:38, Bharata B Rao wrote:
> > This patchset adds memory hot removal support for PowerPC sPAPR.
> > This new version switches to using the proposed "count-indexed" type of
> > hotplug identifier which allows to hot remove a number of LMBs starting
> > with a given DRC index.
> > 
> > This count-indexed hotplug identifier isn't yet part of PAPR.
> 
> Just for clarification / my understanding: That means we also need a
> modified guest to support this new interface? If yes, did you post such
> patches somewhere else already, too?

No patches posted yet, but hopefully soon. These bits will likely be added
as part of an effort that moves all memory hotplug/unplug into guest
kernel instead of relying on drmgr. Most of the bits for in-kernel
memory hotplug are already upstream, but there's a number of other
requirements in the spec update (like a new hotplug interrupt/queue
instead of re-using EPOW) that need to be addressed as part of the
switchover.

> 
>  Thomas
> 

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2016-05-27 16:32 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-15  4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao
2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
2016-03-16  1:29   ` David Gibson
2016-03-17 16:03   ` Michael Roth
2016-03-15  4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao
2016-03-16  1:36   ` David Gibson
2016-03-16  4:41     ` Bharata B Rao
2016-03-16  5:11       ` David Gibson
2016-03-23  3:22       ` David Gibson
2016-03-24 14:15         ` Nathan Fontenot
2016-03-29  4:41           ` David Gibson
2016-04-25  9:20       ` Igor Mammedov
2016-04-26  5:09         ` Bharata B Rao
2016-04-26  7:52           ` Igor Mammedov
2016-04-26 21:03             ` Michael Roth
2016-04-27  6:54               ` Thomas Huth
2016-04-27 13:37               ` Igor Mammedov
2016-04-27 13:59                 ` Thomas Huth
2016-04-27 14:34                   ` Igor Mammedov
2016-04-27 19:07                     ` Michael Roth
2016-04-28  7:55                       ` Igor Mammedov
2016-04-27 14:24                 ` Bharata B Rao
2016-04-29  3:28                 ` David Gibson
2016-04-29  8:42                   ` Igor Mammedov
2016-04-29  3:24               ` David Gibson
2016-04-29  6:45                 ` Thomas Huth
2016-04-29  6:59                   ` Bharata B Rao
2016-04-29  8:22                     ` Thomas Huth
2016-04-29  8:30                       ` Igor Mammedov
2016-04-29 11:01                         ` Thomas Huth
2016-04-29 10:11                   ` David Gibson
2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth
2016-05-27 16:32   ` Michael Roth

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.