From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:38553) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YEdYb-00023n-F6 for qemu-devel@nongnu.org; Fri, 23 Jan 2015 07:41:51 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YEdYX-0001fL-B4 for qemu-devel@nongnu.org; Fri, 23 Jan 2015 07:41:49 -0500 Received: from mx1.redhat.com ([209.132.183.28]:48241) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YEdYX-0001fC-0z for qemu-devel@nongnu.org; Fri, 23 Jan 2015 07:41:45 -0500 Date: Fri, 23 Jan 2015 13:41:38 +0100 From: Igor Mammedov Message-ID: <20150123134138.34334599@nial.brq.redhat.com> In-Reply-To: <1420697420-16053-7-git-send-email-bharata@linux.vnet.ibm.com> References: <1420697420-16053-1-git-send-email-bharata@linux.vnet.ibm.com> <1420697420-16053-7-git-send-email-bharata@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [RFC PATCH v1 06/13] spapr: CPU hotplug support List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Bharata B Rao Cc: agraf@suse.de, qemu-devel@nongnu.org, mdroth@linux.vnet.ibm.com On Thu, 8 Jan 2015 11:40:13 +0530 Bharata B Rao wrote: > Support CPU hotplug via device-add command. Use the exising EPOW event > infrastructure to send CPU hotplug notification to the guest. > > Signed-off-by: Bharata B Rao > --- > hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_events.c | 8 +- > target-ppc/translate_init.c | 6 ++ > 3 files changed, 215 insertions(+), 4 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 515d770..a293a59 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) > g_string_append_len(s, s1, strlen(s1) + 1); > } > > +uint32_t cpus_per_socket; static ??? > + > static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; > QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); > unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; > - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > char *buf; > > + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > add_str(hypertas, "hcall-pft"); > add_str(hypertas, "hcall-term"); > add_str(hypertas, "hcall-dabr"); > @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) > } > } > > +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ > +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, > + int drc_index) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + int index = ppc_get_vcpu_dt_id(cpu); > + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > + 0xffffffff, 0xffffffff}; > + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; > + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; > + uint32_t page_sizes_prop[64]; > + size_t page_sizes_prop_size; > + int smpt = ppc_get_compat_smt_threads(cpu); > + uint32_t servers_prop[smpt]; > + uint32_t gservers_prop[smpt * 2]; > + int i; > + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > + uint32_t associativity[] = {cpu_to_be32(0x5), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(cs->numa_node), > + cpu_to_be32(index)}; > + > + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); > + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > + env->icache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > + env->icache_line_size))); > + > + if (pcc->l1_dcache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > + pcc->l1_dcache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); > + } > + if (pcc->l1_icache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > + pcc->l1_icache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > + > + if (env->spr_cb[SPR_PURR].oea_read) { > + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > + } > + > + if (env->mmu_model & POWERPC_MMU_1TSEG) { > + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > + segs, sizeof(segs)))); > + } > + > + /* Advertise VMX/VSX (vector extensions) if available > + * 0 / no property == no vector extensions > + * 1 == VMX / Altivec available > + * 2 == VSX available */ > + if (env->insns_flags & PPC_ALTIVEC) { > + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > + } > + > + /* Advertise DFP (Decimal Floating Point) if available > + * 0 / no property == no DFP > + * 1 == DFP available */ > + if (env->insns_flags2 & PPC2_DFP) { > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > + } > + > + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, > + sizeof(page_sizes_prop)); > + if (page_sizes_prop_size) { > + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > + page_sizes_prop, page_sizes_prop_size))); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", > + cs->cpu_index / cpus_per_socket))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); > + > + /* Build interrupt servers and gservers properties */ > + for (i = 0; i < smpt; i++) { > + servers_prop[i] = cpu_to_be32(index + i); > + /* Hack, direct the group queues back to cpu 0 */ > + gservers_prop[i*2] = cpu_to_be32(index + i); > + gservers_prop[i*2 + 1] = 0; > + } > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > + servers_prop, sizeof(servers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > + gservers_prop, sizeof(gservers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", > + pft_size_prop, sizeof(pft_size_prop))); > + > + if (nb_numa_nodes > 1) { > + _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity, > + sizeof(associativity))); > + } > +} > + > +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > + int id = ppc_get_vcpu_dt_id(cpu); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + int drc_index = drck->get_index(drc); > + void *fdt, *fdt_orig; > + int offset, i; > + char *nodename; > + > + /* add OF node for CPU and required OF DT properties */ > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > + > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > + > + offset = fdt_add_subnode(fdt, offset, nodename); > + > + /* Set NUMA node for the added CPU */ > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > + cs->numa_node = i; > + break; > + } > + } > + > + spapr_populate_cpu_dt(cs, fdt, offset, drc_index); > + g_free(fdt_orig); > + g_free(nodename); > + > + drck->attach(drc, dev, fdt, offset, false); > +} > + > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + Error *local_err = NULL; > + CPUState *cs = CPU(dev); > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); just rant: does this have any relation to hotplug_dev, the point here is to get these data from hotplug_dev object/some child of it rather then via direct adhoc call. > + > + /* TODO: Check if DR is enabled ? */ > + g_assert(drc); > + > + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); reset probably should be don at realize time, see x86_cpu_realizefn() for example. > + spapr_cpu_hotplug_add(dev, cs); > + spapr_hotplug_req_add_event(drc); > + error_propagate(errp, local_err); > + return; > +} > + > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + if (dev->hotplugged) { > + spapr_cpu_plug(hotplug_dev, dev, errp); Would be nicer if this could do cold-plugged CPUs wiring too. > + } > + } > +} > + > +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, > + DeviceState *dev) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + return HOTPLUG_HANDLER(machine); > + } > + return NULL; > +} > + > static void spapr_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); > FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); > NMIClass *nc = NMI_CLASS(oc); > + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); > > mc->init = ppc_spapr_init; > mc->reset = ppc_spapr_reset; > @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > mc->default_boot_order = NULL; > mc->kvm_type = spapr_kvm_type; > mc->has_dynamic_sysbus = true; > + mc->get_hotplug_handler = spapr_get_hotpug_handler; > + > + hc->plug = spapr_machine_device_plug; > smc->dr_phb_enabled = false; > smc->dr_cpu_enabled = false; > smc->dr_lmb_enabled = false; > @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = { > .interfaces = (InterfaceInfo[]) { > { TYPE_FW_PATH_PROVIDER }, > { TYPE_NMI }, > + { TYPE_HOTPLUG_HANDLER }, > { } > }, > }; > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 434a75d..035d8c9 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) > hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); > hp->hdr.section_version = 1; /* includes extended modifier */ > hp->hotplug_action = hp_action; > - > + hp->drc.index = cpu_to_be32(drck->get_index(drc)); > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > > switch (drc_type) { > case SPAPR_DR_CONNECTOR_TYPE_PCI: > - hp->drc.index = cpu_to_be32(drck->get_index(drc)); > - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; > break; > + case SPAPR_DR_CONNECTOR_TYPE_CPU: > + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; > + break; > default: > /* skip notification for unknown connector types */ > g_free(new_hp); > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > index 9c642a5..cf9d8d3 100644 > --- a/target-ppc/translate_init.c > +++ b/target-ppc/translate_init.c > @@ -32,6 +32,7 @@ > #include "hw/qdev-properties.h" > #include "hw/ppc/spapr.h" > #include "hw/ppc/ppc.h" > +#include "sysemu/sysemu.h" > > //#define PPC_DUMP_CPU > //#define PPC_DEBUG_SPR > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) > return; > } > > + if (cs->cpu_index >= max_cpus) { pls note that cpu_index is monotonically increases, so when you do unplug and then plug it will go above max_cpus or the same will happen if one device_add fails in the middle, the next CPU add will fail because of cs->cpu_index goes overboard. I'd suggest not to rely/use cpu_index for any purposes and use other means to identify where cpu is plugged in. On x68 we slowly getting rid of this dependency in favor of apic_id (topology information), eventually it could become: -device cpu_foo,socket=X,core=Y[,thread=Z][,node=N] you probably could do the same. It doesn't have to be in this series, just be aware of potential issues. > + error_setg(errp, "Can't have more CPUs, maxcpus limit reached"); > + return; > + } > + > cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt > + (cs->cpu_index % smp_threads); > #endif