All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/3] pseries: Allow KVM Book3S-HV on PPC970 CPUS
Date: Thu, 29 Sep 2011 15:25:37 +0200	[thread overview]
Message-ID: <E4FDF83B-E29E-4242-86FE-6FFF59A27689@suse.de> (raw)
In-Reply-To: <1317278706-16105-3-git-send-email-david@gibson.dropbear.id.au>


On 29.09.2011, at 08:45, David Gibson wrote:

> At present, using the hypervisor aware Book3S-HV KVM will only work
> with qemu on POWER7 CPUs.  PPC970 CPUs also have hypervisor
> capability, but they lack the VRMA feature which makes assigning guest
> memory easier.
> 
> In order to allow KVM Book3S-HV on PPC970, we need to specially
> allocate the first chunk of guest memory (the "Real Mode Area" or
> RMA), so that it is physically contiguous.
> 
> Sufficiently recent host kernels allow such contiguous RMAs to be
> allocated, with a kvm capability advertising whether the feature is
> available and/or necessary on this hardware.  This patch enables qemu
> to use this support, thus allowing kvm acceleration of pseries qemu
> machines on PPC970 hardware.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> hw/spapr.c           |   50 ++++++++++++++++++++++++++++++++++++++++--------
> target-ppc/kvm.c     |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++
> target-ppc/kvm_ppc.h |    6 +++++
> 3 files changed, 98 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/spapr.c b/hw/spapr.c
> index ba9ae1c..d51425a 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -89,6 +89,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num)
> }
> 
> static void *spapr_create_fdt_skel(const char *cpu_model,
> +                                   target_phys_addr_t rma_size,
>                                    target_phys_addr_t initrd_base,
>                                    target_phys_addr_t initrd_size,
>                                    const char *boot_device,
> @@ -97,7 +98,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> {
>     void *fdt;
>     CPUState *env;
> -    uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) };
> +    uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) };
> +    uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size),
> +                                           cpu_to_be64(ram_size - rma_size) };
>     uint32_t start_prop = cpu_to_be32(initrd_base);
>     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
>     uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
> @@ -143,15 +146,25 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> 
>     _FDT((fdt_end_node(fdt)));
> 
> -    /* memory node */
> +    /* memory node(s) */
>     _FDT((fdt_begin_node(fdt, "memory@0")));
> 
>     _FDT((fdt_property_string(fdt, "device_type", "memory")));
> -    _FDT((fdt_property(fdt, "reg",
> -                       mem_reg_property, sizeof(mem_reg_property))));
> -
> +    _FDT((fdt_property(fdt, "reg", mem_reg_property_rma,
> +                       sizeof(mem_reg_property_rma))));
>     _FDT((fdt_end_node(fdt)));
> 
> +    if (ram_size > rma_size) {
> +        char mem_name[32];
> +
> +	sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size);
> +	_FDT((fdt_begin_node(fdt, mem_name)));
> +	_FDT((fdt_property_string(fdt, "device_type", "memory")));
> +        _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma,
> +                           sizeof(mem_reg_property_nonrma))));
> +        _FDT((fdt_end_node(fdt)));
> +    }        
> +
>     /* cpus */
>     _FDT((fdt_begin_node(fdt, "cpus")));
> 
> @@ -341,6 +354,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> {
>     CPUState *env;
>     int i;
> +    target_phys_addr_t rma_alloc_size, rma_size;
>     ram_addr_t ram_offset;
>     uint32_t initrd_base;
>     long kernel_size, initrd_size, fw_size;
> @@ -350,10 +364,23 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>     spapr = g_malloc(sizeof(*spapr));
>     cpu_ppc_hypercall = emulate_spapr_hypercall;
> 
> +    /* Allocate RMA if necessary */
> +    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma");
> +
> +    if (rma_alloc_size == -1) {
> +        hw_error("qemu: Unable to create RMA\n");
> +        exit(1);
> +    }
> +    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
> +        rma_size = rma_alloc_size;
> +    } else {
> +        rma_size = ram_size;
> +    }
> +
>     /* We place the device tree just below either the top of RAM, or
>      * 2GB, so that it can be processed with 32-bit code if
>      * necessary */
> -    spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE;
> +    spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE;

The change looks sane, so I'd assume the description above is now wrong :)

>     spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE;
> 
>     /* init CPUs */
> @@ -378,8 +405,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> 
>     /* allocate RAM */
>     spapr->ram_limit = ram_size;
> -    ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", spapr->ram_limit);
> -    cpu_register_physical_memory(0, ram_size, ram_offset);
> +    if (spapr->ram_limit > rma_alloc_size) {
> +        ram_addr_t nonrma_base = rma_alloc_size;
> +        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
> +
> +        ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", nonrma_size);
> +        cpu_register_physical_memory(nonrma_base, nonrma_size, ram_offset);
> +    }
> 
>     /* allocate hash page table.  For now we always make this 16mb,
>      * later we should probably make it scale to the size of guest
> @@ -503,7 +535,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>     }
> 
>     /* Prepare the device tree */
> -    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
> +    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
>                                             initrd_base, initrd_size,
>                                             boot_device, kernel_cmdline,
>                                             pteg_shift + 7);
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 2c1bc7a..37ee902 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -55,6 +55,9 @@ static int cap_interrupt_level = false;
> static int cap_segstate;
> static int cap_booke_sregs;
> static int cap_ppc_smt = 0;
> +#ifdef KVM_CAP_PPC_RMA

No need for these ifdefs anymore thanks to qemu local kvm headers :)


Alex

  reply	other threads:[~2011-09-29 13:25 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-29  6:45 [Qemu-devel] [0/3] pseries: Support and improvements for KVM Book3S-HV support David Gibson
2011-09-29  6:45 ` [Qemu-devel] [PATCH 1/3] pseries: Support SMT systems for KVM Book3S-HV David Gibson
2011-09-29  7:27   ` Jan Kiszka
2011-09-29 13:17   ` Alexander Graf
2011-09-30  1:02     ` David Gibson
2011-09-29  6:45 ` [Qemu-devel] [PATCH 2/3] pseries: Allow KVM Book3S-HV on PPC970 CPUS David Gibson
2011-09-29 13:25   ` Alexander Graf [this message]
2011-09-29  6:45 ` [Qemu-devel] [PATCH 3/3] pseries: Use Book3S-HV TCE acceleration capabilities David Gibson
2011-09-29  7:27   ` Jan Kiszka
2011-09-29 13:29   ` Alexander Graf
2011-09-30  2:37     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E4FDF83B-E29E-4242-86FE-6FFF59A27689@suse.de \
    --to=agraf@suse.de \
    --cc=david@gibson.dropbear.id.au \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.