All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: "Cédric Le Goater" <clg@kaod.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	qemu-ppc@nongnu.org, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 05/13] spapr/xive: add migration support for KVM
Date: Thu, 7 Feb 2019 14:41:16 +1100	[thread overview]
Message-ID: <20190207034116.GB518@umbus.fritz.box> (raw)
In-Reply-To: <20190107183946.7230-6-clg@kaod.org>

[-- Attachment #1: Type: text/plain, Size: 14156 bytes --]

On Mon, Jan 07, 2019 at 07:39:38PM +0100, Cédric Le Goater wrote:
> When the VM is stopped, the VM state handler stabilizes the XIVE IC
> and marks the EQ pages dirty. These are then transferred to destination
> before the transfer of the device vmstates starts.
> 
> The sPAPRXive interrupt controller model captures the XIVE internal
> tables, EAT and ENDT and the XiveTCTX model does the same for the
> thread interrupt context registers.
> 
> At restart, the sPAPRXive 'post_load' method restores all the XIVE
> states. It is called by the sPAPR machine 'post_load' method, when all
> XIVE states have been transferred and loaded.
> 
> Finally, the source states are restored in the VM change state handler
> when the machine reaches the running state.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Looks find modulo possible changes in the KVM interface.

> ---
>  include/hw/ppc/spapr_xive.h |   5 +
>  include/hw/ppc/xive.h       |   1 +
>  hw/intc/spapr_xive.c        |  34 +++++++
>  hw/intc/spapr_xive_kvm.c    | 187 +++++++++++++++++++++++++++++++++++-
>  hw/intc/xive.c              |  17 ++++
>  hw/ppc/spapr_irq.c          |   2 +-
>  6 files changed, 244 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 8815ed5aa372..52804516e909 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -46,6 +46,7 @@ bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>  bool spapr_xive_priority_is_reserved(uint8_t priority);
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id);
>  
>  void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
>                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> @@ -53,6 +54,8 @@ void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
>                             uint8_t *out_end_blk, uint32_t *out_end_idx);
>  int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
>                               uint8_t *out_end_blk, uint32_t *out_end_idx);
> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio);
>  
>  typedef struct sPAPRMachineState sPAPRMachineState;
>  
> @@ -68,5 +71,7 @@ void spapr_xive_map_mmio(sPAPRXive *xive);
>   */
>  void kvmppc_xive_connect(sPAPRXive *xive, Error **errp);
>  void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
> +int kvmppc_xive_pre_save(sPAPRXive *xive);
> +int kvmppc_xive_post_load(sPAPRXive *xive, int version_id);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 2e48d75a22e0..8aa314f93ffd 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -443,5 +443,6 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>  void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
> +void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
>  
>  #endif /* PPC_XIVE_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 50dd66707968..21f3c1ef0901 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -85,6 +85,19 @@ static int spapr_xive_target_to_nvt(uint32_t target,
>   * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>   * priorities per CPU
>   */
> +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio)
> +{
> +    if (out_server) {
> +        *out_server = end_idx >> 3;
> +    }
> +
> +    if (out_prio) {
> +        *out_prio = end_idx & 0x7;
> +    }
> +    return 0;
> +}
> +
>  void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
>                             uint8_t *out_end_blk, uint32_t *out_end_idx)
>  {
> @@ -438,10 +451,31 @@ static const VMStateDescription vmstate_spapr_xive_eas = {
>      },
>  };
>  
> +static int vmstate_spapr_xive_pre_save(void *opaque)
> +{
> +    if (kvmppc_xive_enabled()) {
> +        return kvmppc_xive_pre_save(SPAPR_XIVE(opaque));
> +    }
> +
> +    return 0;
> +}
> +
> +/* Called by the sPAPR machine 'post_load' method */
> +int spapr_xive_post_load(sPAPRXive *xive, int version_id)
> +{
> +    if (kvmppc_xive_enabled()) {
> +        return kvmppc_xive_post_load(xive, version_id);
> +    }
> +
> +    return 0;
> +}
> +
>  static const VMStateDescription vmstate_spapr_xive = {
>      .name = TYPE_SPAPR_XIVE,
>      .version_id = 1,
>      .minimum_version_id = 1,
> +    .pre_save = vmstate_spapr_xive_pre_save,
> +    .post_load = NULL, /* handled at the machine level */
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
>          VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index c7639ffe7758..fe58a9ee32d3 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -60,7 +60,30 @@ static void kvm_cpu_enable(CPUState *cs)
>  /*
>   * XIVE Thread Interrupt Management context (KVM)
>   */
> -static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> +
> +static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[4];
> +    int ret;
> +
> +    /* word0 and word1 of the OS ring. */
> +    state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
> +
> +    /*
> +     * OS CAM line. Used by KVM to print out the VP identifier. This
> +     * is for debug only.
> +     */
> +    state[1] = *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]);
> +
> +    ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno,
> +                         "XIVE: could not restore KVM state of CPU %ld",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +    }
> +}
> +
> +void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>  {
>      uint64_t state[4] = { 0 };
>      int ret;
> @@ -228,6 +251,58 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  /*
>   * sPAPR XIVE interrupt controller (KVM)
>   */
> +static int kvmppc_xive_set_eq_state(sPAPRXive *xive, CPUState *cs, Error **errp)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> +        Error *local_err = NULL;
> +        XiveEND *end;
> +        uint8_t end_blk;
> +        uint32_t end_idx;
> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +        uint64_t kvm_eq_idx;
> +
> +        if (spapr_xive_priority_is_reserved(i)) {
> +            continue;
> +        }
> +
> +        spapr_xive_cpu_to_end(POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> +        assert(end_idx < xive->nr_ends);
> +        end = &xive->endt[end_idx];
> +
> +        if (!xive_end_is_valid(end)) {
> +            continue;
> +        }
> +
> +        /* Build the KVM state from the local END structure */
> +        kvm_eq.flags   = KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY;
> +        kvm_eq.qsize   = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
> +        kvm_eq.qpage   = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 |
> +            be32_to_cpu(end->w3);
> +        kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
> +        kvm_eq.qindex  = xive_get_field32(END_W1_PAGE_OFF, end->w1);
> +
> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> +            KVM_XIVE_EQ_SERVER_MASK;
> +
> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> +                                &kvm_eq, true, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int kvmppc_xive_get_eq_state(sPAPRXive *xive, CPUState *cs, Error **errp)
>  {
>      unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> @@ -298,6 +373,48 @@ static int kvmppc_xive_get_eq_state(sPAPRXive *xive, CPUState *cs, Error **errp)
>      return 0;
>  }
>  
> +static void kvmppc_xive_set_eas_state(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        XiveEAS *eas = &xive->eat[i];
> +        uint32_t end_idx;
> +        uint32_t end_blk;
> +        uint32_t eisn;
> +        uint8_t priority;
> +        uint32_t server;
> +        uint64_t kvm_eas;
> +        Error *local_err = NULL;
> +
> +        /* No need to set MASKED EAS, this is the default state after reset */
> +        if (!xive_eas_is_valid(eas) || xive_eas_is_masked(eas)) {
> +            continue;
> +        }
> +
> +        end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
> +        end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
> +        eisn = xive_get_field64(EAS_END_DATA, eas->w);
> +
> +        spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
> +
> +        kvm_eas = priority << KVM_XIVE_EAS_PRIORITY_SHIFT &
> +            KVM_XIVE_EAS_PRIORITY_MASK;
> +        kvm_eas |= server << KVM_XIVE_EAS_SERVER_SHIFT &
> +            KVM_XIVE_EAS_SERVER_MASK;
> +        kvm_eas |= ((uint64_t)eisn << KVM_XIVE_EAS_EISN_SHIFT) &
> +            KVM_XIVE_EAS_EISN_MASK;
> +
> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, true,
> +                          &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
>  static void kvmppc_xive_get_eas_state(sPAPRXive *xive, Error **errp)
>  {
>      XiveSource *xsrc = &xive->source;
> @@ -448,6 +565,74 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
>      }
>  }
>  
> +int kvmppc_xive_pre_save(sPAPRXive *xive)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs;
> +
> +    /* Grab the EAT */
> +    kvmppc_xive_get_eas_state(xive, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        return -1;
> +    }
> +
> +    /*
> +     * Grab the ENDT. The EQ index and the toggle bit are what we want
> +     * to capture.
> +     */
> +    CPU_FOREACH(cs) {
> +        kvmppc_xive_get_eq_state(xive, cs, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> + * The sPAPRXive 'post_load' method is called by the sPAPR machine
> + * 'post_load' method, when all XIVE states have been transferred and
> + * loaded.
> + */
> +int kvmppc_xive_post_load(sPAPRXive *xive, int version_id)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs;
> +
> +    /* Restore the ENDT first. The targetting depends on it. */
> +    CPU_FOREACH(cs) {
> +        kvmppc_xive_set_eq_state(xive, cs, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    /* Restore the EAT */
> +    kvmppc_xive_set_eas_state(xive, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        return -1;
> +    }
> +
> +    /* Restore the thread interrupt contexts */
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +        kvmppc_xive_cpu_set_state(cpu->tctx, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    /* The source states will be restored when the machine starts running */
> +    return 0;
> +}
> +
>  void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp)
>  {
>      XiveSource *xsrc = &xive->source;
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 596c29d8c826..c5c2fbc3f8bc 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -521,10 +521,27 @@ static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
>      qemu_unregister_reset(xive_tctx_reset, dev);
>  }
>  
> +static int vmstate_xive_tctx_pre_save(void *opaque)
> +{
> +    Error *local_err = NULL;
> +
> +    if (kvmppc_xive_enabled()) {
> +        kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static const VMStateDescription vmstate_xive_tctx = {
>      .name = TYPE_XIVE_TCTX,
>      .version_id = 1,
>      .minimum_version_id = 1,
> +    .pre_save = vmstate_xive_tctx_pre_save,
> +    .post_load = NULL, /* handled by the sPAPRxive model */
>      .fields = (VMStateField[]) {
>          VMSTATE_BUFFER(regs, XiveTCTX),
>          VMSTATE_END_OF_LIST()
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index afbdabfa6543..233c97c5ecd9 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -363,7 +363,7 @@ static void spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
>  
>  static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
>  {
> -    return 0;
> +    return spapr_xive_post_load(spapr->xive, version_id);
>  }
>  
>  static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2019-02-07  4:06 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-07 18:39 [Qemu-devel] [PATCH 00/13] spapr: add KVM support to the XIVE interrupt mode Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 01/13] linux-headers: update to 5.0 Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 02/13] spapr/xive: add KVM support Cédric Le Goater
2019-02-06  2:39   ` David Gibson
2019-01-07 18:39 ` [Qemu-devel] [PATCH 03/13] spapr/xive: add state synchronization with KVM Cédric Le Goater
2019-02-06  2:42   ` David Gibson
2019-01-07 18:39 ` [Qemu-devel] [PATCH 04/13] spapr/xive: introduce a VM state change handler Cédric Le Goater
2019-02-06  2:49   ` David Gibson
2019-01-07 18:39 ` [Qemu-devel] [PATCH 05/13] spapr/xive: add migration support for KVM Cédric Le Goater
2019-02-07  3:41   ` David Gibson [this message]
2019-01-07 18:39 ` [Qemu-devel] [PATCH 06/13] spapr/xive: fix migration of the XiveTCTX under TCG Cédric Le Goater
2019-02-08  5:36   ` David Gibson
2019-02-08  7:12     ` Cédric Le Goater
2019-02-12  0:22       ` David Gibson
2019-02-12  6:58         ` Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 07/13] ppc/xics: introduce a icp_kvm_connect() routine Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 08/13] spapr/rtas: modify spapr_rtas_register() to remove RTAS handlers Cédric Le Goater
2019-01-29  5:09   ` Alexey Kardashevskiy
2019-01-29  7:20     ` Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 09/13] sysbus: add a sysbus_mmio_unmap() helper Cédric Le Goater
2019-01-07 18:39 ` [Qemu-devel] [PATCH 10/13] spapr: introduce routines to delete the KVM IRQ device Cédric Le Goater
2019-02-12  0:58   ` David Gibson
2019-01-07 18:39 ` [Qemu-devel] [PATCH 11/13] spapr: check for the activation of " Cédric Le Goater
2019-02-12  1:01   ` David Gibson
2019-02-12  7:12     ` Cédric Le Goater
2019-02-13  0:17       ` David Gibson
2019-01-07 18:39 ` [Qemu-devel] [PATCH 12/13] spapr/xics: ignore the lower 4K in the IRQ number space Cédric Le Goater
2019-02-12  1:06   ` David Gibson
2019-02-12  7:05     ` Cédric Le Goater
2019-02-13  1:33       ` David Gibson
2019-02-13  8:03         ` Cédric Le Goater
2019-02-13 11:27           ` [Qemu-devel] [Qemu-ppc] " Greg Kurz
2019-02-13 12:11             ` Greg Kurz
2019-01-07 18:39 ` [Qemu-devel] [PATCH 13/13] spapr: add KVM support to the 'dual' machine Cédric Le Goater
2019-02-12  1:11   ` David Gibson
2019-02-12  7:18     ` Cédric Le Goater
2019-02-13  1:32       ` David Gibson
2019-02-13  8:22         ` Cédric Le Goater
2019-02-13 10:07           ` [Qemu-devel] [Qemu-ppc] " Greg Kurz
2019-02-14  3:35             ` David Gibson
2019-02-14  7:13               ` Cédric Le Goater
2019-02-14  3:29           ` [Qemu-devel] " David Gibson
2019-02-22 12:36         ` Cédric Le Goater

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190207034116.GB518@umbus.fritz.box \
    --to=david@gibson.dropbear.id.au \
    --cc=benh@kernel.crashing.org \
    --cc=clg@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.