linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM
@ 2015-02-03  5:44 David Gibson
  2015-02-04 15:24 ` Alexander Graf
  0 siblings, 1 reply; 3+ messages in thread
From: David Gibson @ 2015-02-03  5:44 UTC (permalink / raw)
  To: paulus, agraf; +Cc: aik, kvm-ppc, kvm, linux-kernel, David Gibson

On POWER, storage caching is usually configured via the MMU - attributes
such as cache-inhibited are stored in the TLB and the hashed page table.

This makes correctly performing cache inhibited IO accesses awkward when
the MMU is turned off (real mode).  Some CPU models provide special
registers to control the cache attributes of real mode load and stores but
this is not at all consistent.  This is a problem in particular for SLOF,
the firmware used on KVM guests, which runs entirely in real mode, but
which needs to do IO to load the kernel.

To simplify this qemu implements two special hypercalls, H_LOGICAL_CI_LOAD
and H_LOGICAL_CI_STORE which simulate a cache-inhibited load or store to
a logical address (aka guest physical address).  SLOF uses these for IO.

However, because these are implemented within qemu, not the host kernel,
these bypass any IO devices emulated within KVM itself.  The simplest way
to see this problem is to attempt to boot a KVM guest from a virtio-blk
device with iothread / dataplane enabled.  The iothread code relies on an
in kernel implementation of the virtio queue notification, which is not
triggered by the IO hcalls, and so the guest will stall in SLOF unable to
load the guest OS.

This patch addresses this by providing in-kernel implementations of the
2 hypercalls, which correctly scan the KVM IO bus.  Any access to an
address not handled by the KVM IO bus will cause a VM exit, hitting the
qemu implementation as before.

Note that a userspace change is also required, in order to enable these
new hcall implementations with KVM_CAP_PPC_ENABLE_HCALL.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/include/asm/kvm_book3s.h |  3 ++
 arch/powerpc/kvm/book3s.c             | 76 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c          | 12 ++++++
 arch/powerpc/kvm/book3s_pr_papr.c     | 28 +++++++++++++
 4 files changed, 119 insertions(+)

v2:
  - Removed some debugging printk()s that were accidentally left in
  - Fix endianness; like all PAPR hypercalls, these should always act
    big-endian, even if the guest is little-endian (in practice this
    makes no difference, since the only user is SLOF, which is always
    big-endian)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1..578e550 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -292,6 +292,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
 	return !is_kvmppc_hv_enabled(vcpu->kvm);
 }
 
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 888bf46..7b51492 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -820,6 +820,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 #endif
 }
 
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	u64 buf;
+	int ret;
+
+	if (!is_power_of_2(size) || (size > sizeof(buf)))
+		return H_TOO_HARD;
+
+	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	switch (size) {
+	case 1:
+		kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+		break;
+
+	case 2:
+		kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(u16 *)&buf));
+		break;
+
+	case 4:
+		kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(u32 *)&buf));
+		break;
+
+	case 8:
+		kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(u64 *)&buf));
+		break;
+
+	default:
+		BUG();
+	}
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load); /* For use by the kvm-pr module */
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	unsigned long val = kvmppc_get_gpr(vcpu, 6);
+	u64 buf;
+	int ret;
+
+	switch (size) {
+	case 1:
+		*(u8 *)&buf = val;
+		break;
+
+	case 2:
+		*(u16 *)&buf = cpu_to_be16(val);
+		break;
+
+	case 4:
+		*(u32 *)&buf = cpu_to_be32(val);
+		break;
+
+	case 8:
+		*(u64 *)&buf = cpu_to_be64(val);
+		break;
+
+	default:
+		return H_TOO_HARD;
+	}
+
+	ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store); /* For use by the kvm-pr module */
+
 int kvmppc_core_check_processor_compat(void)
 {
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de4018a..f631b76 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -706,6 +706,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
+	case H_LOGICAL_CI_LOAD:
+		ret = kvmppc_h_logical_ci_load(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_LOGICAL_CI_STORE:
+		ret = kvmppc_h_logical_ci_store(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
 	case H_SET_MODE:
 		ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
 					kvmppc_get_gpr(vcpu, 5),
@@ -740,6 +750,8 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_CONFER:
 	case H_REGISTER_VPA:
 	case H_SET_MODE:
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
 	case H_XIRR:
 	case H_CPPR:
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ce3c893..f2c75a1 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -258,6 +258,28 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_load(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_store(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 {
 	long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -290,6 +312,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_LOGICAL_CI_LOAD:
+		return kvmppc_h_pr_logical_ci_load(vcpu);
+	case H_LOGICAL_CI_STORE:
+		return kvmppc_h_pr_logical_ci_store(vcpu);
 	case H_XIRR:
 	case H_CPPR:
 	case H_EOI:
@@ -323,6 +349,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
 	case H_BULK_REMOVE:
 	case H_PUT_TCE:
 	case H_CEDE:
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
 	case H_XIRR:
 	case H_CPPR:
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM
  2015-02-03  5:44 [PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM David Gibson
@ 2015-02-04 15:24 ` Alexander Graf
  2015-02-05  0:48   ` David Gibson
  0 siblings, 1 reply; 3+ messages in thread
From: Alexander Graf @ 2015-02-04 15:24 UTC (permalink / raw)
  To: David Gibson, paulus; +Cc: aik, kvm-ppc, kvm, linux-kernel



On 03.02.15 06:44, David Gibson wrote:
> On POWER, storage caching is usually configured via the MMU - attributes
> such as cache-inhibited are stored in the TLB and the hashed page table.
> 
> This makes correctly performing cache inhibited IO accesses awkward when
> the MMU is turned off (real mode).  Some CPU models provide special
> registers to control the cache attributes of real mode load and stores but
> this is not at all consistent.  This is a problem in particular for SLOF,
> the firmware used on KVM guests, which runs entirely in real mode, but
> which needs to do IO to load the kernel.
> 
> To simplify this qemu implements two special hypercalls, H_LOGICAL_CI_LOAD
> and H_LOGICAL_CI_STORE which simulate a cache-inhibited load or store to
> a logical address (aka guest physical address).  SLOF uses these for IO.
> 
> However, because these are implemented within qemu, not the host kernel,
> these bypass any IO devices emulated within KVM itself.  The simplest way
> to see this problem is to attempt to boot a KVM guest from a virtio-blk
> device with iothread / dataplane enabled.  The iothread code relies on an
> in kernel implementation of the virtio queue notification, which is not
> triggered by the IO hcalls, and so the guest will stall in SLOF unable to
> load the guest OS.
> 
> This patch addresses this by providing in-kernel implementations of the
> 2 hypercalls, which correctly scan the KVM IO bus.  Any access to an
> address not handled by the KVM IO bus will cause a VM exit, hitting the
> qemu implementation as before.
> 
> Note that a userspace change is also required, in order to enable these
> new hcall implementations with KVM_CAP_PPC_ENABLE_HCALL.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  arch/powerpc/include/asm/kvm_book3s.h |  3 ++
>  arch/powerpc/kvm/book3s.c             | 76 +++++++++++++++++++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv.c          | 12 ++++++
>  arch/powerpc/kvm/book3s_pr_papr.c     | 28 +++++++++++++
>  4 files changed, 119 insertions(+)
> 
> v2:
>   - Removed some debugging printk()s that were accidentally left in
>   - Fix endianness; like all PAPR hypercalls, these should always act
>     big-endian, even if the guest is little-endian (in practice this
>     makes no difference, since the only user is SLOF, which is always
>     big-endian)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 942c7b1..578e550 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -292,6 +292,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
>  	return !is_kvmppc_hv_enabled(vcpu->kvm);
>  }
>  
> +extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
> +extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
> +
>  /* Magic register values loaded into r3 and r4 before the 'sc' assembly
>   * instruction for the OSI hypercalls */
>  #define OSI_SC_MAGIC_R3			0x113724FA
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 888bf46..7b51492 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -820,6 +820,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
>  #endif
>  }
>  
> +int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long size = kvmppc_get_gpr(vcpu, 4);
> +	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
> +	u64 buf;
> +	int ret;
> +
> +	if (!is_power_of_2(size) || (size > sizeof(buf)))
> +		return H_TOO_HARD;
> +
> +	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
> +	if (ret != 0)
> +		return H_TOO_HARD;
> +
> +	switch (size) {
> +	case 1:
> +		kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
> +		break;
> +
> +	case 2:
> +		kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(u16 *)&buf));
> +		break;
> +
> +	case 4:
> +		kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(u32 *)&buf));
> +		break;
> +
> +	case 8:
> +		kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(u64 *)&buf));

Shouldn't these casts be __be types?

> +		break;
> +
> +	default:
> +		BUG();
> +	}
> +
> +	return H_SUCCESS;
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load); /* For use by the kvm-pr module */

No need for the comment.

> +
> +int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long size = kvmppc_get_gpr(vcpu, 4);
> +	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
> +	unsigned long val = kvmppc_get_gpr(vcpu, 6);
> +	u64 buf;
> +	int ret;
> +
> +	switch (size) {
> +	case 1:
> +		*(u8 *)&buf = val;
> +		break;
> +
> +	case 2:
> +		*(u16 *)&buf = cpu_to_be16(val);
> +		break;
> +
> +	case 4:
> +		*(u32 *)&buf = cpu_to_be32(val);
> +		break;
> +
> +	case 8:
> +		*(u64 *)&buf = cpu_to_be64(val);

Same thing about casts here.


Alex

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM
  2015-02-04 15:24 ` Alexander Graf
@ 2015-02-05  0:48   ` David Gibson
  0 siblings, 0 replies; 3+ messages in thread
From: David Gibson @ 2015-02-05  0:48 UTC (permalink / raw)
  To: Alexander Graf; +Cc: paulus, aik, kvm-ppc, kvm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 4851 bytes --]

On Wed, Feb 04, 2015 at 04:24:46PM +0100, Alexander Graf wrote:
> 
> 
> On 03.02.15 06:44, David Gibson wrote:
> > On POWER, storage caching is usually configured via the MMU - attributes
> > such as cache-inhibited are stored in the TLB and the hashed page table.
> > 
> > This makes correctly performing cache inhibited IO accesses awkward when
> > the MMU is turned off (real mode).  Some CPU models provide special
> > registers to control the cache attributes of real mode load and stores but
> > this is not at all consistent.  This is a problem in particular for SLOF,
> > the firmware used on KVM guests, which runs entirely in real mode, but
> > which needs to do IO to load the kernel.
> > 
> > To simplify this qemu implements two special hypercalls, H_LOGICAL_CI_LOAD
> > and H_LOGICAL_CI_STORE which simulate a cache-inhibited load or store to
> > a logical address (aka guest physical address).  SLOF uses these for IO.
> > 
> > However, because these are implemented within qemu, not the host kernel,
> > these bypass any IO devices emulated within KVM itself.  The simplest way
> > to see this problem is to attempt to boot a KVM guest from a virtio-blk
> > device with iothread / dataplane enabled.  The iothread code relies on an
> > in kernel implementation of the virtio queue notification, which is not
> > triggered by the IO hcalls, and so the guest will stall in SLOF unable to
> > load the guest OS.
> > 
> > This patch addresses this by providing in-kernel implementations of the
> > 2 hypercalls, which correctly scan the KVM IO bus.  Any access to an
> > address not handled by the KVM IO bus will cause a VM exit, hitting the
> > qemu implementation as before.
> > 
> > Note that a userspace change is also required, in order to enable these
> > new hcall implementations with KVM_CAP_PPC_ENABLE_HCALL.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  arch/powerpc/include/asm/kvm_book3s.h |  3 ++
> >  arch/powerpc/kvm/book3s.c             | 76 +++++++++++++++++++++++++++++++++++
> >  arch/powerpc/kvm/book3s_hv.c          | 12 ++++++
> >  arch/powerpc/kvm/book3s_pr_papr.c     | 28 +++++++++++++
> >  4 files changed, 119 insertions(+)
> > 
> > v2:
> >   - Removed some debugging printk()s that were accidentally left in
> >   - Fix endianness; like all PAPR hypercalls, these should always act
> >     big-endian, even if the guest is little-endian (in practice this
> >     makes no difference, since the only user is SLOF, which is always
> >     big-endian)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> > index 942c7b1..578e550 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s.h
> > @@ -292,6 +292,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
> >  	return !is_kvmppc_hv_enabled(vcpu->kvm);
> >  }
> >  
> > +extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
> > +extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
> > +
> >  /* Magic register values loaded into r3 and r4 before the 'sc' assembly
> >   * instruction for the OSI hypercalls */
> >  #define OSI_SC_MAGIC_R3			0x113724FA
> > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> > index 888bf46..7b51492 100644
> > --- a/arch/powerpc/kvm/book3s.c
> > +++ b/arch/powerpc/kvm/book3s.c
> > @@ -820,6 +820,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
> >  #endif
> >  }
> >  
> > +int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
> > +{
> > +	unsigned long size = kvmppc_get_gpr(vcpu, 4);
> > +	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
> > +	u64 buf;
> > +	int ret;
> > +
> > +	if (!is_power_of_2(size) || (size > sizeof(buf)))
> > +		return H_TOO_HARD;
> > +
> > +	ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
> > +	if (ret != 0)
> > +		return H_TOO_HARD;
> > +
> > +	switch (size) {
> > +	case 1:
> > +		kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
> > +		break;
> > +
> > +	case 2:
> > +		kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(u16 *)&buf));
> > +		break;
> > +
> > +	case 4:
> > +		kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(u32 *)&buf));
> > +		break;
> > +
> > +	case 8:
> > +		kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(u64 *)&buf));
> 
> Shouldn't these casts be __be types?

Ah, yes they should.

> > +		break;
> > +
> > +	default:
> > +		BUG();
> > +	}
> > +
> > +	return H_SUCCESS;
> > +}
> > +EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load); /* For use by the kvm-pr module */
> 
> No need for the comment.

Ok.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-02-05  0:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-03  5:44 [PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM David Gibson
2015-02-04 15:24 ` Alexander Graf
2015-02-05  0:48   ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).