All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] kvm: powerpc: Do not ignore "E" attribute in mas2
@ 2013-07-18  6:16 ` Bharat Bhushan
  0 siblings, 0 replies; 82+ messages in thread
From: Bharat Bhushan @ 2013-07-18  6:04 UTC (permalink / raw)
  To: kvm-ppc, kvm, agraf, scottwood; +Cc: Bharat Bhushan, Bharat Bhushan

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c2e5e98..277cb18 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
 #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
 #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
 #define MAS2_ATTRIB_MASK \
-	  (MAS2_X0 | MAS2_X1)
+	  (MAS2_X0 | MAS2_X1 | MAS2_E)
 #define MAS3_ATTRIB_MASK \
 	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
 	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-- 
1.7.0.4



^ permalink raw reply related	[flat|nested] 82+ messages in thread

* [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  6:16 ` Bharat Bhushan
@ 2013-07-18  6:16   ` Bharat Bhushan
  -1 siblings, 0 replies; 82+ messages in thread
From: Bharat Bhushan @ 2013-07-18  6:04 UTC (permalink / raw)
  To: kvm-ppc, kvm, agraf, scottwood; +Cc: Bharat Bhushan, Bharat Bhushan

If there is a struct page for the requested mapping then it's
normal DDR and the mapping sets "M" bit (coherent, cacheable)
else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)

This helps setting proper TLB mapping for direct assigned device

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d7..089c227 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 	return mas3;
 }
 
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
 {
+	u32 mas2_attr;
+
+	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
+
+	if (!pfn_valid(pfn)) {
+		mas2_attr |= MAS2_I | MAS2_G;
+	} else {
 #ifdef CONFIG_SMP
-	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-	return mas2 & MAS2_ATTRIB_MASK;
+		mas2_attr |= MAS2_M;
 #endif
+	}
+	return mas2_attr;
 }
 
 /*
@@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
 	/* Force IPROT=0 for all guest mappings. */
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN) |
-		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
 
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 82+ messages in thread

* [PATCH 1/2] kvm: powerpc: Do not ignore "E" attribute in mas2
@ 2013-07-18  6:16 ` Bharat Bhushan
  0 siblings, 0 replies; 82+ messages in thread
From: Bharat Bhushan @ 2013-07-18  6:16 UTC (permalink / raw)
  To: kvm-ppc, kvm, agraf, scottwood; +Cc: Bharat Bhushan, Bharat Bhushan

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c2e5e98..277cb18 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
 #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
 #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
 #define MAS2_ATTRIB_MASK \
-	  (MAS2_X0 | MAS2_X1)
+	  (MAS2_X0 | MAS2_X1 | MAS2_E)
 #define MAS3_ATTRIB_MASK \
 	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
 	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-- 
1.7.0.4



^ permalink raw reply related	[flat|nested] 82+ messages in thread

* [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  6:16   ` Bharat Bhushan
  0 siblings, 0 replies; 82+ messages in thread
From: Bharat Bhushan @ 2013-07-18  6:16 UTC (permalink / raw)
  To: kvm-ppc, kvm, agraf, scottwood; +Cc: Bharat Bhushan, Bharat Bhushan

If there is a struct page for the requested mapping then it's
normal DDR and the mapping sets "M" bit (coherent, cacheable)
else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)

This helps setting proper TLB mapping for direct assigned device

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d7..089c227 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 	return mas3;
 }
 
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
 {
+	u32 mas2_attr;
+
+	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
+
+	if (!pfn_valid(pfn)) {
+		mas2_attr |= MAS2_I | MAS2_G;
+	} else {
 #ifdef CONFIG_SMP
-	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-	return mas2 & MAS2_ATTRIB_MASK;
+		mas2_attr |= MAS2_M;
 #endif
+	}
+	return mas2_attr;
 }
 
 /*
@@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
 	/* Force IPROT=0 for all guest mappings. */
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN) |
-		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
 
-- 
1.7.0.4



^ permalink raw reply related	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  6:16   ` Bharat Bhushan
@ 2013-07-18  6:26     ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  6:26 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: kvm-ppc, kvm, agraf, scottwood, Bharat Bhushan

On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {

Why not directly use kvm_is_mmio_pfn()?

Tiejun

> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}
> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  6:26     ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  6:26 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: kvm-ppc, kvm, agraf, scottwood, Bharat Bhushan

On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {

Why not directly use kvm_is_mmio_pfn()?

Tiejun

> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}
> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  6:26     ` "“tiejun.chen”"
@ 2013-07-18  7:12       ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  7:12 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421



> -----Original Message-----
> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> Sent: Thursday, July 18, 2013 11:56 AM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
> B07421; Bhushan Bharat-R65777
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> > If there is a struct page for the requested mapping then it's normal
> > DDR and the mapping sets "M" bit (coherent, cacheable) else this is
> > treated as I/O and we set  "I + G"  (cache inhibited, guarded)
> >
> > This helps setting proper TLB mapping for direct assigned device
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> >   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >   1 files changed, 12 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> > b/arch/powerpc/kvm/e500_mmu_host.c
> > index 1c6a9d7..089c227 100644
> > --- a/arch/powerpc/kvm/e500_mmu_host.c
> > +++ b/arch/powerpc/kvm/e500_mmu_host.c
> > @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int
> usermode)
> >   	return mas3;
> >   }
> >
> > -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> > +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >   {
> > +	u32 mas2_attr;
> > +
> > +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> > +
> > +	if (!pfn_valid(pfn)) {
> 
> Why not directly use kvm_is_mmio_pfn()?

What I understand from this function (someone can correct me) is that it returns "false" when the page is managed by kernel and is not marked as RESERVED (for some reason). For us it does not matter whether the page is reserved or not, if it is kernel visible page then it is DDR.

-Bharat

> 
> Tiejun
> 
> > +		mas2_attr |= MAS2_I | MAS2_G;
> > +	} else {
> >   #ifdef CONFIG_SMP
> > -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> > -#else
> > -	return mas2 & MAS2_ATTRIB_MASK;
> > +		mas2_attr |= MAS2_M;
> >   #endif
> > +	}
> > +	return mas2_attr;
> >   }
> >
> >   /*
> > @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
> >   	/* Force IPROT=0 for all guest mappings. */
> >   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
> >   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> > -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> > +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
> >   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
> >   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
> >
> >
> 


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  7:12       ` Bhushan Bharat-R65777
  0 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  7:12 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogIuKAnHRpZWp1bi5jaGVu
4oCdIiBbbWFpbHRvOnRpZWp1bi5jaGVuQHdpbmRyaXZlci5jb21dDQo+IFNlbnQ6IFRodXJzZGF5
LCBKdWx5IDE4LCAyMDEzIDExOjU2IEFNDQo+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4g
Q2M6IGt2bS1wcGNAdmdlci5rZXJuZWwub3JnOyBrdm1Admdlci5rZXJuZWwub3JnOyBhZ3JhZkBz
dXNlLmRlOyBXb29kIFNjb3R0LQ0KPiBCMDc0MjE7IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiBT
dWJqZWN0OiBSZTogW1BBVENIIDIvMl0ga3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5
IG9ubHkgZm9yIGtlcm5lbA0KPiBtYW5hZ2VkIHBhZ2VzDQo+IA0KPiBPbiAwNy8xOC8yMDEzIDAy
OjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3cm90ZToNCj4gPiBJZiB0aGVyZSBpcyBhIHN0cnVjdCBw
YWdlIGZvciB0aGUgcmVxdWVzdGVkIG1hcHBpbmcgdGhlbiBpdCdzIG5vcm1hbA0KPiA+IEREUiBh
bmQgdGhlIG1hcHBpbmcgc2V0cyAiTSIgYml0IChjb2hlcmVudCwgY2FjaGVhYmxlKSBlbHNlIHRo
aXMgaXMNCj4gPiB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUgaW5o
aWJpdGVkLCBndWFyZGVkKQ0KPiA+DQo+ID4gVGhpcyBoZWxwcyBzZXR0aW5nIHByb3BlciBUTEIg
bWFwcGluZyBmb3IgZGlyZWN0IGFzc2lnbmVkIGRldmljZQ0KPiA+DQo+ID4gU2lnbmVkLW9mZi1i
eTogQmhhcmF0IEJodXNoYW4gPGJoYXJhdC5iaHVzaGFuQGZyZWVzY2FsZS5jb20+DQo+ID4gLS0t
DQo+ID4gICBhcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYyB8ICAgMTcgKysrKysrKysr
KysrLS0tLS0NCj4gPiAgIDEgZmlsZXMgY2hhbmdlZCwgMTIgaW5zZXJ0aW9ucygrKSwgNSBkZWxl
dGlvbnMoLSkNCj4gPg0KPiA+IGRpZmYgLS1naXQgYS9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11
X2hvc3QuYw0KPiA+IGIvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPiBpbmRl
eCAxYzZhOWQ3Li4wODljMjI3IDEwMDY0NA0KPiA+IC0tLSBhL2FyY2gvcG93ZXJwYy9rdm0vZTUw
MF9tbXVfaG9zdC5jDQo+ID4gKysrIGIvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMN
Cj4gPiBAQCAtNjQsMTMgKzY0LDIwIEBAIHN0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21h
czNfYXR0cmliKHUzMiBtYXMzLCBpbnQNCj4gdXNlcm1vZGUpDQo+ID4gICAJcmV0dXJuIG1hczM7
DQo+ID4gICB9DQo+ID4NCj4gPiAtc3RhdGljIGlubGluZSB1MzIgZTUwMF9zaGFkb3dfbWFzMl9h
dHRyaWIodTMyIG1hczIsIGludCB1c2VybW9kZSkNCj4gPiArc3RhdGljIGlubGluZSB1MzIgZTUw
MF9zaGFkb3dfbWFzMl9hdHRyaWIodTMyIG1hczIsIHBmbl90IHBmbikNCj4gPiAgIHsNCj4gPiAr
CXUzMiBtYXMyX2F0dHI7DQo+ID4gKw0KPiA+ICsJbWFzMl9hdHRyID0gbWFzMiAmIE1BUzJfQVRU
UklCX01BU0s7DQo+ID4gKw0KPiA+ICsJaWYgKCFwZm5fdmFsaWQocGZuKSkgew0KPiANCj4gV2h5
IG5vdCBkaXJlY3RseSB1c2Uga3ZtX2lzX21taW9fcGZuKCk/DQoNCldoYXQgSSB1bmRlcnN0YW5k
IGZyb20gdGhpcyBmdW5jdGlvbiAoc29tZW9uZSBjYW4gY29ycmVjdCBtZSkgaXMgdGhhdCBpdCBy
ZXR1cm5zICJmYWxzZSIgd2hlbiB0aGUgcGFnZSBpcyBtYW5hZ2VkIGJ5IGtlcm5lbCBhbmQgaXMg
bm90IG1hcmtlZCBhcyBSRVNFUlZFRCAoZm9yIHNvbWUgcmVhc29uKS4gRm9yIHVzIGl0IGRvZXMg
bm90IG1hdHRlciB3aGV0aGVyIHRoZSBwYWdlIGlzIHJlc2VydmVkIG9yIG5vdCwgaWYgaXQgaXMg
a2VybmVsIHZpc2libGUgcGFnZSB0aGVuIGl0IGlzIEREUi4NCg0KLUJoYXJhdA0KDQo+IA0KPiBU
aWVqdW4NCj4gDQo+ID4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfSSB8IE1BUzJfRzsNCj4gPiArCX0g
ZWxzZSB7DQo+ID4gICAjaWZkZWYgQ09ORklHX1NNUA0KPiA+IC0JcmV0dXJuIChtYXMyICYgTUFT
Ml9BVFRSSUJfTUFTSykgfCBNQVMyX007DQo+ID4gLSNlbHNlDQo+ID4gLQlyZXR1cm4gbWFzMiAm
IE1BUzJfQVRUUklCX01BU0s7DQo+ID4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfTTsNCj4gPiAgICNl
bmRpZg0KPiA+ICsJfQ0KPiA+ICsJcmV0dXJuIG1hczJfYXR0cjsNCj4gPiAgIH0NCj4gPg0KPiA+
ICAgLyoNCj4gPiBAQCAtMzEzLDcgKzMyMCw3IEBAIHN0YXRpYyB2b2lkIGt2bXBwY19lNTAwX3Nl
dHVwX3N0bGJlKA0KPiA+ICAgCS8qIEZvcmNlIElQUk9UPTAgZm9yIGFsbCBndWVzdCBtYXBwaW5n
cy4gKi8NCj4gPiAgIAlzdGxiZS0+bWFzMSA9IE1BUzFfVFNJWkUodHNpemUpIHwgZ2V0X3RsYl9z
dHMoZ3RsYmUpIHwgTUFTMV9WQUxJRDsNCj4gPiAgIAlzdGxiZS0+bWFzMiA9IChndmFkZHIgJiBN
QVMyX0VQTikgfA0KPiA+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0bGJlLT5t
YXMyLCBwcik7DQo+ID4gKwkJICAgICAgZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIoZ3RsYmUtPm1h
czIsIHBmbik7DQo+ID4gICAJc3RsYmUtPm1hczdfMyA9ICgodTY0KXBmbiA8PCBQQUdFX1NISUZU
KSB8DQo+ID4gICAJCQllNTAwX3NoYWRvd19tYXMzX2F0dHJpYihndGxiZS0+bWFzN18zLCBwcik7
DQo+ID4NCj4gPg0KPiANCg0K


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  7:12       ` Bhushan Bharat-R65777
@ 2013-07-18  7:31         ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  7:31 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>> Sent: Thursday, July 18, 2013 11:56 AM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>> If there is a struct page for the requested mapping then it's normal
>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>
>>> This helps setting proper TLB mapping for direct assigned device
>>>
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 1c6a9d7..089c227 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int
>> usermode)
>>>    	return mas3;
>>>    }
>>>
>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>    {
>>> +	u32 mas2_attr;
>>> +
>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>> +
>>> +	if (!pfn_valid(pfn)) {
>>
>> Why not directly use kvm_is_mmio_pfn()?
>
> What I understand from this function (someone can correct me) is that it returns "false" when the page is managed by kernel and is not marked as RESERVED (for some reason). For us it does not matter whether the page is reserved or not, if it is kernel visible page then it is DDR.
>

I think you are setting I|G by addressing all mmio pages, right? If so,

     KVM: direct mmio pfn check

     Userspace may specify memory slots that are backed by mmio pages rather than
     normal RAM.  In some cases it is not enough to identify these mmio pages
     by pfn_valid().  This patch adds checking the PageReserved as well.

Tiejun

> -Bharat
>
>>
>> Tiejun
>>
>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>> +	} else {
>>>    #ifdef CONFIG_SMP
>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>> -#else
>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>> +		mas2_attr |= MAS2_M;
>>>    #endif
>>> +	}
>>> +	return mas2_attr;
>>>    }
>>>
>>>    /*
>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>    	/* Force IPROT=0 for all guest mappings. */
>>>    	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>    	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>    	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>    			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>
>>>
>>
>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  7:31         ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  7:31 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>> Sent: Thursday, July 18, 2013 11:56 AM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>> If there is a struct page for the requested mapping then it's normal
>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>
>>> This helps setting proper TLB mapping for direct assigned device
>>>
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 1c6a9d7..089c227 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int
>> usermode)
>>>    	return mas3;
>>>    }
>>>
>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>    {
>>> +	u32 mas2_attr;
>>> +
>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>> +
>>> +	if (!pfn_valid(pfn)) {
>>
>> Why not directly use kvm_is_mmio_pfn()?
>
> What I understand from this function (someone can correct me) is that it returns "false" when the page is managed by kernel and is not marked as RESERVED (for some reason). For us it does not matter whether the page is reserved or not, if it is kernel visible page then it is DDR.
>

I think you are setting I|G by addressing all mmio pages, right? If so,

     KVM: direct mmio pfn check

     Userspace may specify memory slots that are backed by mmio pages rather than
     normal RAM.  In some cases it is not enough to identify these mmio pages
     by pfn_valid().  This patch adds checking the PageReserved as well.

Tiejun

> -Bharat
>
>>
>> Tiejun
>>
>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>> +	} else {
>>>    #ifdef CONFIG_SMP
>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>> -#else
>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>> +		mas2_attr |= MAS2_M;
>>>    #endif
>>> +	}
>>> +	return mas2_attr;
>>>    }
>>>
>>>    /*
>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>    	/* Force IPROT=0 for all guest mappings. */
>>>    	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>    	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>    	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>    			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>
>>>
>>
>


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  7:31         ` "“tiejun.chen”"
@ 2013-07-18  8:08           ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:08 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421



> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of "“tiejun.chen”"
> Sent: Thursday, July 18, 2013 1:01 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
> B07421
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >
> >
> >> -----Original Message-----
> >> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >> Sent: Thursday, July 18, 2013 11:56 AM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> >> Scott- B07421; Bhushan Bharat-R65777
> >> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >> kernel managed pages
> >>
> >> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>> If there is a struct page for the requested mapping then it's normal
> >>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
> >>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
> >>>
> >>> This helps setting proper TLB mapping for direct assigned device
> >>>
> >>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>> ---
> >>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>    1 files changed, 12 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>> index 1c6a9d7..089c227 100644
> >>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
> >>> mas3, int
> >> usermode)
> >>>    	return mas3;
> >>>    }
> >>>
> >>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> >>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >>>    {
> >>> +	u32 mas2_attr;
> >>> +
> >>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>> +
> >>> +	if (!pfn_valid(pfn)) {
> >>
> >> Why not directly use kvm_is_mmio_pfn()?
> >
> > What I understand from this function (someone can correct me) is that it
> returns "false" when the page is managed by kernel and is not marked as RESERVED
> (for some reason). For us it does not matter whether the page is reserved or
> not, if it is kernel visible page then it is DDR.
> >
> 
> I think you are setting I|G by addressing all mmio pages, right? If so,
> 
>      KVM: direct mmio pfn check
> 
>      Userspace may specify memory slots that are backed by mmio pages rather
> than
>      normal RAM.  In some cases it is not enough to identify these mmio pages
>      by pfn_valid().  This patch adds checking the PageReserved as well.

Do you know what are those "some cases" and how checking PageReserved helps in those cases?

-Bharat

> 
> Tiejun
> 
> > -Bharat
> >
> >>
> >> Tiejun
> >>
> >>> +		mas2_attr |= MAS2_I | MAS2_G;
> >>> +	} else {
> >>>    #ifdef CONFIG_SMP
> >>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> >>> -#else
> >>> -	return mas2 & MAS2_ATTRIB_MASK;
> >>> +		mas2_attr |= MAS2_M;
> >>>    #endif
> >>> +	}
> >>> +	return mas2_attr;
> >>>    }
> >>>
> >>>    /*
> >>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
> >>>    	/* Force IPROT=0 for all guest mappings. */
> >>>    	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
> >>>    	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> >>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> >>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
> >>>    	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
> >>>    			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
> >>>
> >>>
> >>
> >
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:08           ` Bhushan Bharat-R65777
  0 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:08 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbToga3ZtLXBwYy1vd25lckB2
Z2VyLmtlcm5lbC5vcmcgW21haWx0bzprdm0tcHBjLW93bmVyQHZnZXIua2VybmVsLm9yZ10gT24N
Cj4gQmVoYWxmIE9mICLigJx0aWVqdW4uY2hlbuKAnSINCj4gU2VudDogVGh1cnNkYXksIEp1bHkg
MTgsIDIwMTMgMTowMSBQTQ0KPiBUbzogQmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+IENjOiBrdm0t
cHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsg
V29vZCBTY290dC0NCj4gQjA3NDIxDQo+IFN1YmplY3Q6IFJlOiBbUEFUQ0ggMi8yXSBrdm06IHBv
d2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kgb25seSBmb3Iga2VybmVsDQo+IG1hbmFnZWQgcGFn
ZXMNCj4gDQo+IE9uIDA3LzE4LzIwMTMgMDM6MTIgUE0sIEJodXNoYW4gQmhhcmF0LVI2NTc3NyB3
cm90ZToNCj4gPg0KPiA+DQo+ID4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+ID4+IEZy
b206ICLigJx0aWVqdW4uY2hlbuKAnSIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5kcml2ZXIuY29t
XQ0KPiA+PiBTZW50OiBUaHVyc2RheSwgSnVseSAxOCwgMjAxMyAxMTo1NiBBTQ0KPiA+PiBUbzog
Qmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+ID4+IENjOiBrdm0tcHBjQHZnZXIua2VybmVsLm9yZzsg
a3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsgV29vZA0KPiA+PiBTY290dC0gQjA3
NDIxOyBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJd
IGt2bTogcG93ZXJwYzogc2V0IGNhY2hlIGNvaGVyZW5jeSBvbmx5IGZvcg0KPiA+PiBrZXJuZWwg
bWFuYWdlZCBwYWdlcw0KPiA+Pg0KPiA+PiBPbiAwNy8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQg
Qmh1c2hhbiB3cm90ZToNCj4gPj4+IElmIHRoZXJlIGlzIGEgc3RydWN0IHBhZ2UgZm9yIHRoZSBy
ZXF1ZXN0ZWQgbWFwcGluZyB0aGVuIGl0J3Mgbm9ybWFsDQo+ID4+PiBERFIgYW5kIHRoZSBtYXBw
aW5nIHNldHMgIk0iIGJpdCAoY29oZXJlbnQsIGNhY2hlYWJsZSkgZWxzZSB0aGlzIGlzDQo+ID4+
PiB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUgaW5oaWJpdGVkLCBn
dWFyZGVkKQ0KPiA+Pj4NCj4gPj4+IFRoaXMgaGVscHMgc2V0dGluZyBwcm9wZXIgVExCIG1hcHBp
bmcgZm9yIGRpcmVjdCBhc3NpZ25lZCBkZXZpY2UNCj4gPj4+DQo+ID4+PiBTaWduZWQtb2ZmLWJ5
OiBCaGFyYXQgQmh1c2hhbiA8YmhhcmF0LmJodXNoYW5AZnJlZXNjYWxlLmNvbT4NCj4gPj4+IC0t
LQ0KPiA+Pj4gICAgYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMgfCAgIDE3ICsrKysr
KysrKysrKy0tLS0tDQo+ID4+PiAgICAxIGZpbGVzIGNoYW5nZWQsIDEyIGluc2VydGlvbnMoKyks
IDUgZGVsZXRpb25zKC0pDQo+ID4+Pg0KPiA+Pj4gZGlmZiAtLWdpdCBhL2FyY2gvcG93ZXJwYy9r
dm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4+PiBiL2FyY2gvcG93ZXJwYy9rdm0vZTUwMF9tbXVfaG9z
dC5jDQo+ID4+PiBpbmRleCAxYzZhOWQ3Li4wODljMjI3IDEwMDY0NA0KPiA+Pj4gLS0tIGEvYXJj
aC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPj4+ICsrKyBiL2FyY2gvcG93ZXJwYy9r
dm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4+PiBAQCAtNjQsMTMgKzY0LDIwIEBAIHN0YXRpYyBpbmxp
bmUgdTMyIGU1MDBfc2hhZG93X21hczNfYXR0cmliKHUzMg0KPiA+Pj4gbWFzMywgaW50DQo+ID4+
IHVzZXJtb2RlKQ0KPiA+Pj4gICAgCXJldHVybiBtYXMzOw0KPiA+Pj4gICAgfQ0KPiA+Pj4NCj4g
Pj4+IC1zdGF0aWMgaW5saW5lIHUzMiBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYih1MzIgbWFzMiwg
aW50IHVzZXJtb2RlKQ0KPiA+Pj4gK3N0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21hczJf
YXR0cmliKHUzMiBtYXMyLCBwZm5fdCBwZm4pDQo+ID4+PiAgICB7DQo+ID4+PiArCXUzMiBtYXMy
X2F0dHI7DQo+ID4+PiArDQo+ID4+PiArCW1hczJfYXR0ciA9IG1hczIgJiBNQVMyX0FUVFJJQl9N
QVNLOw0KPiA+Pj4gKw0KPiA+Pj4gKwlpZiAoIXBmbl92YWxpZChwZm4pKSB7DQo+ID4+DQo+ID4+
IFdoeSBub3QgZGlyZWN0bHkgdXNlIGt2bV9pc19tbWlvX3BmbigpPw0KPiA+DQo+ID4gV2hhdCBJ
IHVuZGVyc3RhbmQgZnJvbSB0aGlzIGZ1bmN0aW9uIChzb21lb25lIGNhbiBjb3JyZWN0IG1lKSBp
cyB0aGF0IGl0DQo+IHJldHVybnMgImZhbHNlIiB3aGVuIHRoZSBwYWdlIGlzIG1hbmFnZWQgYnkg
a2VybmVsIGFuZCBpcyBub3QgbWFya2VkIGFzIFJFU0VSVkVEDQo+IChmb3Igc29tZSByZWFzb24p
LiBGb3IgdXMgaXQgZG9lcyBub3QgbWF0dGVyIHdoZXRoZXIgdGhlIHBhZ2UgaXMgcmVzZXJ2ZWQg
b3INCj4gbm90LCBpZiBpdCBpcyBrZXJuZWwgdmlzaWJsZSBwYWdlIHRoZW4gaXQgaXMgRERSLg0K
PiA+DQo+IA0KPiBJIHRoaW5rIHlvdSBhcmUgc2V0dGluZyBJfEcgYnkgYWRkcmVzc2luZyBhbGwg
bW1pbyBwYWdlcywgcmlnaHQ/IElmIHNvLA0KPiANCj4gICAgICBLVk06IGRpcmVjdCBtbWlvIHBm
biBjaGVjaw0KPiANCj4gICAgICBVc2Vyc3BhY2UgbWF5IHNwZWNpZnkgbWVtb3J5IHNsb3RzIHRo
YXQgYXJlIGJhY2tlZCBieSBtbWlvIHBhZ2VzIHJhdGhlcg0KPiB0aGFuDQo+ICAgICAgbm9ybWFs
IFJBTS4gIEluIHNvbWUgY2FzZXMgaXQgaXMgbm90IGVub3VnaCB0byBpZGVudGlmeSB0aGVzZSBt
bWlvIHBhZ2VzDQo+ICAgICAgYnkgcGZuX3ZhbGlkKCkuICBUaGlzIHBhdGNoIGFkZHMgY2hlY2tp
bmcgdGhlIFBhZ2VSZXNlcnZlZCBhcyB3ZWxsLg0KDQpEbyB5b3Uga25vdyB3aGF0IGFyZSB0aG9z
ZSAic29tZSBjYXNlcyIgYW5kIGhvdyBjaGVja2luZyBQYWdlUmVzZXJ2ZWQgaGVscHMgaW4gdGhv
c2UgY2FzZXM/DQoNCi1CaGFyYXQNCg0KPiANCj4gVGllanVuDQo+IA0KPiA+IC1CaGFyYXQNCj4g
Pg0KPiA+Pg0KPiA+PiBUaWVqdW4NCj4gPj4NCj4gPj4+ICsJCW1hczJfYXR0ciB8PSBNQVMyX0kg
fCBNQVMyX0c7DQo+ID4+PiArCX0gZWxzZSB7DQo+ID4+PiAgICAjaWZkZWYgQ09ORklHX1NNUA0K
PiA+Pj4gLQlyZXR1cm4gKG1hczIgJiBNQVMyX0FUVFJJQl9NQVNLKSB8IE1BUzJfTTsNCj4gPj4+
IC0jZWxzZQ0KPiA+Pj4gLQlyZXR1cm4gbWFzMiAmIE1BUzJfQVRUUklCX01BU0s7DQo+ID4+PiAr
CQltYXMyX2F0dHIgfD0gTUFTMl9NOw0KPiA+Pj4gICAgI2VuZGlmDQo+ID4+PiArCX0NCj4gPj4+
ICsJcmV0dXJuIG1hczJfYXR0cjsNCj4gPj4+ICAgIH0NCj4gPj4+DQo+ID4+PiAgICAvKg0KPiA+
Pj4gQEAgLTMxMyw3ICszMjAsNyBAQCBzdGF0aWMgdm9pZCBrdm1wcGNfZTUwMF9zZXR1cF9zdGxi
ZSgNCj4gPj4+ICAgIAkvKiBGb3JjZSBJUFJPVD0wIGZvciBhbGwgZ3Vlc3QgbWFwcGluZ3MuICov
DQo+ID4+PiAgICAJc3RsYmUtPm1hczEgPSBNQVMxX1RTSVpFKHRzaXplKSB8IGdldF90bGJfc3Rz
KGd0bGJlKSB8IE1BUzFfVkFMSUQ7DQo+ID4+PiAgICAJc3RsYmUtPm1hczIgPSAoZ3ZhZGRyICYg
TUFTMl9FUE4pIHwNCj4gPj4+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0bGJl
LT5tYXMyLCBwcik7DQo+ID4+PiArCQkgICAgICBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYihndGxi
ZS0+bWFzMiwgcGZuKTsNCj4gPj4+ICAgIAlzdGxiZS0+bWFzN18zID0gKCh1NjQpcGZuIDw8IFBB
R0VfU0hJRlQpIHwNCj4gPj4+ICAgIAkJCWU1MDBfc2hhZG93X21hczNfYXR0cmliKGd0bGJlLT5t
YXM3XzMsIHByKTsNCj4gPj4+DQo+ID4+Pg0KPiA+Pg0KPiA+DQo+IA0KPiAtLQ0KPiBUbyB1bnN1
YnNjcmliZSBmcm9tIHRoaXMgbGlzdDogc2VuZCB0aGUgbGluZSAidW5zdWJzY3JpYmUga3ZtLXBw
YyIgaW4gdGhlIGJvZHkNCj4gb2YgYSBtZXNzYWdlIHRvIG1ham9yZG9tb0B2Z2VyLmtlcm5lbC5v
cmcgTW9yZSBtYWpvcmRvbW8gaW5mbyBhdA0KPiBodHRwOi8vdmdlci5rZXJuZWwub3JnL21ham9y
ZG9tby1pbmZvLmh0bWwNCg0K


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:08           ` Bhushan Bharat-R65777
@ 2013-07-18  8:21             ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:21 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of "“tiejun.chen”"
>> Sent: Thursday, July 18, 2013 1:01 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott- B07421; Bhushan Bharat-R65777
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>> If there is a struct page for the requested mapping then it's normal
>>>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>>>
>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> index 1c6a9d7..089c227 100644
>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
>>>>> mas3, int
>>>> usermode)
>>>>>     	return mas3;
>>>>>     }
>>>>>
>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>     {
>>>>> +	u32 mas2_attr;
>>>>> +
>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>> +
>>>>> +	if (!pfn_valid(pfn)) {
>>>>
>>>> Why not directly use kvm_is_mmio_pfn()?
>>>
>>> What I understand from this function (someone can correct me) is that it
>> returns "false" when the page is managed by kernel and is not marked as RESERVED
>> (for some reason). For us it does not matter whether the page is reserved or
>> not, if it is kernel visible page then it is DDR.
>>>
>>
>> I think you are setting I|G by addressing all mmio pages, right? If so,
>>
>>       KVM: direct mmio pfn check
>>
>>       Userspace may specify memory slots that are backed by mmio pages rather
>> than
>>       normal RAM.  In some cases it is not enough to identify these mmio pages
>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>
> Do you know what are those "some cases" and how checking PageReserved helps in those cases?

No, myself didn't see these actual cases in qemu,too. But this should be 
chronically persistent as I understand ;-)

Tiejun

>
> -Bharat
>
>>
>> Tiejun
>>
>>> -Bharat
>>>
>>>>
>>>> Tiejun
>>>>
>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>> +	} else {
>>>>>     #ifdef CONFIG_SMP
>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>> -#else
>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>> +		mas2_attr |= MAS2_M;
>>>>>     #endif
>>>>> +	}
>>>>> +	return mas2_attr;
>>>>>     }
>>>>>
>>>>>     /*
>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>     	/* Force IPROT=0 for all guest mappings. */
>>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>
>>>>>
>>>>
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
>> of a message to majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:21             ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:21 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of "“tiejun.chen”"
>> Sent: Thursday, July 18, 2013 1:01 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott- B07421; Bhushan Bharat-R65777
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>> If there is a struct page for the requested mapping then it's normal
>>>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>>>
>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> index 1c6a9d7..089c227 100644
>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
>>>>> mas3, int
>>>> usermode)
>>>>>     	return mas3;
>>>>>     }
>>>>>
>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>     {
>>>>> +	u32 mas2_attr;
>>>>> +
>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>> +
>>>>> +	if (!pfn_valid(pfn)) {
>>>>
>>>> Why not directly use kvm_is_mmio_pfn()?
>>>
>>> What I understand from this function (someone can correct me) is that it
>> returns "false" when the page is managed by kernel and is not marked as RESERVED
>> (for some reason). For us it does not matter whether the page is reserved or
>> not, if it is kernel visible page then it is DDR.
>>>
>>
>> I think you are setting I|G by addressing all mmio pages, right? If so,
>>
>>       KVM: direct mmio pfn check
>>
>>       Userspace may specify memory slots that are backed by mmio pages rather
>> than
>>       normal RAM.  In some cases it is not enough to identify these mmio pages
>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>
> Do you know what are those "some cases" and how checking PageReserved helps in those cases?

No, myself didn't see these actual cases in qemu,too. But this should be 
chronically persistent as I understand ;-)

Tiejun

>
> -Bharat
>
>>
>> Tiejun
>>
>>> -Bharat
>>>
>>>>
>>>> Tiejun
>>>>
>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>> +	} else {
>>>>>     #ifdef CONFIG_SMP
>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>> -#else
>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>> +		mas2_attr |= MAS2_M;
>>>>>     #endif
>>>>> +	}
>>>>> +	return mas2_attr;
>>>>>     }
>>>>>
>>>>>     /*
>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>     	/* Force IPROT=0 for all guest mappings. */
>>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>
>>>>>
>>>>
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
>> of a message to majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
>


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:21             ` "“tiejun.chen”"
@ 2013-07-18  8:22               ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:22 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421



> -----Original Message-----
> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> Sent: Thursday, July 18, 2013 1:52 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
> B07421
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> >
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
> >> Sent: Thursday, July 18, 2013 1:01 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> >> Scott-
> >> B07421
> >> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >> kernel managed pages
> >>
> >> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>>> Sent: Thursday, July 18, 2013 11:56 AM
> >>>> To: Bhushan Bharat-R65777
> >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>> Wood
> >>>> Scott- B07421; Bhushan Bharat-R65777
> >>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >>>> kernel managed pages
> >>>>
> >>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>>>> If there is a struct page for the requested mapping then it's
> >>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable) else
> >>>>> this is treated as I/O and we set  "I + G"  (cache inhibited,
> >>>>> guarded)
> >>>>>
> >>>>> This helps setting proper TLB mapping for direct assigned device
> >>>>>
> >>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>>>> ---
> >>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>> index 1c6a9d7..089c227 100644
> >>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
> >>>>> mas3, int
> >>>> usermode)
> >>>>>     	return mas3;
> >>>>>     }
> >>>>>
> >>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> >>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >>>>>     {
> >>>>> +	u32 mas2_attr;
> >>>>> +
> >>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>>>> +
> >>>>> +	if (!pfn_valid(pfn)) {
> >>>>
> >>>> Why not directly use kvm_is_mmio_pfn()?
> >>>
> >>> What I understand from this function (someone can correct me) is
> >>> that it
> >> returns "false" when the page is managed by kernel and is not marked
> >> as RESERVED (for some reason). For us it does not matter whether the
> >> page is reserved or not, if it is kernel visible page then it is DDR.
> >>>
> >>
> >> I think you are setting I|G by addressing all mmio pages, right? If
> >> so,
> >>
> >>       KVM: direct mmio pfn check
> >>
> >>       Userspace may specify memory slots that are backed by mmio
> >> pages rather than
> >>       normal RAM.  In some cases it is not enough to identify these mmio
> pages
> >>       by pfn_valid().  This patch adds checking the PageReserved as well.
> >
> > Do you know what are those "some cases" and how checking PageReserved helps in
> those cases?
> 
> No, myself didn't see these actual cases in qemu,too. But this should be
> chronically persistent as I understand ;-)

Then I will wait till someone educate me :)

-Bharat

> 
> Tiejun
> 
> >
> > -Bharat
> >
> >>
> >> Tiejun
> >>
> >>> -Bharat
> >>>
> >>>>
> >>>> Tiejun
> >>>>
> >>>>> +		mas2_attr |= MAS2_I | MAS2_G;
> >>>>> +	} else {
> >>>>>     #ifdef CONFIG_SMP
> >>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> >>>>> -#else
> >>>>> -	return mas2 & MAS2_ATTRIB_MASK;
> >>>>> +		mas2_attr |= MAS2_M;
> >>>>>     #endif
> >>>>> +	}
> >>>>> +	return mas2_attr;
> >>>>>     }
> >>>>>
> >>>>>     /*
> >>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
> >>>>>     	/* Force IPROT=0 for all guest mappings. */
> >>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
> >>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> >>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> >>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
> >>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
> >>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
> >>>>>
> >>>>>
> >>>>
> >>>
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> >> the body of a message to majordomo@vger.kernel.org More majordomo
> >> info at http://vger.kernel.org/majordomo-info.html
> >
> 


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:22               ` Bhushan Bharat-R65777
  0 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:22 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogIuKAnHRpZWp1bi5jaGVu
4oCdIiBbbWFpbHRvOnRpZWp1bi5jaGVuQHdpbmRyaXZlci5jb21dDQo+IFNlbnQ6IFRodXJzZGF5
LCBKdWx5IDE4LCAyMDEzIDE6NTIgUE0NCj4gVG86IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiBD
Yzoga3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7IGFncmFmQHN1
c2UuZGU7IFdvb2QgU2NvdHQtDQo+IEIwNzQyMQ0KPiBTdWJqZWN0OiBSZTogW1BBVENIIDIvMl0g
a3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5IG9ubHkgZm9yIGtlcm5lbA0KPiBtYW5h
Z2VkIHBhZ2VzDQo+IA0KPiBPbiAwNy8xOC8yMDEzIDA0OjA4IFBNLCBCaHVzaGFuIEJoYXJhdC1S
NjU3Nzcgd3JvdGU6DQo+ID4NCj4gPg0KPiA+PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0K
PiA+PiBGcm9tOiBrdm0tcHBjLW93bmVyQHZnZXIua2VybmVsLm9yZw0KPiA+PiBbbWFpbHRvOmt2
bS1wcGMtb3duZXJAdmdlci5rZXJuZWwub3JnXSBPbiBCZWhhbGYgT2YgIuKAnHRpZWp1bi5jaGVu
4oCdIg0KPiA+PiBTZW50OiBUaHVyc2RheSwgSnVseSAxOCwgMjAxMyAxOjAxIFBNDQo+ID4+IFRv
OiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPj4gQ2M6IGt2bS1wcGNAdmdlci5rZXJuZWwub3Jn
OyBrdm1Admdlci5rZXJuZWwub3JnOyBhZ3JhZkBzdXNlLmRlOyBXb29kDQo+ID4+IFNjb3R0LQ0K
PiA+PiBCMDc0MjENCj4gPj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJdIGt2bTogcG93ZXJwYzog
c2V0IGNhY2hlIGNvaGVyZW5jeSBvbmx5IGZvcg0KPiA+PiBrZXJuZWwgbWFuYWdlZCBwYWdlcw0K
PiA+Pg0KPiA+PiBPbiAwNy8xOC8yMDEzIDAzOjEyIFBNLCBCaHVzaGFuIEJoYXJhdC1SNjU3Nzcg
d3JvdGU6DQo+ID4+Pg0KPiA+Pj4NCj4gPj4+PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0K
PiA+Pj4+IEZyb206ICLigJx0aWVqdW4uY2hlbuKAnSIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5k
cml2ZXIuY29tXQ0KPiA+Pj4+IFNlbnQ6IFRodXJzZGF5LCBKdWx5IDE4LCAyMDEzIDExOjU2IEFN
DQo+ID4+Pj4gVG86IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiA+Pj4+IENjOiBrdm0tcHBjQHZn
ZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsNCj4gPj4+
PiBXb29kDQo+ID4+Pj4gU2NvdHQtIEIwNzQyMTsgQmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+ID4+
Pj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJdIGt2bTogcG93ZXJwYzogc2V0IGNhY2hlIGNvaGVy
ZW5jeSBvbmx5IGZvcg0KPiA+Pj4+IGtlcm5lbCBtYW5hZ2VkIHBhZ2VzDQo+ID4+Pj4NCj4gPj4+
PiBPbiAwNy8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3cm90ZToNCj4gPj4+Pj4g
SWYgdGhlcmUgaXMgYSBzdHJ1Y3QgcGFnZSBmb3IgdGhlIHJlcXVlc3RlZCBtYXBwaW5nIHRoZW4g
aXQncw0KPiA+Pj4+PiBub3JtYWwgRERSIGFuZCB0aGUgbWFwcGluZyBzZXRzICJNIiBiaXQgKGNv
aGVyZW50LCBjYWNoZWFibGUpIGVsc2UNCj4gPj4+Pj4gdGhpcyBpcyB0cmVhdGVkIGFzIEkvTyBh
bmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUgaW5oaWJpdGVkLA0KPiA+Pj4+PiBndWFyZGVkKQ0K
PiA+Pj4+Pg0KPiA+Pj4+PiBUaGlzIGhlbHBzIHNldHRpbmcgcHJvcGVyIFRMQiBtYXBwaW5nIGZv
ciBkaXJlY3QgYXNzaWduZWQgZGV2aWNlDQo+ID4+Pj4+DQo+ID4+Pj4+IFNpZ25lZC1vZmYtYnk6
IEJoYXJhdCBCaHVzaGFuIDxiaGFyYXQuYmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiA+Pj4+PiAt
LS0NCj4gPj4+Pj4gICAgIGFyY2gvcG93ZXJwYy9rdm0vZTUwMF9tbXVfaG9zdC5jIHwgICAxNyAr
KysrKysrKysrKystLS0tLQ0KPiA+Pj4+PiAgICAgMSBmaWxlcyBjaGFuZ2VkLCAxMiBpbnNlcnRp
b25zKCspLCA1IGRlbGV0aW9ucygtKQ0KPiA+Pj4+Pg0KPiA+Pj4+PiBkaWZmIC0tZ2l0IGEvYXJj
aC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPj4+Pj4gYi9hcmNoL3Bvd2VycGMva3Zt
L2U1MDBfbW11X2hvc3QuYw0KPiA+Pj4+PiBpbmRleCAxYzZhOWQ3Li4wODljMjI3IDEwMDY0NA0K
PiA+Pj4+PiAtLS0gYS9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYw0KPiA+Pj4+PiAr
KysgYi9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYw0KPiA+Pj4+PiBAQCAtNjQsMTMg
KzY0LDIwIEBAIHN0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21hczNfYXR0cmliKHUzMg0K
PiA+Pj4+PiBtYXMzLCBpbnQNCj4gPj4+PiB1c2VybW9kZSkNCj4gPj4+Pj4gICAgIAlyZXR1cm4g
bWFzMzsNCj4gPj4+Pj4gICAgIH0NCj4gPj4+Pj4NCj4gPj4+Pj4gLXN0YXRpYyBpbmxpbmUgdTMy
IGU1MDBfc2hhZG93X21hczJfYXR0cmliKHUzMiBtYXMyLCBpbnQgdXNlcm1vZGUpDQo+ID4+Pj4+
ICtzdGF0aWMgaW5saW5lIHUzMiBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYih1MzIgbWFzMiwgcGZu
X3QgcGZuKQ0KPiA+Pj4+PiAgICAgew0KPiA+Pj4+PiArCXUzMiBtYXMyX2F0dHI7DQo+ID4+Pj4+
ICsNCj4gPj4+Pj4gKwltYXMyX2F0dHIgPSBtYXMyICYgTUFTMl9BVFRSSUJfTUFTSzsNCj4gPj4+
Pj4gKw0KPiA+Pj4+PiArCWlmICghcGZuX3ZhbGlkKHBmbikpIHsNCj4gPj4+Pg0KPiA+Pj4+IFdo
eSBub3QgZGlyZWN0bHkgdXNlIGt2bV9pc19tbWlvX3BmbigpPw0KPiA+Pj4NCj4gPj4+IFdoYXQg
SSB1bmRlcnN0YW5kIGZyb20gdGhpcyBmdW5jdGlvbiAoc29tZW9uZSBjYW4gY29ycmVjdCBtZSkg
aXMNCj4gPj4+IHRoYXQgaXQNCj4gPj4gcmV0dXJucyAiZmFsc2UiIHdoZW4gdGhlIHBhZ2UgaXMg
bWFuYWdlZCBieSBrZXJuZWwgYW5kIGlzIG5vdCBtYXJrZWQNCj4gPj4gYXMgUkVTRVJWRUQgKGZv
ciBzb21lIHJlYXNvbikuIEZvciB1cyBpdCBkb2VzIG5vdCBtYXR0ZXIgd2hldGhlciB0aGUNCj4g
Pj4gcGFnZSBpcyByZXNlcnZlZCBvciBub3QsIGlmIGl0IGlzIGtlcm5lbCB2aXNpYmxlIHBhZ2Ug
dGhlbiBpdCBpcyBERFIuDQo+ID4+Pg0KPiA+Pg0KPiA+PiBJIHRoaW5rIHlvdSBhcmUgc2V0dGlu
ZyBJfEcgYnkgYWRkcmVzc2luZyBhbGwgbW1pbyBwYWdlcywgcmlnaHQ/IElmDQo+ID4+IHNvLA0K
PiA+Pg0KPiA+PiAgICAgICBLVk06IGRpcmVjdCBtbWlvIHBmbiBjaGVjaw0KPiA+Pg0KPiA+PiAg
ICAgICBVc2Vyc3BhY2UgbWF5IHNwZWNpZnkgbWVtb3J5IHNsb3RzIHRoYXQgYXJlIGJhY2tlZCBi
eSBtbWlvDQo+ID4+IHBhZ2VzIHJhdGhlciB0aGFuDQo+ID4+ICAgICAgIG5vcm1hbCBSQU0uICBJ
biBzb21lIGNhc2VzIGl0IGlzIG5vdCBlbm91Z2ggdG8gaWRlbnRpZnkgdGhlc2UgbW1pbw0KPiBw
YWdlcw0KPiA+PiAgICAgICBieSBwZm5fdmFsaWQoKS4gIFRoaXMgcGF0Y2ggYWRkcyBjaGVja2lu
ZyB0aGUgUGFnZVJlc2VydmVkIGFzIHdlbGwuDQo+ID4NCj4gPiBEbyB5b3Uga25vdyB3aGF0IGFy
ZSB0aG9zZSAic29tZSBjYXNlcyIgYW5kIGhvdyBjaGVja2luZyBQYWdlUmVzZXJ2ZWQgaGVscHMg
aW4NCj4gdGhvc2UgY2FzZXM/DQo+IA0KPiBObywgbXlzZWxmIGRpZG4ndCBzZWUgdGhlc2UgYWN0
dWFsIGNhc2VzIGluIHFlbXUsdG9vLiBCdXQgdGhpcyBzaG91bGQgYmUNCj4gY2hyb25pY2FsbHkg
cGVyc2lzdGVudCBhcyBJIHVuZGVyc3RhbmQgOy0pDQoNClRoZW4gSSB3aWxsIHdhaXQgdGlsbCBz
b21lb25lIGVkdWNhdGUgbWUgOikNCg0KLUJoYXJhdA0KDQo+IA0KPiBUaWVqdW4NCj4gDQo+ID4N
Cj4gPiAtQmhhcmF0DQo+ID4NCj4gPj4NCj4gPj4gVGllanVuDQo+ID4+DQo+ID4+PiAtQmhhcmF0
DQo+ID4+Pg0KPiA+Pj4+DQo+ID4+Pj4gVGllanVuDQo+ID4+Pj4NCj4gPj4+Pj4gKwkJbWFzMl9h
dHRyIHw9IE1BUzJfSSB8IE1BUzJfRzsNCj4gPj4+Pj4gKwl9IGVsc2Ugew0KPiA+Pj4+PiAgICAg
I2lmZGVmIENPTkZJR19TTVANCj4gPj4+Pj4gLQlyZXR1cm4gKG1hczIgJiBNQVMyX0FUVFJJQl9N
QVNLKSB8IE1BUzJfTTsNCj4gPj4+Pj4gLSNlbHNlDQo+ID4+Pj4+IC0JcmV0dXJuIG1hczIgJiBN
QVMyX0FUVFJJQl9NQVNLOw0KPiA+Pj4+PiArCQltYXMyX2F0dHIgfD0gTUFTMl9NOw0KPiA+Pj4+
PiAgICAgI2VuZGlmDQo+ID4+Pj4+ICsJfQ0KPiA+Pj4+PiArCXJldHVybiBtYXMyX2F0dHI7DQo+
ID4+Pj4+ICAgICB9DQo+ID4+Pj4+DQo+ID4+Pj4+ICAgICAvKg0KPiA+Pj4+PiBAQCAtMzEzLDcg
KzMyMCw3IEBAIHN0YXRpYyB2b2lkIGt2bXBwY19lNTAwX3NldHVwX3N0bGJlKA0KPiA+Pj4+PiAg
ICAgCS8qIEZvcmNlIElQUk9UPTAgZm9yIGFsbCBndWVzdCBtYXBwaW5ncy4gKi8NCj4gPj4+Pj4g
ICAgIAlzdGxiZS0+bWFzMSA9IE1BUzFfVFNJWkUodHNpemUpIHwgZ2V0X3RsYl9zdHMoZ3RsYmUp
IHwgTUFTMV9WQUxJRDsNCj4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzMiA9IChndmFkZHIgJiBNQVMy
X0VQTikgfA0KPiA+Pj4+PiAtCQkgICAgICBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYihndGxiZS0+
bWFzMiwgcHIpOw0KPiA+Pj4+PiArCQkgICAgICBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYihndGxi
ZS0+bWFzMiwgcGZuKTsNCj4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzN18zID0gKCh1NjQpcGZuIDw8
IFBBR0VfU0hJRlQpIHwNCj4gPj4+Pj4gICAgIAkJCWU1MDBfc2hhZG93X21hczNfYXR0cmliKGd0
bGJlLT5tYXM3XzMsIHByKTsNCj4gPj4+Pj4NCj4gPj4+Pj4NCj4gPj4+Pg0KPiA+Pj4NCj4gPj4N
Cj4gPj4gLS0NCj4gPj4gVG8gdW5zdWJzY3JpYmUgZnJvbSB0aGlzIGxpc3Q6IHNlbmQgdGhlIGxp
bmUgInVuc3Vic2NyaWJlIGt2bS1wcGMiIGluDQo+ID4+IHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0
byBtYWpvcmRvbW9Admdlci5rZXJuZWwub3JnIE1vcmUgbWFqb3Jkb21vDQo+ID4+IGluZm8gYXQg
aHR0cDovL3ZnZXIua2VybmVsLm9yZy9tYWpvcmRvbW8taW5mby5odG1sDQo+ID4NCj4gDQoNCg=


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:21             ` "“tiejun.chen”"
@ 2013-07-18  8:25               ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:25 UTC (permalink / raw)
  To: Bhushan Bharat-R65777, "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421



> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Thursday, July 18, 2013 1:53 PM
> To: '"“tiejun.chen”"'
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
> B07421
> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> 
> 
> > -----Original Message-----
> > From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> > Sent: Thursday, July 18, 2013 1:52 PM
> > To: Bhushan Bharat-R65777
> > Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> > Scott-
> > B07421
> > Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> > kernel managed pages
> >
> > On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> > >
> > >
> > >> -----Original Message-----
> > >> From: kvm-ppc-owner@vger.kernel.org
> > >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
> > >> Sent: Thursday, July 18, 2013 1:01 PM
> > >> To: Bhushan Bharat-R65777
> > >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> > >> Wood
> > >> Scott-
> > >> B07421
> > >> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> > >> kernel managed pages
> > >>
> > >> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> > >>>
> > >>>
> > >>>> -----Original Message-----
> > >>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> > >>>> Sent: Thursday, July 18, 2013 11:56 AM
> > >>>> To: Bhushan Bharat-R65777
> > >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> > >>>> Wood
> > >>>> Scott- B07421; Bhushan Bharat-R65777
> > >>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> > >>>> for kernel managed pages
> > >>>>
> > >>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> > >>>>> If there is a struct page for the requested mapping then it's
> > >>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> > >>>>> else this is treated as I/O and we set  "I + G"  (cache
> > >>>>> inhibited,
> > >>>>> guarded)
> > >>>>>
> > >>>>> This helps setting proper TLB mapping for direct assigned device
> > >>>>>
> > >>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > >>>>> ---
> > >>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> > >>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
> > >>>>>
> > >>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> > >>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> > >>>>> index 1c6a9d7..089c227 100644
> > >>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> > >>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> > >>>>> @@ -64,13 +64,20 @@ static inline u32
> > >>>>> e500_shadow_mas3_attrib(u32 mas3, int
> > >>>> usermode)
> > >>>>>     	return mas3;
> > >>>>>     }
> > >>>>>
> > >>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
> > >>>>> usermode)
> > >>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> > >>>>>     {
> > >>>>> +	u32 mas2_attr;
> > >>>>> +
> > >>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> > >>>>> +
> > >>>>> +	if (!pfn_valid(pfn)) {
> > >>>>
> > >>>> Why not directly use kvm_is_mmio_pfn()?
> > >>>
> > >>> What I understand from this function (someone can correct me) is
> > >>> that it
> > >> returns "false" when the page is managed by kernel and is not
> > >> marked as RESERVED (for some reason). For us it does not matter
> > >> whether the page is reserved or not, if it is kernel visible page then it
> is DDR.
> > >>>
> > >>
> > >> I think you are setting I|G by addressing all mmio pages, right? If
> > >> so,
> > >>
> > >>       KVM: direct mmio pfn check
> > >>
> > >>       Userspace may specify memory slots that are backed by mmio
> > >> pages rather than
> > >>       normal RAM.  In some cases it is not enough to identify these
> > >> mmio
> > pages
> > >>       by pfn_valid().  This patch adds checking the PageReserved as well.
> > >
> > > Do you know what are those "some cases" and how checking
> > > PageReserved helps in
> > those cases?
> >
> > No, myself didn't see these actual cases in qemu,too. But this should
> > be chronically persistent as I understand ;-)
> 
> Then I will wait till someone educate me :)

The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

-Bharat

> > >>>>> +		mas2_attr |= MAS2_I | MAS2_G;
> > >>>>> +	} else {
> > >>>>>     #ifdef CONFIG_SMP
> > >>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> > >>>>> -#else
> > >>>>> -	return mas2 & MAS2_ATTRIB_MASK;
> > >>>>> +		mas2_attr |= MAS2_M;
> > >>>>>     #endif
> > >>>>> +	}
> > >>>>> +	return mas2_attr;
> > >>>>>     }
> > >>>>>
> > >>>>>     /*
> > >>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
> > >>>>>     	/* Force IPROT=0 for all guest mappings. */
> > >>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
> > >>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> > >>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> > >>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
> > >>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
> > >>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
> > >>>>>
> > >>>>>
> > >>>>
> > >>>
> > >>
> > >> --
> > >> To unsubscribe from this list: send the line "unsubscribe kvm-ppc"
> > >> in the body of a message to majordomo@vger.kernel.org More
> > >> majordomo info at http://vger.kernel.org/majordomo-info.html
> > >
> >


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:25               ` Bhushan Bharat-R65777
  0 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  8:25 UTC (permalink / raw)
  To: Bhushan Bharat-R65777, "“tiejun.chen”"
  Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogQmh1c2hhbiBCaGFyYXQt
UjY1Nzc3DQo+IFNlbnQ6IFRodXJzZGF5LCBKdWx5IDE4LCAyMDEzIDE6NTMgUE0NCj4gVG86ICci
4oCcdGllanVuLmNoZW7igJ0iJw0KPiBDYzoga3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2
Z2VyLmtlcm5lbC5vcmc7IGFncmFmQHN1c2UuZGU7IFdvb2QgU2NvdHQtDQo+IEIwNzQyMQ0KPiBT
dWJqZWN0OiBSRTogW1BBVENIIDIvMl0ga3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5
IG9ubHkgZm9yIGtlcm5lbA0KPiBtYW5hZ2VkIHBhZ2VzDQo+IA0KPiANCj4gDQo+ID4gLS0tLS1P
cmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gPiBGcm9tOiAi4oCcdGllanVuLmNoZW7igJ0iIFttYWls
dG86dGllanVuLmNoZW5Ad2luZHJpdmVyLmNvbV0NCj4gPiBTZW50OiBUaHVyc2RheSwgSnVseSAx
OCwgMjAxMyAxOjUyIFBNDQo+ID4gVG86IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiA+IENjOiBr
dm0tcHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5k
ZTsgV29vZA0KPiA+IFNjb3R0LQ0KPiA+IEIwNzQyMQ0KPiA+IFN1YmplY3Q6IFJlOiBbUEFUQ0gg
Mi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kgb25seSBmb3INCj4gPiBrZXJu
ZWwgbWFuYWdlZCBwYWdlcw0KPiA+DQo+ID4gT24gMDcvMTgvMjAxMyAwNDowOCBQTSwgQmh1c2hh
biBCaGFyYXQtUjY1Nzc3IHdyb3RlOg0KPiA+ID4NCj4gPiA+DQo+ID4gPj4gLS0tLS1PcmlnaW5h
bCBNZXNzYWdlLS0tLS0NCj4gPiA+PiBGcm9tOiBrdm0tcHBjLW93bmVyQHZnZXIua2VybmVsLm9y
Zw0KPiA+ID4+IFttYWlsdG86a3ZtLXBwYy1vd25lckB2Z2VyLmtlcm5lbC5vcmddIE9uIEJlaGFs
ZiBPZiAi4oCcdGllanVuLmNoZW7igJ0iDQo+ID4gPj4gU2VudDogVGh1cnNkYXksIEp1bHkgMTgs
IDIwMTMgMTowMSBQTQ0KPiA+ID4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPiA+PiBD
Yzoga3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7IGFncmFmQHN1
c2UuZGU7DQo+ID4gPj4gV29vZA0KPiA+ID4+IFNjb3R0LQ0KPiA+ID4+IEIwNzQyMQ0KPiA+ID4+
IFN1YmplY3Q6IFJlOiBbUEFUQ0ggMi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVu
Y3kgb25seSBmb3INCj4gPiA+PiBrZXJuZWwgbWFuYWdlZCBwYWdlcw0KPiA+ID4+DQo+ID4gPj4g
T24gMDcvMTgvMjAxMyAwMzoxMiBQTSwgQmh1c2hhbiBCaGFyYXQtUjY1Nzc3IHdyb3RlOg0KPiA+
ID4+Pg0KPiA+ID4+Pg0KPiA+ID4+Pj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gPiA+
Pj4+IEZyb206ICLigJx0aWVqdW4uY2hlbuKAnSIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5kcml2
ZXIuY29tXQ0KPiA+ID4+Pj4gU2VudDogVGh1cnNkYXksIEp1bHkgMTgsIDIwMTMgMTE6NTYgQU0N
Cj4gPiA+Pj4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPiA+Pj4+IENjOiBrdm0tcHBj
QHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsNCj4g
PiA+Pj4+IFdvb2QNCj4gPiA+Pj4+IFNjb3R0LSBCMDc0MjE7IEJodXNoYW4gQmhhcmF0LVI2NTc3
Nw0KPiA+ID4+Pj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJdIGt2bTogcG93ZXJwYzogc2V0IGNh
Y2hlIGNvaGVyZW5jeSBvbmx5DQo+ID4gPj4+PiBmb3Iga2VybmVsIG1hbmFnZWQgcGFnZXMNCj4g
PiA+Pj4+DQo+ID4gPj4+PiBPbiAwNy8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3
cm90ZToNCj4gPiA+Pj4+PiBJZiB0aGVyZSBpcyBhIHN0cnVjdCBwYWdlIGZvciB0aGUgcmVxdWVz
dGVkIG1hcHBpbmcgdGhlbiBpdCdzDQo+ID4gPj4+Pj4gbm9ybWFsIEREUiBhbmQgdGhlIG1hcHBp
bmcgc2V0cyAiTSIgYml0IChjb2hlcmVudCwgY2FjaGVhYmxlKQ0KPiA+ID4+Pj4+IGVsc2UgdGhp
cyBpcyB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUNCj4gPiA+Pj4+
PiBpbmhpYml0ZWQsDQo+ID4gPj4+Pj4gZ3VhcmRlZCkNCj4gPiA+Pj4+Pg0KPiA+ID4+Pj4+IFRo
aXMgaGVscHMgc2V0dGluZyBwcm9wZXIgVExCIG1hcHBpbmcgZm9yIGRpcmVjdCBhc3NpZ25lZCBk
ZXZpY2UNCj4gPiA+Pj4+Pg0KPiA+ID4+Pj4+IFNpZ25lZC1vZmYtYnk6IEJoYXJhdCBCaHVzaGFu
IDxiaGFyYXQuYmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiA+ID4+Pj4+IC0tLQ0KPiA+ID4+Pj4+
ICAgICBhcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYyB8ICAgMTcgKysrKysrKysrKysr
LS0tLS0NCj4gPiA+Pj4+PiAgICAgMSBmaWxlcyBjaGFuZ2VkLCAxMiBpbnNlcnRpb25zKCspLCA1
IGRlbGV0aW9ucygtKQ0KPiA+ID4+Pj4+DQo+ID4gPj4+Pj4gZGlmZiAtLWdpdCBhL2FyY2gvcG93
ZXJwYy9rdm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4gPj4+Pj4gYi9hcmNoL3Bvd2VycGMva3ZtL2U1
MDBfbW11X2hvc3QuYw0KPiA+ID4+Pj4+IGluZGV4IDFjNmE5ZDcuLjA4OWMyMjcgMTAwNjQ0DQo+
ID4gPj4+Pj4gLS0tIGEvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPiA+Pj4+
PiArKysgYi9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYw0KPiA+ID4+Pj4+IEBAIC02
NCwxMyArNjQsMjAgQEAgc3RhdGljIGlubGluZSB1MzINCj4gPiA+Pj4+PiBlNTAwX3NoYWRvd19t
YXMzX2F0dHJpYih1MzIgbWFzMywgaW50DQo+ID4gPj4+PiB1c2VybW9kZSkNCj4gPiA+Pj4+PiAg
ICAgCXJldHVybiBtYXMzOw0KPiA+ID4+Pj4+ICAgICB9DQo+ID4gPj4+Pj4NCj4gPiA+Pj4+PiAt
c3RhdGljIGlubGluZSB1MzIgZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIodTMyIG1hczIsIGludA0K
PiA+ID4+Pj4+IHVzZXJtb2RlKQ0KPiA+ID4+Pj4+ICtzdGF0aWMgaW5saW5lIHUzMiBlNTAwX3No
YWRvd19tYXMyX2F0dHJpYih1MzIgbWFzMiwgcGZuX3QgcGZuKQ0KPiA+ID4+Pj4+ICAgICB7DQo+
ID4gPj4+Pj4gKwl1MzIgbWFzMl9hdHRyOw0KPiA+ID4+Pj4+ICsNCj4gPiA+Pj4+PiArCW1hczJf
YXR0ciA9IG1hczIgJiBNQVMyX0FUVFJJQl9NQVNLOw0KPiA+ID4+Pj4+ICsNCj4gPiA+Pj4+PiAr
CWlmICghcGZuX3ZhbGlkKHBmbikpIHsNCj4gPiA+Pj4+DQo+ID4gPj4+PiBXaHkgbm90IGRpcmVj
dGx5IHVzZSBrdm1faXNfbW1pb19wZm4oKT8NCj4gPiA+Pj4NCj4gPiA+Pj4gV2hhdCBJIHVuZGVy
c3RhbmQgZnJvbSB0aGlzIGZ1bmN0aW9uIChzb21lb25lIGNhbiBjb3JyZWN0IG1lKSBpcw0KPiA+
ID4+PiB0aGF0IGl0DQo+ID4gPj4gcmV0dXJucyAiZmFsc2UiIHdoZW4gdGhlIHBhZ2UgaXMgbWFu
YWdlZCBieSBrZXJuZWwgYW5kIGlzIG5vdA0KPiA+ID4+IG1hcmtlZCBhcyBSRVNFUlZFRCAoZm9y
IHNvbWUgcmVhc29uKS4gRm9yIHVzIGl0IGRvZXMgbm90IG1hdHRlcg0KPiA+ID4+IHdoZXRoZXIg
dGhlIHBhZ2UgaXMgcmVzZXJ2ZWQgb3Igbm90LCBpZiBpdCBpcyBrZXJuZWwgdmlzaWJsZSBwYWdl
IHRoZW4gaXQNCj4gaXMgRERSLg0KPiA+ID4+Pg0KPiA+ID4+DQo+ID4gPj4gSSB0aGluayB5b3Ug
YXJlIHNldHRpbmcgSXxHIGJ5IGFkZHJlc3NpbmcgYWxsIG1taW8gcGFnZXMsIHJpZ2h0PyBJZg0K
PiA+ID4+IHNvLA0KPiA+ID4+DQo+ID4gPj4gICAgICAgS1ZNOiBkaXJlY3QgbW1pbyBwZm4gY2hl
Y2sNCj4gPiA+Pg0KPiA+ID4+ICAgICAgIFVzZXJzcGFjZSBtYXkgc3BlY2lmeSBtZW1vcnkgc2xv
dHMgdGhhdCBhcmUgYmFja2VkIGJ5IG1taW8NCj4gPiA+PiBwYWdlcyByYXRoZXIgdGhhbg0KPiA+
ID4+ICAgICAgIG5vcm1hbCBSQU0uICBJbiBzb21lIGNhc2VzIGl0IGlzIG5vdCBlbm91Z2ggdG8g
aWRlbnRpZnkgdGhlc2UNCj4gPiA+PiBtbWlvDQo+ID4gcGFnZXMNCj4gPiA+PiAgICAgICBieSBw
Zm5fdmFsaWQoKS4gIFRoaXMgcGF0Y2ggYWRkcyBjaGVja2luZyB0aGUgUGFnZVJlc2VydmVkIGFz
IHdlbGwuDQo+ID4gPg0KPiA+ID4gRG8geW91IGtub3cgd2hhdCBhcmUgdGhvc2UgInNvbWUgY2Fz
ZXMiIGFuZCBob3cgY2hlY2tpbmcNCj4gPiA+IFBhZ2VSZXNlcnZlZCBoZWxwcyBpbg0KPiA+IHRo
b3NlIGNhc2VzPw0KPiA+DQo+ID4gTm8sIG15c2VsZiBkaWRuJ3Qgc2VlIHRoZXNlIGFjdHVhbCBj
YXNlcyBpbiBxZW11LHRvby4gQnV0IHRoaXMgc2hvdWxkDQo+ID4gYmUgY2hyb25pY2FsbHkgcGVy
c2lzdGVudCBhcyBJIHVuZGVyc3RhbmQgOy0pDQo+IA0KPiBUaGVuIEkgd2lsbCB3YWl0IHRpbGwg
c29tZW9uZSBlZHVjYXRlIG1lIDopDQoNClRoZSByZWFzb24gaXMgLCBrdm1faXNfbW1pb19wZm4o
KSBmdW5jdGlvbiBsb29rcyBwcmV0dHkgaGVhdnkgYW5kIEkgZG8gbm90IHdhbnQgdG8gY2FsbCB0
aGlzIGZvciBhbGwgdGxid2Ugb3BlcmF0aW9uIHVubGVzcyBpdCBpcyBuZWNlc3NhcnkuDQoNCi1C
aGFyYXQNCg0KPiA+ID4+Pj4+ICsJCW1hczJfYXR0ciB8PSBNQVMyX0kgfCBNQVMyX0c7DQo+ID4g
Pj4+Pj4gKwl9IGVsc2Ugew0KPiA+ID4+Pj4+ICAgICAjaWZkZWYgQ09ORklHX1NNUA0KPiA+ID4+
Pj4+IC0JcmV0dXJuIChtYXMyICYgTUFTMl9BVFRSSUJfTUFTSykgfCBNQVMyX007DQo+ID4gPj4+
Pj4gLSNlbHNlDQo+ID4gPj4+Pj4gLQlyZXR1cm4gbWFzMiAmIE1BUzJfQVRUUklCX01BU0s7DQo+
ID4gPj4+Pj4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfTTsNCj4gPiA+Pj4+PiAgICAgI2VuZGlmDQo+
ID4gPj4+Pj4gKwl9DQo+ID4gPj4+Pj4gKwlyZXR1cm4gbWFzMl9hdHRyOw0KPiA+ID4+Pj4+ICAg
ICB9DQo+ID4gPj4+Pj4NCj4gPiA+Pj4+PiAgICAgLyoNCj4gPiA+Pj4+PiBAQCAtMzEzLDcgKzMy
MCw3IEBAIHN0YXRpYyB2b2lkIGt2bXBwY19lNTAwX3NldHVwX3N0bGJlKA0KPiA+ID4+Pj4+ICAg
ICAJLyogRm9yY2UgSVBST1Q9MCBmb3IgYWxsIGd1ZXN0IG1hcHBpbmdzLiAqLw0KPiA+ID4+Pj4+
ICAgICAJc3RsYmUtPm1hczEgPSBNQVMxX1RTSVpFKHRzaXplKSB8IGdldF90bGJfc3RzKGd0bGJl
KSB8IE1BUzFfVkFMSUQ7DQo+ID4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzMiA9IChndmFkZHIgJiBN
QVMyX0VQTikgfA0KPiA+ID4+Pj4+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0
bGJlLT5tYXMyLCBwcik7DQo+ID4gPj4+Pj4gKwkJICAgICAgZTUwMF9zaGFkb3dfbWFzMl9hdHRy
aWIoZ3RsYmUtPm1hczIsIHBmbik7DQo+ID4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzN18zID0gKCh1
NjQpcGZuIDw8IFBBR0VfU0hJRlQpIHwNCj4gPiA+Pj4+PiAgICAgCQkJZTUwMF9zaGFkb3dfbWFz
M19hdHRyaWIoZ3RsYmUtPm1hczdfMywgcHIpOw0KPiA+ID4+Pj4+DQo+ID4gPj4+Pj4NCj4gPiA+
Pj4+DQo+ID4gPj4+DQo+ID4gPj4NCj4gPiA+PiAtLQ0KPiA+ID4+IFRvIHVuc3Vic2NyaWJlIGZy
b20gdGhpcyBsaXN0OiBzZW5kIHRoZSBsaW5lICJ1bnN1YnNjcmliZSBrdm0tcHBjIg0KPiA+ID4+
IGluIHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRvbW9Admdlci5rZXJuZWwub3JnIE1v
cmUNCj4gPiA+PiBtYWpvcmRvbW8gaW5mbyBhdCBodHRwOi8vdmdlci5rZXJuZWwub3JnL21ham9y
ZG9tby1pbmZvLmh0bWwNCj4gPiA+DQo+ID4NCg0K


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  6:16   ` Bharat Bhushan
@ 2013-07-18  8:27     ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:27 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: kvm-ppc, kvm, agraf, scottwood, Bharat Bhushan

On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {
> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}

Additionally, in UP case this little chunk of code is equivalent to

	if (1) {
		mas2_attr |= MAS2_I | MAS2_G;
	} else {
	}

So you'd better wrapper MAS2_m in advance like,

#ifdef CONFIG_SMP
#define M_IF_SMP        MAS2_M
#else
#define M_IF_SMP        0
#endif

Then	
	if (1)
		mas2_attr |= MAS2_I | MAS2_G;
	else
		mas2_attr |= M_IF_SMP;

Tiejun

> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:27     ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:27 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: kvm-ppc, kvm, agraf, scottwood, Bharat Bhushan

On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {
> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}

Additionally, in UP case this little chunk of code is equivalent to

	if (1) {
		mas2_attr |= MAS2_I | MAS2_G;
	} else {
	}

So you'd better wrapper MAS2_m in advance like,

#ifdef CONFIG_SMP
#define M_IF_SMP        MAS2_M
#else
#define M_IF_SMP        0
#endif

Then	
	if (1)
		mas2_attr |= MAS2_I | MAS2_G;
	else
		mas2_attr |= M_IF_SMP;

Tiejun

> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:25               ` Bhushan Bharat-R65777
@ 2013-07-18  8:55                 ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:55 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>>
>>
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>>
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>>
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>      	return mas3;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>      {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>>
>>>>>
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>>
>>>>>        KVM: direct mmio pfn check
>>>>>
>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>>
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>>
>> Then I will wait till someone educate me :)
>
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

Furthermore, how to distinguish we're creating TLB entry for the device assigned 
directly to the GS?

I think its unnecessary to always check if that is mmio's pfn since we have more 
non direct assigned devices.

So maybe we can introduce another helper to fixup that TLB entry in instead of 
this path.

Tiejun

>
> -Bharat
>
>>>>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>>>>> +	} else {
>>>>>>>>      #ifdef CONFIG_SMP
>>>>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>>>>> -#else
>>>>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +		mas2_attr |= MAS2_M;
>>>>>>>>      #endif
>>>>>>>> +	}
>>>>>>>> +	return mas2_attr;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>>>>      	/* Force IPROT=0 for all guest mappings. */
>>>>>>>>      	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>>>>      	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>>>>      	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>>>>      			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc"
>>>>> in the body of a message to majordomo@vger.kernel.org More
>>>>> majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  8:55                 ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  8:55 UTC (permalink / raw)
  To: Bhushan Bharat-R65777; +Cc: kvm-ppc, kvm, agraf, Wood Scott-B07421

On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>>
>>
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>>
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>>
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>      	return mas3;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>      {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>>
>>>>>
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>>
>>>>>        KVM: direct mmio pfn check
>>>>>
>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>>
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>>
>> Then I will wait till someone educate me :)
>
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

Furthermore, how to distinguish we're creating TLB entry for the device assigned 
directly to the GS?

I think its unnecessary to always check if that is mmio's pfn since we have more 
non direct assigned devices.

So maybe we can introduce another helper to fixup that TLB entry in instead of 
this path.

Tiejun

>
> -Bharat
>
>>>>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>>>>> +	} else {
>>>>>>>>      #ifdef CONFIG_SMP
>>>>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>>>>> -#else
>>>>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +		mas2_attr |= MAS2_M;
>>>>>>>>      #endif
>>>>>>>> +	}
>>>>>>>> +	return mas2_attr;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>>>>      	/* Force IPROT=0 for all guest mappings. */
>>>>>>>>      	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>>>>      	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>>>>      	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>>>>      			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc"
>>>>> in the body of a message to majordomo@vger.kernel.org More
>>>>> majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:55                 ` "“tiejun.chen”"
@ 2013-07-18  9:44                   ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18  9:44 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 10:55, “tiejun.chen” wrote:

> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"“tiejun.chen”"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>> 
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>> 
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>> 
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>> 
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>> 
>>>>>> 
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>> 
>>>>>>       KVM: direct mmio pfn check
>>>>>> 
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>> 
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>> 
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>> 
>>> Then I will wait till someone educate me :)
>> 
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
> 
> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?

Because other devices wouldn't be available to the guest through memory slots.

> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.

I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)

> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.

This path does fix up the shadow (host) TLB entry :).


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  9:44                   ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18  9:44 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 10:55, “tiejun.chen” wrote:

> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"“tiejun.chen”"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>> 
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>> 
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>> 
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>> 
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>> 
>>>>>> 
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>> 
>>>>>>       KVM: direct mmio pfn check
>>>>>> 
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>> 
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>> 
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>> 
>>> Then I will wait till someone educate me :)
>> 
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
> 
> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?

Because other devices wouldn't be available to the guest through memory slots.

> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.

I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)

> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.

This path does fix up the shadow (host) TLB entry :).


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  8:25               ` Bhushan Bharat-R65777
@ 2013-07-18  9:48                 ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18  9:48 UTC (permalink / raw)
  To: Bhushan Bharat-R65777
  Cc: "“tiejun.chen”", kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> 
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>> 
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>> 
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>> 
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>    	return mas3;
>>>>>>>>    }
>>>>>>>> 
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>    {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>> 
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>> 
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>> 
>>>>> 
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>> 
>>>>>      KVM: direct mmio pfn check
>>>>> 
>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>> 
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>> 
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>> 
>> Then I will wait till someone educate me :)
> 
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:

  1) Non cache coherent DMA
  2) Memory hot remove

The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:

        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
        default n if PPC_47x
        default y

so we never hit it with any core we care about ;).

Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Which means I think it's fine to slim this whole thing down to only check for pfn_valid(), as the code does in this patch. It would however be very useful to have a comment there that explains why it's safe to do so.



Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  9:48                 ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18  9:48 UTC (permalink / raw)
  To: Bhushan Bharat-R65777
  Cc: "“tiejun.chen”", kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> 
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>> 
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>> 
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>> 
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>    	return mas3;
>>>>>>>>    }
>>>>>>>> 
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>    {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>> 
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>> 
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>> 
>>>>> 
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>> 
>>>>>      KVM: direct mmio pfn check
>>>>> 
>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>> 
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>> 
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>> 
>> Then I will wait till someone educate me :)
> 
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:

  1) Non cache coherent DMA
  2) Memory hot remove

The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:

        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
        default n if PPC_47x
        default y

so we never hit it with any core we care about ;).

Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Which means I think it's fine to slim this whole thing down to only check for pfn_valid(), as the code does in this patch. It would however be very useful to have a comment there that explains why it's safe to do so.



Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  9:48                 ` Alexander Graf
  (?)
@ 2013-07-18  9:51                 ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-18  9:51 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”", kvm-ppc, kvm, Wood Scott-B07421



> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Thursday, July 18, 2013 3:19 PM
> To: Bhushan Bharat-R65777
> Cc: "“tiejun.chen”"; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-
> B07421
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> 
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Bhushan Bharat-R65777
> >> Sent: Thursday, July 18, 2013 1:53 PM
> >> To: '"“tiejun.chen”"'
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> >> Scott-
> >> B07421
> >> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >> kernel managed pages
> >>
> >>
> >>
> >>> -----Original Message-----
> >>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>> Sent: Thursday, July 18, 2013 1:52 PM
> >>> To: Bhushan Bharat-R65777
> >>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>> Wood
> >>> Scott-
> >>> B07421
> >>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >>> kernel managed pages
> >>>
> >>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> >>>>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
> >>>>> Sent: Thursday, July 18, 2013 1:01 PM
> >>>>> To: Bhushan Bharat-R65777
> >>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>> Wood
> >>>>> Scott-
> >>>>> B07421
> >>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>> for kernel managed pages
> >>>>>
> >>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >>>>>>
> >>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
> >>>>>>> To: Bhushan Bharat-R65777
> >>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>>>> Wood
> >>>>>>> Scott- B07421; Bhushan Bharat-R65777
> >>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>>>> for kernel managed pages
> >>>>>>>
> >>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>>>>>>> If there is a struct page for the requested mapping then it's
> >>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> >>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
> >>>>>>>> inhibited,
> >>>>>>>> guarded)
> >>>>>>>>
> >>>>>>>> This helps setting proper TLB mapping for direct assigned
> >>>>>>>> device
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>>>>>>> ---
> >>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> index 1c6a9d7..089c227 100644
> >>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> @@ -64,13 +64,20 @@ static inline u32
> >>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
> >>>>>>> usermode)
> >>>>>>>>    	return mas3;
> >>>>>>>>    }
> >>>>>>>>
> >>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
> >>>>>>>> usermode)
> >>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >>>>>>>>    {
> >>>>>>>> +	u32 mas2_attr;
> >>>>>>>> +
> >>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>>>>>>> +
> >>>>>>>> +	if (!pfn_valid(pfn)) {
> >>>>>>>
> >>>>>>> Why not directly use kvm_is_mmio_pfn()?
> >>>>>>
> >>>>>> What I understand from this function (someone can correct me) is
> >>>>>> that it
> >>>>> returns "false" when the page is managed by kernel and is not
> >>>>> marked as RESERVED (for some reason). For us it does not matter
> >>>>> whether the page is reserved or not, if it is kernel visible page
> >>>>> then it
> >> is DDR.
> >>>>>>
> >>>>>
> >>>>> I think you are setting I|G by addressing all mmio pages, right?
> >>>>> If so,
> >>>>>
> >>>>>      KVM: direct mmio pfn check
> >>>>>
> >>>>>      Userspace may specify memory slots that are backed by mmio
> >>>>> pages rather than
> >>>>>      normal RAM.  In some cases it is not enough to identify these
> >>>>> mmio
> >>> pages
> >>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
> >>>>
> >>>> Do you know what are those "some cases" and how checking
> >>>> PageReserved helps in
> >>> those cases?
> >>>
> >>> No, myself didn't see these actual cases in qemu,too. But this
> >>> should be chronically persistent as I understand ;-)
> >>
> >> Then I will wait till someone educate me :)
> >
> > The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not
> want to call this for all tlbwe operation unless it is necessary.
> 
> It certainly does more than we need and potentially slows down the fast path
> (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check
> for pages that are declared reserved on the host. This happens in 2 cases:
> 
>   1) Non cache coherent DMA
>   2) Memory hot remove
> 
> The non coherent DMA case would be interesting, as with the mechanism as it is
> in place in Linux today, we could potentially break normal guest operation if we
> don't take it into account. However, it's Kconfig guarded by:
> 
>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>         default n if PPC_47x
>         default y
> 
> so we never hit it with any core we care about ;).
> 
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about
> that one either.
> 
> Which means I think it's fine to slim this whole thing down to only check for
> pfn_valid(), as the code does in this patch. It would however be very useful to
> have a comment there that explains why it's safe to do so.

Big thanks for the details :-)

Will add a comment.

-Bharat

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  9:44                   ` Alexander Graf
@ 2013-07-18  9:56                     ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  9:56 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:44 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>
>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>>
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>      	return mas3;
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>      {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>>
>>>>>>>
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>>
>>>>>>>        KVM: direct mmio pfn check
>>>>>>>
>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>>
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>>
>>>> Then I will wait till someone educate me :)
>>>
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>
>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>
> Because other devices wouldn't be available to the guest through memory slots.

Yes.

>
>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>
> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>

Sorry, looks I'm misleading you :-P

>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>
> This path does fix up the shadow (host) TLB entry :).
>

I just mean whether we can have a separate path dedicated to those direct 
assigned devices, not go this common path :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18  9:56                     ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18  9:56 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:44 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>
>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>>
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>      	return mas3;
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>      {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>>
>>>>>>>
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>>
>>>>>>>        KVM: direct mmio pfn check
>>>>>>>
>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>>
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>>
>>>> Then I will wait till someone educate me :)
>>>
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>
>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>
> Because other devices wouldn't be available to the guest through memory slots.

Yes.

>
>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>
> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>

Sorry, looks I'm misleading you :-P

>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>
> This path does fix up the shadow (host) TLB entry :).
>

I just mean whether we can have a separate path dedicated to those direct 
assigned devices, not go this common path :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  9:56                     ` "“tiejun.chen”"
@ 2013-07-18 10:00                       ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:00 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 11:56, “tiejun.chen” wrote:

> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>> 
>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>> 
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>> 
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>> 
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>> 
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>> 
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>> 
>>>>>>>> 
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>> 
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>> 
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>> 
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>> 
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>> 
>>>>> Then I will wait till someone educate me :)
>>>> 
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>> 
>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>> 
>> Because other devices wouldn't be available to the guest through memory slots.
> 
> Yes.
> 
>> 
>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>> 
>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>> 
> 
> Sorry, looks I'm misleading you :-P
> 
>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>> 
>> This path does fix up the shadow (host) TLB entry :).
>> 
> 
> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)

I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.

We could take that information from 2 other side though:

  1) Memory Slot
  2) Guest TLB Flags

If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.

If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Either way, not trusting anyone is definitely the safer choice.


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:00                       ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:00 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 11:56, “tiejun.chen” wrote:

> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>> 
>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>> 
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>> 
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>> 
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>> 
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>> 
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>> 
>>>>>>>> 
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>> 
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>> 
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>> 
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>> 
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>> 
>>>>> Then I will wait till someone educate me :)
>>>> 
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>> 
>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>> 
>> Because other devices wouldn't be available to the guest through memory slots.
> 
> Yes.
> 
>> 
>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>> 
>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>> 
> 
> Sorry, looks I'm misleading you :-P
> 
>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>> 
>> This path does fix up the shadow (host) TLB entry :).
>> 
> 
> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)

I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.

We could take that information from 2 other side though:

  1) Memory Slot
  2) Guest TLB Flags

If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.

If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Either way, not trusting anyone is definitely the safer choice.


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18  9:48                 ` Alexander Graf
@ 2013-07-18 10:08                   ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:08 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:48 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>
>>
>>
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"�tiejun.chen�"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>>
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>>
>>>>>>
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>>
>>>>>>       KVM: direct mmio pfn check
>>>>>>
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>>
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>>
>>> Then I will wait till someone educate me :)
>>
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>
> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>
>    1) Non cache coherent DMA
>    2) Memory hot remove
>
> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>
>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>          default n if PPC_47x
>          default y
>
> so we never hit it with any core we care about ;).
>
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Thanks for this good information :)

So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() 
to make sure that check is only valid when that is really needed? This can 
decrease those unnecessary performance loss.

If I'm wrong please correct me :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:08                   ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:08 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:48 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>
>>
>>
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"�tiejun.chen�"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>>
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>>
>>>>>>
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>>
>>>>>>       KVM: direct mmio pfn check
>>>>>>
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>>
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>>
>>> Then I will wait till someone educate me :)
>>
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>
> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>
>    1) Non cache coherent DMA
>    2) Memory hot remove
>
> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>
>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>          default n if PPC_47x
>          default y
>
> so we never hit it with any core we care about ;).
>
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Thanks for this good information :)

So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() 
to make sure that check is only valid when that is really needed? This can 
decrease those unnecessary performance loss.

If I'm wrong please correct me :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:08                   ` "“tiejun.chen”"
@ 2013-07-18 10:12                     ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:12 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 12:08, “tiejun.chen” wrote:

> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>> 
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>> 
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>    	return mas3;
>>>>>>>>>>    }
>>>>>>>>>> 
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>    {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>> 
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>> 
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>> 
>>>>>>> 
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>> 
>>>>>>>      KVM: direct mmio pfn check
>>>>>>> 
>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>> 
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>> 
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>> 
>>>> Then I will wait till someone educate me :)
>>> 
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>> 
>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>> 
>>   1) Non cache coherent DMA
>>   2) Memory hot remove
>> 
>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>> 
>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>         default n if PPC_47x
>>         default y
>> 
>> so we never hit it with any core we care about ;).
>> 
>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
> 
> Thanks for this good information :)
> 
> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
> 
> If I'm wrong please correct me :)

You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.

I'd rather not like to break x86 :).

However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:12                     ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:12 UTC (permalink / raw)
  To: “tiejun.chen”
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 12:08, “tiejun.chen” wrote:

> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>> 
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>> 
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>    	return mas3;
>>>>>>>>>>    }
>>>>>>>>>> 
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>    {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>> 
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>> 
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>> 
>>>>>>> 
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>> 
>>>>>>>      KVM: direct mmio pfn check
>>>>>>> 
>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>> 
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>> 
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>> 
>>>> Then I will wait till someone educate me :)
>>> 
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>> 
>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>> 
>>   1) Non cache coherent DMA
>>   2) Memory hot remove
>> 
>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>> 
>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>         default n if PPC_47x
>>         default y
>> 
>> so we never hit it with any core we care about ;).
>> 
>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
> 
> Thanks for this good information :)
> 
> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
> 
> If I'm wrong please correct me :)

You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.

I'd rather not like to break x86 :).

However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:00                       ` Alexander Graf
@ 2013-07-18 10:14                         ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:14 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:00 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 11:56, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>>>
>>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>>
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>      	return mas3;
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>      {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>>
>>>>>>>>>        KVM: direct mmio pfn check
>>>>>>>>>
>>>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>>
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>>
>>>>>> Then I will wait till someone educate me :)
>>>>>
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>
>>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>>>
>>> Because other devices wouldn't be available to the guest through memory slots.
>>
>> Yes.
>>
>>>
>>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>>>
>>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>>>
>>
>> Sorry, looks I'm misleading you :-P
>>
>>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>>>
>>> This path does fix up the shadow (host) TLB entry :).
>>>
>>
>> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)
>
> I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.
>
> We could take that information from 2 other side though:
>
>    1) Memory Slot
>    2) Guest TLB Flags
>
> If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.
>
> If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Understood.

>
> Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Yes, we should certainly set I|G for that TLB entry mapping to device.

>
> Either way, not trusting anyone is definitely the safer choice.

Definitely :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:14                         ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:14 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:00 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 11:56, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>>>
>>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>>
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>      	return mas3;
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>      {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>>
>>>>>>>>>        KVM: direct mmio pfn check
>>>>>>>>>
>>>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>>
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>>
>>>>>> Then I will wait till someone educate me :)
>>>>>
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>
>>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>>>
>>> Because other devices wouldn't be available to the guest through memory slots.
>>
>> Yes.
>>
>>>
>>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>>>
>>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>>>
>>
>> Sorry, looks I'm misleading you :-P
>>
>>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>>>
>>> This path does fix up the shadow (host) TLB entry :).
>>>
>>
>> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)
>
> I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.
>
> We could take that information from 2 other side though:
>
>    1) Memory Slot
>    2) Guest TLB Flags
>
> If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.
>
> If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Understood.

>
> Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Yes, we should certainly set I|G for that TLB entry mapping to device.

>
> Either way, not trusting anyone is definitely the safer choice.

Definitely :)

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:12                     ` Alexander Graf
@ 2013-07-18 10:19                       ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:19 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:12 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>>
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>>
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>>
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>>
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>>
>>>>> Then I will wait till someone educate me :)
>>>>
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>
>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>
>>>    1) Non cache coherent DMA
>>>    2) Memory hot remove
>>>
>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>
>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>          default n if PPC_47x
>>>          default y
>>>
>>> so we never hit it with any core we care about ;).
>>>
>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>
>> Thanks for this good information :)
>>
>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>
>> If I'm wrong please correct me :)
>
> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>
> I'd rather not like to break x86 :).
>
> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>

Often what case should be adopted to validate this scenario?

Tiejun

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:19                       ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-18 10:19 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:12 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>>
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>>
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>>
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>>
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>>
>>>>> Then I will wait till someone educate me :)
>>>>
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>
>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>
>>>    1) Non cache coherent DMA
>>>    2) Memory hot remove
>>>
>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>
>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>          default n if PPC_47x
>>>          default y
>>>
>>> so we never hit it with any core we care about ;).
>>>
>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>
>> Thanks for this good information :)
>>
>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>
>> If I'm wrong please correct me :)
>
> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>
> I'd rather not like to break x86 :).
>
> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>

Often what case should be adopted to validate this scenario?

Tiejun


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:19                       ` "“tiejun.chen”"
@ 2013-07-18 10:27                         ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:27 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 12:19, “tiejun.chen” wrote:

> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>> 
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>> 
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>    }
>>>>>>>>>>>> 
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>    {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>> 
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>> 
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>> 
>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>> 
>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>> 
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>> 
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>> 
>>>>>> Then I will wait till someone educate me :)
>>>>> 
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>> 
>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>> 
>>>>   1) Non cache coherent DMA
>>>>   2) Memory hot remove
>>>> 
>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>> 
>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>         default n if PPC_47x
>>>>         default y
>>>> 
>>>> so we never hit it with any core we care about ;).
>>>> 
>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>> 
>>> Thanks for this good information :)
>>> 
>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>> 
>>> If I'm wrong please correct me :)
>> 
>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>> 
>> I'd rather not like to break x86 :).
>> 
>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>> 
> 
> Often what case should be adopted to validate this scenario?

Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 10:27                         ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-18 10:27 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421


On 18.07.2013, at 12:19, “tiejun.chen” wrote:

> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>> 
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>> 
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>    }
>>>>>>>>>>>> 
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>    {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>> 
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>> 
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>> 
>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>> 
>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>> 
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>> 
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>> 
>>>>>> Then I will wait till someone educate me :)
>>>>> 
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>> 
>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>> 
>>>>   1) Non cache coherent DMA
>>>>   2) Memory hot remove
>>>> 
>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>> 
>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>         default n if PPC_47x
>>>>         default y
>>>> 
>>>> so we never hit it with any core we care about ;).
>>>> 
>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>> 
>>> Thanks for this good information :)
>>> 
>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>> 
>>> If I'm wrong please correct me :)
>> 
>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>> 
>> I'd rather not like to break x86 :).
>> 
>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>> 
> 
> Often what case should be adopted to validate this scenario?

Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:00                       ` Alexander Graf
@ 2013-07-18 16:11                         ` Scott Wood
  -1 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-18 16:11 UTC (permalink / raw)
  To: Alexander Graf
  Cc: “tiejun.chen”,
	Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:00:42 AM, Alexander Graf wrote:
> Now why is setting invalid flags a problem? If I understand Scott  
> correctly, it can break the host if you access certain host devices  
> with caching enabled. But to be sure I'd say we ask him directly :).

The architecture makes it illegal to mix cacheable and cache-inhibited  
mappings to the same physical page.  Mixing W or M bits is generally  
bad as well.  I've seen it cause machine checks, error interrupts, etc.  
-- not just corrupting the page in question.

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-18 16:11                         ` Scott Wood
  0 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-18 16:11 UTC (permalink / raw)
  To: Alexander Graf
  Cc: “tiejun.chen”,
	Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 05:00:42 AM, Alexander Graf wrote:
> Now why is setting invalid flags a problem? If I understand Scott  
> correctly, it can break the host if you access certain host devices  
> with caching enabled. But to be sure I'd say we ask him directly :).

The architecture makes it illegal to mix cacheable and cache-inhibited  
mappings to the same physical page.  Mixing W or M bits is generally  
bad as well.  I've seen it cause machine checks, error interrupts, etc.  
-- not just corrupting the page in question.

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-18 10:27                         ` Alexander Graf
@ 2013-07-24  2:26                           ` "“tiejun.chen”"
  -1 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-24  2:26 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:27 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>
>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>
>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>
>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Bhushan Bharat-R65777
>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>> To: '"�tiejun.chen�"'
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>>> B07421
>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>>> managed pages
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott-
>>>>>>>>>> B07421
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>>> kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>>> Wood
>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>>
>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>>> inhibited,
>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>
>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>>>     }
>>>>>>>>>>>>>
>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>>     {
>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>
>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>
>>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>>> that it
>>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>>> is DDR.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>>> so,
>>>>>>>>>>
>>>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>>>
>>>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>>>> pages rather than
>>>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>>>> mmio
>>>>>>>> pages
>>>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>>
>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>> PageReserved helps in
>>>>>>>> those cases?
>>>>>>>>
>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>>> be chronically persistent as I understand ;-)
>>>>>>>
>>>>>>> Then I will wait till someone educate me :)
>>>>>>
>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>>
>>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>
>>>>>    1) Non cache coherent DMA
>>>>>    2) Memory hot remove
>>>>>
>>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>>>
>>>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>          default n if PPC_47x
>>>>>          default y
>>>>>
>>>>> so we never hit it with any core we care about ;).
>>>>>
>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>>>
>>>> Thanks for this good information :)
>>>>
>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>>>
>>>> If I'm wrong please correct me :)
>>>
>>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>>>
>>> I'd rather not like to break x86 :).
>>>
>>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>>>
>>
>> Often what case should be adopted to validate this scenario?
>
> Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)
>

I tried to run five times with this combination, "time `for ((i=0; i<5000; 
i++));  do /bin/echo; done`", to calculate the average value with this change:

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1580dd4..5e8635b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,6 +102,10 @@ static bool largepages_enabled = true;

  bool kvm_is_mmio_pfn(pfn_t pfn)
  {
+#ifdef CONFIG_MEMORY_HOTPLUG
+       /*
+        * Currently only in memory hot remove case we may still need this.
+        */
         if (pfn_valid(pfn)) {
                 int reserved;
                 struct page *tail = pfn_to_page(pfn);
@@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
                 }
                 return PageReserved(tail);
         }
+#endif

         return true;
  }

Before apply this change:

real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s

After apply this change:

real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s

So,

real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%

Note I only boot one VM.

Tiejun

^ permalink raw reply related	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  2:26                           ` "“tiejun.chen”"
  0 siblings, 0 replies; 82+ messages in thread
From: "“tiejun.chen”" @ 2013-07-24  2:26 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Bhushan Bharat-R65777, kvm-ppc, kvm, Wood Scott-B07421

On 07/18/2013 06:27 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>
>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>
>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>
>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Bhushan Bharat-R65777
>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>> To: '"�tiejun.chen�"'
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>>> B07421
>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>>> managed pages
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott-
>>>>>>>>>> B07421
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>>> kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>>> Wood
>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>>
>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>>> inhibited,
>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>
>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>>>     }
>>>>>>>>>>>>>
>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>>     {
>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>
>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>
>>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>>> that it
>>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>>> is DDR.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>>> so,
>>>>>>>>>>
>>>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>>>
>>>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>>>> pages rather than
>>>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>>>> mmio
>>>>>>>> pages
>>>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>>
>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>> PageReserved helps in
>>>>>>>> those cases?
>>>>>>>>
>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>>> be chronically persistent as I understand ;-)
>>>>>>>
>>>>>>> Then I will wait till someone educate me :)
>>>>>>
>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>>
>>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>
>>>>>    1) Non cache coherent DMA
>>>>>    2) Memory hot remove
>>>>>
>>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>>>
>>>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>          default n if PPC_47x
>>>>>          default y
>>>>>
>>>>> so we never hit it with any core we care about ;).
>>>>>
>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>>>
>>>> Thanks for this good information :)
>>>>
>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>>>
>>>> If I'm wrong please correct me :)
>>>
>>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>>>
>>> I'd rather not like to break x86 :).
>>>
>>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>>>
>>
>> Often what case should be adopted to validate this scenario?
>
> Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)
>

I tried to run five times with this combination, "time `for ((i=0; i<5000; 
i++));  do /bin/echo; done`", to calculate the average value with this change:

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1580dd4..5e8635b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,6 +102,10 @@ static bool largepages_enabled = true;

  bool kvm_is_mmio_pfn(pfn_t pfn)
  {
+#ifdef CONFIG_MEMORY_HOTPLUG
+       /*
+        * Currently only in memory hot remove case we may still need this.
+        */
         if (pfn_valid(pfn)) {
                 int reserved;
                 struct page *tail = pfn_to_page(pfn);
@@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
                 }
                 return PageReserved(tail);
         }
+#endif

         return true;
  }

Before apply this change:

real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s

After apply this change:

real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s

So,

real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%

Note I only boot one VM.

Tiejun


^ permalink raw reply related	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  2:26                           ` "“tiejun.chen”"
@ 2013-07-24  8:25                             ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  8:25 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Gleb Natapov, Paolo Bonzini


On 24.07.2013, at 04:26, “tiejun.chen” wrote:

> On 07/18/2013 06:27 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>> 
>>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>> 
>>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Bhushan Bharat-R65777
>>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>>> To: '"�tiejun.chen�"'
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>>>> B07421
>>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>>>> managed pages
>>>>>>>> 
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott-
>>>>>>>>>>> B07421
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>>>> kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>>>> Wood
>>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>>> 
>>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>>>> inhibited,
>>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>>>    }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>>>    {
>>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>> 
>>>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>>>> that it
>>>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>>>> is DDR.
>>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>>>> so,
>>>>>>>>>>> 
>>>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>>>> 
>>>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>>>> pages rather than
>>>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>>>> mmio
>>>>>>>>> pages
>>>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>>> 
>>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>>> PageReserved helps in
>>>>>>>>> those cases?
>>>>>>>>> 
>>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>>>> be chronically persistent as I understand ;-)
>>>>>>>> 
>>>>>>>> Then I will wait till someone educate me :)
>>>>>>> 
>>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>>> 
>>>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>> 
>>>>>>   1) Non cache coherent DMA
>>>>>>   2) Memory hot remove
>>>>>> 
>>>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>>>> 
>>>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>>         default n if PPC_47x
>>>>>>         default y
>>>>>> 
>>>>>> so we never hit it with any core we care about ;).
>>>>>> 
>>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>>>> 
>>>>> Thanks for this good information :)
>>>>> 
>>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>>>> 
>>>>> If I'm wrong please correct me :)
>>>> 
>>>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>>>> 
>>>> I'd rather not like to break x86 :).
>>>> 
>>>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>>>> 
>>> 
>>> Often what case should be adopted to validate this scenario?
>> 
>> Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)
>> 
> 
> I tried to run five times with this combination, "time `for ((i=0; i<5000; i++));  do /bin/echo; done`", to calculate the average value with this change:
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 1580dd4..5e8635b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> 
> bool kvm_is_mmio_pfn(pfn_t pfn)
> {
> +#ifdef CONFIG_MEMORY_HOTPLUG

I'd feel safer if we narrow this down to e500.

> +       /*
> +        * Currently only in memory hot remove case we may still need this.
> +        */
>        if (pfn_valid(pfn)) {

We still have to check for pfn_valid, no? So the #ifdef should be down here.

>                int reserved;
>                struct page *tail = pfn_to_page(pfn);
> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>                }
>                return PageReserved(tail);
>        }
> +#endif
> 
>        return true;
> }
> 
> Before apply this change:
> 
> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> 
> After apply this change:
> 
> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> 
> So,
> 
> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%

Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.

Gleb, Paolo, any hard feelings?


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  8:25                             ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  8:25 UTC (permalink / raw)
  To: "“tiejun.chen”"
  Cc: Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Gleb Natapov, Paolo Bonzini


On 24.07.2013, at 04:26, “tiejun.chen” wrote:

> On 07/18/2013 06:27 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>> 
>>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>> 
>>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Bhushan Bharat-R65777
>>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>>> To: '"�tiejun.chen�"'
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>>>> B07421
>>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>>>> managed pages
>>>>>>>> 
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott-
>>>>>>>>>>> B07421
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>>>> kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>>>> Wood
>>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>>> 
>>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>>>> inhibited,
>>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>>>    }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>>>    {
>>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>> 
>>>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>>>> that it
>>>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>>>> is DDR.
>>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>>>> so,
>>>>>>>>>>> 
>>>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>>>> 
>>>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>>>> pages rather than
>>>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>>>> mmio
>>>>>>>>> pages
>>>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>>> 
>>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>>> PageReserved helps in
>>>>>>>>> those cases?
>>>>>>>>> 
>>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>>>> be chronically persistent as I understand ;-)
>>>>>>>> 
>>>>>>>> Then I will wait till someone educate me :)
>>>>>>> 
>>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>>> 
>>>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>> 
>>>>>>   1) Non cache coherent DMA
>>>>>>   2) Memory hot remove
>>>>>> 
>>>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>>>> 
>>>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>>         default n if PPC_47x
>>>>>>         default y
>>>>>> 
>>>>>> so we never hit it with any core we care about ;).
>>>>>> 
>>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>>>> 
>>>>> Thanks for this good information :)
>>>>> 
>>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>>>> 
>>>>> If I'm wrong please correct me :)
>>>> 
>>>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>>>> 
>>>> I'd rather not like to break x86 :).
>>>> 
>>>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>>>> 
>>> 
>>> Often what case should be adopted to validate this scenario?
>> 
>> Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)
>> 
> 
> I tried to run five times with this combination, "time `for ((i=0; i<5000; i++));  do /bin/echo; done`", to calculate the average value with this change:
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 1580dd4..5e8635b 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> 
> bool kvm_is_mmio_pfn(pfn_t pfn)
> {
> +#ifdef CONFIG_MEMORY_HOTPLUG

I'd feel safer if we narrow this down to e500.

> +       /*
> +        * Currently only in memory hot remove case we may still need this.
> +        */
>        if (pfn_valid(pfn)) {

We still have to check for pfn_valid, no? So the #ifdef should be down here.

>                int reserved;
>                struct page *tail = pfn_to_page(pfn);
> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>                }
>                return PageReserved(tail);
>        }
> +#endif
> 
>        return true;
> }
> 
> Before apply this change:
> 
> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> 
> After apply this change:
> 
> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> 
> So,
> 
> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%

Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.

Gleb, Paolo, any hard feelings?


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  8:25                             ` Alexander Graf
@ 2013-07-24  9:11                               ` Bhushan Bharat-R65777
  -1 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-24  9:11 UTC (permalink / raw)
  To: Alexander Graf, "“tiejun.chen”"
  Cc: kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Gleb Natapov, Paolo Bonzini



> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Wednesday, July 24, 2013 1:55 PM
> To: "“tiejun.chen”"
> Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org list;
> Wood Scott-B07421; Gleb Natapov; Paolo Bonzini
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> 
> On 24.07.2013, at 04:26, “tiejun.chen” wrote:
> 
> > On 07/18/2013 06:27 PM, Alexander Graf wrote:
> >>
> >> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
> >>
> >>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
> >>>>
> >>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
> >>>>
> >>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
> >>>>>>
> >>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
> >>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Bhushan Bharat-R65777
> >>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
> >>>>>>>> To: '" tiejun.chen "'
> >>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >>>>>>>> agraf@suse.de; Wood Scott-
> >>>>>>>> B07421
> >>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>>>>> for kernel managed pages
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>> -----Original Message-----
> >>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
> >>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
> >>>>>>>>> To: Bhushan Bharat-R65777
> >>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >>>>>>>>> agraf@suse.de; Wood
> >>>>>>>>> Scott-
> >>>>>>>>> B07421
> >>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
> >>>>>>>>> only for kernel managed pages
> >>>>>>>>>
> >>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>> -----Original Message-----
> >>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of " tiejun.chen "
> >>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
> >>>>>>>>>>> To: Bhushan Bharat-R65777
> >>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >>>>>>>>>>> agraf@suse.de; Wood
> >>>>>>>>>>> Scott-
> >>>>>>>>>>> B07421
> >>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
> >>>>>>>>>>> only for kernel managed pages
> >>>>>>>>>>>
> >>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>>> -----Original Message-----
> >>>>>>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
> >>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
> >>>>>>>>>>>>> To: Bhushan Bharat-R65777
> >>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >>>>>>>>>>>>> agraf@suse.de; Wood
> >>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
> >>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
> >>>>>>>>>>>>> only for kernel managed pages
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>>>>>>>>>>>>> If there is a struct page for the requested mapping then
> >>>>>>>>>>>>>> it's normal DDR and the mapping sets "M" bit (coherent,
> >>>>>>>>>>>>>> cacheable) else this is treated as I/O and we set  "I +
> >>>>>>>>>>>>>> G"  (cache inhibited,
> >>>>>>>>>>>>>> guarded)
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned
> >>>>>>>>>>>>>> device
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan
> >>>>>>>>>>>>>> <bharat.bhushan@freescale.com>
> >>>>>>>>>>>>>> ---
> >>>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
> >>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
> >>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
> >>>>>>>>>>>>> usermode)
> >>>>>>>>>>>>>>    	return mas3;
> >>>>>>>>>>>>>>    }
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
> >>>>>>>>>>>>>> usermode)
> >>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2,
> >>>>>>>>>>>>>> +pfn_t pfn)
> >>>>>>>>>>>>>>    {
> >>>>>>>>>>>>>> +	u32 mas2_attr;
> >>>>>>>>>>>>>> +
> >>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>>>>>>>>>>>>> +
> >>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
> >>>>>>>>>>>>
> >>>>>>>>>>>> What I understand from this function (someone can correct
> >>>>>>>>>>>> me) is that it
> >>>>>>>>>>> returns "false" when the page is managed by kernel and is
> >>>>>>>>>>> not marked as RESERVED (for some reason). For us it does not
> >>>>>>>>>>> matter whether the page is reserved or not, if it is kernel
> >>>>>>>>>>> visible page then it
> >>>>>>>> is DDR.
> >>>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> I think you are setting I|G by addressing all mmio pages,
> >>>>>>>>>>> right? If so,
> >>>>>>>>>>>
> >>>>>>>>>>>      KVM: direct mmio pfn check
> >>>>>>>>>>>
> >>>>>>>>>>>      Userspace may specify memory slots that are backed by
> >>>>>>>>>>> mmio pages rather than
> >>>>>>>>>>>      normal RAM.  In some cases it is not enough to identify
> >>>>>>>>>>> these mmio
> >>>>>>>>> pages
> >>>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as
> well.
> >>>>>>>>>>
> >>>>>>>>>> Do you know what are those "some cases" and how checking
> >>>>>>>>>> PageReserved helps in
> >>>>>>>>> those cases?
> >>>>>>>>>
> >>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this
> >>>>>>>>> should be chronically persistent as I understand ;-)
> >>>>>>>>
> >>>>>>>> Then I will wait till someone educate me :)
> >>>>>>>
> >>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do
> not want to call this for all tlbwe operation unless it is necessary.
> >>>>>>
> >>>>>> It certainly does more than we need and potentially slows down the fast
> path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to
> check for pages that are declared reserved on the host. This happens in 2 cases:
> >>>>>>
> >>>>>>   1) Non cache coherent DMA
> >>>>>>   2) Memory hot remove
> >>>>>>
> >>>>>> The non coherent DMA case would be interesting, as with the mechanism as
> it is in place in Linux today, we could potentially break normal guest operation
> if we don't take it into account. However, it's Kconfig guarded by:
> >>>>>>
> >>>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
> >>>>>>         default n if PPC_47x
> >>>>>>         default y
> >>>>>>
> >>>>>> so we never hit it with any core we care about ;).
> >>>>>>
> >>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry
> about that one either.
> >>>>>
> >>>>> Thanks for this good information :)
> >>>>>
> >>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside
> kvm_is_mmio_pfn() to make sure that check is only valid when that is really
> needed? This can decrease those unnecessary performance loss.
> >>>>>
> >>>>> If I'm wrong please correct me :)
> >>>>
> >>>> You're perfectly right, but this is generic KVM code. So it gets run across
> all architectures. What if someone has the great idea to add a new case here for
> x86, but doesn't tell us? In that case we potentially break x86.
> >>>>
> >>>> I'd rather not like to break x86 :).
> >>>>
> >>>> However, it'd be very interesting to see a benchmark with this change. Do
> you think you could just rip out the whole reserved check and run a few
> benchmarks and show us the results?
> >>>>
> >>>
> >>> Often what case should be adopted to validate this scenario?
> >>
> >> Something which hammers the TLB emulation heavily. I usually just run
> >> /bin/echo a thousand times in "time" and see how long it takes ;)
> >>
> >
> > I tried to run five times with this combination, "time `for ((i=0; i<5000;
> i++));  do /bin/echo; done`", to calculate the average value with this change:
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index
> > 1580dd4..5e8635b 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> >
> > bool kvm_is_mmio_pfn(pfn_t pfn)
> > {
> > +#ifdef CONFIG_MEMORY_HOTPLUG
> 
> I'd feel safer if we narrow this down to e500.
> 
> > +       /*
> > +        * Currently only in memory hot remove case we may still need this.
> > +        */
> >        if (pfn_valid(pfn)) {
> 
> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> 
> >                int reserved;
> >                struct page *tail = pfn_to_page(pfn); @@ -124,6 +128,7
> > @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >                }
> >                return PageReserved(tail);
> >        }
> > +#endif
> >
> >        return true;
> > }
> >
> > Before apply this change:
> >
> > real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5=
> 1m21.376s
> > user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5=
> 0m23.433s
> > sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> >
> > After apply this change:
> >
> > real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5=
> 1m20.667s
> > user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5=
> 0m22.615s
> > sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> >
> > So,
> >
> > real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> > user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> > sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> 
> Very nice, so there is a real world performance benefit to doing this. Then yes,
> I think it would make sense to change the global helper function to be fast on
> e500 and use that one from e500_shadow_mas2_attrib() instead.

Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?

-Bharat

> 
> Gleb, Paolo, any hard feelings?
> 
> 
> Alex
> 


^ permalink raw reply	[flat|nested] 82+ messages in thread

* RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  9:11                               ` Bhushan Bharat-R65777
  0 siblings, 0 replies; 82+ messages in thread
From: Bhushan Bharat-R65777 @ 2013-07-24  9:11 UTC (permalink / raw)
  To: Alexander Graf, "“tiejun.chen”"
  Cc: kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Gleb Natapov, Paolo Bonzini

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogQWxleGFuZGVyIEdyYWYg
W21haWx0bzphZ3JhZkBzdXNlLmRlXQ0KPiBTZW50OiBXZWRuZXNkYXksIEp1bHkgMjQsIDIwMTMg
MTo1NSBQTQ0KPiBUbzogIuKAnHRpZWp1bi5jaGVu4oCdIg0KPiBDYzogQmh1c2hhbiBCaGFyYXQt
UjY1Nzc3OyBrdm0tcHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZyBsaXN0
Ow0KPiBXb29kIFNjb3R0LUIwNzQyMTsgR2xlYiBOYXRhcG92OyBQYW9sbyBCb256aW5pDQo+IFN1
YmplY3Q6IFJlOiBbUEFUQ0ggMi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kg
b25seSBmb3Iga2VybmVsDQo+IG1hbmFnZWQgcGFnZXMNCj4gDQo+IA0KPiBPbiAyNC4wNy4yMDEz
LCBhdCAwNDoyNiwg4oCcdGllanVuLmNoZW7igJ0gd3JvdGU6DQo+IA0KPiA+IE9uIDA3LzE4LzIw
MTMgMDY6MjcgUE0sIEFsZXhhbmRlciBHcmFmIHdyb3RlOg0KPiA+Pg0KPiA+PiBPbiAxOC4wNy4y
MDEzLCBhdCAxMjoxOSwg4oCcdGllanVuLmNoZW7igJ0gd3JvdGU6DQo+ID4+DQo+ID4+PiBPbiAw
Ny8xOC8yMDEzIDA2OjEyIFBNLCBBbGV4YW5kZXIgR3JhZiB3cm90ZToNCj4gPj4+Pg0KPiA+Pj4+
IE9uIDE4LjA3LjIwMTMsIGF0IDEyOjA4LCDigJx0aWVqdW4uY2hlbuKAnSB3cm90ZToNCj4gPj4+
Pg0KPiA+Pj4+PiBPbiAwNy8xOC8yMDEzIDA1OjQ4IFBNLCBBbGV4YW5kZXIgR3JhZiB3cm90ZToN
Cj4gPj4+Pj4+DQo+ID4+Pj4+PiBPbiAxOC4wNy4yMDEzLCBhdCAxMDoyNSwgQmh1c2hhbiBCaGFy
YXQtUjY1Nzc3IHdyb3RlOg0KPiA+Pj4+Pj4NCj4gPj4+Pj4+Pg0KPiA+Pj4+Pj4+DQo+ID4+Pj4+
Pj4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+ID4+Pj4+Pj4+IEZyb206IEJodXNoYW4g
QmhhcmF0LVI2NTc3Nw0KPiA+Pj4+Pj4+PiBTZW50OiBUaHVyc2RheSwgSnVseSAxOCwgMjAxMyAx
OjUzIFBNDQo+ID4+Pj4+Pj4+IFRvOiAnIiB0aWVqdW4uY2hlbiAiJw0KPiA+Pj4+Pj4+PiBDYzog
a3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7DQo+ID4+Pj4+Pj4+
IGFncmFmQHN1c2UuZGU7IFdvb2QgU2NvdHQtDQo+ID4+Pj4+Pj4+IEIwNzQyMQ0KPiA+Pj4+Pj4+
PiBTdWJqZWN0OiBSRTogW1BBVENIIDIvMl0ga3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJl
bmN5IG9ubHkNCj4gPj4+Pj4+Pj4gZm9yIGtlcm5lbCBtYW5hZ2VkIHBhZ2VzDQo+ID4+Pj4+Pj4+
DQo+ID4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+PiAtLS0tLU9yaWdpbmFsIE1lc3Nh
Z2UtLS0tLQ0KPiA+Pj4+Pj4+Pj4gRnJvbTogIiB0aWVqdW4uY2hlbiAiIFttYWlsdG86dGllanVu
LmNoZW5Ad2luZHJpdmVyLmNvbV0NCj4gPj4+Pj4+Pj4+IFNlbnQ6IFRodXJzZGF5LCBKdWx5IDE4
LCAyMDEzIDE6NTIgUE0NCj4gPj4+Pj4+Pj4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4g
Pj4+Pj4+Pj4+IENjOiBrdm0tcHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9y
ZzsNCj4gPj4+Pj4+Pj4+IGFncmFmQHN1c2UuZGU7IFdvb2QNCj4gPj4+Pj4+Pj4+IFNjb3R0LQ0K
PiA+Pj4+Pj4+Pj4gQjA3NDIxDQo+ID4+Pj4+Pj4+PiBTdWJqZWN0OiBSZTogW1BBVENIIDIvMl0g
a3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5DQo+ID4+Pj4+Pj4+PiBvbmx5IGZvciBr
ZXJuZWwgbWFuYWdlZCBwYWdlcw0KPiA+Pj4+Pj4+Pj4NCj4gPj4+Pj4+Pj4+IE9uIDA3LzE4LzIw
MTMgMDQ6MDggUE0sIEJodXNoYW4gQmhhcmF0LVI2NTc3NyB3cm90ZToNCj4gPj4+Pj4+Pj4+Pg0K
PiA+Pj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+Pj4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+
ID4+Pj4+Pj4+Pj4+IEZyb206IGt2bS1wcGMtb3duZXJAdmdlci5rZXJuZWwub3JnDQo+ID4+Pj4+
Pj4+Pj4+IFttYWlsdG86a3ZtLXBwYy1vd25lckB2Z2VyLmtlcm5lbC5vcmddIE9uIEJlaGFsZiBP
ZiAiIHRpZWp1bi5jaGVuICINCj4gPj4+Pj4+Pj4+Pj4gU2VudDogVGh1cnNkYXksIEp1bHkgMTgs
IDIwMTMgMTowMSBQTQ0KPiA+Pj4+Pj4+Pj4+PiBUbzogQmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+
ID4+Pj4+Pj4+Pj4+IENjOiBrdm0tcHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVs
Lm9yZzsNCj4gPj4+Pj4+Pj4+Pj4gYWdyYWZAc3VzZS5kZTsgV29vZA0KPiA+Pj4+Pj4+Pj4+PiBT
Y290dC0NCj4gPj4+Pj4+Pj4+Pj4gQjA3NDIxDQo+ID4+Pj4+Pj4+Pj4+IFN1YmplY3Q6IFJlOiBb
UEFUQ0ggMi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kNCj4gPj4+Pj4+Pj4+
Pj4gb25seSBmb3Iga2VybmVsIG1hbmFnZWQgcGFnZXMNCj4gPj4+Pj4+Pj4+Pj4NCj4gPj4+Pj4+
Pj4+Pj4gT24gMDcvMTgvMjAxMyAwMzoxMiBQTSwgQmh1c2hhbiBCaGFyYXQtUjY1Nzc3IHdyb3Rl
Og0KPiA+Pj4+Pj4+Pj4+Pj4NCj4gPj4+Pj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+Pj4+Pj4gLS0tLS1P
cmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gPj4+Pj4+Pj4+Pj4+PiBGcm9tOiAiIHRpZWp1bi5jaGVu
ICIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5kcml2ZXIuY29tXQ0KPiA+Pj4+Pj4+Pj4+Pj4+IFNl
bnQ6IFRodXJzZGF5LCBKdWx5IDE4LCAyMDEzIDExOjU2IEFNDQo+ID4+Pj4+Pj4+Pj4+Pj4gVG86
IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiA+Pj4+Pj4+Pj4+Pj4+IENjOiBrdm0tcHBjQHZnZXIu
a2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsNCj4gPj4+Pj4+Pj4+Pj4+PiBhZ3JhZkBz
dXNlLmRlOyBXb29kDQo+ID4+Pj4+Pj4+Pj4+Pj4gU2NvdHQtIEIwNzQyMTsgQmh1c2hhbiBCaGFy
YXQtUjY1Nzc3DQo+ID4+Pj4+Pj4+Pj4+Pj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJdIGt2bTog
cG93ZXJwYzogc2V0IGNhY2hlIGNvaGVyZW5jeQ0KPiA+Pj4+Pj4+Pj4+Pj4+IG9ubHkgZm9yIGtl
cm5lbCBtYW5hZ2VkIHBhZ2VzDQo+ID4+Pj4+Pj4+Pj4+Pj4NCj4gPj4+Pj4+Pj4+Pj4+PiBPbiAw
Ny8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3cm90ZToNCj4gPj4+Pj4+Pj4+Pj4+
Pj4gSWYgdGhlcmUgaXMgYSBzdHJ1Y3QgcGFnZSBmb3IgdGhlIHJlcXVlc3RlZCBtYXBwaW5nIHRo
ZW4NCj4gPj4+Pj4+Pj4+Pj4+Pj4gaXQncyBub3JtYWwgRERSIGFuZCB0aGUgbWFwcGluZyBzZXRz
ICJNIiBiaXQgKGNvaGVyZW50LA0KPiA+Pj4+Pj4+Pj4+Pj4+PiBjYWNoZWFibGUpIGVsc2UgdGhp
cyBpcyB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArDQo+ID4+Pj4+Pj4+Pj4+Pj4+IEci
ICAoY2FjaGUgaW5oaWJpdGVkLA0KPiA+Pj4+Pj4+Pj4+Pj4+PiBndWFyZGVkKQ0KPiA+Pj4+Pj4+
Pj4+Pj4+Pg0KPiA+Pj4+Pj4+Pj4+Pj4+PiBUaGlzIGhlbHBzIHNldHRpbmcgcHJvcGVyIFRMQiBt
YXBwaW5nIGZvciBkaXJlY3QgYXNzaWduZWQNCj4gPj4+Pj4+Pj4+Pj4+Pj4gZGV2aWNlDQo+ID4+
Pj4+Pj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+Pj4+Pj4+IFNpZ25lZC1vZmYtYnk6IEJoYXJhdCBCaHVz
aGFuDQo+ID4+Pj4+Pj4+Pj4+Pj4+IDxiaGFyYXQuYmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiA+
Pj4+Pj4+Pj4+Pj4+PiAtLS0NCj4gPj4+Pj4+Pj4+Pj4+Pj4gICAgYXJjaC9wb3dlcnBjL2t2bS9l
NTAwX21tdV9ob3N0LmMgfCAgIDE3ICsrKysrKysrKysrKy0tLS0tDQo+ID4+Pj4+Pj4+Pj4+Pj4+
ICAgIDEgZmlsZXMgY2hhbmdlZCwgMTIgaW5zZXJ0aW9ucygrKSwgNSBkZWxldGlvbnMoLSkNCj4g
Pj4+Pj4+Pj4+Pj4+Pj4NCj4gPj4+Pj4+Pj4+Pj4+Pj4gZGlmZiAtLWdpdCBhL2FyY2gvcG93ZXJw
Yy9rdm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4+Pj4+Pj4+Pj4+Pj4+IGIvYXJjaC9wb3dlcnBjL2t2
bS9lNTAwX21tdV9ob3N0LmMNCj4gPj4+Pj4+Pj4+Pj4+Pj4gaW5kZXggMWM2YTlkNy4uMDg5YzIy
NyAxMDA2NDQNCj4gPj4+Pj4+Pj4+Pj4+Pj4gLS0tIGEvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21t
dV9ob3N0LmMNCj4gPj4+Pj4+Pj4+Pj4+Pj4gKysrIGIvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21t
dV9ob3N0LmMNCj4gPj4+Pj4+Pj4+Pj4+Pj4gQEAgLTY0LDEzICs2NCwyMCBAQCBzdGF0aWMgaW5s
aW5lIHUzMg0KPiA+Pj4+Pj4+Pj4+Pj4+PiBlNTAwX3NoYWRvd19tYXMzX2F0dHJpYih1MzIgbWFz
MywgaW50DQo+ID4+Pj4+Pj4+Pj4+Pj4gdXNlcm1vZGUpDQo+ID4+Pj4+Pj4+Pj4+Pj4+ICAgIAly
ZXR1cm4gbWFzMzsNCj4gPj4+Pj4+Pj4+Pj4+Pj4gICAgfQ0KPiA+Pj4+Pj4+Pj4+Pj4+Pg0KPiA+
Pj4+Pj4+Pj4+Pj4+PiAtc3RhdGljIGlubGluZSB1MzIgZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIo
dTMyIG1hczIsIGludA0KPiA+Pj4+Pj4+Pj4+Pj4+PiB1c2VybW9kZSkNCj4gPj4+Pj4+Pj4+Pj4+
Pj4gK3N0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21hczJfYXR0cmliKHUzMiBtYXMyLA0K
PiA+Pj4+Pj4+Pj4+Pj4+PiArcGZuX3QgcGZuKQ0KPiA+Pj4+Pj4+Pj4+Pj4+PiAgICB7DQo+ID4+
Pj4+Pj4+Pj4+Pj4+ICsJdTMyIG1hczJfYXR0cjsNCj4gPj4+Pj4+Pj4+Pj4+Pj4gKw0KPiA+Pj4+
Pj4+Pj4+Pj4+PiArCW1hczJfYXR0ciA9IG1hczIgJiBNQVMyX0FUVFJJQl9NQVNLOw0KPiA+Pj4+
Pj4+Pj4+Pj4+PiArDQo+ID4+Pj4+Pj4+Pj4+Pj4+ICsJaWYgKCFwZm5fdmFsaWQocGZuKSkgew0K
PiA+Pj4+Pj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+Pj4+Pj4gV2h5IG5vdCBkaXJlY3RseSB1c2Uga3Zt
X2lzX21taW9fcGZuKCk/DQo+ID4+Pj4+Pj4+Pj4+Pg0KPiA+Pj4+Pj4+Pj4+Pj4gV2hhdCBJIHVu
ZGVyc3RhbmQgZnJvbSB0aGlzIGZ1bmN0aW9uIChzb21lb25lIGNhbiBjb3JyZWN0DQo+ID4+Pj4+
Pj4+Pj4+PiBtZSkgaXMgdGhhdCBpdA0KPiA+Pj4+Pj4+Pj4+PiByZXR1cm5zICJmYWxzZSIgd2hl
biB0aGUgcGFnZSBpcyBtYW5hZ2VkIGJ5IGtlcm5lbCBhbmQgaXMNCj4gPj4+Pj4+Pj4+Pj4gbm90
IG1hcmtlZCBhcyBSRVNFUlZFRCAoZm9yIHNvbWUgcmVhc29uKS4gRm9yIHVzIGl0IGRvZXMgbm90
DQo+ID4+Pj4+Pj4+Pj4+IG1hdHRlciB3aGV0aGVyIHRoZSBwYWdlIGlzIHJlc2VydmVkIG9yIG5v
dCwgaWYgaXQgaXMga2VybmVsDQo+ID4+Pj4+Pj4+Pj4+IHZpc2libGUgcGFnZSB0aGVuIGl0DQo+
ID4+Pj4+Pj4+IGlzIEREUi4NCj4gPj4+Pj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+Pj4+DQo+ID4+Pj4+
Pj4+Pj4+IEkgdGhpbmsgeW91IGFyZSBzZXR0aW5nIEl8RyBieSBhZGRyZXNzaW5nIGFsbCBtbWlv
IHBhZ2VzLA0KPiA+Pj4+Pj4+Pj4+PiByaWdodD8gSWYgc28sDQo+ID4+Pj4+Pj4+Pj4+DQo+ID4+
Pj4+Pj4+Pj4+ICAgICAgS1ZNOiBkaXJlY3QgbW1pbyBwZm4gY2hlY2sNCj4gPj4+Pj4+Pj4+Pj4N
Cj4gPj4+Pj4+Pj4+Pj4gICAgICBVc2Vyc3BhY2UgbWF5IHNwZWNpZnkgbWVtb3J5IHNsb3RzIHRo
YXQgYXJlIGJhY2tlZCBieQ0KPiA+Pj4+Pj4+Pj4+PiBtbWlvIHBhZ2VzIHJhdGhlciB0aGFuDQo+
ID4+Pj4+Pj4+Pj4+ICAgICAgbm9ybWFsIFJBTS4gIEluIHNvbWUgY2FzZXMgaXQgaXMgbm90IGVu
b3VnaCB0byBpZGVudGlmeQ0KPiA+Pj4+Pj4+Pj4+PiB0aGVzZSBtbWlvDQo+ID4+Pj4+Pj4+PiBw
YWdlcw0KPiA+Pj4+Pj4+Pj4+PiAgICAgIGJ5IHBmbl92YWxpZCgpLiAgVGhpcyBwYXRjaCBhZGRz
IGNoZWNraW5nIHRoZSBQYWdlUmVzZXJ2ZWQgYXMNCj4gd2VsbC4NCj4gPj4+Pj4+Pj4+Pg0KPiA+
Pj4+Pj4+Pj4+IERvIHlvdSBrbm93IHdoYXQgYXJlIHRob3NlICJzb21lIGNhc2VzIiBhbmQgaG93
IGNoZWNraW5nDQo+ID4+Pj4+Pj4+Pj4gUGFnZVJlc2VydmVkIGhlbHBzIGluDQo+ID4+Pj4+Pj4+
PiB0aG9zZSBjYXNlcz8NCj4gPj4+Pj4+Pj4+DQo+ID4+Pj4+Pj4+PiBObywgbXlzZWxmIGRpZG4n
dCBzZWUgdGhlc2UgYWN0dWFsIGNhc2VzIGluIHFlbXUsdG9vLiBCdXQgdGhpcw0KPiA+Pj4+Pj4+
Pj4gc2hvdWxkIGJlIGNocm9uaWNhbGx5IHBlcnNpc3RlbnQgYXMgSSB1bmRlcnN0YW5kIDstKQ0K
PiA+Pj4+Pj4+Pg0KPiA+Pj4+Pj4+PiBUaGVuIEkgd2lsbCB3YWl0IHRpbGwgc29tZW9uZSBlZHVj
YXRlIG1lIDopDQo+ID4+Pj4+Pj4NCj4gPj4+Pj4+PiBUaGUgcmVhc29uIGlzICwga3ZtX2lzX21t
aW9fcGZuKCkgZnVuY3Rpb24gbG9va3MgcHJldHR5IGhlYXZ5IGFuZCBJIGRvDQo+IG5vdCB3YW50
IHRvIGNhbGwgdGhpcyBmb3IgYWxsIHRsYndlIG9wZXJhdGlvbiB1bmxlc3MgaXQgaXMgbmVjZXNz
YXJ5Lg0KPiA+Pj4+Pj4NCj4gPj4+Pj4+IEl0IGNlcnRhaW5seSBkb2VzIG1vcmUgdGhhbiB3ZSBu
ZWVkIGFuZCBwb3RlbnRpYWxseSBzbG93cyBkb3duIHRoZSBmYXN0DQo+IHBhdGggKFJBTSBtYXBw
aW5nKS4gVGhlIG9ubHkgdGhpbmcgaXQgZG9lcyBvbiB0b3Agb2YgImlmIChwZm5fdmFsaWQoKSki
IGlzIHRvDQo+IGNoZWNrIGZvciBwYWdlcyB0aGF0IGFyZSBkZWNsYXJlZCByZXNlcnZlZCBvbiB0
aGUgaG9zdC4gVGhpcyBoYXBwZW5zIGluIDIgY2FzZXM6DQo+ID4+Pj4+Pg0KPiA+Pj4+Pj4gICAx
KSBOb24gY2FjaGUgY29oZXJlbnQgRE1BDQo+ID4+Pj4+PiAgIDIpIE1lbW9yeSBob3QgcmVtb3Zl
DQo+ID4+Pj4+Pg0KPiA+Pj4+Pj4gVGhlIG5vbiBjb2hlcmVudCBETUEgY2FzZSB3b3VsZCBiZSBp
bnRlcmVzdGluZywgYXMgd2l0aCB0aGUgbWVjaGFuaXNtIGFzDQo+IGl0IGlzIGluIHBsYWNlIGlu
IExpbnV4IHRvZGF5LCB3ZSBjb3VsZCBwb3RlbnRpYWxseSBicmVhayBub3JtYWwgZ3Vlc3Qgb3Bl
cmF0aW9uDQo+IGlmIHdlIGRvbid0IHRha2UgaXQgaW50byBhY2NvdW50LiBIb3dldmVyLCBpdCdz
IEtjb25maWcgZ3VhcmRlZCBieToNCj4gPj4+Pj4+DQo+ID4+Pj4+PiAgICAgICAgIGRlcGVuZHMg
b24gNHh4IHx8IDh4eCB8fCBFMjAwIHx8IFBQQ19NUEM1MTJ4IHx8IEdBTUVDVUJFX0NPTU1PTg0K
PiA+Pj4+Pj4gICAgICAgICBkZWZhdWx0IG4gaWYgUFBDXzQ3eA0KPiA+Pj4+Pj4gICAgICAgICBk
ZWZhdWx0IHkNCj4gPj4+Pj4+DQo+ID4+Pj4+PiBzbyB3ZSBuZXZlciBoaXQgaXQgd2l0aCBhbnkg
Y29yZSB3ZSBjYXJlIGFib3V0IDspLg0KPiA+Pj4+Pj4NCj4gPj4+Pj4+IE1lbW9yeSBob3QgcmVt
b3ZlIGRvZXMgbm90IGV4aXN0IG9uIGU1MDAgRldJVywgc28gd2UgZG9uJ3QgaGF2ZSB0byB3b3Jy
eQ0KPiBhYm91dCB0aGF0IG9uZSBlaXRoZXIuDQo+ID4+Pj4+DQo+ID4+Pj4+IFRoYW5rcyBmb3Ig
dGhpcyBnb29kIGluZm9ybWF0aW9uIDopDQo+ID4+Pj4+DQo+ID4+Pj4+IFNvIHdoeSBub3QgbGlt
aXQgdGhvc2UgY29kZXMgd2l0aCBDT05GSUdfTUVNT1JZX0hPVFBMVUcgaW5zaWRlDQo+IGt2bV9p
c19tbWlvX3BmbigpIHRvIG1ha2Ugc3VyZSB0aGF0IGNoZWNrIGlzIG9ubHkgdmFsaWQgd2hlbiB0
aGF0IGlzIHJlYWxseQ0KPiBuZWVkZWQ/IFRoaXMgY2FuIGRlY3JlYXNlIHRob3NlIHVubmVjZXNz
YXJ5IHBlcmZvcm1hbmNlIGxvc3MuDQo+ID4+Pj4+DQo+ID4+Pj4+IElmIEknbSB3cm9uZyBwbGVh
c2UgY29ycmVjdCBtZSA6KQ0KPiA+Pj4+DQo+ID4+Pj4gWW91J3JlIHBlcmZlY3RseSByaWdodCwg
YnV0IHRoaXMgaXMgZ2VuZXJpYyBLVk0gY29kZS4gU28gaXQgZ2V0cyBydW4gYWNyb3NzDQo+IGFs
bCBhcmNoaXRlY3R1cmVzLiBXaGF0IGlmIHNvbWVvbmUgaGFzIHRoZSBncmVhdCBpZGVhIHRvIGFk
ZCBhIG5ldyBjYXNlIGhlcmUgZm9yDQo+IHg4NiwgYnV0IGRvZXNuJ3QgdGVsbCB1cz8gSW4gdGhh
dCBjYXNlIHdlIHBvdGVudGlhbGx5IGJyZWFrIHg4Ni4NCj4gPj4+Pg0KPiA+Pj4+IEknZCByYXRo
ZXIgbm90IGxpa2UgdG8gYnJlYWsgeDg2IDopLg0KPiA+Pj4+DQo+ID4+Pj4gSG93ZXZlciwgaXQn
ZCBiZSB2ZXJ5IGludGVyZXN0aW5nIHRvIHNlZSBhIGJlbmNobWFyayB3aXRoIHRoaXMgY2hhbmdl
LiBEbw0KPiB5b3UgdGhpbmsgeW91IGNvdWxkIGp1c3QgcmlwIG91dCB0aGUgd2hvbGUgcmVzZXJ2
ZWQgY2hlY2sgYW5kIHJ1biBhIGZldw0KPiBiZW5jaG1hcmtzIGFuZCBzaG93IHVzIHRoZSByZXN1
bHRzPw0KPiA+Pj4+DQo+ID4+Pg0KPiA+Pj4gT2Z0ZW4gd2hhdCBjYXNlIHNob3VsZCBiZSBhZG9w
dGVkIHRvIHZhbGlkYXRlIHRoaXMgc2NlbmFyaW8/DQo+ID4+DQo+ID4+IFNvbWV0aGluZyB3aGlj
aCBoYW1tZXJzIHRoZSBUTEIgZW11bGF0aW9uIGhlYXZpbHkuIEkgdXN1YWxseSBqdXN0IHJ1bg0K
PiA+PiAvYmluL2VjaG8gYSB0aG91c2FuZCB0aW1lcyBpbiAidGltZSIgYW5kIHNlZSBob3cgbG9u
ZyBpdCB0YWtlcyA7KQ0KPiA+Pg0KPiA+DQo+ID4gSSB0cmllZCB0byBydW4gZml2ZSB0aW1lcyB3
aXRoIHRoaXMgY29tYmluYXRpb24sICJ0aW1lIGBmb3IgKChpPTA7IGk8NTAwMDsNCj4gaSsrKSk7
ICBkbyAvYmluL2VjaG87IGRvbmVgIiwgdG8gY2FsY3VsYXRlIHRoZSBhdmVyYWdlIHZhbHVlIHdp
dGggdGhpcyBjaGFuZ2U6DQo+ID4NCj4gPiBkaWZmIC0tZ2l0IGEvdmlydC9rdm0va3ZtX21haW4u
YyBiL3ZpcnQva3ZtL2t2bV9tYWluLmMgaW5kZXgNCj4gPiAxNTgwZGQ0Li41ZTg2MzViIDEwMDY0
NA0KPiA+IC0tLSBhL3ZpcnQva3ZtL2t2bV9tYWluLmMNCj4gPiArKysgYi92aXJ0L2t2bS9rdm1f
bWFpbi5jDQo+ID4gQEAgLTEwMiw2ICsxMDIsMTAgQEAgc3RhdGljIGJvb2wgbGFyZ2VwYWdlc19l
bmFibGVkID0gdHJ1ZTsNCj4gPg0KPiA+IGJvb2wga3ZtX2lzX21taW9fcGZuKHBmbl90IHBmbikN
Cj4gPiB7DQo+ID4gKyNpZmRlZiBDT05GSUdfTUVNT1JZX0hPVFBMVUcNCj4gDQo+IEknZCBmZWVs
IHNhZmVyIGlmIHdlIG5hcnJvdyB0aGlzIGRvd24gdG8gZTUwMC4NCj4gDQo+ID4gKyAgICAgICAv
Kg0KPiA+ICsgICAgICAgICogQ3VycmVudGx5IG9ubHkgaW4gbWVtb3J5IGhvdCByZW1vdmUgY2Fz
ZSB3ZSBtYXkgc3RpbGwgbmVlZCB0aGlzLg0KPiA+ICsgICAgICAgICovDQo+ID4gICAgICAgIGlm
IChwZm5fdmFsaWQocGZuKSkgew0KPiANCj4gV2Ugc3RpbGwgaGF2ZSB0byBjaGVjayBmb3IgcGZu
X3ZhbGlkLCBubz8gU28gdGhlICNpZmRlZiBzaG91bGQgYmUgZG93biBoZXJlLg0KPiANCj4gPiAg
ICAgICAgICAgICAgICBpbnQgcmVzZXJ2ZWQ7DQo+ID4gICAgICAgICAgICAgICAgc3RydWN0IHBh
Z2UgKnRhaWwgPSBwZm5fdG9fcGFnZShwZm4pOyBAQCAtMTI0LDYgKzEyOCw3DQo+ID4gQEAgYm9v
bCBrdm1faXNfbW1pb19wZm4ocGZuX3QgcGZuKQ0KPiA+ICAgICAgICAgICAgICAgIH0NCj4gPiAg
ICAgICAgICAgICAgICByZXR1cm4gUGFnZVJlc2VydmVkKHRhaWwpOw0KPiA+ICAgICAgICB9DQo+
ID4gKyNlbmRpZg0KPiA+DQo+ID4gICAgICAgIHJldHVybiB0cnVlOw0KPiA+IH0NCj4gPg0KPiA+
IEJlZm9yZSBhcHBseSB0aGlzIGNoYW5nZToNCj4gPg0KPiA+IHJlYWwgICAgKDFtMTkuOTU0cyAr
IDFtMjAuOTE4cyArIDFtMjIuNzQwcyArIDFtMjEuMTQ2cyArIDFtMjIuMTIwcykvNT0NCj4gMW0y
MS4zNzZzDQo+ID4gdXNlciAgICAoMG0yMy4xODFzICsgMG0yMy41NTBzICsgMG0yMy41MDZzICsg
MG0yMy40MTBzICsgMG0yMy41MjBzKS81PQ0KPiAwbTIzLjQzM3MNCj4gPiBzeXMJKDBtNDkuMDg3
cyArIDBtNDkuNTYzcyArIDBtNTEuNzU4cyArIDBtNTAuMjkwcyArIDBtNTEuMDQ3cykvNT0gMG01
MC4zNDlzDQo+ID4NCj4gPiBBZnRlciBhcHBseSB0aGlzIGNoYW5nZToNCj4gPg0KPiA+IHJlYWwg
ICAgKDFtMTkuNTA3cyArIDFtMjAuOTE5cyArIDFtMjEuNDM2cyArIDFtMjEuMTc5cyArIDFtMjAu
MjkzcykvNT0NCj4gMW0yMC42NjdzDQo+ID4gdXNlciAgICAoMG0yMi41OTVzICsgMG0yMi43MTlz
ICsgMG0yMi40ODRzICsgMG0yMi44MTFzICsgMG0yMi40NjdzKS81PQ0KPiAwbTIyLjYxNXMNCj4g
PiBzeXMJKDBtNDguODQxcyArIDBtNDkuOTI5cyArIDBtNTAuMzEwcyArIDBtNDkuODEzcyArIDBt
NDguNTg3cykvNT0gMG00OS40OTZzDQo+ID4NCj4gPiBTbywNCj4gPg0KPiA+IHJlYWwgICAgKDFt
MjAuNjY3cyAtIDFtMjEuMzc2cykvMW0yMS4zNzZzIHggMTAwJSA9IC0wLjYlDQo+ID4gdXNlciAg
ICAoMG0yMi42MTVzIC0gMG0yMy40MzNzKS8wbTIzLjQzM3MgeCAxMDAlID0gLTMuNSUNCj4gPiBz
eXMJKDBtNDkuNDk2cyAtIDBtNTAuMzQ5cykvMG01MC4zNDlzIHggMTAwJSA9IC0xLjclDQo+IA0K
PiBWZXJ5IG5pY2UsIHNvIHRoZXJlIGlzIGEgcmVhbCB3b3JsZCBwZXJmb3JtYW5jZSBiZW5lZml0
IHRvIGRvaW5nIHRoaXMuIFRoZW4geWVzLA0KPiBJIHRoaW5rIGl0IHdvdWxkIG1ha2Ugc2Vuc2Ug
dG8gY2hhbmdlIHRoZSBnbG9iYWwgaGVscGVyIGZ1bmN0aW9uIHRvIGJlIGZhc3Qgb24NCj4gZTUw
MCBhbmQgdXNlIHRoYXQgb25lIGZyb20gZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIoKSBpbnN0ZWFk
Lg0KDQpBcmUgbm90IHdlIGdvaW5nIHRvIHVzZSBwYWdlX2lzX3JhbSgpIGZyb20gIGU1MDBfc2hh
ZG93X21hczJfYXR0cmliKCkgYXMgU2NvdHQgY29tbWVudGVkPw0KDQotQmhhcmF0DQoNCj4gDQo+
IEdsZWIsIFBhb2xvLCBhbnkgaGFyZCBmZWVsaW5ncz8NCj4gDQo+IA0KPiBBbGV4DQo+IA0KDQo

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  9:11                               ` Bhushan Bharat-R65777
@ 2013-07-24  9:21                                 ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  9:21 UTC (permalink / raw)
  To: Bhushan Bharat-R65777
  Cc: "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Gleb Natapov, Paolo Bonzini


On 24.07.2013, at 11:11, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Wednesday, July 24, 2013 1:55 PM
>> To: "“tiejun.chen”"
>> Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org list;
>> Wood Scott-B07421; Gleb Natapov; Paolo Bonzini
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> On 24.07.2013, at 04:26, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 06:27 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>>>> 
>>>>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>>>> 
>>>>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>>>> 
>>>>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>>>> 
>>>>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Bhushan Bharat-R65777
>>>>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>>>>> To: '" tiejun.chen "'
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>> agraf@suse.de; Wood Scott-
>>>>>>>>>> B07421
>>>>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>> Scott-
>>>>>>>>>>> B07421
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of " tiejun.chen "
>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>>>> Scott-
>>>>>>>>>>>>> B07421
>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>>>> 
>>>>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>>>>> If there is a struct page for the requested mapping then
>>>>>>>>>>>>>>>> it's normal DDR and the mapping sets "M" bit (coherent,
>>>>>>>>>>>>>>>> cacheable) else this is treated as I/O and we set  "I +
>>>>>>>>>>>>>>>> G"  (cache inhibited,
>>>>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned
>>>>>>>>>>>>>>>> device
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan
>>>>>>>>>>>>>>>> <bharat.bhushan@freescale.com>
>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>>>>   1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>>>   	return mas3;
>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2,
>>>>>>>>>>>>>>>> +pfn_t pfn)
>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> What I understand from this function (someone can correct
>>>>>>>>>>>>>> me) is that it
>>>>>>>>>>>>> returns "false" when the page is managed by kernel and is
>>>>>>>>>>>>> not marked as RESERVED (for some reason). For us it does not
>>>>>>>>>>>>> matter whether the page is reserved or not, if it is kernel
>>>>>>>>>>>>> visible page then it
>>>>>>>>>> is DDR.
>>>>>>>>>>>>>> 
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I think you are setting I|G by addressing all mmio pages,
>>>>>>>>>>>>> right? If so,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>     KVM: direct mmio pfn check
>>>>>>>>>>>>> 
>>>>>>>>>>>>>     Userspace may specify memory slots that are backed by
>>>>>>>>>>>>> mmio pages rather than
>>>>>>>>>>>>>     normal RAM.  In some cases it is not enough to identify
>>>>>>>>>>>>> these mmio
>>>>>>>>>>> pages
>>>>>>>>>>>>>     by pfn_valid().  This patch adds checking the PageReserved as
>> well.
>>>>>>>>>>>> 
>>>>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>>>>> PageReserved helps in
>>>>>>>>>>> those cases?
>>>>>>>>>>> 
>>>>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this
>>>>>>>>>>> should be chronically persistent as I understand ;-)
>>>>>>>>>> 
>>>>>>>>>> Then I will wait till someone educate me :)
>>>>>>>>> 
>>>>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do
>> not want to call this for all tlbwe operation unless it is necessary.
>>>>>>>> 
>>>>>>>> It certainly does more than we need and potentially slows down the fast
>> path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to
>> check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>>>> 
>>>>>>>>  1) Non cache coherent DMA
>>>>>>>>  2) Memory hot remove
>>>>>>>> 
>>>>>>>> The non coherent DMA case would be interesting, as with the mechanism as
>> it is in place in Linux today, we could potentially break normal guest operation
>> if we don't take it into account. However, it's Kconfig guarded by:
>>>>>>>> 
>>>>>>>>        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>>>>        default n if PPC_47x
>>>>>>>>        default y
>>>>>>>> 
>>>>>>>> so we never hit it with any core we care about ;).
>>>>>>>> 
>>>>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry
>> about that one either.
>>>>>>> 
>>>>>>> Thanks for this good information :)
>>>>>>> 
>>>>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside
>> kvm_is_mmio_pfn() to make sure that check is only valid when that is really
>> needed? This can decrease those unnecessary performance loss.
>>>>>>> 
>>>>>>> If I'm wrong please correct me :)
>>>>>> 
>>>>>> You're perfectly right, but this is generic KVM code. So it gets run across
>> all architectures. What if someone has the great idea to add a new case here for
>> x86, but doesn't tell us? In that case we potentially break x86.
>>>>>> 
>>>>>> I'd rather not like to break x86 :).
>>>>>> 
>>>>>> However, it'd be very interesting to see a benchmark with this change. Do
>> you think you could just rip out the whole reserved check and run a few
>> benchmarks and show us the results?
>>>>>> 
>>>>> 
>>>>> Often what case should be adopted to validate this scenario?
>>>> 
>>>> Something which hammers the TLB emulation heavily. I usually just run
>>>> /bin/echo a thousand times in "time" and see how long it takes ;)
>>>> 
>>> 
>>> I tried to run five times with this combination, "time `for ((i=0; i<5000;
>> i++));  do /bin/echo; done`", to calculate the average value with this change:
>>> 
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index
>>> 1580dd4..5e8635b 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>> 
>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>> {
>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>> 
>> I'd feel safer if we narrow this down to e500.
>> 
>>> +       /*
>>> +        * Currently only in memory hot remove case we may still need this.
>>> +        */
>>>       if (pfn_valid(pfn)) {
>> 
>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>> 
>>>               int reserved;
>>>               struct page *tail = pfn_to_page(pfn); @@ -124,6 +128,7
>>> @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>               }
>>>               return PageReserved(tail);
>>>       }
>>> +#endif
>>> 
>>>       return true;
>>> }
>>> 
>>> Before apply this change:
>>> 
>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5=
>> 1m21.376s
>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5=
>> 0m23.433s
>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>> 
>>> After apply this change:
>>> 
>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5=
>> 1m20.667s
>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5=
>> 0m22.615s
>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>> 
>>> So,
>>> 
>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>> 
>> Very nice, so there is a real world performance benefit to doing this. Then yes,
>> I think it would make sense to change the global helper function to be fast on
>> e500 and use that one from e500_shadow_mas2_attrib() instead.
> 
> Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?

rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  9:21                                 ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  9:21 UTC (permalink / raw)
  To: Bhushan Bharat-R65777
  Cc: "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Gleb Natapov, Paolo Bonzini


On 24.07.2013, at 11:11, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Wednesday, July 24, 2013 1:55 PM
>> To: "“tiejun.chen”"
>> Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org list;
>> Wood Scott-B07421; Gleb Natapov; Paolo Bonzini
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> On 24.07.2013, at 04:26, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 06:27 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 12:19, “tiejun.chen” wrote:
>>>> 
>>>>> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>>>>>> 
>>>>>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>>>>>> 
>>>>>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>>>>>> 
>>>>>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Bhushan Bharat-R65777
>>>>>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>>>>>> To: '" tiejun.chen "'
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>> agraf@suse.de; Wood Scott-
>>>>>>>>>> B07421
>>>>>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>> Scott-
>>>>>>>>>>> B07421
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of " tiejun.chen "
>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>>>> Scott-
>>>>>>>>>>>>> B07421
>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>>>> 
>>>>>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>>>> From: " tiejun.chen " [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>>>>>>>>>>> agraf@suse.de; Wood
>>>>>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency
>>>>>>>>>>>>>>> only for kernel managed pages
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>>>>>> If there is a struct page for the requested mapping then
>>>>>>>>>>>>>>>> it's normal DDR and the mapping sets "M" bit (coherent,
>>>>>>>>>>>>>>>> cacheable) else this is treated as I/O and we set  "I +
>>>>>>>>>>>>>>>> G"  (cache inhibited,
>>>>>>>>>>>>>>>> guarded)
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned
>>>>>>>>>>>>>>>> device
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> Signed-off-by: Bharat Bhushan
>>>>>>>>>>>>>>>> <bharat.bhushan@freescale.com>
>>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>>>>>   1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>>>   	return mas3;
>>>>>>>>>>>>>>>>   }
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>>>>>> usermode)
>>>>>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2,
>>>>>>>>>>>>>>>> +pfn_t pfn)
>>>>>>>>>>>>>>>>   {
>>>>>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> What I understand from this function (someone can correct
>>>>>>>>>>>>>> me) is that it
>>>>>>>>>>>>> returns "false" when the page is managed by kernel and is
>>>>>>>>>>>>> not marked as RESERVED (for some reason). For us it does not
>>>>>>>>>>>>> matter whether the page is reserved or not, if it is kernel
>>>>>>>>>>>>> visible page then it
>>>>>>>>>> is DDR.
>>>>>>>>>>>>>> 
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I think you are setting I|G by addressing all mmio pages,
>>>>>>>>>>>>> right? If so,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>     KVM: direct mmio pfn check
>>>>>>>>>>>>> 
>>>>>>>>>>>>>     Userspace may specify memory slots that are backed by
>>>>>>>>>>>>> mmio pages rather than
>>>>>>>>>>>>>     normal RAM.  In some cases it is not enough to identify
>>>>>>>>>>>>> these mmio
>>>>>>>>>>> pages
>>>>>>>>>>>>>     by pfn_valid().  This patch adds checking the PageReserved as
>> well.
>>>>>>>>>>>> 
>>>>>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>>>>>> PageReserved helps in
>>>>>>>>>>> those cases?
>>>>>>>>>>> 
>>>>>>>>>>> No, myself didn't see these actual cases in qemu,too. But this
>>>>>>>>>>> should be chronically persistent as I understand ;-)
>>>>>>>>>> 
>>>>>>>>>> Then I will wait till someone educate me :)
>>>>>>>>> 
>>>>>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do
>> not want to call this for all tlbwe operation unless it is necessary.
>>>>>>>> 
>>>>>>>> It certainly does more than we need and potentially slows down the fast
>> path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to
>> check for pages that are declared reserved on the host. This happens in 2 cases:
>>>>>>>> 
>>>>>>>>  1) Non cache coherent DMA
>>>>>>>>  2) Memory hot remove
>>>>>>>> 
>>>>>>>> The non coherent DMA case would be interesting, as with the mechanism as
>> it is in place in Linux today, we could potentially break normal guest operation
>> if we don't take it into account. However, it's Kconfig guarded by:
>>>>>>>> 
>>>>>>>>        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>>>>>        default n if PPC_47x
>>>>>>>>        default y
>>>>>>>> 
>>>>>>>> so we never hit it with any core we care about ;).
>>>>>>>> 
>>>>>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry
>> about that one either.
>>>>>>> 
>>>>>>> Thanks for this good information :)
>>>>>>> 
>>>>>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside
>> kvm_is_mmio_pfn() to make sure that check is only valid when that is really
>> needed? This can decrease those unnecessary performance loss.
>>>>>>> 
>>>>>>> If I'm wrong please correct me :)
>>>>>> 
>>>>>> You're perfectly right, but this is generic KVM code. So it gets run across
>> all architectures. What if someone has the great idea to add a new case here for
>> x86, but doesn't tell us? In that case we potentially break x86.
>>>>>> 
>>>>>> I'd rather not like to break x86 :).
>>>>>> 
>>>>>> However, it'd be very interesting to see a benchmark with this change. Do
>> you think you could just rip out the whole reserved check and run a few
>> benchmarks and show us the results?
>>>>>> 
>>>>> 
>>>>> Often what case should be adopted to validate this scenario?
>>>> 
>>>> Something which hammers the TLB emulation heavily. I usually just run
>>>> /bin/echo a thousand times in "time" and see how long it takes ;)
>>>> 
>>> 
>>> I tried to run five times with this combination, "time `for ((i=0; i<5000;
>> i++));  do /bin/echo; done`", to calculate the average value with this change:
>>> 
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index
>>> 1580dd4..5e8635b 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>> 
>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>> {
>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>> 
>> I'd feel safer if we narrow this down to e500.
>> 
>>> +       /*
>>> +        * Currently only in memory hot remove case we may still need this.
>>> +        */
>>>       if (pfn_valid(pfn)) {
>> 
>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>> 
>>>               int reserved;
>>>               struct page *tail = pfn_to_page(pfn); @@ -124,6 +128,7
>>> @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>               }
>>>               return PageReserved(tail);
>>>       }
>>> +#endif
>>> 
>>>       return true;
>>> }
>>> 
>>> Before apply this change:
>>> 
>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5>> 1m21.376s
>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5>> 0m23.433s
>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>> 
>>> After apply this change:
>>> 
>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5>> 1m20.667s
>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5>> 0m22.615s
>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>> 
>>> So,
>>> 
>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>> 
>> Very nice, so there is a real world performance benefit to doing this. Then yes,
>> I think it would make sense to change the global helper function to be fast on
>> e500 and use that one from e500_shadow_mas2_attrib() instead.
> 
> Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?

rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  9:21                                 ` Alexander Graf
@ 2013-07-24  9:35                                   ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24  9:35 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Bhushan Bharat-R65777, "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini

On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> > Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?
> 
> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> 
> 
Because it is much slower and, IIRC, actually used to build pfn map that allow
us to check quickly for valid pfn.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  9:35                                   ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24  9:35 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Bhushan Bharat-R65777, "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini

On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> > Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?
> 
> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> 
> 
Because it is much slower and, IIRC, actually used to build pfn map that allow
us to check quickly for valid pfn.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  9:35                                   ` Gleb Natapov
@ 2013-07-24  9:39                                     ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  9:39 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Bhushan Bharat-R65777, "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini


On 24.07.2013, at 11:35, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
>>> Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?
>> 
>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
>> 
>> 
> Because it is much slower and, IIRC, actually used to build pfn map that allow
> us to check quickly for valid pfn.

Then why should we use page_is_ram()? :)

I really don't want the e500 code to diverge too much from what the rest of the kvm code is doing.


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24  9:39                                     ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24  9:39 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Bhushan Bharat-R65777, "“tiejun.chen”",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini


On 24.07.2013, at 11:35, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
>>> Are not we going to use page_is_ram() from  e500_shadow_mas2_attrib() as Scott commented?
>> 
>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
>> 
>> 
> Because it is much slower and, IIRC, actually used to build pfn map that allow
> us to check quickly for valid pfn.

Then why should we use page_is_ram()? :)

I really don't want the e500 code to diverge too much from what the rest of the kvm code is doing.


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  8:25                             ` Alexander Graf
@ 2013-07-24 10:01                               ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:01 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

Copying Andrea for him to verify that I am not talking nonsense :)

On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 1580dd4..5e8635b 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> > 
> > bool kvm_is_mmio_pfn(pfn_t pfn)
> > {
> > +#ifdef CONFIG_MEMORY_HOTPLUG
> 
> I'd feel safer if we narrow this down to e500.
> 
> > +       /*
> > +        * Currently only in memory hot remove case we may still need this.
> > +        */
> >        if (pfn_valid(pfn)) {
> 
> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> 
> >                int reserved;
> >                struct page *tail = pfn_to_page(pfn);
> > @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >                }
> >                return PageReserved(tail);
> >        }
> > +#endif
> > 
> >        return true;
> > }
> > 
> > Before apply this change:
> > 
> > real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> > user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> > sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> > 
> > After apply this change:
> > 
> > real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> > user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> > sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> > 
> > So,
> > 
> > real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> > user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> > sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> 
> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> 
> Gleb, Paolo, any hard feelings?
> 
I do not see how can we break the function in such a way and get
away with it. Not all valid pfns point to memory. Physical address can
be sparse (due to PCI hole, framebuffer or just because).

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 10:01                               ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:01 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

Copying Andrea for him to verify that I am not talking nonsense :)

On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 1580dd4..5e8635b 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> > 
> > bool kvm_is_mmio_pfn(pfn_t pfn)
> > {
> > +#ifdef CONFIG_MEMORY_HOTPLUG
> 
> I'd feel safer if we narrow this down to e500.
> 
> > +       /*
> > +        * Currently only in memory hot remove case we may still need this.
> > +        */
> >        if (pfn_valid(pfn)) {
> 
> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> 
> >                int reserved;
> >                struct page *tail = pfn_to_page(pfn);
> > @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >                }
> >                return PageReserved(tail);
> >        }
> > +#endif
> > 
> >        return true;
> > }
> > 
> > Before apply this change:
> > 
> > real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> > user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> > sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> > 
> > After apply this change:
> > 
> > real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> > user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> > sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> > 
> > So,
> > 
> > real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> > user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> > sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> 
> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> 
> Gleb, Paolo, any hard feelings?
> 
I do not see how can we break the function in such a way and get
away with it. Not all valid pfns point to memory. Physical address can
be sparse (due to PCI hole, framebuffer or just because).

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 10:01                               ` Gleb Natapov
@ 2013-07-24 10:09                                 ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24 10:09 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli


On 24.07.2013, at 12:01, Gleb Natapov wrote:

> Copying Andrea for him to verify that I am not talking nonsense :)
> 
> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index 1580dd4..5e8635b 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>> 
>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>> {
>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>> 
>> I'd feel safer if we narrow this down to e500.
>> 
>>> +       /*
>>> +        * Currently only in memory hot remove case we may still need this.
>>> +        */
>>>       if (pfn_valid(pfn)) {
>> 
>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>> 
>>>               int reserved;
>>>               struct page *tail = pfn_to_page(pfn);
>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>               }
>>>               return PageReserved(tail);
>>>       }
>>> +#endif
>>> 
>>>       return true;
>>> }
>>> 
>>> Before apply this change:
>>> 
>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>> 
>>> After apply this change:
>>> 
>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>> 
>>> So,
>>> 
>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>> 
>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
>> 
>> Gleb, Paolo, any hard feelings?
>> 
> I do not see how can we break the function in such a way and get
> away with it. Not all valid pfns point to memory. Physical address can
> be sparse (due to PCI hole, framebuffer or just because).

But we don't check for sparseness today in here either. We merely check for incomplete huge pages.


Alex

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 10:09                                 ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24 10:09 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli


On 24.07.2013, at 12:01, Gleb Natapov wrote:

> Copying Andrea for him to verify that I am not talking nonsense :)
> 
> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index 1580dd4..5e8635b 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>> 
>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>> {
>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>> 
>> I'd feel safer if we narrow this down to e500.
>> 
>>> +       /*
>>> +        * Currently only in memory hot remove case we may still need this.
>>> +        */
>>>       if (pfn_valid(pfn)) {
>> 
>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>> 
>>>               int reserved;
>>>               struct page *tail = pfn_to_page(pfn);
>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>               }
>>>               return PageReserved(tail);
>>>       }
>>> +#endif
>>> 
>>>       return true;
>>> }
>>> 
>>> Before apply this change:
>>> 
>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>> 
>>> After apply this change:
>>> 
>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>> 
>>> So,
>>> 
>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>> 
>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
>> 
>> Gleb, Paolo, any hard feelings?
>> 
> I do not see how can we break the function in such a way and get
> away with it. Not all valid pfns point to memory. Physical address can
> be sparse (due to PCI hole, framebuffer or just because).

But we don't check for sparseness today in here either. We merely check for incomplete huge pages.


Alex


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 10:09                                 ` Alexander Graf
@ 2013-07-24 10:19                                   ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:19 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> 
> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> 
> > Copying Andrea for him to verify that I am not talking nonsense :)
> > 
> > On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> >>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>> index 1580dd4..5e8635b 100644
> >>> --- a/virt/kvm/kvm_main.c
> >>> +++ b/virt/kvm/kvm_main.c
> >>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> >>> 
> >>> bool kvm_is_mmio_pfn(pfn_t pfn)
> >>> {
> >>> +#ifdef CONFIG_MEMORY_HOTPLUG
> >> 
> >> I'd feel safer if we narrow this down to e500.
> >> 
> >>> +       /*
> >>> +        * Currently only in memory hot remove case we may still need this.
> >>> +        */
> >>>       if (pfn_valid(pfn)) {
> >> 
> >> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> >> 
> >>>               int reserved;
> >>>               struct page *tail = pfn_to_page(pfn);
> >>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>               }
> >>>               return PageReserved(tail);
> >>>       }
> >>> +#endif
> >>> 
> >>>       return true;
> >>> }
> >>> 
> >>> Before apply this change:
> >>> 
> >>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> >>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> >>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> >>> 
> >>> After apply this change:
> >>> 
> >>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> >>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> >>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> >>> 
> >>> So,
> >>> 
> >>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> >>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> >>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> >> 
> >> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> >> 
> >> Gleb, Paolo, any hard feelings?
> >> 
> > I do not see how can we break the function in such a way and get
> > away with it. Not all valid pfns point to memory. Physical address can
> > be sparse (due to PCI hole, framebuffer or just because).
> 
> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> 
That's not how I read the code. The code checks for reserved flag set.
It should be set on pfns that point to memory holes. As far as I
understand huge page tricks they are there to guaranty that THP does not
change flags under us, Andrea?

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 10:19                                   ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:19 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> 
> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> 
> > Copying Andrea for him to verify that I am not talking nonsense :)
> > 
> > On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> >>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>> index 1580dd4..5e8635b 100644
> >>> --- a/virt/kvm/kvm_main.c
> >>> +++ b/virt/kvm/kvm_main.c
> >>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> >>> 
> >>> bool kvm_is_mmio_pfn(pfn_t pfn)
> >>> {
> >>> +#ifdef CONFIG_MEMORY_HOTPLUG
> >> 
> >> I'd feel safer if we narrow this down to e500.
> >> 
> >>> +       /*
> >>> +        * Currently only in memory hot remove case we may still need this.
> >>> +        */
> >>>       if (pfn_valid(pfn)) {
> >> 
> >> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> >> 
> >>>               int reserved;
> >>>               struct page *tail = pfn_to_page(pfn);
> >>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>               }
> >>>               return PageReserved(tail);
> >>>       }
> >>> +#endif
> >>> 
> >>>       return true;
> >>> }
> >>> 
> >>> Before apply this change:
> >>> 
> >>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> >>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> >>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> >>> 
> >>> After apply this change:
> >>> 
> >>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> >>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> >>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> >>> 
> >>> So,
> >>> 
> >>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> >>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> >>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> >> 
> >> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> >> 
> >> Gleb, Paolo, any hard feelings?
> >> 
> > I do not see how can we break the function in such a way and get
> > away with it. Not all valid pfns point to memory. Physical address can
> > be sparse (due to PCI hole, framebuffer or just because).
> 
> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> 
That's not how I read the code. The code checks for reserved flag set.
It should be set on pfns that point to memory holes. As far as I
understand huge page tricks they are there to guaranty that THP does not
change flags under us, Andrea?

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 10:19                                   ` Gleb Natapov
@ 2013-07-24 10:25                                     ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24 10:25 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli


On 24.07.2013, at 12:19, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
>> 
>> On 24.07.2013, at 12:01, Gleb Natapov wrote:
>> 
>>> Copying Andrea for him to verify that I am not talking nonsense :)
>>> 
>>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
>>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>>> index 1580dd4..5e8635b 100644
>>>>> --- a/virt/kvm/kvm_main.c
>>>>> +++ b/virt/kvm/kvm_main.c
>>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>>>> 
>>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>>>> {
>>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>>>> 
>>>> I'd feel safer if we narrow this down to e500.
>>>> 
>>>>> +       /*
>>>>> +        * Currently only in memory hot remove case we may still need this.
>>>>> +        */
>>>>>      if (pfn_valid(pfn)) {
>>>> 
>>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>>>> 
>>>>>              int reserved;
>>>>>              struct page *tail = pfn_to_page(pfn);
>>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>>>              }
>>>>>              return PageReserved(tail);
>>>>>      }
>>>>> +#endif
>>>>> 
>>>>>      return true;
>>>>> }
>>>>> 
>>>>> Before apply this change:
>>>>> 
>>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
>>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
>>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>>>> 
>>>>> After apply this change:
>>>>> 
>>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
>>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
>>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>>>> 
>>>>> So,
>>>>> 
>>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>>>> 
>>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
>>>> 
>>>> Gleb, Paolo, any hard feelings?
>>>> 
>>> I do not see how can we break the function in such a way and get
>>> away with it. Not all valid pfns point to memory. Physical address can
>>> be sparse (due to PCI hole, framebuffer or just because).
>> 
>> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
>> 
> That's not how I read the code. The code checks for reserved flag set.
> It should be set on pfns that point to memory holes. As far as I

I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.

But I'd be more than happy to get proven wrong :).


Alex

> understand huge page tricks they are there to guaranty that THP does not
> change flags under us, Andrea?
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 10:25                                     ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-24 10:25 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli


On 24.07.2013, at 12:19, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
>> 
>> On 24.07.2013, at 12:01, Gleb Natapov wrote:
>> 
>>> Copying Andrea for him to verify that I am not talking nonsense :)
>>> 
>>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
>>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>>> index 1580dd4..5e8635b 100644
>>>>> --- a/virt/kvm/kvm_main.c
>>>>> +++ b/virt/kvm/kvm_main.c
>>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
>>>>> 
>>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
>>>>> {
>>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
>>>> 
>>>> I'd feel safer if we narrow this down to e500.
>>>> 
>>>>> +       /*
>>>>> +        * Currently only in memory hot remove case we may still need this.
>>>>> +        */
>>>>>      if (pfn_valid(pfn)) {
>>>> 
>>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
>>>> 
>>>>>              int reserved;
>>>>>              struct page *tail = pfn_to_page(pfn);
>>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
>>>>>              }
>>>>>              return PageReserved(tail);
>>>>>      }
>>>>> +#endif
>>>>> 
>>>>>      return true;
>>>>> }
>>>>> 
>>>>> Before apply this change:
>>>>> 
>>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
>>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
>>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
>>>>> 
>>>>> After apply this change:
>>>>> 
>>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
>>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
>>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
>>>>> 
>>>>> So,
>>>>> 
>>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
>>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
>>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
>>>> 
>>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
>>>> 
>>>> Gleb, Paolo, any hard feelings?
>>>> 
>>> I do not see how can we break the function in such a way and get
>>> away with it. Not all valid pfns point to memory. Physical address can
>>> be sparse (due to PCI hole, framebuffer or just because).
>> 
>> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
>> 
> That's not how I read the code. The code checks for reserved flag set.
> It should be set on pfns that point to memory holes. As far as I

I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.

But I'd be more than happy to get proven wrong :).


Alex

> understand huge page tricks they are there to guaranty that THP does not
> change flags under us, Andrea?
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 10:25                                     ` Alexander Graf
@ 2013-07-24 10:30                                       ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:30 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

On Wed, Jul 24, 2013 at 12:25:18PM +0200, Alexander Graf wrote:
> 
> On 24.07.2013, at 12:19, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> >> 
> >> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> >> 
> >>> Copying Andrea for him to verify that I am not talking nonsense :)
> >>> 
> >>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> >>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>>>> index 1580dd4..5e8635b 100644
> >>>>> --- a/virt/kvm/kvm_main.c
> >>>>> +++ b/virt/kvm/kvm_main.c
> >>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> >>>>> 
> >>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>>> {
> >>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
> >>>> 
> >>>> I'd feel safer if we narrow this down to e500.
> >>>> 
> >>>>> +       /*
> >>>>> +        * Currently only in memory hot remove case we may still need this.
> >>>>> +        */
> >>>>>      if (pfn_valid(pfn)) {
> >>>> 
> >>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> >>>> 
> >>>>>              int reserved;
> >>>>>              struct page *tail = pfn_to_page(pfn);
> >>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>>>              }
> >>>>>              return PageReserved(tail);
> >>>>>      }
> >>>>> +#endif
> >>>>> 
> >>>>>      return true;
> >>>>> }
> >>>>> 
> >>>>> Before apply this change:
> >>>>> 
> >>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> >>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> >>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> >>>>> 
> >>>>> After apply this change:
> >>>>> 
> >>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> >>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> >>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> >>>>> 
> >>>>> So,
> >>>>> 
> >>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> >>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> >>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> >>>> 
> >>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> >>>> 
> >>>> Gleb, Paolo, any hard feelings?
> >>>> 
> >>> I do not see how can we break the function in such a way and get
> >>> away with it. Not all valid pfns point to memory. Physical address can
> >>> be sparse (due to PCI hole, framebuffer or just because).
> >> 
> >> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> >> 
> > That's not how I read the code. The code checks for reserved flag set.
> > It should be set on pfns that point to memory holes. As far as I
> 
> I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.
> 
> But I'd be more than happy to get proven wrong :).
> 
Can you write a module that scans all page structures? AFAIK all pages
are marked as reserved and then those that become regular memory are
marked as unreserved. Hope Andrea will chime in here :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 10:30                                       ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-24 10:30 UTC (permalink / raw)
  To: Alexander Graf
  Cc: "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini, Andrea Arcangeli

On Wed, Jul 24, 2013 at 12:25:18PM +0200, Alexander Graf wrote:
> 
> On 24.07.2013, at 12:19, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> >> 
> >> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> >> 
> >>> Copying Andrea for him to verify that I am not talking nonsense :)
> >>> 
> >>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> >>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>>>> index 1580dd4..5e8635b 100644
> >>>>> --- a/virt/kvm/kvm_main.c
> >>>>> +++ b/virt/kvm/kvm_main.c
> >>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> >>>>> 
> >>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>>> {
> >>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
> >>>> 
> >>>> I'd feel safer if we narrow this down to e500.
> >>>> 
> >>>>> +       /*
> >>>>> +        * Currently only in memory hot remove case we may still need this.
> >>>>> +        */
> >>>>>      if (pfn_valid(pfn)) {
> >>>> 
> >>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> >>>> 
> >>>>>              int reserved;
> >>>>>              struct page *tail = pfn_to_page(pfn);
> >>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> >>>>>              }
> >>>>>              return PageReserved(tail);
> >>>>>      }
> >>>>> +#endif
> >>>>> 
> >>>>>      return true;
> >>>>> }
> >>>>> 
> >>>>> Before apply this change:
> >>>>> 
> >>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> >>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> >>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> >>>>> 
> >>>>> After apply this change:
> >>>>> 
> >>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> >>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> >>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> >>>>> 
> >>>>> So,
> >>>>> 
> >>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> >>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> >>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> >>>> 
> >>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> >>>> 
> >>>> Gleb, Paolo, any hard feelings?
> >>>> 
> >>> I do not see how can we break the function in such a way and get
> >>> away with it. Not all valid pfns point to memory. Physical address can
> >>> be sparse (due to PCI hole, framebuffer or just because).
> >> 
> >> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> >> 
> > That's not how I read the code. The code checks for reserved flag set.
> > It should be set on pfns that point to memory holes. As far as I
> 
> I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.
> 
> But I'd be more than happy to get proven wrong :).
> 
Can you write a module that scans all page structures? AFAIK all pages
are marked as reserved and then those that become regular memory are
marked as unreserved. Hope Andrea will chime in here :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24  9:39                                     ` Alexander Graf
  (?)
@ 2013-07-24 20:32                                       ` Scott Wood
  -1 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-24 20:32 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Gleb Natapov, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> 
> On 24.07.2013, at 11:35, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>> Are not we going to use page_is_ram() from   
> e500_shadow_mas2_attrib() as Scott commented?
> >>
> >> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>
> >>
> > Because it is much slower and, IIRC, actually used to build pfn map  
> that allow
> > us to check quickly for valid pfn.
> 
> Then why should we use page_is_ram()? :)
> 
> I really don't want the e500 code to diverge too much from what the  
> rest of the kvm code is doing.

I don't understand "actually used to build pfn map...".  What code is  
this?  I don't see any calls to page_is_ram() in the KVM code, or in  
generic mm code.  Is this a statement about what x86 does?

On PPC page_is_ram() is only called (AFAICT) for determining what  
attributes to set on mmaps.  We want to be sure that KVM always makes  
the same decision.  While pfn_valid() seems like it should be  
equivalent, it's not obvious from the PPC code that it is.

If pfn_valid() is better, why is that not used for mmap?  Why are there  
two different names for the same thing?

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 20:32                                       ` Scott Wood
  0 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-24 20:32 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Wood Scott-B07421, Gleb Natapov, kvm@vger.kernel.org list,
	kvm-ppc, “tiejun.chen”,
	Bhushan Bharat-R65777, Paolo Bonzini, linuxppc-dev

On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
>=20
> On 24.07.2013, at 11:35, Gleb Natapov wrote:
>=20
> > On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>> Are not we going to use page_is_ram() from  =20
> e500_shadow_mas2_attrib() as Scott commented?
> >>
> >> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>
> >>
> > Because it is much slower and, IIRC, actually used to build pfn map =20
> that allow
> > us to check quickly for valid pfn.
>=20
> Then why should we use page_is_ram()? :)
>=20
> I really don't want the e500 code to diverge too much from what the =20
> rest of the kvm code is doing.

I don't understand "actually used to build pfn map...".  What code is =20
this?  I don't see any calls to page_is_ram() in the KVM code, or in =20
generic mm code.  Is this a statement about what x86 does?

On PPC page_is_ram() is only called (AFAICT) for determining what =20
attributes to set on mmaps.  We want to be sure that KVM always makes =20
the same decision.  While pfn_valid() seems like it should be =20
equivalent, it's not obvious from the PPC code that it is.

If pfn_valid() is better, why is that not used for mmap?  Why are there =20
two different names for the same thing?

-Scott=

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-24 20:32                                       ` Scott Wood
  0 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-24 20:32 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Gleb Natapov, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> 
> On 24.07.2013, at 11:35, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>> Are not we going to use page_is_ram() from   
> e500_shadow_mas2_attrib() as Scott commented?
> >>
> >> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>
> >>
> > Because it is much slower and, IIRC, actually used to build pfn map  
> that allow
> > us to check quickly for valid pfn.
> 
> Then why should we use page_is_ram()? :)
> 
> I really don't want the e500 code to diverge too much from what the  
> rest of the kvm code is doing.

I don't understand "actually used to build pfn map...".  What code is  
this?  I don't see any calls to page_is_ram() in the KVM code, or in  
generic mm code.  Is this a statement about what x86 does?

On PPC page_is_ram() is only called (AFAICT) for determining what  
attributes to set on mmaps.  We want to be sure that KVM always makes  
the same decision.  While pfn_valid() seems like it should be  
equivalent, it's not obvious from the PPC code that it is.

If pfn_valid() is better, why is that not used for mmap?  Why are there  
two different names for the same thing?

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 10:30                                       ` Gleb Natapov
@ 2013-07-25  1:04                                         ` Andrea Arcangeli
  -1 siblings, 0 replies; 82+ messages in thread
From: Andrea Arcangeli @ 2013-07-25  1:04 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Alexander Graf, "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini

Hi!

On Wed, Jul 24, 2013 at 01:30:12PM +0300, Gleb Natapov wrote:
> On Wed, Jul 24, 2013 at 12:25:18PM +0200, Alexander Graf wrote:
> > 
> > On 24.07.2013, at 12:19, Gleb Natapov wrote:
> > 
> > > On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> > >> 
> > >> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> > >> 
> > >>> Copying Andrea for him to verify that I am not talking nonsense :)
> > >>> 
> > >>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> > >>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > >>>>> index 1580dd4..5e8635b 100644
> > >>>>> --- a/virt/kvm/kvm_main.c
> > >>>>> +++ b/virt/kvm/kvm_main.c
> > >>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> > >>>>> 
> > >>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
> > >>>>> {
> > >>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
> > >>>> 
> > >>>> I'd feel safer if we narrow this down to e500.
> > >>>> 
> > >>>>> +       /*
> > >>>>> +        * Currently only in memory hot remove case we may still need this.
> > >>>>> +        */
> > >>>>>      if (pfn_valid(pfn)) {
> > >>>> 
> > >>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> > >>>> 
> > >>>>>              int reserved;
> > >>>>>              struct page *tail = pfn_to_page(pfn);
> > >>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> > >>>>>              }
> > >>>>>              return PageReserved(tail);
> > >>>>>      }
> > >>>>> +#endif
> > >>>>> 
> > >>>>>      return true;
> > >>>>> }
> > >>>>> 
> > >>>>> Before apply this change:
> > >>>>> 
> > >>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> > >>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> > >>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> > >>>>> 
> > >>>>> After apply this change:
> > >>>>> 
> > >>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> > >>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> > >>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> > >>>>> 
> > >>>>> So,
> > >>>>> 
> > >>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> > >>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> > >>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> > >>>> 
> > >>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> > >>>> 
> > >>>> Gleb, Paolo, any hard feelings?
> > >>>> 
> > >>> I do not see how can we break the function in such a way and get
> > >>> away with it. Not all valid pfns point to memory. Physical address can
> > >>> be sparse (due to PCI hole, framebuffer or just because).
> > >> 
> > >> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> > >> 
> > > That's not how I read the code. The code checks for reserved flag set.
> > > It should be set on pfns that point to memory holes. As far as I
> > 
> > I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.
> > 
> > But I'd be more than happy to get proven wrong :).
> > 
> Can you write a module that scans all page structures? AFAIK all pages
> are marked as reserved and then those that become regular memory are
> marked as unreserved. Hope Andrea will chime in here :)

So the situation with regard to non-RAM and PageReserved/pfn_valid is
quite simple.

"struct page" exists for non-RAM too as "struct page" must exist up to
at least 2^MAX_ORDER pfn alignment or things breaks, like the first
pfn must be 2^MXA_ORDER aligned or again things break in the buddy. We
don't make an effort to save a few "struct page" to keep it simpler.

But those non-RAM pages (or tiny non-RAM page holes if any) are marked
PageReserved.

If "struct page" doesn't exist pfn_valid returns false.

So you cannot get away skipping pfn_valid and at least one
PageReserved.

However it gets more complex than just ram vs non-RAM, because there
are pages that are real RAM (not left marked PageReserved at boot
after checking e820 or equivalent bios data for non-x86 archs) but
that are taken over by drivers, than then could use it as mmio regions
snooping the writes and mapping them in userland too as hugepages
maybe. That is the motivation for the THP related code in
kvm_is_mmio_pfn.

Those vmas have VM_PFNMAP set so vm_normal_page is zero and the
refcounting is skipped like if it's non-RAM and they're mapped with
remap_pfn_range (different mechanism for VM_MIXEDMAP that does the
refcounting and doesn't require in turn the driver to mark the page
PageReserved).

The above explains why KVM needs to skip the refcounting on
PageReserved == true && pfn_valid() == true, and it must skip the
refcounting for pfn_valid == false without trying to call pfn_to_page
(or it'll crash).

Now the code doing the THP check with smp_rmb is very safe, possibly
too safe. Looking at it now, it looks a minor overengineering
oversight.

The slight oversight is that split_huge_page cannot transfer the
PG_reserved bit from head to tail.

So there's no real risk that the driver allocates an hugepage, marks
the head reserved (the PG_ bits of a THP page are only relevant in the
head), maps the page with some new version of remap_pfn_range_huge
(not possible right now, PFNMAP|MIXEDMAP only can handle 4k mappings
right now) and then split_huge_page runs and we miss the reserved bit
on the tail page. Because the reserved bit wouldn't be transferred to
the tail page anyway by split_huge_page so we'd miss it anyway if
anything like that would happen.

Besides split_huge_page couldn't run on a device owned page as it's
not anonymous but device-owned and there's no way to map it with a
hugepmd too.

So in short, it's probably never going to help to have such a check
there. We can probably optimize away the THP code in there.

No matter how the driver maps this hypotetic new type of reserved
hugepage in userland, it should never allow split_huge_page to run on
it, and then it should take care of marking all subpages as reserved
too. And KVM won't need to worry about a driver setting reserved only
on a head page anymore.

Untested RFC patch follows.

==
>From 76927680df7034a575bed5da754f7ebe94481fb3 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 25 Jul 2013 02:56:08 +0200
Subject: [PATCH] kvm: optimize away THP checks in kvm_is_mmio_pfn()

The checks on PG_reserved in the page structure on head and tail pages
aren't necessary because split_huge_page wouldn't transfer the
PG_reserved bit from head to tail anyway.

This was a forward-thinking check done in the case PageReserved was
set by a driver-owned page mapped in userland with something like
remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not
possible right now). It was meant to be very safe, but it's overkill
as it's unlikely split_huge_page could ever run without the driver
noticing and tearing down the hugepage itself.

And if a driver in the future will really want to map a reserved
hugepage in userland using an huge pmd it should simply take care of
marking all subpages reserved too to keep KVM safe. This of course
would require such a hypothetical driver to tear down the huge pmd
itself and splitting the hugepage itself, instead of relaying on
split_huge_page, but that sounds very reasonable, especially
considering split_huge_page wouldn't currently transfer the reserved
bit anyway.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 virt/kvm/kvm_main.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1580dd4..fa030fb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,28 +102,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

^ permalink raw reply related	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25  1:04                                         ` Andrea Arcangeli
  0 siblings, 0 replies; 82+ messages in thread
From: Andrea Arcangeli @ 2013-07-25  1:04 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Alexander Graf, "“tiejun.chen”",
	Bhushan Bharat-R65777, kvm-ppc, kvm@vger.kernel.org list,
	Wood Scott-B07421, Paolo Bonzini

Hi!

On Wed, Jul 24, 2013 at 01:30:12PM +0300, Gleb Natapov wrote:
> On Wed, Jul 24, 2013 at 12:25:18PM +0200, Alexander Graf wrote:
> > 
> > On 24.07.2013, at 12:19, Gleb Natapov wrote:
> > 
> > > On Wed, Jul 24, 2013 at 12:09:42PM +0200, Alexander Graf wrote:
> > >> 
> > >> On 24.07.2013, at 12:01, Gleb Natapov wrote:
> > >> 
> > >>> Copying Andrea for him to verify that I am not talking nonsense :)
> > >>> 
> > >>> On Wed, Jul 24, 2013 at 10:25:20AM +0200, Alexander Graf wrote:
> > >>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > >>>>> index 1580dd4..5e8635b 100644
> > >>>>> --- a/virt/kvm/kvm_main.c
> > >>>>> +++ b/virt/kvm/kvm_main.c
> > >>>>> @@ -102,6 +102,10 @@ static bool largepages_enabled = true;
> > >>>>> 
> > >>>>> bool kvm_is_mmio_pfn(pfn_t pfn)
> > >>>>> {
> > >>>>> +#ifdef CONFIG_MEMORY_HOTPLUG
> > >>>> 
> > >>>> I'd feel safer if we narrow this down to e500.
> > >>>> 
> > >>>>> +       /*
> > >>>>> +        * Currently only in memory hot remove case we may still need this.
> > >>>>> +        */
> > >>>>>      if (pfn_valid(pfn)) {
> > >>>> 
> > >>>> We still have to check for pfn_valid, no? So the #ifdef should be down here.
> > >>>> 
> > >>>>>              int reserved;
> > >>>>>              struct page *tail = pfn_to_page(pfn);
> > >>>>> @@ -124,6 +128,7 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
> > >>>>>              }
> > >>>>>              return PageReserved(tail);
> > >>>>>      }
> > >>>>> +#endif
> > >>>>> 
> > >>>>>      return true;
> > >>>>> }
> > >>>>> 
> > >>>>> Before apply this change:
> > >>>>> 
> > >>>>> real    (1m19.954s + 1m20.918s + 1m22.740s + 1m21.146s + 1m22.120s)/5= 1m21.376s
> > >>>>> user    (0m23.181s + 0m23.550s + 0m23.506s + 0m23.410s + 0m23.520s)/5= 0m23.433s
> > >>>>> sys	(0m49.087s + 0m49.563s + 0m51.758s + 0m50.290s + 0m51.047s)/5= 0m50.349s
> > >>>>> 
> > >>>>> After apply this change:
> > >>>>> 
> > >>>>> real    (1m19.507s + 1m20.919s + 1m21.436s + 1m21.179s + 1m20.293s)/5= 1m20.667s
> > >>>>> user    (0m22.595s + 0m22.719s + 0m22.484s + 0m22.811s + 0m22.467s)/5= 0m22.615s
> > >>>>> sys	(0m48.841s + 0m49.929s + 0m50.310s + 0m49.813s + 0m48.587s)/5= 0m49.496s
> > >>>>> 
> > >>>>> So,
> > >>>>> 
> > >>>>> real    (1m20.667s - 1m21.376s)/1m21.376s x 100% = -0.6%
> > >>>>> user    (0m22.615s - 0m23.433s)/0m23.433s x 100% = -3.5%
> > >>>>> sys	(0m49.496s - 0m50.349s)/0m50.349s x 100% = -1.7%
> > >>>> 
> > >>>> Very nice, so there is a real world performance benefit to doing this. Then yes, I think it would make sense to change the global helper function to be fast on e500 and use that one from e500_shadow_mas2_attrib() instead.
> > >>>> 
> > >>>> Gleb, Paolo, any hard feelings?
> > >>>> 
> > >>> I do not see how can we break the function in such a way and get
> > >>> away with it. Not all valid pfns point to memory. Physical address can
> > >>> be sparse (due to PCI hole, framebuffer or just because).
> > >> 
> > >> But we don't check for sparseness today in here either. We merely check for incomplete huge pages.
> > >> 
> > > That's not how I read the code. The code checks for reserved flag set.
> > > It should be set on pfns that point to memory holes. As far as I
> > 
> > I couldn't find any traces of code that sets the reserved bits on e500 chips though. I've only seen it getting set for memory hotplug and memory incoherent DMA code which doesn't get used on e500.
> > 
> > But I'd be more than happy to get proven wrong :).
> > 
> Can you write a module that scans all page structures? AFAIK all pages
> are marked as reserved and then those that become regular memory are
> marked as unreserved. Hope Andrea will chime in here :)

So the situation with regard to non-RAM and PageReserved/pfn_valid is
quite simple.

"struct page" exists for non-RAM too as "struct page" must exist up to
at least 2^MAX_ORDER pfn alignment or things breaks, like the first
pfn must be 2^MXA_ORDER aligned or again things break in the buddy. We
don't make an effort to save a few "struct page" to keep it simpler.

But those non-RAM pages (or tiny non-RAM page holes if any) are marked
PageReserved.

If "struct page" doesn't exist pfn_valid returns false.

So you cannot get away skipping pfn_valid and at least one
PageReserved.

However it gets more complex than just ram vs non-RAM, because there
are pages that are real RAM (not left marked PageReserved at boot
after checking e820 or equivalent bios data for non-x86 archs) but
that are taken over by drivers, than then could use it as mmio regions
snooping the writes and mapping them in userland too as hugepages
maybe. That is the motivation for the THP related code in
kvm_is_mmio_pfn.

Those vmas have VM_PFNMAP set so vm_normal_page is zero and the
refcounting is skipped like if it's non-RAM and they're mapped with
remap_pfn_range (different mechanism for VM_MIXEDMAP that does the
refcounting and doesn't require in turn the driver to mark the page
PageReserved).

The above explains why KVM needs to skip the refcounting on
PageReserved = true && pfn_valid() = true, and it must skip the
refcounting for pfn_valid = false without trying to call pfn_to_page
(or it'll crash).

Now the code doing the THP check with smp_rmb is very safe, possibly
too safe. Looking at it now, it looks a minor overengineering
oversight.

The slight oversight is that split_huge_page cannot transfer the
PG_reserved bit from head to tail.

So there's no real risk that the driver allocates an hugepage, marks
the head reserved (the PG_ bits of a THP page are only relevant in the
head), maps the page with some new version of remap_pfn_range_huge
(not possible right now, PFNMAP|MIXEDMAP only can handle 4k mappings
right now) and then split_huge_page runs and we miss the reserved bit
on the tail page. Because the reserved bit wouldn't be transferred to
the tail page anyway by split_huge_page so we'd miss it anyway if
anything like that would happen.

Besides split_huge_page couldn't run on a device owned page as it's
not anonymous but device-owned and there's no way to map it with a
hugepmd too.

So in short, it's probably never going to help to have such a check
there. We can probably optimize away the THP code in there.

No matter how the driver maps this hypotetic new type of reserved
hugepage in userland, it should never allow split_huge_page to run on
it, and then it should take care of marking all subpages as reserved
too. And KVM won't need to worry about a driver setting reserved only
on a head page anymore.

Untested RFC patch follows.

=
From 76927680df7034a575bed5da754f7ebe94481fb3 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 25 Jul 2013 02:56:08 +0200
Subject: [PATCH] kvm: optimize away THP checks in kvm_is_mmio_pfn()

The checks on PG_reserved in the page structure on head and tail pages
aren't necessary because split_huge_page wouldn't transfer the
PG_reserved bit from head to tail anyway.

This was a forward-thinking check done in the case PageReserved was
set by a driver-owned page mapped in userland with something like
remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not
possible right now). It was meant to be very safe, but it's overkill
as it's unlikely split_huge_page could ever run without the driver
noticing and tearing down the hugepage itself.

And if a driver in the future will really want to map a reserved
hugepage in userland using an huge pmd it should simply take care of
marking all subpages reserved too to keep KVM safe. This of course
would require such a hypothetical driver to tear down the huge pmd
itself and splitting the hugepage itself, instead of relaying on
split_huge_page, but that sounds very reasonable, especially
considering split_huge_page wouldn't currently transfer the reserved
bit anyway.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 virt/kvm/kvm_main.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1580dd4..fa030fb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,28 +102,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

^ permalink raw reply related	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-24 20:32                                       ` Scott Wood
  (?)
@ 2013-07-25  8:50                                         ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25  8:50 UTC (permalink / raw)
  To: Scott Wood
  Cc: Alexander Graf, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >
> >On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >
> >> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>> Are not we going to use page_is_ram() from
> >e500_shadow_mas2_attrib() as Scott commented?
> >>>
> >>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>
> >>>
> >> Because it is much slower and, IIRC, actually used to build pfn
> >map that allow
> >> us to check quickly for valid pfn.
> >
> >Then why should we use page_is_ram()? :)
> >
> >I really don't want the e500 code to diverge too much from what
> >the rest of the kvm code is doing.
> 
> I don't understand "actually used to build pfn map...".  What code
> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> in generic mm code.  Is this a statement about what x86 does?
It may be not page_is_ram() directly, but the same into page_is_ram() is
using. On power both page_is_ram() and do_init_bootmem() walks some kind
of memblock_region data structure. What important is that pfn_valid()
does not mean that there is a memory behind page structure. See Andrea's
reply.

> 
> On PPC page_is_ram() is only called (AFAICT) for determining what
> attributes to set on mmaps.  We want to be sure that KVM always
> makes the same decision.  While pfn_valid() seems like it should be
> equivalent, it's not obvious from the PPC code that it is.
> 
Again pfn_valid() is not enough.

> If pfn_valid() is better, why is that not used for mmap?  Why are
> there two different names for the same thing?
> 
They are not the same thing. page_is_ram() tells you if phys address is
ram backed. pfn_valid() tells you if there is struct page behind the
pfn. PageReserved() tells if you a pfn is marked as reserved. All non
ram pfns should be reserved, but ram pfns can be reserved too. Again,
see Andrea's reply.

Why ppc uses page_is_ram() for mmap? How should I know? But looking at
the function it does it only as a fallback if
ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
noncached as a safe fallback makes sense. It is also make sense to allow
noncached access to reserved ram sometimes.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25  8:50                                         ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25  8:50 UTC (permalink / raw)
  To: Scott Wood
  Cc: Wood Scott-B07421, kvm@vger.kernel.org list, Alexander Graf,
	kvm-ppc, “tiejun.chen”,
	Bhushan Bharat-R65777, Paolo Bonzini, linuxppc-dev

On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >
> >On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >
> >> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>> Are not we going to use page_is_ram() from
> >e500_shadow_mas2_attrib() as Scott commented?
> >>>
> >>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>
> >>>
> >> Because it is much slower and, IIRC, actually used to build pfn
> >map that allow
> >> us to check quickly for valid pfn.
> >
> >Then why should we use page_is_ram()? :)
> >
> >I really don't want the e500 code to diverge too much from what
> >the rest of the kvm code is doing.
> 
> I don't understand "actually used to build pfn map...".  What code
> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> in generic mm code.  Is this a statement about what x86 does?
It may be not page_is_ram() directly, but the same into page_is_ram() is
using. On power both page_is_ram() and do_init_bootmem() walks some kind
of memblock_region data structure. What important is that pfn_valid()
does not mean that there is a memory behind page structure. See Andrea's
reply.

> 
> On PPC page_is_ram() is only called (AFAICT) for determining what
> attributes to set on mmaps.  We want to be sure that KVM always
> makes the same decision.  While pfn_valid() seems like it should be
> equivalent, it's not obvious from the PPC code that it is.
> 
Again pfn_valid() is not enough.

> If pfn_valid() is better, why is that not used for mmap?  Why are
> there two different names for the same thing?
> 
They are not the same thing. page_is_ram() tells you if phys address is
ram backed. pfn_valid() tells you if there is struct page behind the
pfn. PageReserved() tells if you a pfn is marked as reserved. All non
ram pfns should be reserved, but ram pfns can be reserved too. Again,
see Andrea's reply.

Why ppc uses page_is_ram() for mmap? How should I know? But looking at
the function it does it only as a fallback if
ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
noncached as a safe fallback makes sense. It is also make sense to allow
noncached access to reserved ram sometimes.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25  8:50                                         ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25  8:50 UTC (permalink / raw)
  To: Scott Wood
  Cc: Alexander Graf, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >
> >On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >
> >> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>> Are not we going to use page_is_ram() from
> >e500_shadow_mas2_attrib() as Scott commented?
> >>>
> >>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>
> >>>
> >> Because it is much slower and, IIRC, actually used to build pfn
> >map that allow
> >> us to check quickly for valid pfn.
> >
> >Then why should we use page_is_ram()? :)
> >
> >I really don't want the e500 code to diverge too much from what
> >the rest of the kvm code is doing.
> 
> I don't understand "actually used to build pfn map...".  What code
> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> in generic mm code.  Is this a statement about what x86 does?
It may be not page_is_ram() directly, but the same into page_is_ram() is
using. On power both page_is_ram() and do_init_bootmem() walks some kind
of memblock_region data structure. What important is that pfn_valid()
does not mean that there is a memory behind page structure. See Andrea's
reply.

> 
> On PPC page_is_ram() is only called (AFAICT) for determining what
> attributes to set on mmaps.  We want to be sure that KVM always
> makes the same decision.  While pfn_valid() seems like it should be
> equivalent, it's not obvious from the PPC code that it is.
> 
Again pfn_valid() is not enough.

> If pfn_valid() is better, why is that not used for mmap?  Why are
> there two different names for the same thing?
> 
They are not the same thing. page_is_ram() tells you if phys address is
ram backed. pfn_valid() tells you if there is struct page behind the
pfn. PageReserved() tells if you a pfn is marked as reserved. All non
ram pfns should be reserved, but ram pfns can be reserved too. Again,
see Andrea's reply.

Why ppc uses page_is_ram() for mmap? How should I know? But looking at
the function it does it only as a fallback if
ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
noncached as a safe fallback makes sense. It is also make sense to allow
noncached access to reserved ram sometimes.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-25  8:50                                         ` Gleb Natapov
  (?)
@ 2013-07-25 16:07                                           ` Alexander Graf
  -1 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-25 16:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Scott Wood, Bhushan Bharat-R65777,
	"“tiejun.chen” Chen",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev, Benjamin Herrenschmidt


On 25.07.2013, at 10:50, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
>> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
>>> 
>>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
>>> 
>>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
>>>>>> Are not we going to use page_is_ram() from
>>> e500_shadow_mas2_attrib() as Scott commented?
>>>>> 
>>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
>>>>> 
>>>>> 
>>>> Because it is much slower and, IIRC, actually used to build pfn
>>> map that allow
>>>> us to check quickly for valid pfn.
>>> 
>>> Then why should we use page_is_ram()? :)
>>> 
>>> I really don't want the e500 code to diverge too much from what
>>> the rest of the kvm code is doing.
>> 
>> I don't understand "actually used to build pfn map...".  What code
>> is this?  I don't see any calls to page_is_ram() in the KVM code, or
>> in generic mm code.  Is this a statement about what x86 does?
> It may be not page_is_ram() directly, but the same into page_is_ram() is
> using. On power both page_is_ram() and do_init_bootmem() walks some kind
> of memblock_region data structure. What important is that pfn_valid()
> does not mean that there is a memory behind page structure. See Andrea's
> reply.
> 
>> 
>> On PPC page_is_ram() is only called (AFAICT) for determining what
>> attributes to set on mmaps.  We want to be sure that KVM always
>> makes the same decision.  While pfn_valid() seems like it should be
>> equivalent, it's not obvious from the PPC code that it is.
>> 
> Again pfn_valid() is not enough.
> 
>> If pfn_valid() is better, why is that not used for mmap?  Why are
>> there two different names for the same thing?
>> 
> They are not the same thing. page_is_ram() tells you if phys address is
> ram backed. pfn_valid() tells you if there is struct page behind the
> pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> ram pfns should be reserved, but ram pfns can be reserved too. Again,
> see Andrea's reply.
> 
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at

That one's easy. Let's just ask Ben. Ben, is there any particular reason PPC uses page_is_ram() rather than what KVM does here to figure out whether a pfn is RAM or not? It would be really useful to be able to run the exact same logic that figures out whether we're cacheable or not in both TLB writers (KVM and linux-mm).


Alex

> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense. It is also make sense to allow
> noncached access to reserved ram sometimes.
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25 16:07                                           ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-25 16:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Wood Scott-B07421, kvm@vger.kernel.org list, kvm-ppc,
	"“tiejun.chen” Chen",
	Bhushan Bharat-R65777, Scott Wood, Paolo Bonzini, linuxppc-dev


On 25.07.2013, at 10:50, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
>> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
>>>=20
>>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
>>>=20
>>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
>>>>>> Are not we going to use page_is_ram() from
>>> e500_shadow_mas2_attrib() as Scott commented?
>>>>>=20
>>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
>>>>>=20
>>>>>=20
>>>> Because it is much slower and, IIRC, actually used to build pfn
>>> map that allow
>>>> us to check quickly for valid pfn.
>>>=20
>>> Then why should we use page_is_ram()? :)
>>>=20
>>> I really don't want the e500 code to diverge too much from what
>>> the rest of the kvm code is doing.
>>=20
>> I don't understand "actually used to build pfn map...".  What code
>> is this?  I don't see any calls to page_is_ram() in the KVM code, or
>> in generic mm code.  Is this a statement about what x86 does?
> It may be not page_is_ram() directly, but the same into page_is_ram() =
is
> using. On power both page_is_ram() and do_init_bootmem() walks some =
kind
> of memblock_region data structure. What important is that pfn_valid()
> does not mean that there is a memory behind page structure. See =
Andrea's
> reply.
>=20
>>=20
>> On PPC page_is_ram() is only called (AFAICT) for determining what
>> attributes to set on mmaps.  We want to be sure that KVM always
>> makes the same decision.  While pfn_valid() seems like it should be
>> equivalent, it's not obvious from the PPC code that it is.
>>=20
> Again pfn_valid() is not enough.
>=20
>> If pfn_valid() is better, why is that not used for mmap?  Why are
>> there two different names for the same thing?
>>=20
> They are not the same thing. page_is_ram() tells you if phys address =
is
> ram backed. pfn_valid() tells you if there is struct page behind the
> pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> ram pfns should be reserved, but ram pfns can be reserved too. Again,
> see Andrea's reply.
>=20
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at

That one's easy. Let's just ask Ben. Ben, is there any particular reason =
PPC uses page_is_ram() rather than what KVM does here to figure out =
whether a pfn is RAM or not? It would be really useful to be able to run =
the exact same logic that figures out whether we're cacheable or not in =
both TLB writers (KVM and linux-mm).


Alex

> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense. It is also make sense to =
allow
> noncached access to reserved ram sometimes.
>=20
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25 16:07                                           ` Alexander Graf
  0 siblings, 0 replies; 82+ messages in thread
From: Alexander Graf @ 2013-07-25 16:07 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Scott Wood, Bhushan Bharat-R65777,
	"“tiejun.chen” Chen",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev, Benjamin Herrenschmidt


On 25.07.2013, at 10:50, Gleb Natapov wrote:

> On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
>> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
>>> 
>>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
>>> 
>>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
>>>>>> Are not we going to use page_is_ram() from
>>> e500_shadow_mas2_attrib() as Scott commented?
>>>>> 
>>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
>>>>> 
>>>>> 
>>>> Because it is much slower and, IIRC, actually used to build pfn
>>> map that allow
>>>> us to check quickly for valid pfn.
>>> 
>>> Then why should we use page_is_ram()? :)
>>> 
>>> I really don't want the e500 code to diverge too much from what
>>> the rest of the kvm code is doing.
>> 
>> I don't understand "actually used to build pfn map...".  What code
>> is this?  I don't see any calls to page_is_ram() in the KVM code, or
>> in generic mm code.  Is this a statement about what x86 does?
> It may be not page_is_ram() directly, but the same into page_is_ram() is
> using. On power both page_is_ram() and do_init_bootmem() walks some kind
> of memblock_region data structure. What important is that pfn_valid()
> does not mean that there is a memory behind page structure. See Andrea's
> reply.
> 
>> 
>> On PPC page_is_ram() is only called (AFAICT) for determining what
>> attributes to set on mmaps.  We want to be sure that KVM always
>> makes the same decision.  While pfn_valid() seems like it should be
>> equivalent, it's not obvious from the PPC code that it is.
>> 
> Again pfn_valid() is not enough.
> 
>> If pfn_valid() is better, why is that not used for mmap?  Why are
>> there two different names for the same thing?
>> 
> They are not the same thing. page_is_ram() tells you if phys address is
> ram backed. pfn_valid() tells you if there is struct page behind the
> pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> ram pfns should be reserved, but ram pfns can be reserved too. Again,
> see Andrea's reply.
> 
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at

That one's easy. Let's just ask Ben. Ben, is there any particular reason PPC uses page_is_ram() rather than what KVM does here to figure out whether a pfn is RAM or not? It would be really useful to be able to run the exact same logic that figures out whether we're cacheable or not in both TLB writers (KVM and linux-mm).


Alex

> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense. It is also make sense to allow
> noncached access to reserved ram sometimes.
> 
> --
> 			Gleb.


^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-25 16:07                                           ` Alexander Graf
  (?)
@ 2013-07-25 16:14                                             ` Gleb Natapov
  -1 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25 16:14 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Bhushan Bharat-R65777,
	"“tiejun.chen” Chen",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev, Benjamin Herrenschmidt

On Thu, Jul 25, 2013 at 06:07:55PM +0200, Alexander Graf wrote:
> 
> On 25.07.2013, at 10:50, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> >> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >>> 
> >>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >>> 
> >>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>>>> Are not we going to use page_is_ram() from
> >>> e500_shadow_mas2_attrib() as Scott commented?
> >>>>> 
> >>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>>> 
> >>>>> 
> >>>> Because it is much slower and, IIRC, actually used to build pfn
> >>> map that allow
> >>>> us to check quickly for valid pfn.
> >>> 
> >>> Then why should we use page_is_ram()? :)
> >>> 
> >>> I really don't want the e500 code to diverge too much from what
> >>> the rest of the kvm code is doing.
> >> 
> >> I don't understand "actually used to build pfn map...".  What code
> >> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> >> in generic mm code.  Is this a statement about what x86 does?
> > It may be not page_is_ram() directly, but the same into page_is_ram() is
> > using. On power both page_is_ram() and do_init_bootmem() walks some kind
> > of memblock_region data structure. What important is that pfn_valid()
> > does not mean that there is a memory behind page structure. See Andrea's
> > reply.
> > 
> >> 
> >> On PPC page_is_ram() is only called (AFAICT) for determining what
> >> attributes to set on mmaps.  We want to be sure that KVM always
> >> makes the same decision.  While pfn_valid() seems like it should be
> >> equivalent, it's not obvious from the PPC code that it is.
> >> 
> > Again pfn_valid() is not enough.
> > 
> >> If pfn_valid() is better, why is that not used for mmap?  Why are
> >> there two different names for the same thing?
> >> 
> > They are not the same thing. page_is_ram() tells you if phys address is
> > ram backed. pfn_valid() tells you if there is struct page behind the
> > pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> > ram pfns should be reserved, but ram pfns can be reserved too. Again,
> > see Andrea's reply.
> > 
> > Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> 
> That one's easy. Let's just ask Ben. Ben, is there any particular reason PPC uses page_is_ram() rather than what KVM does here to figure out whether a pfn is RAM or not? It would be really useful to be able to run the exact same logic that figures out whether we're cacheable or not in both TLB writers (KVM and linux-mm).
> 
KVM does not only try to figure out what is RAM or not! Look at how KVM
uses the function. KVM tries to figure out if refcounting needed to be
used on this page among other things.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25 16:14                                             ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25 16:14 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Wood Scott-B07421, kvm@vger.kernel.org list, kvm-ppc,
	"“tiejun.chen” Chen",
	Bhushan Bharat-R65777, Scott Wood, Paolo Bonzini, linuxppc-dev

On Thu, Jul 25, 2013 at 06:07:55PM +0200, Alexander Graf wrote:
> 
> On 25.07.2013, at 10:50, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> >> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >>> 
> >>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >>> 
> >>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>>>> Are not we going to use page_is_ram() from
> >>> e500_shadow_mas2_attrib() as Scott commented?
> >>>>> 
> >>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>>> 
> >>>>> 
> >>>> Because it is much slower and, IIRC, actually used to build pfn
> >>> map that allow
> >>>> us to check quickly for valid pfn.
> >>> 
> >>> Then why should we use page_is_ram()? :)
> >>> 
> >>> I really don't want the e500 code to diverge too much from what
> >>> the rest of the kvm code is doing.
> >> 
> >> I don't understand "actually used to build pfn map...".  What code
> >> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> >> in generic mm code.  Is this a statement about what x86 does?
> > It may be not page_is_ram() directly, but the same into page_is_ram() is
> > using. On power both page_is_ram() and do_init_bootmem() walks some kind
> > of memblock_region data structure. What important is that pfn_valid()
> > does not mean that there is a memory behind page structure. See Andrea's
> > reply.
> > 
> >> 
> >> On PPC page_is_ram() is only called (AFAICT) for determining what
> >> attributes to set on mmaps.  We want to be sure that KVM always
> >> makes the same decision.  While pfn_valid() seems like it should be
> >> equivalent, it's not obvious from the PPC code that it is.
> >> 
> > Again pfn_valid() is not enough.
> > 
> >> If pfn_valid() is better, why is that not used for mmap?  Why are
> >> there two different names for the same thing?
> >> 
> > They are not the same thing. page_is_ram() tells you if phys address is
> > ram backed. pfn_valid() tells you if there is struct page behind the
> > pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> > ram pfns should be reserved, but ram pfns can be reserved too. Again,
> > see Andrea's reply.
> > 
> > Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> 
> That one's easy. Let's just ask Ben. Ben, is there any particular reason PPC uses page_is_ram() rather than what KVM does here to figure out whether a pfn is RAM or not? It would be really useful to be able to run the exact same logic that figures out whether we're cacheable or not in both TLB writers (KVM and linux-mm).
> 
KVM does not only try to figure out what is RAM or not! Look at how KVM
uses the function. KVM tries to figure out if refcounting needed to be
used on this page among other things.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-25 16:14                                             ` Gleb Natapov
  0 siblings, 0 replies; 82+ messages in thread
From: Gleb Natapov @ 2013-07-25 16:14 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Bhushan Bharat-R65777,
	"“tiejun.chen” Chen",
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev, Benjamin Herrenschmidt

On Thu, Jul 25, 2013 at 06:07:55PM +0200, Alexander Graf wrote:
> 
> On 25.07.2013, at 10:50, Gleb Natapov wrote:
> 
> > On Wed, Jul 24, 2013 at 03:32:49PM -0500, Scott Wood wrote:
> >> On 07/24/2013 04:39:59 AM, Alexander Graf wrote:
> >>> 
> >>> On 24.07.2013, at 11:35, Gleb Natapov wrote:
> >>> 
> >>>> On Wed, Jul 24, 2013 at 11:21:11AM +0200, Alexander Graf wrote:
> >>>>>> Are not we going to use page_is_ram() from
> >>> e500_shadow_mas2_attrib() as Scott commented?
> >>>>> 
> >>>>> rWhy aren't we using page_is_ram() in kvm_is_mmio_pfn()?
> >>>>> 
> >>>>> 
> >>>> Because it is much slower and, IIRC, actually used to build pfn
> >>> map that allow
> >>>> us to check quickly for valid pfn.
> >>> 
> >>> Then why should we use page_is_ram()? :)
> >>> 
> >>> I really don't want the e500 code to diverge too much from what
> >>> the rest of the kvm code is doing.
> >> 
> >> I don't understand "actually used to build pfn map...".  What code
> >> is this?  I don't see any calls to page_is_ram() in the KVM code, or
> >> in generic mm code.  Is this a statement about what x86 does?
> > It may be not page_is_ram() directly, but the same into page_is_ram() is
> > using. On power both page_is_ram() and do_init_bootmem() walks some kind
> > of memblock_region data structure. What important is that pfn_valid()
> > does not mean that there is a memory behind page structure. See Andrea's
> > reply.
> > 
> >> 
> >> On PPC page_is_ram() is only called (AFAICT) for determining what
> >> attributes to set on mmaps.  We want to be sure that KVM always
> >> makes the same decision.  While pfn_valid() seems like it should be
> >> equivalent, it's not obvious from the PPC code that it is.
> >> 
> > Again pfn_valid() is not enough.
> > 
> >> If pfn_valid() is better, why is that not used for mmap?  Why are
> >> there two different names for the same thing?
> >> 
> > They are not the same thing. page_is_ram() tells you if phys address is
> > ram backed. pfn_valid() tells you if there is struct page behind the
> > pfn. PageReserved() tells if you a pfn is marked as reserved. All non
> > ram pfns should be reserved, but ram pfns can be reserved too. Again,
> > see Andrea's reply.
> > 
> > Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> 
> That one's easy. Let's just ask Ben. Ben, is there any particular reason PPC uses page_is_ram() rather than what KVM does here to figure out whether a pfn is RAM or not? It would be really useful to be able to run the exact same logic that figures out whether we're cacheable or not in both TLB writers (KVM and linux-mm).
> 
KVM does not only try to figure out what is RAM or not! Look at how KVM
uses the function. KVM tries to figure out if refcounting needed to be
used on this page among other things.

--
			Gleb.

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
  2013-07-25  8:50                                         ` Gleb Natapov
  (?)
@ 2013-07-26 22:27                                           ` Scott Wood
  -1 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-26 22:27 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Alexander Graf, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On 07/25/2013 03:50:42 AM, Gleb Natapov wrote:
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense.

There's only one current implementation of  
ppc_md.phys_mem_access_prot(), which is pci_phys_mem_access_prot(),  
which also uses page_is_ram().  If page_is_ram() returns false then it  
checks for write-combining PCI.  But yes, we would want to call  
ppc_md.phys_mem_access_prot() if present.

Copying from the host PTE would be ideal if doesn't come with a  
noticeable performance impact compared to other methods, but one way or  
another we want to be sure we match.

> It is also make sense to allow noncached access to reserved ram  
> sometimes.

Perhaps, but that's not KVM's decision to make.  You should get the  
same result as if you mmaped it -- because QEMU already did and we need  
to be consistent.  Not to mention the large page kernel mapping that  
will have been done on e500...

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-26 22:27                                           ` Scott Wood
  0 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-26 22:27 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Wood Scott-B07421, kvm@vger.kernel.org list, Alexander Graf,
	kvm-ppc, “tiejun.chen”,
	Bhushan Bharat-R65777, Paolo Bonzini, linuxppc-dev

On 07/25/2013 03:50:42 AM, Gleb Natapov wrote:
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense.

There's only one current implementation of =20
ppc_md.phys_mem_access_prot(), which is pci_phys_mem_access_prot(), =20
which also uses page_is_ram().  If page_is_ram() returns false then it =20
checks for write-combining PCI.  But yes, we would want to call =20
ppc_md.phys_mem_access_prot() if present.

Copying from the host PTE would be ideal if doesn't come with a =20
noticeable performance impact compared to other methods, but one way or =20
another we want to be sure we match.

> It is also make sense to allow noncached access to reserved ram =20
> sometimes.

Perhaps, but that's not KVM's decision to make.  You should get the =20
same result as if you mmaped it -- because QEMU already did and we need =20
to be consistent.  Not to mention the large page kernel mapping that =20
will have been done on e500...

-Scott=

^ permalink raw reply	[flat|nested] 82+ messages in thread

* Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages
@ 2013-07-26 22:27                                           ` Scott Wood
  0 siblings, 0 replies; 82+ messages in thread
From: Scott Wood @ 2013-07-26 22:27 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Alexander Graf, Bhushan Bharat-R65777, “tiejun.chen”,
	kvm-ppc, kvm@vger.kernel.org list, Wood Scott-B07421,
	Paolo Bonzini, linuxppc-dev

On 07/25/2013 03:50:42 AM, Gleb Natapov wrote:
> Why ppc uses page_is_ram() for mmap? How should I know? But looking at
> the function it does it only as a fallback if
> ppc_md.phys_mem_access_prot() is not provided. Making access to MMIO
> noncached as a safe fallback makes sense.

There's only one current implementation of  
ppc_md.phys_mem_access_prot(), which is pci_phys_mem_access_prot(),  
which also uses page_is_ram().  If page_is_ram() returns false then it  
checks for write-combining PCI.  But yes, we would want to call  
ppc_md.phys_mem_access_prot() if present.

Copying from the host PTE would be ideal if doesn't come with a  
noticeable performance impact compared to other methods, but one way or  
another we want to be sure we match.

> It is also make sense to allow noncached access to reserved ram  
> sometimes.

Perhaps, but that's not KVM's decision to make.  You should get the  
same result as if you mmaped it -- because QEMU already did and we need  
to be consistent.  Not to mention the large page kernel mapping that  
will have been done on e500...

-Scott

^ permalink raw reply	[flat|nested] 82+ messages in thread

end of thread, other threads:[~2013-07-26 22:27 UTC | newest]

Thread overview: 82+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-18  6:04 [PATCH 1/2] kvm: powerpc: Do not ignore "E" attribute in mas2 Bharat Bhushan
2013-07-18  6:16 ` Bharat Bhushan
2013-07-18  6:04 ` [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel managed pages Bharat Bhushan
2013-07-18  6:16   ` Bharat Bhushan
2013-07-18  6:26   ` "“tiejun.chen”"
2013-07-18  6:26     ` "“tiejun.chen”"
2013-07-18  7:12     ` Bhushan Bharat-R65777
2013-07-18  7:12       ` Bhushan Bharat-R65777
2013-07-18  7:31       ` "“tiejun.chen”"
2013-07-18  7:31         ` "“tiejun.chen”"
2013-07-18  8:08         ` Bhushan Bharat-R65777
2013-07-18  8:08           ` Bhushan Bharat-R65777
2013-07-18  8:21           ` "“tiejun.chen”"
2013-07-18  8:21             ` "“tiejun.chen”"
2013-07-18  8:22             ` Bhushan Bharat-R65777
2013-07-18  8:22               ` Bhushan Bharat-R65777
2013-07-18  8:25             ` Bhushan Bharat-R65777
2013-07-18  8:25               ` Bhushan Bharat-R65777
2013-07-18  8:55               ` "“tiejun.chen”"
2013-07-18  8:55                 ` "“tiejun.chen”"
2013-07-18  9:44                 ` Alexander Graf
2013-07-18  9:44                   ` Alexander Graf
2013-07-18  9:56                   ` "“tiejun.chen”"
2013-07-18  9:56                     ` "“tiejun.chen”"
2013-07-18 10:00                     ` Alexander Graf
2013-07-18 10:00                       ` Alexander Graf
2013-07-18 10:14                       ` "“tiejun.chen”"
2013-07-18 10:14                         ` "“tiejun.chen”"
2013-07-18 16:11                       ` Scott Wood
2013-07-18 16:11                         ` Scott Wood
2013-07-18  9:48               ` Alexander Graf
2013-07-18  9:48                 ` Alexander Graf
2013-07-18  9:51                 ` Bhushan Bharat-R65777
2013-07-18 10:08                 ` "“tiejun.chen”"
2013-07-18 10:08                   ` "“tiejun.chen”"
2013-07-18 10:12                   ` Alexander Graf
2013-07-18 10:12                     ` Alexander Graf
2013-07-18 10:19                     ` "“tiejun.chen”"
2013-07-18 10:19                       ` "“tiejun.chen”"
2013-07-18 10:27                       ` Alexander Graf
2013-07-18 10:27                         ` Alexander Graf
2013-07-24  2:26                         ` "“tiejun.chen”"
2013-07-24  2:26                           ` "“tiejun.chen”"
2013-07-24  8:25                           ` Alexander Graf
2013-07-24  8:25                             ` Alexander Graf
2013-07-24  9:11                             ` Bhushan Bharat-R65777
2013-07-24  9:11                               ` Bhushan Bharat-R65777
2013-07-24  9:21                               ` Alexander Graf
2013-07-24  9:21                                 ` Alexander Graf
2013-07-24  9:35                                 ` Gleb Natapov
2013-07-24  9:35                                   ` Gleb Natapov
2013-07-24  9:39                                   ` Alexander Graf
2013-07-24  9:39                                     ` Alexander Graf
2013-07-24 20:32                                     ` Scott Wood
2013-07-24 20:32                                       ` Scott Wood
2013-07-24 20:32                                       ` Scott Wood
2013-07-25  8:50                                       ` Gleb Natapov
2013-07-25  8:50                                         ` Gleb Natapov
2013-07-25  8:50                                         ` Gleb Natapov
2013-07-25 16:07                                         ` Alexander Graf
2013-07-25 16:07                                           ` Alexander Graf
2013-07-25 16:07                                           ` Alexander Graf
2013-07-25 16:14                                           ` Gleb Natapov
2013-07-25 16:14                                             ` Gleb Natapov
2013-07-25 16:14                                             ` Gleb Natapov
2013-07-26 22:27                                         ` Scott Wood
2013-07-26 22:27                                           ` Scott Wood
2013-07-26 22:27                                           ` Scott Wood
2013-07-24 10:01                             ` Gleb Natapov
2013-07-24 10:01                               ` Gleb Natapov
2013-07-24 10:09                               ` Alexander Graf
2013-07-24 10:09                                 ` Alexander Graf
2013-07-24 10:19                                 ` Gleb Natapov
2013-07-24 10:19                                   ` Gleb Natapov
2013-07-24 10:25                                   ` Alexander Graf
2013-07-24 10:25                                     ` Alexander Graf
2013-07-24 10:30                                     ` Gleb Natapov
2013-07-24 10:30                                       ` Gleb Natapov
2013-07-25  1:04                                       ` Andrea Arcangeli
2013-07-25  1:04                                         ` Andrea Arcangeli
2013-07-18  8:27   ` "“tiejun.chen”"
2013-07-18  8:27     ` "“tiejun.chen”"

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.