All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] KVM: s390: vsie: use common code functions for pinning
@ 2017-09-01 15:11 David Hildenbrand
  2017-09-04  9:16 ` Cornelia Huck
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: David Hildenbrand @ 2017-09-01 15:11 UTC (permalink / raw)
  To: kvm
  Cc: Christian Borntraeger, Paolo Bonzini, Radim Krčmář,
	david, Cornelia Huck

We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE
for all errors). So we can also get rid of special handling in the
callers of pin_guest_page() and always assume that it is a g2 error.

As also kvm_s390_inject_program_int() should never fail, we can
simplify pin_scb(), too.

Signed-off-by: David Hildenbrand <david@redhat.com>
---

Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git next

v1- > v2:
- As Christian noticed, gfn_to_page() will never return -ENOMEM. So we
  can simplify pin_guest_page() even more and also the callers.

 arch/s390/kvm/vsie.c     | 50 +++++++++++++++++-------------------------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      |  4 ++--
 3 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index fbe46dd0e55d..ad17882accf9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  *
  * Returns: - 0 on success
  *          - -EINVAL if the gpa is not valid guest storage
- *          - -ENOMEM if out of memory
  */
 static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 {
 	struct page *page;
-	hva_t hva;
-	int rc;
 
-	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
-	if (kvm_is_error_hva(hva))
+	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+	if (is_error_page(page))
 		return -EINVAL;
-	rc = get_user_pages_fast(hva, 1, 1, &page);
-	if (rc < 0)
-		return rc;
-	else if (rc != 1)
-		return -ENOMEM;
 	*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
 	return 0;
 }
@@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
 static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
 {
-	struct page *page;
-
-	page = virt_to_page(hpa);
-	set_page_dirty_lock(page);
-	put_page(page);
+	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
 	/* mark the page always as dirty for migration */
 	mark_page_dirty(kvm, gpa_to_gfn(gpa));
 }
@@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			rc = set_validity_icpt(scb_s, 0x003bU);
 		if (!rc) {
 			rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-			if (rc == -EINVAL)
+			if (rc)
 				rc = set_validity_icpt(scb_s, 0x0034U);
 		}
 		if (rc)
@@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		}
 		/* 256 bytes cannot cross page boundaries */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x0080U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->itdba = hpa;
 	}
 
@@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		 * if this block gets bigger, we have to shadow it.
 		 */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x1310U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->gvrd = hpa;
 	}
 
@@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		}
 		/* 64 bytes cannot cross page boundaries */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x0043U);
-		/* Validity 0x0044 will be checked by SIE */
-		if (rc)
 			goto unpin;
+		}
+		/* Validity 0x0044 will be checked by SIE */
 		scb_s->riccbd = hpa;
 	}
 	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
@@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		 * cross page boundaries
 		 */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x10b0U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->sdnxo = hpa | sdnxc;
 	}
 	return 0;
@@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  *
  * Returns: - 0 if the scb was pinned.
  *          - > 0 if control has to be given to guest 2
- *          - -ENOMEM if out of memory
  */
 static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		   gpa_t gpa)
@@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 	int rc;
 
 	rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-	if (rc == -EINVAL) {
+	if (rc) {
 		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		if (!rc)
-			rc = 1;
+		WARN_ON_ONCE(rc);
+		return 1;
 	}
-	if (!rc)
-		vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
-	return rc;
+	vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+	return 0;
 }
 
 /*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 648b34cabb38..e489fc7089be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -664,6 +664,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
 			       bool *writable);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
+void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 void kvm_get_pfn(kvm_pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 82987d457b8b..926bfdf68d8d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -122,7 +122,6 @@ static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
 
 __visible bool kvm_rebooting;
@@ -1723,11 +1722,12 @@ void kvm_release_page_dirty(struct page *page)
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
 
-static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
+void kvm_release_pfn_dirty(kvm_pfn_t pfn)
 {
 	kvm_set_pfn_dirty(pfn);
 	kvm_release_pfn_clean(pfn);
 }
+EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
 
 void kvm_set_pfn_dirty(kvm_pfn_t pfn)
 {
-- 
2.13.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-01 15:11 [PATCH v2] KVM: s390: vsie: use common code functions for pinning David Hildenbrand
@ 2017-09-04  9:16 ` Cornelia Huck
  2017-09-12 12:28 ` Christian Borntraeger
  2017-11-02 11:38 ` Christian Borntraeger
  2 siblings, 0 replies; 7+ messages in thread
From: Cornelia Huck @ 2017-09-04  9:16 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: kvm, Christian Borntraeger, Paolo Bonzini, Radim Krčmář

On Fri,  1 Sep 2017 17:11:43 +0200
David Hildenbrand <david@redhat.com> wrote:

> We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE
> for all errors). So we can also get rid of special handling in the
> callers of pin_guest_page() and always assume that it is a g2 error.
> 
> As also kvm_s390_inject_program_int() should never fail, we can
> simplify pin_scb(), too.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> 
> Based on:
> git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git next
> 
> v1- > v2:
> - As Christian noticed, gfn_to_page() will never return -ENOMEM. So we
>   can simplify pin_guest_page() even more and also the callers.
> 
>  arch/s390/kvm/vsie.c     | 50 +++++++++++++++++-------------------------------
>  include/linux/kvm_host.h |  1 +
>  virt/kvm/kvm_main.c      |  4 ++--
>  3 files changed, 21 insertions(+), 34 deletions(-)

Reviewed-by: Cornelia Huck <cohuck@redhat.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-01 15:11 [PATCH v2] KVM: s390: vsie: use common code functions for pinning David Hildenbrand
  2017-09-04  9:16 ` Cornelia Huck
@ 2017-09-12 12:28 ` Christian Borntraeger
  2017-09-12 12:52   ` David Hildenbrand
  2017-11-02 11:38 ` Christian Borntraeger
  2 siblings, 1 reply; 7+ messages in thread
From: Christian Borntraeger @ 2017-09-12 12:28 UTC (permalink / raw)
  To: David Hildenbrand, kvm
  Cc: Paolo Bonzini, Radim Krčmář, Cornelia Huck



On 09/01/2017 05:11 PM, David Hildenbrand wrote:

> @@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
>  /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
>  static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
>  {
> -	struct page *page;
> -
> -	page = virt_to_page(hpa);
> -	set_page_dirty_lock(page);
> -	put_page(page);
> +	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
>  	/* mark the page always as dirty for migration */
>  	mark_page_dirty(kvm, gpa_to_gfn(gpa));
>  }

This is probably ok but can you maybe outline why we no longer need the 
_lock variant of set_page_dirty? (Or asked differently: why did we use
the _locked varaint)

Other than that everything looks sane, I am just not sure about this part.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-12 12:28 ` Christian Borntraeger
@ 2017-09-12 12:52   ` David Hildenbrand
  2017-09-28 16:53     ` David Hildenbrand
  2017-09-29 14:51     ` Paolo Bonzini
  0 siblings, 2 replies; 7+ messages in thread
From: David Hildenbrand @ 2017-09-12 12:52 UTC (permalink / raw)
  To: Christian Borntraeger, kvm, Paolo Bonzini
  Cc: Radim Krčmář, Cornelia Huck

On 12.09.2017 14:28, Christian Borntraeger wrote:
> 
> 
> On 09/01/2017 05:11 PM, David Hildenbrand wrote:
> 
>> @@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
>>  /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
>>  static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
>>  {
>> -	struct page *page;
>> -
>> -	page = virt_to_page(hpa);
>> -	set_page_dirty_lock(page);
>> -	put_page(page);
>> +	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
>>  	/* mark the page always as dirty for migration */
>>  	mark_page_dirty(kvm, gpa_to_gfn(gpa));
>>  }
> 
> This is probably ok but can you maybe outline why we no longer need the 
> _lock variant of set_page_dirty? (Or asked differently: why did we use
> the _locked varaint)
> 
> Other than that everything looks sane, I am just not sure about this part.
> 

Short answer: As x86 uses this heavily, I was expecting it to do the
right thing. I can't sport any additional page locking in x86 code.

Long answer:

mm/page-writeback.c: It only seems to be relevant for !anonymous mappings.

"set_page_dirty() is racy [...] This is because another CPU could
truncate the page off the mapping and then free the mapping."

"For pages with a mapping this should be done under the page lock for
the benefit of asynchronous memory errors who prefer a consistent dirty
state. This rule can be broken in some special cases [...]"

Sounds to me like the worst thing that could happen is losing a dirty
flag. Which should not matter if somebody tries to do nasty stuff with
the mapping either way.


But to verify: Paolo, can you recall why we don't need the _locked
variants in kvm common code?

-- 

Thanks,

David

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-12 12:52   ` David Hildenbrand
@ 2017-09-28 16:53     ` David Hildenbrand
  2017-09-29 14:51     ` Paolo Bonzini
  1 sibling, 0 replies; 7+ messages in thread
From: David Hildenbrand @ 2017-09-28 16:53 UTC (permalink / raw)
  To: Christian Borntraeger, kvm, Paolo Bonzini
  Cc: Radim Krčmář, Cornelia Huck

Ping Paolo :)

>> This is probably ok but can you maybe outline why we no longer need the 
>> _lock variant of set_page_dirty? (Or asked differently: why did we use
>> the _locked varaint)
>>
>> Other than that everything looks sane, I am just not sure about this part.
>>
> 
> Short answer: As x86 uses this heavily, I was expecting it to do the
> right thing. I can't sport any additional page locking in x86 code.
> 
> Long answer:
> 
> mm/page-writeback.c: It only seems to be relevant for !anonymous mappings.
> 
> "set_page_dirty() is racy [...] This is because another CPU could
> truncate the page off the mapping and then free the mapping."
> 
> "For pages with a mapping this should be done under the page lock for
> the benefit of asynchronous memory errors who prefer a consistent dirty
> state. This rule can be broken in some special cases [...]"
> 
> Sounds to me like the worst thing that could happen is losing a dirty
> flag. Which should not matter if somebody tries to do nasty stuff with
> the mapping either way.
> 
> 
> But to verify: Paolo, can you recall why we don't need the _locked
> variants in kvm common code?
> 


-- 

Thanks,

David

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-12 12:52   ` David Hildenbrand
  2017-09-28 16:53     ` David Hildenbrand
@ 2017-09-29 14:51     ` Paolo Bonzini
  1 sibling, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2017-09-29 14:51 UTC (permalink / raw)
  To: David Hildenbrand, Christian Borntraeger, kvm
  Cc: Radim Krčmář, Cornelia Huck

On 12/09/2017 14:52, David Hildenbrand wrote:
> mm/page-writeback.c: It only seems to be relevant for !anonymous mappings.
> 
> "set_page_dirty() is racy [...] This is because another CPU could
> truncate the page off the mapping and then free the mapping."
> 
> "For pages with a mapping this should be done under the page lock for
> the benefit of asynchronous memory errors who prefer a consistent dirty
> state. This rule can be broken in some special cases [...]"
> 
> Sounds to me like the worst thing that could happen is losing a dirty
> flag. Which should not matter if somebody tries to do nasty stuff with
> the mapping either way.
> 
> 
> But to verify: Paolo, can you recall why we don't need the _locked
> variants in kvm common code?

That predates me by several years, but if you look at set_page_dirty it is

	if (page_mapping(page))
		return ...->set_page_dirty(page);
	else
		return TestSetPageDirty(page);

so KVM basically expects that !page_mapping(page) or that the page is
ram-backed.  That includes a hugetlbfs page, for which the callback is
*also* doing just SetPageDirty, or a few non-anonymous maps for which
the callback is __set_page_dirty_no_writeback; these in turn are ramfs
and tmpfs.

If you map files to KVM, the page cache (fs/buffer.c) is not going to
know that the pages are dirty, and likewise the file system is not going
to know that the inode is dirty.  So don't do that.

Paolo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: s390: vsie: use common code functions for pinning
  2017-09-01 15:11 [PATCH v2] KVM: s390: vsie: use common code functions for pinning David Hildenbrand
  2017-09-04  9:16 ` Cornelia Huck
  2017-09-12 12:28 ` Christian Borntraeger
@ 2017-11-02 11:38 ` Christian Borntraeger
  2 siblings, 0 replies; 7+ messages in thread
From: Christian Borntraeger @ 2017-11-02 11:38 UTC (permalink / raw)
  To: David Hildenbrand, kvm
  Cc: Paolo Bonzini, Radim Krčmář, Cornelia Huck

On 09/01/2017 05:11 PM, David Hildenbrand wrote:
> We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE
> for all errors). So we can also get rid of special handling in the
> callers of pin_guest_page() and always assume that it is a g2 error.
> 
> As also kvm_s390_inject_program_int() should never fail, we can
> simplify pin_scb(), too.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

Thanks, applied.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-11-02 11:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 15:11 [PATCH v2] KVM: s390: vsie: use common code functions for pinning David Hildenbrand
2017-09-04  9:16 ` Cornelia Huck
2017-09-12 12:28 ` Christian Borntraeger
2017-09-12 12:52   ` David Hildenbrand
2017-09-28 16:53     ` David Hildenbrand
2017-09-29 14:51     ` Paolo Bonzini
2017-11-02 11:38 ` Christian Borntraeger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.