From: Cornelia Huck <cohuck@redhat.com>
To: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: david@redhat.com, Ulrich.Weigand@de.ibm.com, aarcange@redhat.com,
akpm@linux-foundation.org, frankja@linux.vnet.ibm.com,
gor@linux.ibm.com, imbrenda@linux.ibm.com, kvm@vger.kernel.org,
linux-mm@kvack.org, linux-s390@vger.kernel.org,
mimu@linux.ibm.com, thuth@redhat.com
Subject: Re: [PATCH v2 RFC] KVM: s390/interrupt: do not pin adapter interrupt pages
Date: Wed, 12 Feb 2020 13:39:08 +0100 [thread overview]
Message-ID: <20200212133908.6c6c9072.cohuck@redhat.com> (raw)
In-Reply-To: <20200211092341.3965-1-borntraeger@de.ibm.com>
On Tue, 11 Feb 2020 04:23:41 -0500
Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> From: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
>
> The adapter interrupt page containing the indicator bits is currently
> pinned. That means that a guest with many devices can pin a lot of
> memory pages in the host. This also complicates the reference tracking
> which is needed for memory management handling of protected virtual
> machines.
> We can simply try to get the userspace page set the bits and free the
> page. By storing the userspace address in the irq routing entry instead
> of the guest address we can actually avoid many lookups and list walks
> so that this variant is very likely not slower.
>
> Signed-off-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
> [borntraeger@de.ibm.com: patch simplification]
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
> quick and dirty, how this could look like
>
>
> arch/s390/include/asm/kvm_host.h | 3 -
> arch/s390/kvm/interrupt.c | 146 +++++++++++--------------------
> 2 files changed, 49 insertions(+), 100 deletions(-)
>
(...)
> @@ -2488,83 +2485,26 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
>
> static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
> {
> - struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
> - struct s390_map_info *map;
> - int ret;
> -
> - if (!adapter || !addr)
> - return -EINVAL;
> -
> - map = kzalloc(sizeof(*map), GFP_KERNEL);
> - if (!map) {
> - ret = -ENOMEM;
> - goto out;
> - }
> - INIT_LIST_HEAD(&map->list);
> - map->guest_addr = addr;
> - map->addr = gmap_translate(kvm->arch.gmap, addr);
> - if (map->addr == -EFAULT) {
> - ret = -EFAULT;
> - goto out;
> - }
> - ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
> - if (ret < 0)
> - goto out;
> - BUG_ON(ret != 1);
> - down_write(&adapter->maps_lock);
> - if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
> - list_add_tail(&map->list, &adapter->maps);
> - ret = 0;
> - } else {
> - put_page(map->page);
> - ret = -EINVAL;
> + /*
> + * We resolve the gpa to hva when setting the IRQ routing. If userspace
> + * decides to mess with the memslots it better also updates the irq
> + * routing. Otherwise we will write to the wrong userspace address.
> + */
> + return 0;
Given that this function now always returns 0, we basically get a
completely useless roundtrip into the kernel when userspace is trying
to setup the mappings.
Can we define a new IO_ADAPTER_MAPPING_NOT_NEEDED or so capability that
userspace can check?
This change in behaviour probably wants a change in the documentation
as well.
> }
> - up_write(&adapter->maps_lock);
> -out:
> - if (ret)
> - kfree(map);
> - return ret;
> -}
>
> static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
> {
> - struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
> - struct s390_map_info *map, *tmp;
> - int found = 0;
> -
> - if (!adapter || !addr)
> - return -EINVAL;
> -
> - down_write(&adapter->maps_lock);
> - list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
> - if (map->guest_addr == addr) {
> - found = 1;
> - atomic_dec(&adapter->nr_maps);
> - list_del(&map->list);
> - put_page(map->page);
> - kfree(map);
> - break;
> - }
> - }
> - up_write(&adapter->maps_lock);
> -
> - return found ? 0 : -EINVAL;
> + return 0;
Same here.
> }
>
> void kvm_s390_destroy_adapters(struct kvm *kvm)
> {
> int i;
> - struct s390_map_info *map, *tmp;
>
> for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
> if (!kvm->arch.adapters[i])
> continue;
> - list_for_each_entry_safe(map, tmp,
> - &kvm->arch.adapters[i]->maps, list) {
> - list_del(&map->list);
> - put_page(map->page);
> - kfree(map);
> - }
> kfree(kvm->arch.adapters[i]);
Call kfree() unconditionally?
> }
> }
> @@ -2831,19 +2771,25 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
> return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
> }
>
> -static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
> - u64 addr)
> +static struct page *get_map_page(struct kvm *kvm,
> + struct s390_io_adapter *adapter,
> + u64 uaddr)
> {
> - struct s390_map_info *map;
> + struct page *page;
> + int ret;
>
> if (!adapter)
> return NULL;
> -
> - list_for_each_entry(map, &adapter->maps, list) {
> - if (map->guest_addr == addr)
> - return map;
> - }
> - return NULL;
> + page = NULL;
> + if (!uaddr)
> + return NULL;
> + down_read(&kvm->mm->mmap_sem);
> + ret = get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
> + &page, NULL, NULL);
> + if (ret < 1)
> + page = NULL;
> + up_read(&kvm->mm->mmap_sem);
> + return page;
> }
>
> static int adapter_indicators_set(struct kvm *kvm,
(...)
> @@ -2951,12 +2900,15 @@ int kvm_set_routing_entry(struct kvm *kvm,
> const struct kvm_irq_routing_entry *ue)
> {
> int ret;
> + u64 uaddr;
>
> switch (ue->type) {
> case KVM_IRQ_ROUTING_S390_ADAPTER:
> e->set = set_adapter_int;
> - e->adapter.summary_addr = ue->u.adapter.summary_addr;
> - e->adapter.ind_addr = ue->u.adapter.ind_addr;
> + uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
Can gmap_translate() return -EFAULT here? The code above only seems to
check for 0... do we want to return an error here?
> + e->adapter.summary_addr = uaddr;
> + uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
> + e->adapter.ind_addr = uaddr;
> e->adapter.summary_offset = ue->u.adapter.summary_offset;
> e->adapter.ind_offset = ue->u.adapter.ind_offset;
> e->adapter.adapter_id = ue->u.adapter.adapter_id;
next prev parent reply other threads:[~2020-02-12 12:39 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-07 11:39 [PATCH 00/35] KVM: s390: Add support for protected VMs Christian Borntraeger
2020-02-07 11:39 ` [PATCH 01/35] mm:gup/writeback: add callbacks for inaccessible pages Christian Borntraeger
2020-02-10 17:27 ` Christian Borntraeger
2020-02-11 11:26 ` Will Deacon
2020-02-11 11:43 ` Christian Borntraeger
2020-02-13 14:48 ` Christian Borntraeger
2020-02-18 16:02 ` Will Deacon
2020-02-13 19:56 ` Sean Christopherson
2020-02-13 20:13 ` Christian Borntraeger
2020-02-13 20:46 ` Sean Christopherson
2020-02-17 20:55 ` Tom Lendacky
2020-02-17 21:14 ` Christian Borntraeger
2020-02-10 18:17 ` David Hildenbrand
2020-02-10 18:28 ` Christian Borntraeger
2020-02-10 18:43 ` David Hildenbrand
2020-02-10 18:51 ` Christian Borntraeger
2020-02-18 3:36 ` Tian, Kevin
2020-02-18 6:44 ` Christian Borntraeger
2020-02-07 11:39 ` [PATCH 02/35] KVM: s390/interrupt: do not pin adapter interrupt pages Christian Borntraeger
2020-02-10 12:26 ` David Hildenbrand
2020-02-10 18:38 ` Christian Borntraeger
2020-02-10 19:33 ` David Hildenbrand
2020-02-11 9:23 ` [PATCH v2 RFC] " Christian Borntraeger
2020-02-12 11:52 ` Christian Borntraeger
2020-02-12 12:16 ` David Hildenbrand
2020-02-12 12:22 ` Christian Borntraeger
2020-02-12 12:47 ` David Hildenbrand
2020-02-12 12:39 ` Cornelia Huck [this message]
2020-02-12 12:44 ` Christian Borntraeger
2020-02-12 13:07 ` Cornelia Huck
2020-02-10 18:56 ` [PATCH 02/35] KVM: s390/interrupt: do not pin adapter interrupt Ulrich Weigand
2020-02-10 12:40 ` [PATCH 02/35] KVM: s390/interrupt: do not pin adapter interrupt pages David Hildenbrand
2020-02-07 11:39 ` [PATCH 05/35] s390/mm: provide memory management functions for protected KVM guests Christian Borntraeger
2020-02-12 13:42 ` Cornelia Huck
2020-02-13 7:43 ` Christian Borntraeger
2020-02-13 8:44 ` Cornelia Huck
2020-02-14 17:59 ` David Hildenbrand
2020-02-14 21:17 ` Christian Borntraeger
2020-02-07 11:39 ` [PATCH 06/35] s390/mm: add (non)secure page access exceptions handlers Christian Borntraeger
2020-02-14 18:05 ` David Hildenbrand
2020-02-14 19:59 ` Christian Borntraeger
2020-02-07 11:39 ` [PATCH 10/35] KVM: s390: protvirt: Secure memory is not mergeable Christian Borntraeger
2020-02-07 11:39 ` [PATCH 11/35] KVM: s390/mm: Make pages accessible before destroying the guest Christian Borntraeger
2020-02-14 18:40 ` David Hildenbrand
2020-02-07 11:39 ` [PATCH 21/35] KVM: s390/mm: handle guest unpin events Christian Borntraeger
2020-02-10 14:58 ` Thomas Huth
2020-02-11 13:21 ` Cornelia Huck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200212133908.6c6c9072.cohuck@redhat.com \
--to=cohuck@redhat.com \
--cc=Ulrich.Weigand@de.ibm.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=borntraeger@de.ibm.com \
--cc=david@redhat.com \
--cc=frankja@linux.vnet.ibm.com \
--cc=gor@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-s390@vger.kernel.org \
--cc=mimu@linux.ibm.com \
--cc=thuth@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).