All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: xiaoyao.li@intel.com, michael.roth@amd.com, david@redhat.com
Subject: [PATCH 25/26] kvm: handle KVM_EXIT_MEMORY_FAULT
Date: Fri, 22 Mar 2024 19:11:15 +0100	[thread overview]
Message-ID: <20240322181116.1228416-26-pbonzini@redhat.com> (raw)
In-Reply-To: <20240322181116.1228416-1-pbonzini@redhat.com>

From: Chao Peng <chao.p.peng@linux.intel.com>

When geeting KVM_EXIT_MEMORY_FAULT exit, it indicates userspace needs to
do the memory conversion on the RAMBlock to turn the memory into desired
attribute, i.e., private/shared.

Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when
KVM_EXIT_MEMORY_FAULT happens.

Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has
guest_memfd memory backend.

Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is
added.

When page is converted from shared to private, the original shared
memory can be discarded via ram_block_discard_range(). Note, shared
memory can be discarded only when it's not back'ed by hugetlb because
hugetlb is supposed to be pre-allocated and no need for discarding.

Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>

Message-ID: <20240320083945.991426-13-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/sysemu/kvm.h   |  2 +
 accel/kvm/kvm-all.c    | 99 +++++++++++++++++++++++++++++++++++++-----
 accel/kvm/trace-events |  2 +
 3 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 2cb31925091..698f1640fe2 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -541,4 +541,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
 
 int kvm_set_memory_attributes_private(hwaddr start, hwaddr size);
 int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size);
+
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
 #endif
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 56b17cbd8aa..afd7f992e39 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2893,6 +2893,70 @@ static void kvm_eat_signals(CPUState *cpu)
     } while (sigismember(&chkset, SIG_IPI));
 }
 
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
+{
+    MemoryRegionSection section;
+    ram_addr_t offset;
+    MemoryRegion *mr;
+    RAMBlock *rb;
+    void *addr;
+    int ret = -1;
+
+    trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
+
+    if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) ||
+        !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) {
+        return -1;
+    }
+
+    if (!size) {
+        return -1;
+    }
+
+    section = memory_region_find(get_system_memory(), start, size);
+    mr = section.mr;
+    if (!mr) {
+        return -1;
+    }
+
+    if (!memory_region_has_guest_memfd(mr)) {
+        error_report("Converting non guest_memfd backed memory region "
+                     "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+                     start, size, to_private ? "private" : "shared");
+        ret = -1;
+        goto out_unref;
+    }
+
+    if (to_private) {
+        ret = kvm_set_memory_attributes_private(start, size);
+    } else {
+        ret = kvm_set_memory_attributes_shared(start, size);
+    }
+    if (ret) {
+        goto out_unref;
+    }
+
+    addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
+    rb = qemu_ram_block_from_host(addr, false, &offset);
+
+    if (to_private) {
+        if (rb->page_size == qemu_real_host_page_size()) {
+            /*
+            * shared memory is back'ed by  hugetlb, which is supposed to be
+            * pre-allocated and doesn't need to be discarded
+            */
+            goto out_unref;
+        }
+        ret = ram_block_discard_range(rb, offset, size);
+    } else {
+        ret = ram_block_discard_guest_memfd_range(rb, offset, size);
+    }
+
+out_unref:
+    memory_region_unref(section.mr);
+    return ret;
+}
+
 int kvm_cpu_exec(CPUState *cpu)
 {
     struct kvm_run *run = cpu->kvm_run;
@@ -2960,18 +3024,20 @@ int kvm_cpu_exec(CPUState *cpu)
                 ret = EXCP_INTERRUPT;
                 break;
             }
-            fprintf(stderr, "error: kvm run failed %s\n",
-                    strerror(-run_ret));
+            if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
+                fprintf(stderr, "error: kvm run failed %s\n",
+                        strerror(-run_ret));
 #ifdef TARGET_PPC
-            if (run_ret == -EBUSY) {
-                fprintf(stderr,
-                        "This is probably because your SMT is enabled.\n"
-                        "VCPU can only run on primary threads with all "
-                        "secondary threads offline.\n");
-            }
+                if (run_ret == -EBUSY) {
+                    fprintf(stderr,
+                            "This is probably because your SMT is enabled.\n"
+                            "VCPU can only run on primary threads with all "
+                            "secondary threads offline.\n");
+                }
 #endif
-            ret = -1;
-            break;
+                ret = -1;
+                break;
+            }
         }
 
         trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
@@ -3054,6 +3120,19 @@ int kvm_cpu_exec(CPUState *cpu)
                 break;
             }
             break;
+        case KVM_EXIT_MEMORY_FAULT:
+            trace_kvm_memory_fault(run->memory_fault.gpa,
+                                   run->memory_fault.size,
+                                   run->memory_fault.flags);
+            if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
+                error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
+                             (uint64_t)run->memory_fault.flags);
+                ret = -1;
+                break;
+            }
+            ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
+                                     run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
+            break;
         default:
             ret = kvm_arch_handle_exit(cpu, run);
             break;
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index e8c52cb9e7a..681ccb667d6 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -31,3 +31,5 @@ kvm_cpu_exec(void) ""
 kvm_interrupt_exit_request(void) ""
 kvm_io_window_exit(void) ""
 kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
+kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
+kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64
-- 
2.44.0



  parent reply	other threads:[~2024-03-22 18:15 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-22 18:10 [PATCH for-9.1 00/26] x86, kvm: common confidential computing subset Paolo Bonzini
2024-03-22 18:10 ` [PATCH 01/26] pci-host/q35: Move PAM initialization above SMRAM initialization Paolo Bonzini
2024-03-22 18:10 ` [PATCH 02/26] q35: Introduce smm_ranges property for q35-pci-host Paolo Bonzini
2024-03-22 18:10 ` [PATCH 03/26] confidential guest support: Add kvm_init() and kvm_reset() in class Paolo Bonzini
2024-03-25  8:32   ` Philippe Mathieu-Daudé
2024-03-25 10:53     ` Paolo Bonzini
2024-03-22 18:10 ` [PATCH 04/26] i386/sev: Switch to use confidential_guest_kvm_init() Paolo Bonzini
2024-03-22 18:10 ` [PATCH 05/26] ppc/pef: switch to use confidential_guest_kvm_init/reset() Paolo Bonzini
2024-03-22 18:10 ` [PATCH 06/26] s390: Switch to use confidential_guest_kvm_init() Paolo Bonzini
2024-03-22 18:10 ` [PATCH 07/26] scripts/update-linux-headers: Add setup_data.h to import list Paolo Bonzini
2024-03-22 18:10 ` [PATCH 08/26] scripts/update-linux-headers: Add bits.h to file imports Paolo Bonzini
2024-03-22 18:10 ` [PATCH 09/26] [HACK] linux-headers: Update headers for 6.8 + kvm-coco-queue + SNP Paolo Bonzini
2024-03-22 18:11 ` [PATCH 10/26] [TO SQUASH] hw/i386: Remove redeclaration of struct setup_data Paolo Bonzini
2024-03-22 18:11 ` [PATCH 11/26] runstate: skip initial CPU reset if reset is not actually possible Paolo Bonzini
2024-03-25  8:58   ` Daniel P. Berrangé
2024-03-25  9:32   ` Philippe Mathieu-Daudé
2024-03-22 18:11 ` [PATCH 12/26] KVM: track whether guest state is encrypted Paolo Bonzini
2024-03-25  9:25   ` Philippe Mathieu-Daudé
2024-03-26 15:48   ` Xiaoyao Li
2024-03-27  9:05     ` Paolo Bonzini
2024-03-22 18:11 ` [PATCH 13/26] KVM: remove kvm_arch_cpu_check_are_resettable Paolo Bonzini
2024-03-25  9:26   ` Philippe Mathieu-Daudé
2024-03-22 18:11 ` [PATCH 14/26] target/i386: introduce x86-confidential-guest Paolo Bonzini
2024-03-22 18:11 ` [PATCH 15/26] target/i386: Implement mc->kvm_type() to get VM type Paolo Bonzini
2024-03-25  9:29   ` Philippe Mathieu-Daudé
2024-03-25 10:57     ` Paolo Bonzini
2024-03-22 18:11 ` [PATCH 16/26] target/i386: SEV: use KVM_SEV_INIT2 if possible Paolo Bonzini
2024-03-22 18:11 ` [PATCH 17/26] trace/kvm: Split address space and slot id in trace_kvm_set_user_memory() Paolo Bonzini
2024-03-22 18:11 ` [PATCH 18/26] kvm: Introduce support for memory_attributes Paolo Bonzini
2024-03-25  9:31   ` Philippe Mathieu-Daudé
2024-03-22 18:11 ` [PATCH 19/26] RAMBlock: Add support of KVM private guest memfd Paolo Bonzini
2024-03-22 18:53   ` Michael Roth
2024-03-22 18:11 ` [PATCH 20/26] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot Paolo Bonzini
2024-03-22 18:11 ` [PATCH 21/26] kvm/memory: Make memory type private by default if it has guest memfd backend Paolo Bonzini
2024-03-26  8:56   ` Xiaoyao Li
2024-03-22 18:11 ` [PATCH 22/26] HostMem: Add mechanism to opt in kvm guest memfd via MachineState Paolo Bonzini
2024-03-22 18:11 ` [PATCH 23/26] RAMBlock: make guest_memfd require uncoordinated discard Paolo Bonzini
2024-03-22 18:11 ` [PATCH 24/26] physmem: Introduce ram_block_discard_guest_memfd_range() Paolo Bonzini
2024-03-22 18:11 ` Paolo Bonzini [this message]
2024-03-26  8:53   ` [PATCH 25/26] kvm: handle KVM_EXIT_MEMORY_FAULT Xiaoyao Li
2024-03-22 18:11 ` [PATCH 26/26] i386/kvm: Move architectural CPUID leaf generation to separate helper Paolo Bonzini
2024-04-01 15:03   ` Xiaoyao Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240322181116.1228416-26-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=david@redhat.com \
    --cc=michael.roth@amd.com \
    --cc=qemu-devel@nongnu.org \
    --cc=xiaoyao.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.