All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/5] hvf: stability fixes for HVF
@ 2019-11-24 20:05 Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in Cameron Esfahani via
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

The following patches fix stability issues with running QEMU on Apple
Hypervisor Framework (HVF):
- non-RAM, non-ROMD areas need to trap so accesses can be correctly
  emulated.
- Current TSC synchronization implementation is insufficient: when
  running with more than 1 core, TSC values can go backwards.  Until
  a correct implementation can be written, remove calls to
  hv_vm_sync_tsc().  Pass through TSC to guest OS.
- Fix REX emulation in relation to legacy prefixes.
- More correctly match SDM when setting CR0 and PDPTE registers.
- Save away exception type as well as vector in hvf_store_events() so
  they can be correctly reinjected in hvf_inject_interrupts().  Under
  heavy loads, exceptions got misrouted.

Changes in v2:
- Fix code style errors.

Cameron Esfahani (5):
  hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
  hvf: remove TSC synchronization code because it isn't fully complete
  hvf: correctly handle REX prefix in relation to legacy prefixes
  hvf: more accurately match SDM when setting CR0 and PDPTE registers
  hvf: save away type as well as vector so we can reinject them

 target/i386/hvf/hvf.c        | 79 ++++++++++++++++++++++++++----------
 target/i386/hvf/vmx.h        | 18 ++++----
 target/i386/hvf/x86_decode.c | 64 ++++++++++++++++-------------
 target/i386/hvf/x86_decode.h | 20 ++++-----
 target/i386/hvf/x86_emu.c    |  3 --
 target/i386/hvf/x86hvf.c     | 26 +++++-------
 6 files changed, 124 insertions(+), 86 deletions(-)

-- 
2.24.0



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
@ 2019-11-24 20:05 ` Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 2/5] hvf: remove TSC synchronization code because it isn't fully complete Cameron Esfahani via
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

If an area is non-RAM and non-ROMD, then remove mappings so accesses
will trap and can be emulated.  Change hvf_find_overlap_slot() to take
a size instead of an end address: it wouldn't return a slot because
callers would pass the same address for start and end.  Don't always
map area as read/write/execute, respect area flags.

Signed-off-by: Cameron Esfahani <dirty@apple.com>
---
 target/i386/hvf/hvf.c | 50 ++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 231732aaf7..0b50cfcbc6 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -107,14 +107,14 @@ static void assert_hvf_ok(hv_return_t ret)
 }
 
 /* Memory slots */
-hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
+hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
 {
     hvf_slot *slot;
     int x;
     for (x = 0; x < hvf_state->num_slots; ++x) {
         slot = &hvf_state->slots[x];
         if (slot->size && start < (slot->start + slot->size) &&
-            end > slot->start) {
+            (start + size) > slot->start) {
             return slot;
         }
     }
@@ -129,12 +129,10 @@ struct mac_slot {
 };
 
 struct mac_slot mac_slots[32];
-#define ALIGN(x, y)  (((x) + (y) - 1) & ~((y) - 1))
 
-static int do_hvf_set_memory(hvf_slot *slot)
+static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
 {
     struct mac_slot *macslot;
-    hv_memory_flags_t flags;
     hv_return_t ret;
 
     macslot = &mac_slots[slot->slot_id];
@@ -151,8 +149,6 @@ static int do_hvf_set_memory(hvf_slot *slot)
         return 0;
     }
 
-    flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
-
     macslot->present = 1;
     macslot->gpa_start = slot->start;
     macslot->size = slot->size;
@@ -165,14 +161,24 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 {
     hvf_slot *mem;
     MemoryRegion *area = section->mr;
+    bool writeable = !area->readonly && !area->rom_device;
+    hv_memory_flags_t flags;
 
     if (!memory_region_is_ram(area)) {
-        return;
+        if (writeable) {
+            return;
+        } else if (!memory_region_is_romd(area)) {
+            /*
+             * If the memory device is not in romd_mode, then we actually want
+             * to remove the hvf memory slot so all accesses will trap.
+             */
+             add = false;
+        }
     }
 
     mem = hvf_find_overlap_slot(
             section->offset_within_address_space,
-            section->offset_within_address_space + int128_get64(section->size));
+            int128_get64(section->size));
 
     if (mem && add) {
         if (mem->size == int128_get64(section->size) &&
@@ -186,7 +192,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
     /* Region needs to be reset. set the size to 0 and remap it. */
     if (mem) {
         mem->size = 0;
-        if (do_hvf_set_memory(mem)) {
+        if (do_hvf_set_memory(mem, 0)) {
             error_report("Failed to reset overlapping slot");
             abort();
         }
@@ -196,6 +202,13 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
         return;
     }
 
+    if (area->readonly ||
+        (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
+        flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
+    } else {
+        flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
+    }
+
     /* Now make a new slot. */
     int x;
 
@@ -216,7 +229,7 @@ void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
     mem->start = section->offset_within_address_space;
     mem->region = area;
 
-    if (do_hvf_set_memory(mem)) {
+    if (do_hvf_set_memory(mem, flags)) {
         error_report("Error registering new memory slot");
         abort();
     }
@@ -345,7 +358,14 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
         return false;
     }
 
-    return !slot;
+    if (!slot) {
+        return true;
+    }
+    if (!memory_region_is_ram(slot->region) &&
+        !(read && memory_region_is_romd(slot->region))) {
+        return true;
+    }
+    return false;
 }
 
 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
@@ -354,7 +374,7 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 
     slot = hvf_find_overlap_slot(
             section->offset_within_address_space,
-            section->offset_within_address_space + int128_get64(section->size));
+            int128_get64(section->size));
 
     /* protect region against writes; begin tracking it */
     if (on) {
@@ -720,7 +740,7 @@ int hvf_vcpu_exec(CPUState *cpu)
             ret = EXCP_INTERRUPT;
             break;
         }
-            /* Need to check if MMIO or unmmaped fault */
+        /* Need to check if MMIO or unmapped fault */
         case EXIT_REASON_EPT_FAULT:
         {
             hvf_slot *slot;
@@ -731,7 +751,7 @@ int hvf_vcpu_exec(CPUState *cpu)
                 vmx_set_nmi_blocking(cpu);
             }
 
-            slot = hvf_find_overlap_slot(gpa, gpa);
+            slot = hvf_find_overlap_slot(gpa, 1);
             /* mmio */
             if (ept_emulation_fault(slot, gpa, exit_qual)) {
                 struct x86_decode decode;
-- 
2.24.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 2/5] hvf: remove TSC synchronization code because it isn't fully complete
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in Cameron Esfahani via
@ 2019-11-24 20:05 ` Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes Cameron Esfahani via
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

The existing code in QEMU's HVF support to attempt to synchronize TSC
across multiple cores is not sufficient.  TSC value on other cores
can go backwards.  Until implementation is fixed, remove calls to
hv_vm_sync_tsc().  Pass through TSC to guest OS.

Signed-off-by: Cameron Esfahani <dirty@apple.com>
---
 target/i386/hvf/hvf.c     | 3 +--
 target/i386/hvf/x86_emu.c | 3 ---
 target/i386/hvf/x86hvf.c  | 4 ----
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 0b50cfcbc6..90fd50acfc 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -518,7 +518,6 @@ void hvf_reset_vcpu(CPUState *cpu) {
         wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
     }
 
-    hv_vm_sync_tsc(0);
     hv_vcpu_invalidate_tlb(cpu->hvf_fd);
     hv_vcpu_flush(cpu->hvf_fd);
 }
@@ -612,7 +611,7 @@ int hvf_init_vcpu(CPUState *cpu)
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
-    /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
+    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 1b04bd7e94..3df767209d 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -772,9 +772,6 @@ void simulate_wrmsr(struct CPUState *cpu)
 
     switch (msr) {
     case MSR_IA32_TSC:
-        /* if (!osx_is_sierra())
-             wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp());
-        hv_vm_sync_tsc(data);*/
         break;
     case MSR_IA32_APICBASE:
         cpu_set_apic_base(X86_CPU(cpu)->apic_state, data);
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index e0ea02d631..1485b95776 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -152,10 +152,6 @@ void hvf_put_msrs(CPUState *cpu_state)
 
     hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base);
     hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base);
-
-    /* if (!osx_is_sierra())
-         wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/
-    hv_vm_sync_tsc(env->tsc);
 }
 
 
-- 
2.24.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 2/5] hvf: remove TSC synchronization code because it isn't fully complete Cameron Esfahani via
@ 2019-11-24 20:05 ` Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers Cameron Esfahani via
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

In real x86 processors, the REX prefix must come after legacy prefixes.
REX before legacy is ignored.  Update the HVF emulation code to properly
handle this.  Fix some spelling errors in constants.  Fix some decoder
table initialization issues found by Coverity.

Signed-off-by: Cameron Esfahani <dirty@apple.com>
---
 target/i386/hvf/x86_decode.c | 64 ++++++++++++++++++++----------------
 target/i386/hvf/x86_decode.h | 20 +++++------
 2 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 822fa1866e..77c346605f 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -122,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode,
 {
     op->type = X86_VAR_REG;
     op->reg = R_EAX;
-    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0,
+    /* Since reg is always AX, REX prefix has no impact. */
+    op->ptr = get_reg_ref(env, op->reg, false, 0,
                           decode->operand_size);
 }
 
@@ -1687,40 +1688,37 @@ calc_addr:
     }
 }
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
-                         int size)
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
+                         int is_extended, int size)
 {
     target_ulong ptr = 0;
-    int which = 0;
 
     if (is_extended) {
         reg |= R_R8;
     }
 
-
     switch (size) {
     case 1:
-        if (is_extended || reg < 4 || rex) {
-            which = 1;
+        if (is_extended || reg < 4 || rex_present) {
             ptr = (target_ulong)&RL(env, reg);
         } else {
-            which = 2;
             ptr = (target_ulong)&RH(env, reg - 4);
         }
         break;
     default:
-        which = 3;
         ptr = (target_ulong)&RRX(env, reg);
         break;
     }
     return ptr;
 }
 
-target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
-                         int size)
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
+                         int is_extended, int size)
 {
     target_ulong val = 0;
-    memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size);
+    memcpy(&val,
+           (void *)get_reg_ref(env, reg, rex_present, is_extended, size),
+           size);
     return val;
 }
 
@@ -1853,28 +1851,38 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
 static void decode_prefix(CPUX86State *env, struct x86_decode *decode)
 {
     while (1) {
+        /*
+         * REX prefix must come after legacy prefixes.
+         * REX before legacy is ignored.
+         * Clear rex to simulate this.
+         */
         uint8_t byte = decode_byte(env, decode);
         switch (byte) {
         case PREFIX_LOCK:
             decode->lock = byte;
+            decode->rex.rex = 0;
             break;
         case PREFIX_REPN:
         case PREFIX_REP:
             decode->rep = byte;
+            decode->rex.rex = 0;
             break;
-        case PREFIX_CS_SEG_OVEERIDE:
-        case PREFIX_SS_SEG_OVEERIDE:
-        case PREFIX_DS_SEG_OVEERIDE:
-        case PREFIX_ES_SEG_OVEERIDE:
-        case PREFIX_FS_SEG_OVEERIDE:
-        case PREFIX_GS_SEG_OVEERIDE:
+        case PREFIX_CS_SEG_OVERRIDE:
+        case PREFIX_SS_SEG_OVERRIDE:
+        case PREFIX_DS_SEG_OVERRIDE:
+        case PREFIX_ES_SEG_OVERRIDE:
+        case PREFIX_FS_SEG_OVERRIDE:
+        case PREFIX_GS_SEG_OVERRIDE:
             decode->segment_override = byte;
+            decode->rex.rex = 0;
             break;
         case PREFIX_OP_SIZE_OVERRIDE:
             decode->op_size_override = byte;
+            decode->rex.rex = 0;
             break;
         case PREFIX_ADDR_SIZE_OVERRIDE:
             decode->addr_size_override = byte;
+            decode->rex.rex = 0;
             break;
         case PREFIX_REX ... (PREFIX_REX + 0xf):
             if (x86_is_long_mode(env_cpu(env))) {
@@ -2111,14 +2119,14 @@ void init_decoder()
 {
     int i;
     
-    for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) {
-        memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst));
+    for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) {
+        memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst));
     }
     for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) {
-        memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst));
+        memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst));
     }
     for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) {
-        memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87));
+        memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87));
     
     }
     for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) {
@@ -2167,22 +2175,22 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,
                                target_ulong addr, X86Seg seg)
 {
     switch (decode->segment_override) {
-    case PREFIX_CS_SEG_OVEERIDE:
+    case PREFIX_CS_SEG_OVERRIDE:
         seg = R_CS;
         break;
-    case PREFIX_SS_SEG_OVEERIDE:
+    case PREFIX_SS_SEG_OVERRIDE:
         seg = R_SS;
         break;
-    case PREFIX_DS_SEG_OVEERIDE:
+    case PREFIX_DS_SEG_OVERRIDE:
         seg = R_DS;
         break;
-    case PREFIX_ES_SEG_OVEERIDE:
+    case PREFIX_ES_SEG_OVERRIDE:
         seg = R_ES;
         break;
-    case PREFIX_FS_SEG_OVEERIDE:
+    case PREFIX_FS_SEG_OVERRIDE:
         seg = R_FS;
         break;
-    case PREFIX_GS_SEG_OVEERIDE:
+    case PREFIX_GS_SEG_OVERRIDE:
         seg = R_GS;
         break;
     default:
diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h
index bc574a7a44..ef7960113f 100644
--- a/target/i386/hvf/x86_decode.h
+++ b/target/i386/hvf/x86_decode.h
@@ -27,12 +27,12 @@ typedef enum x86_prefix {
     PREFIX_REPN =                  0xf2,
     PREFIX_REP =                   0xf3,
     /* group 2 */
-    PREFIX_CS_SEG_OVEERIDE =       0x2e,
-    PREFIX_SS_SEG_OVEERIDE =       0x36,
-    PREFIX_DS_SEG_OVEERIDE =       0x3e,
-    PREFIX_ES_SEG_OVEERIDE =       0x26,
-    PREFIX_FS_SEG_OVEERIDE =       0x64,
-    PREFIX_GS_SEG_OVEERIDE =       0x65,
+    PREFIX_CS_SEG_OVERRIDE =       0x2e,
+    PREFIX_SS_SEG_OVERRIDE =       0x36,
+    PREFIX_DS_SEG_OVERRIDE =       0x3e,
+    PREFIX_ES_SEG_OVERRIDE =       0x26,
+    PREFIX_FS_SEG_OVERRIDE =       0x64,
+    PREFIX_GS_SEG_OVERRIDE =       0x65,
     /* group 3 */
     PREFIX_OP_SIZE_OVERRIDE =      0x66,
     /* group 4 */
@@ -303,10 +303,10 @@ uint64_t sign(uint64_t val, int size);
 
 uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
-                         int size);
-target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
-                         int size);
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present,
+                         int is_extended, int size);
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present,
+                         int is_extended, int size);
 void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
                         struct x86_decode_op *op);
 target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,
-- 
2.24.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
                   ` (2 preceding siblings ...)
  2019-11-24 20:05 ` [PATCH v2 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes Cameron Esfahani via
@ 2019-11-24 20:05 ` Cameron Esfahani via
  2019-11-24 20:05 ` [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them Cameron Esfahani via
  2019-11-25 10:28 ` [PATCH v2 0/5] hvf: stability fixes for HVF Paolo Bonzini
  5 siblings, 0 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

More accurately match SDM when setting CR0 and PDPTE registers.

Clear PDPTE registers when resetting vcpus.

Signed-off-by: Cameron Esfahani <dirty@apple.com>
---
 target/i386/hvf/hvf.c |  8 ++++++++
 target/i386/hvf/vmx.h | 18 ++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 90fd50acfc..784e67d77e 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -441,12 +441,20 @@ static MemoryListener hvf_memory_listener = {
 };
 
 void hvf_reset_vcpu(CPUState *cpu) {
+    uint64_t pdpte[4] = {0, 0, 0, 0};
+    int i;
 
     /* TODO: this shouldn't be needed; there is already a call to
      * cpu_synchronize_all_post_reset in vl.c
      */
     wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
     wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
+
+    /* Initialize PDPTE */
+    for (i = 0; i < 4; i++) {
+        wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
+    }
+
     macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 
     wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 5dc52ecad6..eb8894cd58 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -121,6 +121,7 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
     uint64_t pdpte[4] = {0, 0, 0, 0};
     uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER);
     uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0);
+    uint64_t mask = CR0_PG | CR0_CD | CR0_NW | CR0_NE | CR0_ET;
 
     if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) &&
         !(efer & MSR_EFER_LME)) {
@@ -128,18 +129,15 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
                          rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f,
                          MEMTXATTRS_UNSPECIFIED,
                          (uint8_t *)pdpte, 32, 0);
+        /* Only set PDPTE when appropriate. */
+        for (i = 0; i < 4; i++) {
+            wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
+        }
     }
 
-    for (i = 0; i < 4; i++) {
-        wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
-    }
-
-    wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG);
+    wvmcs(vcpu, VMCS_CR0_MASK, mask);
     wvmcs(vcpu, VMCS_CR0_SHADOW, cr0);
 
-    cr0 &= ~CR0_CD;
-    wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET);
-
     if (efer & MSR_EFER_LME) {
         if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) {
             enter_long_mode(vcpu, cr0, efer);
@@ -149,6 +147,10 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0)
         }
     }
 
+    /* Filter new CR0 after we are finished examining it above. */
+    cr0 = (cr0 & ~(mask & ~CR0_PG));
+    wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET);
+
     hv_vcpu_invalidate_tlb(vcpu);
     hv_vcpu_flush(vcpu);
 }
-- 
2.24.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
                   ` (3 preceding siblings ...)
  2019-11-24 20:05 ` [PATCH v2 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers Cameron Esfahani via
@ 2019-11-24 20:05 ` Cameron Esfahani via
  2019-11-25 10:26   ` Paolo Bonzini
  2019-11-25 10:28 ` [PATCH v2 0/5] hvf: stability fixes for HVF Paolo Bonzini
  5 siblings, 1 reply; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-24 20:05 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

Save away type as well as vector in hvf_store_events() so we can
correctly reinject both in hvf_inject_interrupts().

Make sure to clear ins_len and has_error_code when ins_len isn't
valid and error_code isn't set.

Signed-off-by: Cameron Esfahani <dirty@apple.com>
---
 target/i386/hvf/hvf.c    | 18 ++++++++++++++----
 target/i386/hvf/x86hvf.c | 22 ++++++++++------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 784e67d77e..8a8aee4495 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -641,14 +641,18 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
         case VMCS_IDT_VEC_HWINTR:
         case VMCS_IDT_VEC_SWINTR:
-            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
+            /* Save event type as well so we can inject the correct type. */
+            env->interrupt_injected =
+                idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM);
             break;
         case VMCS_IDT_VEC_NMI:
             env->nmi_injected = true;
             break;
         case VMCS_IDT_VEC_HWEXCEPTION:
         case VMCS_IDT_VEC_SWEXCEPTION:
-            env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
+            /* Save event type as well so we can inject the correct type. */
+            env->exception_nr =
+                idtvec_info & (VMCS_IDT_VEC_TYPE | VMCS_IDT_VEC_VECNUM);
             env->exception_injected = 1;
             break;
         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
@@ -658,10 +662,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
             env->ins_len = ins_len;
+        } else {
+            /* Clear ins_len when it isn't valid. */
+            env->ins_len = 0;
         }
-        if (idtvec_info & VMCS_INTR_DEL_ERRCODE) {
+        if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
             env->has_error_code = true;
             env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
+        } else {
+            /* Clear has_error_code when error_code isn't valid. */
+            env->has_error_code = false;
         }
     }
     if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
@@ -942,7 +952,7 @@ int hvf_vcpu_exec(CPUState *cpu)
             macvm_set_rip(cpu, rip + ins_len);
             break;
         case VMX_REASON_VMCALL:
-            env->exception_nr = EXCP0D_GPF;
+            env->exception_nr = VMCS_INTR_T_HWEXCEPTION | EXCP0D_GPF;
             env->exception_injected = 1;
             env->has_error_code = true;
             env->error_code = 0;
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 1485b95776..d25ae4585b 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -345,8 +345,6 @@ void vmx_clear_int_window_exiting(CPUState *cpu)
              ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
 }
 
-#define NMI_VEC 2
-
 bool hvf_inject_interrupts(CPUState *cpu_state)
 {
     X86CPU *x86cpu = X86_CPU(cpu_state);
@@ -356,17 +354,15 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
     uint64_t intr_type;
     bool have_event = true;
     if (env->interrupt_injected != -1) {
-        vector = env->interrupt_injected;
-        intr_type = VMCS_INTR_T_SWINTR;
+        /* Type and vector are both saved in interrupt_injected. */
+        vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM;
+        intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE;
     } else if (env->exception_nr != -1) {
-        vector = env->exception_nr;
-        if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
-            intr_type = VMCS_INTR_T_SWEXCEPTION;
-        } else {
-            intr_type = VMCS_INTR_T_HWEXCEPTION;
-        }
+        /* Type and vector are both saved in exception_nr. */
+        vector = env->exception_nr & VMCS_IDT_VEC_VECNUM;
+        intr_type = env->exception_nr & VMCS_IDT_VEC_TYPE;
     } else if (env->nmi_injected) {
-        vector = NMI_VEC;
+        vector = EXCP02_NMI;
         intr_type = VMCS_INTR_T_NMI;
     } else {
         have_event = false;
@@ -390,6 +386,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
             if (env->has_error_code) {
                 wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR,
                       env->error_code);
+                /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */
+                info |= VMCS_INTR_DEL_ERRCODE;
             }
             /*printf("reinject  %lx err %d\n", info, err);*/
             wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
@@ -399,7 +397,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
     if (cpu_state->interrupt_request & CPU_INTERRUPT_NMI) {
         if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) {
             cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI;
-            info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | NMI_VEC;
+            info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI;
             wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
         } else {
             vmx_set_nmi_window_exiting(cpu_state);
-- 
2.24.0



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-24 20:05 ` [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them Cameron Esfahani via
@ 2019-11-25 10:26   ` Paolo Bonzini
  2019-11-26 20:04     ` Cameron Esfahani via
  0 siblings, 1 reply; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-25 10:26 UTC (permalink / raw)
  To: Cameron Esfahani, qemu-devel

On 24/11/19 21:05, Cameron Esfahani wrote:
> Save away type as well as vector in hvf_store_events() so we can
> correctly reinject both in hvf_inject_interrupts().
> 
> Make sure to clear ins_len and has_error_code when ins_len isn't
> valid and error_code isn't set.

Do you have a testcase for this?  (I could guess it's about the INT1
instruction).

Paolo



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/5] hvf: stability fixes for HVF
  2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
                   ` (4 preceding siblings ...)
  2019-11-24 20:05 ` [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them Cameron Esfahani via
@ 2019-11-25 10:28 ` Paolo Bonzini
  2019-11-26 20:10   ` Cameron Esfahani via
  5 siblings, 1 reply; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-25 10:28 UTC (permalink / raw)
  To: Cameron Esfahani, qemu-devel, Peter Maydell

On 24/11/19 21:05, Cameron Esfahani wrote:
> The following patches fix stability issues with running QEMU on Apple
> Hypervisor Framework (HVF):
> - non-RAM, non-ROMD areas need to trap so accesses can be correctly
>   emulated.
> - Current TSC synchronization implementation is insufficient: when
>   running with more than 1 core, TSC values can go backwards.  Until
>   a correct implementation can be written, remove calls to
>   hv_vm_sync_tsc().  Pass through TSC to guest OS.
> - Fix REX emulation in relation to legacy prefixes.
> - More correctly match SDM when setting CR0 and PDPTE registers.
> - Save away exception type as well as vector in hvf_store_events() so
>   they can be correctly reinjected in hvf_inject_interrupts().  Under
>   heavy loads, exceptions got misrouted.

Certainly no doubt about patches 1-4, while for patch 5 I'm wondering if
it's masking another bug; I'd prefer to have also some assertions that
interrupt_injected is never an exception and exception_nr is never an
interrupt.

Peter, can you apply patches 1-4 directly?  I cannot even compile-test
them right now, but they are obviously bugfixes.

Paolo



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-25 10:26   ` Paolo Bonzini
@ 2019-11-26 20:04     ` Cameron Esfahani via
  2019-11-28  5:57       ` Cameron Esfahani via
  2019-11-28 13:56       ` Paolo Bonzini
  0 siblings, 2 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-26 20:04 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Our test case was booting many concurrent macOS VMs under heavy system load.  I don't know if I could create one to replicate that.

Cameron Esfahani
dirty@apple.com

"In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere."

"The Builders", H. W. Longfellow



> On Nov 25, 2019, at 2:26 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 24/11/19 21:05, Cameron Esfahani wrote:
>> Save away type as well as vector in hvf_store_events() so we can
>> correctly reinject both in hvf_inject_interrupts().
>> 
>> Make sure to clear ins_len and has_error_code when ins_len isn't
>> valid and error_code isn't set.
> 
> Do you have a testcase for this?  (I could guess it's about the INT1
> instruction).
> 
> Paolo
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/5] hvf: stability fixes for HVF
  2019-11-25 10:28 ` [PATCH v2 0/5] hvf: stability fixes for HVF Paolo Bonzini
@ 2019-11-26 20:10   ` Cameron Esfahani via
  0 siblings, 0 replies; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-26 20:10 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, Peter Maydell

Let me see if I can add some assertions.

Cameron Esfahani
dirty@apple.com

"The cake is a lie."

Common wisdom



> On Nov 25, 2019, at 2:28 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> Certainly no doubt about patches 1-4, while for patch 5 I'm wondering if
> it's masking another bug; I'd prefer to have also some assertions that
> interrupt_injected is never an exception and exception_nr is never an
> interrupt.
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-26 20:04     ` Cameron Esfahani via
@ 2019-11-28  5:57       ` Cameron Esfahani via
  2019-11-28 13:52         ` Paolo Bonzini
  2019-11-28 13:56       ` Paolo Bonzini
  1 sibling, 1 reply; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-28  5:57 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

I added some asserts to our internal version of QEMU.  It's a few months off of master and, specifically, doesn't have fd13f23b8c95311eff74426921557eee592b0ed3.

With the previous version of hvf_inject_interrupts(), before our fix, the code looked like the following:

>     if (env->interrupt_injected != -1) {
>         vector = env->interrupt_injected;
>         intr_type = VMCS_INTR_T_SWINTR;
>     } else if (env->exception_injected != -1) {


What we were seeing was, under heavy loads, running many concurrent macOS VMs, that we would get spurious interrupts.  Tracking it down, we discovered that VMCS_INTR_T_SWINTR was getting injected when VMCS_INTR_T_HWINTR was expected.

If I take our proposed patch code, which built on top of master from a few days ago, and has fd13f23b8c95311eff74426921557eee592b0ed3,  and add an assert, like the following:

>     if (env->interrupt_injected != -1) {
>         /* Type and vector are both saved in interrupt_injected. */
>         vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM;
>         intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE;
>         if (VMCS_INTR_T_SWINTR != intr_type) {
>             printf("VMCS_INTR_T_SWINTR (%x) != intr_type (%llx)\n", VMCS_INTR_T_SWINTR, intr_type);
>             assert(VMCS_INTR_T_SWINTR == intr_type);
>         }
>     } else if (env->exception_nr != -1) {


Then we will see the assert trigger and get the following output:

> VMCS_INTR_T_SWINTR (400) != intr_type (0)
> Assertion failed: (VMCS_INTR_T_SWINTR == intr_type), function hvf_inject_interrupts, file qemu_upstream/target/i386/hvf/x86hvf.c, line 362.


So, as far as I can see, the proposed changes are still necessary.

Cameron Esfahani
dirty@apple.com

"Americans are very skilled at creating a custom meaning from something that's mass-produced."

Ann Powers


> On Nov 26, 2019, at 12:04 PM, Cameron Esfahani via <qemu-devel@nongnu.org> wrote:
> 
> Our test case was booting many concurrent macOS VMs under heavy system load.  I don't know if I could create one to replicate that.
> 
> Cameron Esfahani
> dirty@apple.com
> 
> "In the elder days of Art, Builders wrought with greatest care each minute and unseen part; For the gods see everywhere."
> 
> "The Builders", H. W. Longfellow
> 
> 
> 
>> On Nov 25, 2019, at 2:26 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> 
>> On 24/11/19 21:05, Cameron Esfahani wrote:
>>> Save away type as well as vector in hvf_store_events() so we can
>>> correctly reinject both in hvf_inject_interrupts().
>>> 
>>> Make sure to clear ins_len and has_error_code when ins_len isn't
>>> valid and error_code isn't set.
>> 
>> Do you have a testcase for this?  (I could guess it's about the INT1
>> instruction).
>> 
>> Paolo
>> 
> 
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-28  5:57       ` Cameron Esfahani via
@ 2019-11-28 13:52         ` Paolo Bonzini
  0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-28 13:52 UTC (permalink / raw)
  To: Cameron Esfahani; +Cc: qemu-devel

On 28/11/19 06:57, Cameron Esfahani wrote:
> 
> What we were seeing was, under heavy loads, running many concurrent
macOS VMs, that we would get spurious interrupts.  Tracking it down, we
discovered that VMCS_INTR_T_SWINTR was getting injected when
VMCS_INTR_T_HWINTR was expected.
> 
> If I take our proposed patch code, which built on top of master from a
> few days ago, and has fd13f23b8c95311eff74426921557eee592b0ed3,  and add
> an assert, like the following:
> 
>>     if (env->interrupt_injected != -1) {
>>         /* Type and vector are both saved in interrupt_injected. */
>>         vector = env->interrupt_injected & VMCS_IDT_VEC_VECNUM;
>>         intr_type = env->interrupt_injected & VMCS_IDT_VEC_TYPE;
>>         if (VMCS_INTR_T_SWINTR != intr_type) {
>>             printf("VMCS_INTR_T_SWINTR (%x) != intr_type (%llx)\n", VMCS_INTR_T_SWINTR, intr_type);
>>             assert(VMCS_INTR_T_SWINTR == intr_type);
>>         }
>>     } else if (env->exception_nr != -1) {
> 
> Then we will see the assert trigger and get the following output:
> 
>> VMCS_INTR_T_SWINTR (400) != intr_type (0)
>> Assertion failed: (VMCS_INTR_T_SWINTR == intr_type), function hvf_inject_interrupts, file qemu_upstream/target/i386/hvf/x86hvf.c, line 362.

Great, thanks.  It's good to know that it's only software vs. hardware
interrupt.  I'll compare the KVM and QEMU source code to see why KVM
does not lose software vs. hardware interrupt, since the QEMU event
injection code was modeled against KVM's.

Paolo



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-26 20:04     ` Cameron Esfahani via
  2019-11-28  5:57       ` Cameron Esfahani via
@ 2019-11-28 13:56       ` Paolo Bonzini
  2019-11-28 13:59         ` Paolo Bonzini
  2019-11-30  8:31         ` Cameron Esfahani via
  1 sibling, 2 replies; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-28 13:56 UTC (permalink / raw)
  To: Cameron Esfahani; +Cc: qemu-devel

On 26/11/19 21:04, Cameron Esfahani wrote:
> Our test case was booting many concurrent macOS VMs under heavy
> system load.  I don't know if I could create one to replicate that.

Does this work?

diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 1485b95776..26c6c3a49f 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -357,7 +357,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
     bool have_event = true;
     if (env->interrupt_injected != -1) {
         vector = env->interrupt_injected;
-        intr_type = VMCS_INTR_T_SWINTR;
+        if (env->ins_len) {
+            intr_type = VMCS_INTR_T_SWINTR;
+        } else {
+            intr_type = VMCS_INTR_T_HWINTR;
+        }
     } else if (env->exception_nr != -1) {
         vector = env->exception_nr;
         if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {

Thanks,

Paolo



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-28 13:56       ` Paolo Bonzini
@ 2019-11-28 13:59         ` Paolo Bonzini
  2019-11-30  8:31         ` Cameron Esfahani via
  1 sibling, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-28 13:59 UTC (permalink / raw)
  To: Cameron Esfahani; +Cc: qemu-devel

On 28/11/19 14:56, Paolo Bonzini wrote:
> On 26/11/19 21:04, Cameron Esfahani wrote:
>> Our test case was booting many concurrent macOS VMs under heavy
>> system load.  I don't know if I could create one to replicate that.
> 
> Does this work?
> 
> diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
> index 1485b95776..26c6c3a49f 100644
> --- a/target/i386/hvf/x86hvf.c
> +++ b/target/i386/hvf/x86hvf.c
> @@ -357,7 +357,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
>      bool have_event = true;
>      if (env->interrupt_injected != -1) {
>          vector = env->interrupt_injected;
> -        intr_type = VMCS_INTR_T_SWINTR;
> +        if (env->ins_len) {
> +            intr_type = VMCS_INTR_T_SWINTR;
> +        } else {
> +            intr_type = VMCS_INTR_T_HWINTR;
> +        }
>      } else if (env->exception_nr != -1) {
>          vector = env->exception_nr;
>          if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {

Better include this too:

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 784e67d77e..5dc7515841 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -637,6 +637,7 @@ static void hvf_store_events(CPUState *cpu, uint32_t
ins_len, uint64_t idtvec_in
     env->exception_injected = 0;
     env->interrupt_injected = -1;
     env->nmi_injected = false;
+    env->ins_len = 0;
     if (idtvec_info & VMCS_IDT_VEC_VALID) {
         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
         case VMCS_IDT_VEC_HWINTR:

Paolo



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-28 13:56       ` Paolo Bonzini
  2019-11-28 13:59         ` Paolo Bonzini
@ 2019-11-30  8:31         ` Cameron Esfahani via
  2019-11-30  8:46           ` Paolo Bonzini
  1 sibling, 1 reply; 16+ messages in thread
From: Cameron Esfahani via @ 2019-11-30  8:31 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

So far so good.  Without any workaround, I could get it to fail within a few seconds.  With your change, I've been running for a few minutes without a problem.  But, this is on my laptop, so I'll wait until I can test it on a wider-range of machines at work next week.  If it continues to work, I'll update my patch to include this fix.

Now, can you help me understand why this approach is better than what I had written?  When we're in hvf_store_events(), we have vector type and number.  All the information we need to reinject later.  So why not save vector type away, instead of attempting to reconstruct it from other information (env->ins_len) in hvf_inject_interrupts()?

Cameron Esfahani
dirty@apple.com

"There are times in the life of a nation when the only place a decent man can find himself is in prison."



> On Nov 28, 2019, at 5:56 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 26/11/19 21:04, Cameron Esfahani wrote:
>> Our test case was booting many concurrent macOS VMs under heavy
>> system load.  I don't know if I could create one to replicate that.
> 
> Does this work?
> 
> diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
> index 1485b95776..26c6c3a49f 100644
> --- a/target/i386/hvf/x86hvf.c
> +++ b/target/i386/hvf/x86hvf.c
> @@ -357,7 +357,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
>     bool have_event = true;
>     if (env->interrupt_injected != -1) {
>         vector = env->interrupt_injected;
> -        intr_type = VMCS_INTR_T_SWINTR;
> +        if (env->ins_len) {
> +            intr_type = VMCS_INTR_T_SWINTR;
> +        } else {
> +            intr_type = VMCS_INTR_T_HWINTR;
> +        }
>     } else if (env->exception_nr != -1) {
>         vector = env->exception_nr;
>         if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
> 
> Thanks,
> 
> Paolo
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them
  2019-11-30  8:31         ` Cameron Esfahani via
@ 2019-11-30  8:46           ` Paolo Bonzini
  0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2019-11-30  8:46 UTC (permalink / raw)
  To: Cameron Esfahani; +Cc: qemu-devel

On 30/11/19 09:31, Cameron Esfahani wrote:
> So far so good.  Without any workaround, I could get it to fail
> within a few seconds.  With your change, I've been running for a few
> minutes without a problem.  But, this is on my laptop, so I'll wait
> until I can test it on a wider-range of machines at work next week.
> If it continues to work, I'll update my patch to include this fix.

Great!  Note that there was a follow-up hunk in another email.

> Now, can you help me understand why this approach is better than what
> I had written?  When we're in hvf_store_events(), we have vector type
> and number.  All the information we need to reinject later.  So why
> not save vector type away, instead of attempting to reconstruct it
> from other information (env->ins_len) in hvf_inject_interrupts()?

No huge reason, I agree.  However, the event injection code in the
Android emulator was broken for pretty much every corner case, so when
we rewrote it in QEMU for Summer of Code we took KVM as a model (commit
b7394c8394, "i386: hvf: refactor event injection code for hvf",
2017-12-22).  Keeping the code similar can help with debugging, so I
prefer having the same meaning for env->interrupt_injected and
env->exception_nr across HVF and KVM.

I didn't write the code for KVM, so I cannot really say why they chose
to not save the event type.

Thanks,

Paolo

> 
> Cameron Esfahani
> dirty@apple.com
> 
> "There are times in the life of a nation when the only place a decent man can find himself is in prison."
> 
> 
> 
>> On Nov 28, 2019, at 5:56 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 26/11/19 21:04, Cameron Esfahani wrote:
>>> Our test case was booting many concurrent macOS VMs under heavy
>>> system load.  I don't know if I could create one to replicate that.
>>
>> Does this work?
>>
>> diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
>> index 1485b95776..26c6c3a49f 100644
>> --- a/target/i386/hvf/x86hvf.c
>> +++ b/target/i386/hvf/x86hvf.c
>> @@ -357,7 +357,11 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
>>     bool have_event = true;
>>     if (env->interrupt_injected != -1) {
>>         vector = env->interrupt_injected;
>> -        intr_type = VMCS_INTR_T_SWINTR;
>> +        if (env->ins_len) {
>> +            intr_type = VMCS_INTR_T_SWINTR;
>> +        } else {
>> +            intr_type = VMCS_INTR_T_HWINTR;
>> +        }
>>     } else if (env->exception_nr != -1) {
>>         vector = env->exception_nr;
>>         if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
>>
>> Thanks,
>>
>> Paolo
>>
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-11-30  8:58 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-24 20:05 [PATCH v2 0/5] hvf: stability fixes for HVF Cameron Esfahani via
2019-11-24 20:05 ` [PATCH v2 1/5] hvf: non-RAM, non-ROMD memory ranges are now correctly mapped in Cameron Esfahani via
2019-11-24 20:05 ` [PATCH v2 2/5] hvf: remove TSC synchronization code because it isn't fully complete Cameron Esfahani via
2019-11-24 20:05 ` [PATCH v2 3/5] hvf: correctly handle REX prefix in relation to legacy prefixes Cameron Esfahani via
2019-11-24 20:05 ` [PATCH v2 4/5] hvf: more accurately match SDM when setting CR0 and PDPTE registers Cameron Esfahani via
2019-11-24 20:05 ` [PATCH v2 5/5] hvf: save away type as well as vector so we can reinject them Cameron Esfahani via
2019-11-25 10:26   ` Paolo Bonzini
2019-11-26 20:04     ` Cameron Esfahani via
2019-11-28  5:57       ` Cameron Esfahani via
2019-11-28 13:52         ` Paolo Bonzini
2019-11-28 13:56       ` Paolo Bonzini
2019-11-28 13:59         ` Paolo Bonzini
2019-11-30  8:31         ` Cameron Esfahani via
2019-11-30  8:46           ` Paolo Bonzini
2019-11-25 10:28 ` [PATCH v2 0/5] hvf: stability fixes for HVF Paolo Bonzini
2019-11-26 20:10   ` Cameron Esfahani via

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.