All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
@ 2015-12-15 21:02 Boris Ostrovsky
  2015-12-16  9:04 ` Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-12-15 21:02 UTC (permalink / raw)
  To: jbeulich, andrew.cooper3; +Cc: Boris Ostrovsky, xen-devel

Commit 9256f66c1606 ("x86/PCI: intercept all PV Dom0 MMCFG writes")
added intercepts for writes to RO MMCFG space from PV dom0.

Similar functionality is needed by dom0s in HVM containers (such as
PVH and, in the future, HVMlite).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 xen/arch/x86/hvm/emulate.c             |   22 +++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c                 |   15 ++++++++++++++
 xen/arch/x86/mm.c                      |   33 +++++++++++++------------------
 xen/arch/x86/x86_emulate/x86_emulate.h |    3 ++
 xen/include/asm-x86/hvm/emulate.h      |    4 +++
 xen/include/asm-x86/mm.h               |   17 ++++++++++++++++
 6 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index e1017b5..eed9324 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1778,6 +1778,28 @@ int hvm_emulate_one_no_write(
     return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
 }
 
+static const struct x86_emulate_ops hvm_emulate_ops_mmio = {
+    .read       = mmio_ro_emulated_read,
+    .insn_fetch = hvmemul_insn_fetch,
+    .write      = mmio_intercept_write,
+};
+int hvm_emulate_one_mmio(unsigned seg, unsigned bdf, unsigned long gla)
+{
+    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
+        .cr2 = gla,
+        .seg = seg,
+        .bdf = bdf
+    };
+    struct hvm_emulate_ctxt ctxt = { .ctxt.data = &mmio_ro_ctxt };
+    int rc;
+
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+    rc = _hvm_emulate_one(&ctxt, &hvm_emulate_ops_mmio);
+    hvm_emulate_writeback(&ctxt);
+
+    return rc;
+}
+
 void hvm_mem_access_emulate_one(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 92d57ff..69ef6ed 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
         goto out_put_gfn;
     }
 
+    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )
+    {
+        unsigned int seg, bdf;
+        const unsigned long *ro_map;
+
+        if ( pci_mmcfg_decode(mfn_x(mfn), &seg, &bdf) &&
+             ((ro_map = pci_get_ro_map(seg)) == NULL ||
+              !test_bit(bdf, ro_map)) )
+            if (hvm_emulate_one_mmio(seg, bdf, gla) == X86EMUL_OKAY)
+            {
+                rc = 1;
+                goto out_put_gfn;
+            }
+    }
+
     /* If we fell through, the vcpu will retry now that access restrictions have
      * been removed. It may fault again if the p2m entry type still requires so.
      * Otherwise, this is an error condition. */
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 92df36f..506ee41 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5243,13 +5243,7 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
  * fault handling for read-only MMIO pages
  */
 
-struct mmio_ro_emulate_ctxt {
-    struct x86_emulate_ctxt ctxt;
-    unsigned long cr2;
-    unsigned int seg, bdf;
-};
-
-static int mmio_ro_emulated_read(
+int mmio_ro_emulated_read(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
@@ -5266,8 +5260,8 @@ static int mmio_ro_emulated_write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt =
-        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = 
+        (struct mmio_ro_emulate_ctxt *)ctxt->data;
 
     /* Only allow naturally-aligned stores at the original %cr2 address. */
     if ( ((bytes | offset) & (bytes - 1)) || offset != mmio_ro_ctxt->cr2 )
@@ -5286,7 +5280,7 @@ static const struct x86_emulate_ops mmio_ro_emulate_ops = {
     .write      = mmio_ro_emulated_write,
 };
 
-static int mmio_intercept_write(
+int mmio_intercept_write(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
@@ -5294,7 +5288,7 @@ static int mmio_intercept_write(
     struct x86_emulate_ctxt *ctxt)
 {
     struct mmio_ro_emulate_ctxt *mmio_ctxt =
-        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+        (struct mmio_ro_emulate_ctxt *)ctxt->data;
 
     /*
      * Only allow naturally-aligned stores no wider than 4 bytes to the
@@ -5331,12 +5325,13 @@ int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
     l1_pgentry_t pte;
     unsigned long mfn;
     unsigned int addr_size = is_pv_32bit_vcpu(v) ? 32 : BITS_PER_LONG;
-    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
-        .ctxt.regs = regs,
-        .ctxt.addr_size = addr_size,
-        .ctxt.sp_size = addr_size,
-        .ctxt.swint_emulate = x86_swint_emulate_none,
-        .cr2 = addr
+    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = addr };
+    struct x86_emulate_ctxt ctxt = {
+        .regs = regs,
+        .addr_size = addr_size,
+        .sp_size = addr_size,
+        .swint_emulate = x86_swint_emulate_none,
+        .data = &mmio_ro_ctxt
     };
     const unsigned long *ro_map;
     int rc;
@@ -5366,9 +5361,9 @@ int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
     if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) &&
          ((ro_map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL ||
           !test_bit(mmio_ro_ctxt.bdf, ro_map)) )
-        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops);
+        rc = x86_emulate(&ctxt, &mmio_intercept_ops);
     else
-        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+        rc = x86_emulate(&ctxt, &mmio_ro_emulate_ops);
 
     return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
 }
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index cfac09b..b85b8f5 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -430,6 +430,9 @@ struct x86_emulate_ctxt
         } flags;
         uint8_t byte;
     } retire;
+
+    /* Caller data that can be used by x86_emulate_ops */
+    void *data;
 };
 
 struct x86_emulate_stub {
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 49134b5..6e6388a 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -57,6 +57,10 @@ void hvm_emulate_writeback(
 struct segment_register *hvmemul_get_seg_reg(
     enum x86_segment seg,
     struct hvm_emulate_ctxt *hvmemul_ctxt);
+int hvm_emulate_one_mmio(
+    unsigned seg,
+    unsigned bdf,
+    unsigned long gla);
 
 int hvmemul_do_pio_buffer(uint16_t port,
                           unsigned int size,
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 67b34c6..9549590 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -7,6 +7,7 @@
 #include <xen/spinlock.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
+#include <asm/x86_emulate.h>
 
 /*
  * Per-page-frame information.
@@ -488,6 +489,22 @@ void memguard_unguard_range(void *p, unsigned long l);
 void memguard_guard_stack(void *p);
 void memguard_unguard_stack(void *p);
 
+struct mmio_ro_emulate_ctxt {
+        unsigned long cr2;
+        unsigned int seg, bdf;
+};
+
+extern int mmio_ro_emulated_read(enum x86_segment seg,
+                                 unsigned long offset,
+                                 void *p_data,
+                                 unsigned int bytes,
+                                 struct x86_emulate_ctxt *ctxt);
+extern int mmio_intercept_write(enum x86_segment seg,
+                                unsigned long offset,
+                                void *p_data,
+                                unsigned int bytes,
+                                struct x86_emulate_ctxt *ctxt);
+
 int  ptwr_do_page_fault(struct vcpu *, unsigned long,
                         struct cpu_user_regs *);
 int  mmio_ro_do_page_fault(struct vcpu *, unsigned long,
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-15 21:02 [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers Boris Ostrovsky
@ 2015-12-16  9:04 ` Jan Beulich
  2015-12-16 14:04   ` Boris Ostrovsky
  2015-12-16 19:34   ` Boris Ostrovsky
  0 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2015-12-16  9:04 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: andrew.cooper3, xen-devel

>>> On 15.12.15 at 22:02, <boris.ostrovsky@oracle.com> wrote:
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1778,6 +1778,28 @@ int hvm_emulate_one_no_write(
>      return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
>  }
>  
> +static const struct x86_emulate_ops hvm_emulate_ops_mmio = {
> +    .read       = mmio_ro_emulated_read,
> +    .insn_fetch = hvmemul_insn_fetch,
> +    .write      = mmio_intercept_write,
> +};

Blank line missing here, but as a probably better alternative this could
move into the function below.

> +int hvm_emulate_one_mmio(unsigned seg, unsigned bdf, unsigned long gla)

Name and function parameters don't match up (also for e.g. the
changed struct mmio_ro_emulate_ctxt). DYM e.g.
hvm_emulate_one_mmcfg()? (The function naming in x86/mm.c
is actually using mmio because its serving wider purpose than
just MMCFG write interception. Which makes me wonder whether
we don't actually need that other part for PVH too, in which case
the naming would again be fine.)

> +{
> +    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
> +        .cr2 = gla,
> +        .seg = seg,
> +        .bdf = bdf
> +    };
> +    struct hvm_emulate_ctxt ctxt = { .ctxt.data = &mmio_ro_ctxt };

hvm_mem_access_emulate_one() so far is the only example pre-
initializing ctxt before calling hvm_emulate_prepare(), and I don't
think it actually needs this. I think the proper place to set .data is
after the call to hvm_emulate_prepare().

> +    int rc;
> +
> +    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
> +    rc = _hvm_emulate_one(&ctxt, &hvm_emulate_ops_mmio);
> +    hvm_emulate_writeback(&ctxt);

Is this correct to be done unconditionally (i.e. regardless of rc)?

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
>          goto out_put_gfn;
>      }
>  
> +    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )

The PV condition includes a PFEC_page_present check, and I think
an equivalent should be added here too.

> +    {
> +        unsigned int seg, bdf;
> +        const unsigned long *ro_map;
> +
> +        if ( pci_mmcfg_decode(mfn_x(mfn), &seg, &bdf) &&
> +             ((ro_map = pci_get_ro_map(seg)) == NULL ||
> +              !test_bit(bdf, ro_map)) )
> +            if (hvm_emulate_one_mmio(seg, bdf, gla) == X86EMUL_OKAY)

Two immediately adjacent if()-s should be folded into one.

> @@ -5266,8 +5260,8 @@ static int mmio_ro_emulated_write(
>      unsigned int bytes,
>      struct x86_emulate_ctxt *ctxt)
>  {
> -    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt =
> -        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
> +    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = 
> +        (struct mmio_ro_emulate_ctxt *)ctxt->data;

Pointless cast.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -430,6 +430,9 @@ struct x86_emulate_ctxt
>          } flags;
>          uint8_t byte;
>      } retire;
> +
> +    /* Caller data that can be used by x86_emulate_ops */

There seems to be more missing in this comment than just the full
stop: x86_emulate_ops is a structure and hence can't use any
data.

> --- a/xen/include/asm-x86/hvm/emulate.h
> +++ b/xen/include/asm-x86/hvm/emulate.h
> @@ -57,6 +57,10 @@ void hvm_emulate_writeback(
>  struct segment_register *hvmemul_get_seg_reg(
>      enum x86_segment seg,
>      struct hvm_emulate_ctxt *hvmemul_ctxt);
> +int hvm_emulate_one_mmio(
> +    unsigned seg,
> +    unsigned bdf,

Please don't omit the "int" here (and elsewhere whenever the
context doesn't suggest it's common practice in the given piece of
code).

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-16  9:04 ` Jan Beulich
@ 2015-12-16 14:04   ` Boris Ostrovsky
  2015-12-16 14:47     ` Jan Beulich
  2015-12-16 19:34   ` Boris Ostrovsky
  1 sibling, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-12-16 14:04 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, xen-devel



On 12/16/2015 04:04 AM, Jan Beulich wrote:
>>>> On 15.12.15 at 22:02,<boris.ostrovsky@oracle.com>  wrote:
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -1778,6 +1778,28 @@ int hvm_emulate_one_no_write(
>>       return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
>>   }
>>   
>> +static const struct x86_emulate_ops hvm_emulate_ops_mmio = {
>> +    .read       = mmio_ro_emulated_read,
>> +    .insn_fetch = hvmemul_insn_fetch,
>> +    .write      = mmio_intercept_write,
>> +};
> Blank line missing here, but as a probably better alternative this could
> move into the function below.
>
>> +int hvm_emulate_one_mmio(unsigned seg, unsigned bdf, unsigned long gla)
> Name and function parameters don't match up (also for e.g. the
> changed struct mmio_ro_emulate_ctxt). DYM e.g.
> hvm_emulate_one_mmcfg()?

Yes. I in fact first named it *_mmcfg() and then renamed it to _*mmio(). 
But then by the same logic wouldn't we want to rename 
mmio_intercept_write() as well, not necessarily because of arguments but 
because of what it actually does?

I am not sure what you mean by your statement in parentheses about 
mmio_ro_emulate_ctxt.

> (The function naming in x86/mm.c
> is actually using mmio because its serving wider purpose than
> just MMCFG write interception. Which makes me wonder whether
> we don't actually need that other part for PVH too, in which case
> the naming would again be fine.)

I don't think I understand what the other part is used for in PV so I 
don't know whether it is useful for PVH.

>> +{
>> +    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
>> +        .cr2 = gla,
>> +        .seg = seg,
>> +        .bdf = bdf
>> +    };
>> +    struct hvm_emulate_ctxt ctxt = { .ctxt.data = &mmio_ro_ctxt };
> hvm_mem_access_emulate_one() so far is the only example pre-
> initializing ctxt before calling hvm_emulate_prepare(), and I don't
> think it actually needs this. I think the proper place to set .data is
> after the call to hvm_emulate_prepare().

I used hvm_emulate_prepare() as a convenient way to initialize the 
context to a reasonable state, with definition of "reasonable" possibly 
changing at some point later. Other emulating routines have 
hvm_emulate_ctxt passed in so it may already have certain fields set.

(Yes, .data needs to be set afterwards).

-boris

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-16 14:04   ` Boris Ostrovsky
@ 2015-12-16 14:47     ` Jan Beulich
  0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2015-12-16 14:47 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: andrew.cooper3, xen-devel

>>> On 16.12.15 at 15:04, <boris.ostrovsky@oracle.com> wrote:
> On 12/16/2015 04:04 AM, Jan Beulich wrote:
>>>>> On 15.12.15 at 22:02,<boris.ostrovsky@oracle.com>  wrote:
>>> --- a/xen/arch/x86/hvm/emulate.c
>>> +++ b/xen/arch/x86/hvm/emulate.c
>>> @@ -1778,6 +1778,28 @@ int hvm_emulate_one_no_write(
>>>       return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
>>>   }
>>>   
>>> +static const struct x86_emulate_ops hvm_emulate_ops_mmio = {
>>> +    .read       = mmio_ro_emulated_read,
>>> +    .insn_fetch = hvmemul_insn_fetch,
>>> +    .write      = mmio_intercept_write,
>>> +};
>> Blank line missing here, but as a probably better alternative this could
>> move into the function below.
>>
>>> +int hvm_emulate_one_mmio(unsigned seg, unsigned bdf, unsigned long gla)
>> Name and function parameters don't match up (also for e.g. the
>> changed struct mmio_ro_emulate_ctxt). DYM e.g.
>> hvm_emulate_one_mmcfg()?
> 
> Yes. I in fact first named it *_mmcfg() and then renamed it to _*mmio(). 
> But then by the same logic wouldn't we want to rename 
> mmio_intercept_write() as well, not necessarily because of arguments but 
> because of what it actually does?

We might. I named it that way just to match its neighbors.

> I am not sure what you mean by your statement in parentheses about 
> mmio_ro_emulate_ctxt.

I simply hinted at this also likely needing s/mmio/mmcfg/.

>> (The function naming in x86/mm.c
>> is actually using mmio because its serving wider purpose than
>> just MMCFG write interception. Which makes me wonder whether
>> we don't actually need that other part for PVH too, in which case
>> the naming would again be fine.)
> 
> I don't think I understand what the other part is used for in PV so I 
> don't know whether it is useful for PVH.

It discards writes to the config space of r/o devices.

>>> +{
>>> +    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
>>> +        .cr2 = gla,
>>> +        .seg = seg,
>>> +        .bdf = bdf
>>> +    };
>>> +    struct hvm_emulate_ctxt ctxt = { .ctxt.data = &mmio_ro_ctxt };
>> hvm_mem_access_emulate_one() so far is the only example pre-
>> initializing ctxt before calling hvm_emulate_prepare(), and I don't
>> think it actually needs this. I think the proper place to set .data is
>> after the call to hvm_emulate_prepare().
> 
> I used hvm_emulate_prepare() as a convenient way to initialize the 
> context to a reasonable state, with definition of "reasonable" possibly 
> changing at some point later. Other emulating routines have 
> hvm_emulate_ctxt passed in so it may already have certain fields set.

Oh, I wasn't questioning that part - you certainly need to use that
function. I was questioning the zero-initialization of all the fields
other than .data prior to calling this function.

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-16  9:04 ` Jan Beulich
  2015-12-16 14:04   ` Boris Ostrovsky
@ 2015-12-16 19:34   ` Boris Ostrovsky
  2015-12-17  9:46     ` Jan Beulich
  1 sibling, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-12-16 19:34 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, xen-devel



On 12/16/2015 04:04 AM, Jan Beulich wrote:
>
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
>>           goto out_put_gfn;
>>       }
>>   
>> +    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )
> The PV condition includes a PFEC_page_present check, and I think
> an equivalent should be added here too.

Hmm.. So how would I determine that? I can see how this is available in 
SVM (although it is not reflected in npfec) but in VMX I don't see 
direct indication of whether the page was present.

-boris

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-16 19:34   ` Boris Ostrovsky
@ 2015-12-17  9:46     ` Jan Beulich
  2015-12-17 13:55       ` Boris Ostrovsky
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2015-12-17  9:46 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: andrew.cooper3, xen-devel

>>> On 16.12.15 at 20:34, <boris.ostrovsky@oracle.com> wrote:
> On 12/16/2015 04:04 AM, Jan Beulich wrote:
>>
>>> --- a/xen/arch/x86/hvm/hvm.c
>>> +++ b/xen/arch/x86/hvm/hvm.c
>>> @@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
>>>           goto out_put_gfn;
>>>       }
>>>   
>>> +    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )
>> The PV condition includes a PFEC_page_present check, and I think
>> an equivalent should be added here too.
> 
> Hmm.. So how would I determine that? I can see how this is available in 
> SVM (although it is not reflected in npfec) but in VMX I don't see 
> direct indication of whether the page was present.

That's what bits 3..5 of the exit qualification can be used for. Really
you could leverage the more fine grained reporting by EPT to make
the condition here "bit 1 must be set" (i.e. npfec.write_access) and
"bit 3 must be set" (readable, which implies present) and "bit 4 must
be clear" (and we might even demand bit 5 to be clear too).

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-17  9:46     ` Jan Beulich
@ 2015-12-17 13:55       ` Boris Ostrovsky
  2015-12-17 13:59         ` Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-12-17 13:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, xen-devel

On 12/17/2015 04:46 AM, Jan Beulich wrote:
>>>> On 16.12.15 at 20:34, <boris.ostrovsky@oracle.com> wrote:
>> On 12/16/2015 04:04 AM, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/hvm/hvm.c
>>>> +++ b/xen/arch/x86/hvm/hvm.c
>>>> @@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
>>>>            goto out_put_gfn;
>>>>        }
>>>>    
>>>> +    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )
>>> The PV condition includes a PFEC_page_present check, and I think
>>> an equivalent should be added here too.
>> Hmm.. So how would I determine that? I can see how this is available in
>> SVM (although it is not reflected in npfec) but in VMX I don't see
>> direct indication of whether the page was present.
> That's what bits 3..5 of the exit qualification can be used for. Really
> you could leverage the more fine grained reporting by EPT to make
> the condition here "bit 1 must be set" (i.e. npfec.write_access) and
> "bit 3 must be set" (readable, which implies present) and "bit 4 must
> be clear" (and we might even demand bit 5 to be clear too).

OK, but these bits are not mapped into npfec structure so we'd need to 
expand it.

(And then SVM does not provide this much information so we may need to 
get creative there).

-boris

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers
  2015-12-17 13:55       ` Boris Ostrovsky
@ 2015-12-17 13:59         ` Jan Beulich
  0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2015-12-17 13:59 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: andrew.cooper3, xen-devel

>>> On 17.12.15 at 14:55, <boris.ostrovsky@oracle.com> wrote:
> On 12/17/2015 04:46 AM, Jan Beulich wrote:
>>>>> On 16.12.15 at 20:34, <boris.ostrovsky@oracle.com> wrote:
>>> On 12/16/2015 04:04 AM, Jan Beulich wrote:
>>>>> --- a/xen/arch/x86/hvm/hvm.c
>>>>> +++ b/xen/arch/x86/hvm/hvm.c
>>>>> @@ -3116,6 +3116,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned 
> long gla,
>>>>>            goto out_put_gfn;
>>>>>        }
>>>>>    
>>>>> +    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) )
>>>> The PV condition includes a PFEC_page_present check, and I think
>>>> an equivalent should be added here too.
>>> Hmm.. So how would I determine that? I can see how this is available in
>>> SVM (although it is not reflected in npfec) but in VMX I don't see
>>> direct indication of whether the page was present.
>> That's what bits 3..5 of the exit qualification can be used for. Really
>> you could leverage the more fine grained reporting by EPT to make
>> the condition here "bit 1 must be set" (i.e. npfec.write_access) and
>> "bit 3 must be set" (readable, which implies present) and "bit 4 must
>> be clear" (and we might even demand bit 5 to be clear too).
> 
> OK, but these bits are not mapped into npfec structure so we'd need to 
> expand it.
> 
> (And then SVM does not provide this much information so we may need to 
> get creative there).

Yes and yes (sadly).

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-12-17 13:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-15 21:02 [PATCH] x86/PCI: Intercept Dom0 MMCFG from dom0s in HVM containers Boris Ostrovsky
2015-12-16  9:04 ` Jan Beulich
2015-12-16 14:04   ` Boris Ostrovsky
2015-12-16 14:47     ` Jan Beulich
2015-12-16 19:34   ` Boris Ostrovsky
2015-12-17  9:46     ` Jan Beulich
2015-12-17 13:55       ` Boris Ostrovsky
2015-12-17 13:59         ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.