All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
@ 2020-12-03 11:21 Hongyan Xia
  2020-12-03 11:27 ` Hongyan Xia
  2020-12-07 10:11 ` [PATCH v2] " Jan Beulich
  0 siblings, 2 replies; 9+ messages in thread
From: Hongyan Xia @ 2020-12-03 11:21 UTC (permalink / raw)
  To: xen-devel; +Cc: Jan Beulich, Andrew Cooper, Roger Pau Monné, Wei Liu

From: Hongyan Xia <hongyxia@amazon.com>

There is simply no guarantee that vmap won't return superpages to the
caller. It can happen if the list of MFNs are contiguous, or we simply
have a large granularity. Although rare, if such things do happen, we
will simply hit BUG_ON() and crash.

Introduce xen_map_to_mfn() to translate any mapped Xen address to mfn
regardless of page size, and wrap vmap_to_mfn() around it.

Signed-off-by: Hongyan Xia <hongyxia@amazon.com>

---
Changed in v2:
- const pl*e
- introduce xen_map_to_mfn().
- goto to a single exit path.
- ASSERT_UNREACHABLE instead of ASSERT.
---
 xen/arch/x86/mm.c          | 54 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/mm.h   |  1 +
 xen/include/asm-x86/page.h |  2 +-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 5a50339284c7..53cc5c6de2e6 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5194,6 +5194,60 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
         }                                          \
     } while ( false )
 
+/* Translate mapped Xen address to MFN. */
+mfn_t xen_map_to_mfn(unsigned long va)
+{
+#define CHECK_MAPPED(cond_)     \
+    if ( !(cond_) )             \
+    {                           \
+        ASSERT_UNREACHABLE();   \
+        ret = INVALID_MFN;      \
+        goto out;               \
+    }                           \
+
+    bool locking = system_state > SYS_STATE_boot;
+    unsigned int l2_offset = l2_table_offset(va);
+    unsigned int l1_offset = l1_table_offset(va);
+    const l3_pgentry_t *pl3e = virt_to_xen_l3e(va);
+    const l2_pgentry_t *pl2e = NULL;
+    const l1_pgentry_t *pl1e = NULL;
+    struct page_info *l3page;
+    mfn_t ret;
+
+    L3T_INIT(l3page);
+    CHECK_MAPPED(pl3e)
+    l3page = virt_to_page(pl3e);
+    L3T_LOCK(l3page);
+
+    CHECK_MAPPED(l3e_get_flags(*pl3e) & _PAGE_PRESENT)
+    if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
+    {
+        ret = mfn_add(l3e_get_mfn(*pl3e),
+                      (l2_offset << PAGETABLE_ORDER) + l1_offset);
+        goto out;
+    }
+
+    pl2e = map_l2t_from_l3e(*pl3e) + l2_offset;
+    CHECK_MAPPED(l2e_get_flags(*pl2e) & _PAGE_PRESENT)
+    if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+    {
+        ret = mfn_add(l2e_get_mfn(*pl2e), l1_offset);
+        goto out;
+    }
+
+    pl1e = map_l1t_from_l2e(*pl2e) + l1_offset;
+    CHECK_MAPPED(l1e_get_flags(*pl1e) & _PAGE_PRESENT)
+    ret = l1e_get_mfn(*pl1e);
+
+#undef CHECK_MAPPED
+ out:
+    L3T_UNLOCK(l3page);
+    unmap_domain_page(pl1e);
+    unmap_domain_page(pl2e);
+    unmap_domain_page(pl3e);
+    return ret;
+}
+
 int map_pages_to_xen(
     unsigned long virt,
     mfn_t mfn,
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index deeba75a1cbb..2fc8eeaf7aad 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -578,6 +578,7 @@ mfn_t alloc_xen_pagetable_new(void);
 void free_xen_pagetable_new(mfn_t mfn);
 
 l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
+mfn_t xen_map_to_mfn(unsigned long va);
 
 int __sync_local_execstate(void);
 
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 7a771baf7cb3..886adf17a40c 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -291,7 +291,7 @@ void copy_page_sse2(void *, const void *);
 #define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
 #define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
 #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
-#define vmap_to_mfn(va)     l1e_get_mfn(*virt_to_xen_l1e((unsigned long)(va)))
+#define vmap_to_mfn(va)     xen_map_to_mfn((unsigned long)va)
 #define vmap_to_page(va)    mfn_to_page(vmap_to_mfn(va))
 
 #endif /* !defined(__ASSEMBLY__) */
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-03 11:21 [PATCH] x86/vmap: handle superpages in vmap_to_mfn() Hongyan Xia
@ 2020-12-03 11:27 ` Hongyan Xia
  2020-12-07 10:11 ` [PATCH v2] " Jan Beulich
  1 sibling, 0 replies; 9+ messages in thread
From: Hongyan Xia @ 2020-12-03 11:27 UTC (permalink / raw)
  To: xen-devel; +Cc: Jan Beulich, Andrew Cooper, Roger Pau Monné, Wei Liu

Apologies. Missing v2 in the title.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-03 11:21 [PATCH] x86/vmap: handle superpages in vmap_to_mfn() Hongyan Xia
  2020-12-03 11:27 ` Hongyan Xia
@ 2020-12-07 10:11 ` Jan Beulich
  2020-12-07 11:54   ` Hongyan Xia
  1 sibling, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2020-12-07 10:11 UTC (permalink / raw)
  To: Hongyan Xia; +Cc: Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On 03.12.2020 12:21, Hongyan Xia wrote:
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -5194,6 +5194,60 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
>          }                                          \
>      } while ( false )
>  
> +/* Translate mapped Xen address to MFN. */
> +mfn_t xen_map_to_mfn(unsigned long va)
> +{
> +#define CHECK_MAPPED(cond_)     \
> +    if ( !(cond_) )             \
> +    {                           \
> +        ASSERT_UNREACHABLE();   \
> +        ret = INVALID_MFN;      \
> +        goto out;               \
> +    }                           \

This should be coded such that use sites ...

> +    bool locking = system_state > SYS_STATE_boot;
> +    unsigned int l2_offset = l2_table_offset(va);
> +    unsigned int l1_offset = l1_table_offset(va);
> +    const l3_pgentry_t *pl3e = virt_to_xen_l3e(va);
> +    const l2_pgentry_t *pl2e = NULL;
> +    const l1_pgentry_t *pl1e = NULL;
> +    struct page_info *l3page;
> +    mfn_t ret;
> +
> +    L3T_INIT(l3page);
> +    CHECK_MAPPED(pl3e)
> +    l3page = virt_to_page(pl3e);
> +    L3T_LOCK(l3page);
> +
> +    CHECK_MAPPED(l3e_get_flags(*pl3e) & _PAGE_PRESENT)

... will properly require a statement-ending semicolon. With
additionally the trailing underscore dropped from the macro's
parameter name
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Or wait,

> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -578,6 +578,7 @@ mfn_t alloc_xen_pagetable_new(void);
>  void free_xen_pagetable_new(mfn_t mfn);
>  
>  l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
> +mfn_t xen_map_to_mfn(unsigned long va);

This is now a pretty proper companion of map_page_to_xen(), and
hence imo ought to be declared next to that one rather than here.
Ultimately Arm may also need to gain an implementation.

> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -291,7 +291,7 @@ void copy_page_sse2(void *, const void *);
>  #define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
>  #define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
>  #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
> -#define vmap_to_mfn(va)     l1e_get_mfn(*virt_to_xen_l1e((unsigned long)(va)))
> +#define vmap_to_mfn(va)     xen_map_to_mfn((unsigned long)va)

You've lost parentheses around va.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-07 10:11 ` [PATCH v2] " Jan Beulich
@ 2020-12-07 11:54   ` Hongyan Xia
  2020-12-07 11:57     ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Hongyan Xia @ 2020-12-07 11:54 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On Mon, 2020-12-07 at 11:11 +0100, Jan Beulich wrote:
> On 03.12.2020 12:21, Hongyan Xia wrote:
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -5194,6 +5194,60 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long
> > v)
> >          }                                          \
> >      } while ( false )
> >  
> > +/* Translate mapped Xen address to MFN. */
> > +mfn_t xen_map_to_mfn(unsigned long va)
> > +{
> > +#define CHECK_MAPPED(cond_)     \
> > +    if ( !(cond_) )             \
> > +    {                           \
> > +        ASSERT_UNREACHABLE();   \
> > +        ret = INVALID_MFN;      \
> > +        goto out;               \
> > +    }                           \
> 
> This should be coded such that use sites ...
> 
> > +    bool locking = system_state > SYS_STATE_boot;
> > +    unsigned int l2_offset = l2_table_offset(va);
> > +    unsigned int l1_offset = l1_table_offset(va);
> > +    const l3_pgentry_t *pl3e = virt_to_xen_l3e(va);
> > +    const l2_pgentry_t *pl2e = NULL;
> > +    const l1_pgentry_t *pl1e = NULL;
> > +    struct page_info *l3page;
> > +    mfn_t ret;
> > +
> > +    L3T_INIT(l3page);
> > +    CHECK_MAPPED(pl3e)
> > +    l3page = virt_to_page(pl3e);
> > +    L3T_LOCK(l3page);
> > +
> > +    CHECK_MAPPED(l3e_get_flags(*pl3e) & _PAGE_PRESENT)
> 
> ... will properly require a statement-ending semicolon. With
> additionally the trailing underscore dropped from the macro's
> parameter name

The immediate solution that came to mind is a do-while construct. Would
you be happy with that?

> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 

Thanks.

> Or wait,
> 
> > --- a/xen/include/asm-x86/mm.h
> > +++ b/xen/include/asm-x86/mm.h
> > @@ -578,6 +578,7 @@ mfn_t alloc_xen_pagetable_new(void);
> >  void free_xen_pagetable_new(mfn_t mfn);
> >  
> >  l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
> > +mfn_t xen_map_to_mfn(unsigned long va);
> 
> This is now a pretty proper companion of map_page_to_xen(), and
> hence imo ought to be declared next to that one rather than here.
> Ultimately Arm may also need to gain an implementation.

Since map_pages_to_xen() is in the common header, are we okay with
having the declaration but not an implementation on the Arm side in
this patch? Or do we also want to introduce the Arm implementation in
this patch?

> > --- a/xen/include/asm-x86/page.h
> > +++ b/xen/include/asm-x86/page.h
> > @@ -291,7 +291,7 @@ void copy_page_sse2(void *, const void *);
> >  #define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
> >  #define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
> >  #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
> > -#define vmap_to_mfn(va)     l1e_get_mfn(*virt_to_xen_l1e((unsigned
> > long)(va)))
> > +#define vmap_to_mfn(va)     xen_map_to_mfn((unsigned long)va)
> 
> You've lost parentheses around va.

Will fix.

Hongyan



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-07 11:54   ` Hongyan Xia
@ 2020-12-07 11:57     ` Jan Beulich
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2020-12-07 11:57 UTC (permalink / raw)
  To: Hongyan Xia; +Cc: Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On 07.12.2020 12:54, Hongyan Xia wrote:
> On Mon, 2020-12-07 at 11:11 +0100, Jan Beulich wrote:
>> On 03.12.2020 12:21, Hongyan Xia wrote:
>>> --- a/xen/arch/x86/mm.c
>>> +++ b/xen/arch/x86/mm.c
>>> @@ -5194,6 +5194,60 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long
>>> v)
>>>          }                                          \
>>>      } while ( false )
>>>  
>>> +/* Translate mapped Xen address to MFN. */
>>> +mfn_t xen_map_to_mfn(unsigned long va)
>>> +{
>>> +#define CHECK_MAPPED(cond_)     \
>>> +    if ( !(cond_) )             \
>>> +    {                           \
>>> +        ASSERT_UNREACHABLE();   \
>>> +        ret = INVALID_MFN;      \
>>> +        goto out;               \
>>> +    }                           \
>>
>> This should be coded such that use sites ...
>>
>>> +    bool locking = system_state > SYS_STATE_boot;
>>> +    unsigned int l2_offset = l2_table_offset(va);
>>> +    unsigned int l1_offset = l1_table_offset(va);
>>> +    const l3_pgentry_t *pl3e = virt_to_xen_l3e(va);
>>> +    const l2_pgentry_t *pl2e = NULL;
>>> +    const l1_pgentry_t *pl1e = NULL;
>>> +    struct page_info *l3page;
>>> +    mfn_t ret;
>>> +
>>> +    L3T_INIT(l3page);
>>> +    CHECK_MAPPED(pl3e)
>>> +    l3page = virt_to_page(pl3e);
>>> +    L3T_LOCK(l3page);
>>> +
>>> +    CHECK_MAPPED(l3e_get_flags(*pl3e) & _PAGE_PRESENT)
>>
>> ... will properly require a statement-ending semicolon. With
>> additionally the trailing underscore dropped from the macro's
>> parameter name
> 
> The immediate solution that came to mind is a do-while construct. Would
> you be happy with that?

Sure.

>>> --- a/xen/include/asm-x86/mm.h
>>> +++ b/xen/include/asm-x86/mm.h
>>> @@ -578,6 +578,7 @@ mfn_t alloc_xen_pagetable_new(void);
>>>  void free_xen_pagetable_new(mfn_t mfn);
>>>  
>>>  l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
>>> +mfn_t xen_map_to_mfn(unsigned long va);
>>
>> This is now a pretty proper companion of map_page_to_xen(), and
>> hence imo ought to be declared next to that one rather than here.
>> Ultimately Arm may also need to gain an implementation.
> 
> Since map_pages_to_xen() is in the common header, are we okay with
> having the declaration but not an implementation on the Arm side in
> this patch? Or do we also want to introduce the Arm implementation in
> this patch?

Just a declaration is fine imo. If a use in common code appears,
it'll still be noticeable at link time that Arm will need to
have an implementation added.

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-02 12:17   ` Hongyan Xia
@ 2020-12-02 13:05     ` Jan Beulich
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2020-12-02 13:05 UTC (permalink / raw)
  To: Hongyan Xia
  Cc: jgrall, Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On 02.12.2020 13:17, Hongyan Xia wrote:
> On Wed, 2020-12-02 at 11:04 +0100, Jan Beulich wrote:
>> On 30.11.2020 17:50, Hongyan Xia wrote:
>>> +    l3page = virt_to_page(pl3e);
>>> +    L3T_LOCK(l3page);
>>> +
>>> +    ASSERT(l3e_get_flags(*pl3e) & _PAGE_PRESENT);
>>> +    if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
>>> +    {
>>> +        ret = mfn_add(l3e_get_mfn(*pl3e),
>>> +                      (l2_offset << PAGETABLE_ORDER) + l1_offset);
>>> +        L3T_UNLOCK(l3page);
>>> +        return ret;
>>
>> To keep the locked region as narrow as possible
>>
>>         mfn = l3e_get_mfn(*pl3e);
>>         L3T_UNLOCK(l3page);
>>         return mfn_add(mfn, (l2_offset << PAGETABLE_ORDER) +
>> l1_offset);
>>
>> ? However, in particular because of the recurring unlocks on
>> the exit paths I wonder whether it wouldn't be better to
>> restructure the whole function such that there'll be one unlock
>> and one return. Otoh I'm afraid what I'm asking for here is
>> going to yield a measurable set of goto-s ...
> 
> I can do that.
> 
> But what about the lock narrowing? Will be slightly more tricky when
> there is goto. Naturally:
> 
>     ret = full return mfn;
>     goto out;
> 
> out:
>     UNLOCK();
>     return ret;
> 
> but with narrowing, my first reaction is:
> 
>     ret = high bits of mfn;
>     l2_offset = 0;
>     l1_offset = 0;
>     goto out;
> 
> out:
>     UNLOCK();
>     return mfn + l2_offset << TABLE_ORDER + l1_offset;
> 
> To be honest, I doubt it is really worth it and I prefer the first one.

That's why I said "However ..." - I did realize both won't fit
together very well.

>>> +    }
>>> +
>>> +    pl2e = map_l2t_from_l3e(*pl3e) + l2_offset;
>>> +    ASSERT(l2e_get_flags(*pl2e) & _PAGE_PRESENT);
>>> +    if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
>>> +    {
>>> +        ret = mfn_add(l2e_get_mfn(*pl2e), l1_offset);
>>> +        L3T_UNLOCK(l3page);
>>> +        return ret;
>>> +    }
>>> +
>>> +    pl1e = map_l1t_from_l2e(*pl2e) + l1_offset;
>>> +    UNMAP_DOMAIN_PAGE(pl2e);
>>> +    ASSERT(l1e_get_flags(*pl1e) & _PAGE_PRESENT);
>>> +    ret = l1e_get_mfn(*pl1e);
>>> +    L3T_UNLOCK(l3page);
>>> +    UNMAP_DOMAIN_PAGE(pl1e);
>>> +
>>> +    return ret;
>>> +}
>>
>> Now for the name of the function: The only aspect tying it
>> somewhat to vmap() is that it assumes (asserts) it'll find a
>> valid mapping. I think it wants renaming, and vmap_to_mfn()
>> then would become a #define of it (perhaps even retaining
>> its property of getting unsigned long passed in), at which
>> point it also doesn't need moving out of page.h. As to the
>> actual name, xen_map_to_mfn() to somewhat match up with
>> map_pages_to_xen()?
> 
> I actually really like this idea. I will come up with something in the
> next rev. But if we want to make it generic, shouldn't we not ASSERT on
> pl*e and the PRESENT flag and just return INVALID_MFN? Then this
> function works on both mapped address (assumption of vmap_to_mfn()) and
> other use cases.

Depends - we can still document that this function is going to
require a valid mapping. I did consider the generalization, too,
but this to a certain degree also collides with virt_to_xen_l3e()
allocating an L3 table, which isn't what we would want for a
fully generic lookup function. IOW - I'm undecided and will take
it from wherever you move it (albeit with no promise to not ask
for further adjustment).

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
  2020-12-02 10:04 ` Jan Beulich
@ 2020-12-02 12:17   ` Hongyan Xia
  2020-12-02 13:05     ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Hongyan Xia @ 2020-12-02 12:17 UTC (permalink / raw)
  To: Jan Beulich
  Cc: jgrall, Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On Wed, 2020-12-02 at 11:04 +0100, Jan Beulich wrote:
> On 30.11.2020 17:50, Hongyan Xia wrote:
> > From: Hongyan Xia <hongyxia@amazon.com>
> > 
> > There is simply no guarantee that vmap won't return superpages to
> > the
> > caller. It can happen if the list of MFNs are contiguous, or we
> > simply
> > have a large granularity. Although rare, if such things do happen,
> > we
> > will simply hit BUG_ON() and crash. Properly handle such cases in a
> > new
> > implementation.
> > 
> > Note that vmap is now too large to be a macro, so implement it as a
> > normal function and move the declaration to mm.h (page.h cannot
> > handle
> > mfn_t).
> 
> I'm not happy about this movement, and it's also not really clear to
> me what problem page.h would have in principle. Yes, it can't
> include xen/mm.h, but I think it is long overdue that we disentangle
> this at least some. Let me see what I can do as a prereq for this
> change, but see also below.
> 
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -5194,6 +5194,49 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long
> > v)
> >          }                                          \
> >      } while ( false )
> >  
> > +mfn_t vmap_to_mfn(const void *v)
> > +{
> > +    bool locking = system_state > SYS_STATE_boot;
> > +    unsigned int l2_offset = l2_table_offset((unsigned long)v);
> > +    unsigned int l1_offset = l1_table_offset((unsigned long)v);
> > +    l3_pgentry_t *pl3e = virt_to_xen_l3e((unsigned long)v);
> > +    l2_pgentry_t *pl2e;
> > +    l1_pgentry_t *pl1e;
> 
> Can't all three of them be pointer-to-const?

They can (and yes they should).

> > +    struct page_info *l3page;
> > +    mfn_t ret;
> > +
> > +    ASSERT(pl3e);
> 
>     if ( !pl3e )
>     {
>         ASSERT_UNREACHABLE();
>         return INVALID_MFN;
>     }
> 
> as per the bottom of ./CODING_STYLE? (Similarly further down
> then.)

Will revise.

> > +    l3page = virt_to_page(pl3e);
> > +    L3T_LOCK(l3page);
> > +
> > +    ASSERT(l3e_get_flags(*pl3e) & _PAGE_PRESENT);
> > +    if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
> > +    {
> > +        ret = mfn_add(l3e_get_mfn(*pl3e),
> > +                      (l2_offset << PAGETABLE_ORDER) + l1_offset);
> > +        L3T_UNLOCK(l3page);
> > +        return ret;
> 
> To keep the locked region as narrow as possible
> 
>         mfn = l3e_get_mfn(*pl3e);
>         L3T_UNLOCK(l3page);
>         return mfn_add(mfn, (l2_offset << PAGETABLE_ORDER) +
> l1_offset);
> 
> ? However, in particular because of the recurring unlocks on
> the exit paths I wonder whether it wouldn't be better to
> restructure the whole function such that there'll be one unlock
> and one return. Otoh I'm afraid what I'm asking for here is
> going to yield a measurable set of goto-s ...

I can do that.

But what about the lock narrowing? Will be slightly more tricky when
there is goto. Naturally:

    ret = full return mfn;
    goto out;

out:
    UNLOCK();
    return ret;

but with narrowing, my first reaction is:

    ret = high bits of mfn;
    l2_offset = 0;
    l1_offset = 0;
    goto out;

out:
    UNLOCK();
    return mfn + l2_offset << TABLE_ORDER + l1_offset;

To be honest, I doubt it is really worth it and I prefer the first one.

> 
> > +    }
> > +
> > +    pl2e = map_l2t_from_l3e(*pl3e) + l2_offset;
> > +    ASSERT(l2e_get_flags(*pl2e) & _PAGE_PRESENT);
> > +    if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
> > +    {
> > +        ret = mfn_add(l2e_get_mfn(*pl2e), l1_offset);
> > +        L3T_UNLOCK(l3page);
> > +        return ret;
> > +    }
> > +
> > +    pl1e = map_l1t_from_l2e(*pl2e) + l1_offset;
> > +    UNMAP_DOMAIN_PAGE(pl2e);
> > +    ASSERT(l1e_get_flags(*pl1e) & _PAGE_PRESENT);
> > +    ret = l1e_get_mfn(*pl1e);
> > +    L3T_UNLOCK(l3page);
> > +    UNMAP_DOMAIN_PAGE(pl1e);
> > +
> > +    return ret;
> > +}
> 
> Now for the name of the function: The only aspect tying it
> somewhat to vmap() is that it assumes (asserts) it'll find a
> valid mapping. I think it wants renaming, and vmap_to_mfn()
> then would become a #define of it (perhaps even retaining
> its property of getting unsigned long passed in), at which
> point it also doesn't need moving out of page.h. As to the
> actual name, xen_map_to_mfn() to somewhat match up with
> map_pages_to_xen()?

I actually really like this idea. I will come up with something in the
next rev. But if we want to make it generic, shouldn't we not ASSERT on
pl*e and the PRESENT flag and just return INVALID_MFN? Then this
function works on both mapped address (assumption of vmap_to_mfn()) and
other use cases.

Hongyan



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
  2020-11-30 16:50 [PATCH] " Hongyan Xia
@ 2020-12-02 10:04 ` Jan Beulich
  2020-12-02 12:17   ` Hongyan Xia
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2020-12-02 10:04 UTC (permalink / raw)
  To: Hongyan Xia
  Cc: jgrall, Andrew Cooper, Roger Pau Monné, Wei Liu, xen-devel

On 30.11.2020 17:50, Hongyan Xia wrote:
> From: Hongyan Xia <hongyxia@amazon.com>
> 
> There is simply no guarantee that vmap won't return superpages to the
> caller. It can happen if the list of MFNs are contiguous, or we simply
> have a large granularity. Although rare, if such things do happen, we
> will simply hit BUG_ON() and crash. Properly handle such cases in a new
> implementation.
> 
> Note that vmap is now too large to be a macro, so implement it as a
> normal function and move the declaration to mm.h (page.h cannot handle
> mfn_t).

I'm not happy about this movement, and it's also not really clear to
me what problem page.h would have in principle. Yes, it can't
include xen/mm.h, but I think it is long overdue that we disentangle
this at least some. Let me see what I can do as a prereq for this
change, but see also below.

> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -5194,6 +5194,49 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
>          }                                          \
>      } while ( false )
>  
> +mfn_t vmap_to_mfn(const void *v)
> +{
> +    bool locking = system_state > SYS_STATE_boot;
> +    unsigned int l2_offset = l2_table_offset((unsigned long)v);
> +    unsigned int l1_offset = l1_table_offset((unsigned long)v);
> +    l3_pgentry_t *pl3e = virt_to_xen_l3e((unsigned long)v);
> +    l2_pgentry_t *pl2e;
> +    l1_pgentry_t *pl1e;

Can't all three of them be pointer-to-const?

> +    struct page_info *l3page;
> +    mfn_t ret;
> +
> +    ASSERT(pl3e);

    if ( !pl3e )
    {
        ASSERT_UNREACHABLE();
        return INVALID_MFN;
    }

as per the bottom of ./CODING_STYLE? (Similarly further down
then.)

> +    l3page = virt_to_page(pl3e);
> +    L3T_LOCK(l3page);
> +
> +    ASSERT(l3e_get_flags(*pl3e) & _PAGE_PRESENT);
> +    if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
> +    {
> +        ret = mfn_add(l3e_get_mfn(*pl3e),
> +                      (l2_offset << PAGETABLE_ORDER) + l1_offset);
> +        L3T_UNLOCK(l3page);
> +        return ret;

To keep the locked region as narrow as possible

        mfn = l3e_get_mfn(*pl3e);
        L3T_UNLOCK(l3page);
        return mfn_add(mfn, (l2_offset << PAGETABLE_ORDER) + l1_offset);

? However, in particular because of the recurring unlocks on
the exit paths I wonder whether it wouldn't be better to
restructure the whole function such that there'll be one unlock
and one return. Otoh I'm afraid what I'm asking for here is
going to yield a measurable set of goto-s ...

> +    }
> +
> +    pl2e = map_l2t_from_l3e(*pl3e) + l2_offset;
> +    ASSERT(l2e_get_flags(*pl2e) & _PAGE_PRESENT);
> +    if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
> +    {
> +        ret = mfn_add(l2e_get_mfn(*pl2e), l1_offset);
> +        L3T_UNLOCK(l3page);
> +        return ret;
> +    }
> +
> +    pl1e = map_l1t_from_l2e(*pl2e) + l1_offset;
> +    UNMAP_DOMAIN_PAGE(pl2e);
> +    ASSERT(l1e_get_flags(*pl1e) & _PAGE_PRESENT);
> +    ret = l1e_get_mfn(*pl1e);
> +    L3T_UNLOCK(l3page);
> +    UNMAP_DOMAIN_PAGE(pl1e);
> +
> +    return ret;
> +}

Now for the name of the function: The only aspect tying it
somewhat to vmap() is that it assumes (asserts) it'll find a
valid mapping. I think it wants renaming, and vmap_to_mfn()
then would become a #define of it (perhaps even retaining
its property of getting unsigned long passed in), at which
point it also doesn't need moving out of page.h. As to the
actual name, xen_map_to_mfn() to somewhat match up with
map_pages_to_xen()?

Jan


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] x86/vmap: handle superpages in vmap_to_mfn()
@ 2020-11-30 16:50 Hongyan Xia
  2020-12-02 10:04 ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Hongyan Xia @ 2020-11-30 16:50 UTC (permalink / raw)
  To: xen-devel
  Cc: jgrall, Jan Beulich, Andrew Cooper, Roger Pau Monné, Wei Liu

From: Hongyan Xia <hongyxia@amazon.com>

There is simply no guarantee that vmap won't return superpages to the
caller. It can happen if the list of MFNs are contiguous, or we simply
have a large granularity. Although rare, if such things do happen, we
will simply hit BUG_ON() and crash. Properly handle such cases in a new
implementation.

Note that vmap is now too large to be a macro, so implement it as a
normal function and move the declaration to mm.h (page.h cannot handle
mfn_t).

Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
---
 xen/arch/x86/domain_page.c |  2 +-
 xen/arch/x86/mm.c          | 43 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/mm.h   |  2 ++
 xen/include/asm-x86/page.h |  2 --
 4 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index eac5e3304fb8..4ba75d397a17 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -338,7 +338,7 @@ mfn_t domain_page_map_to_mfn(const void *ptr)
         return _mfn(virt_to_mfn(ptr));
 
     if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
-        return vmap_to_mfn(va);
+        return vmap_to_mfn(ptr);
 
     ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 5a50339284c7..c22385e90d8a 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5194,6 +5194,49 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
         }                                          \
     } while ( false )
 
+mfn_t vmap_to_mfn(const void *v)
+{
+    bool locking = system_state > SYS_STATE_boot;
+    unsigned int l2_offset = l2_table_offset((unsigned long)v);
+    unsigned int l1_offset = l1_table_offset((unsigned long)v);
+    l3_pgentry_t *pl3e = virt_to_xen_l3e((unsigned long)v);
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+    struct page_info *l3page;
+    mfn_t ret;
+
+    ASSERT(pl3e);
+    l3page = virt_to_page(pl3e);
+    L3T_LOCK(l3page);
+
+    ASSERT(l3e_get_flags(*pl3e) & _PAGE_PRESENT);
+    if ( l3e_get_flags(*pl3e) & _PAGE_PSE )
+    {
+        ret = mfn_add(l3e_get_mfn(*pl3e),
+                      (l2_offset << PAGETABLE_ORDER) + l1_offset);
+        L3T_UNLOCK(l3page);
+        return ret;
+    }
+
+    pl2e = map_l2t_from_l3e(*pl3e) + l2_offset;
+    ASSERT(l2e_get_flags(*pl2e) & _PAGE_PRESENT);
+    if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+    {
+        ret = mfn_add(l2e_get_mfn(*pl2e), l1_offset);
+        L3T_UNLOCK(l3page);
+        return ret;
+    }
+
+    pl1e = map_l1t_from_l2e(*pl2e) + l1_offset;
+    UNMAP_DOMAIN_PAGE(pl2e);
+    ASSERT(l1e_get_flags(*pl1e) & _PAGE_PRESENT);
+    ret = l1e_get_mfn(*pl1e);
+    L3T_UNLOCK(l3page);
+    UNMAP_DOMAIN_PAGE(pl1e);
+
+    return ret;
+}
+
 int map_pages_to_xen(
     unsigned long virt,
     mfn_t mfn,
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index deeba75a1cbb..6354d165f48b 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -578,6 +578,8 @@ mfn_t alloc_xen_pagetable_new(void);
 void free_xen_pagetable_new(mfn_t mfn);
 
 l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
+mfn_t vmap_to_mfn(const void *v);
+#define vmap_to_page(va) mfn_to_page(vmap_to_mfn(va))
 
 int __sync_local_execstate(void);
 
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 7a771baf7cb3..b2bcc95fd2de 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -291,8 +291,6 @@ void copy_page_sse2(void *, const void *);
 #define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
 #define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
 #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
-#define vmap_to_mfn(va)     l1e_get_mfn(*virt_to_xen_l1e((unsigned long)(va)))
-#define vmap_to_page(va)    mfn_to_page(vmap_to_mfn(va))
 
 #endif /* !defined(__ASSEMBLY__) */
 
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-12-07 11:58 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-03 11:21 [PATCH] x86/vmap: handle superpages in vmap_to_mfn() Hongyan Xia
2020-12-03 11:27 ` Hongyan Xia
2020-12-07 10:11 ` [PATCH v2] " Jan Beulich
2020-12-07 11:54   ` Hongyan Xia
2020-12-07 11:57     ` Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2020-11-30 16:50 [PATCH] " Hongyan Xia
2020-12-02 10:04 ` Jan Beulich
2020-12-02 12:17   ` Hongyan Xia
2020-12-02 13:05     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.