All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] gnttab: defer allocation of status frame tracking array
@ 2020-12-23 15:13 Jan Beulich
  2020-12-24  9:57 ` Julien Grall
  2021-01-04 15:04 ` Andrew Cooper
  0 siblings, 2 replies; 11+ messages in thread
From: Jan Beulich @ 2020-12-23 15:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall,
	Stefano Stabellini, Wei Liu

This array can be large when many grant frames are permitted; avoid
allocating it when it's not going to be used anyway, by doing this only
in gnttab_populate_status_frames().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Defer allocation to when a domain actually switches to the v2 grant
    API.

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1725,6 +1728,17 @@ gnttab_populate_status_frames(struct dom
     /* Make sure, prior version checks are architectural visible */
     block_speculation();
 
+    if ( gt->status == ZERO_BLOCK_PTR )
+    {
+        gt->status = xzalloc_array(grant_status_t *,
+                                   grant_to_status_frames(gt->max_grant_frames));
+        if ( !gt->status )
+        {
+            gt->status = ZERO_BLOCK_PTR;
+            return -ENOMEM;
+        }
+    }
+
     for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
     {
         if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
@@ -1745,18 +1759,23 @@ status_alloc_failed:
         free_xenheap_page(gt->status[i]);
         gt->status[i] = NULL;
     }
+    if ( !nr_status_frames(gt) )
+    {
+        xfree(gt->status);
+        gt->status = ZERO_BLOCK_PTR;
+    }
     return -ENOMEM;
 }
 
 static int
 gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
 {
-    unsigned int i;
+    unsigned int i, n = nr_status_frames(gt);
 
     /* Make sure, prior version checks are architectural visible */
     block_speculation();
 
-    for ( i = 0; i < nr_status_frames(gt); i++ )
+    for ( i = 0; i < n; i++ )
     {
         struct page_info *pg = virt_to_page(gt->status[i]);
         gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
@@ -1811,12 +1830,12 @@ gnttab_unpopulate_status_frames(struct d
         page_set_owner(pg, NULL);
     }
 
-    for ( i = 0; i < nr_status_frames(gt); i++ )
-    {
-        free_xenheap_page(gt->status[i]);
-        gt->status[i] = NULL;
-    }
     gt->nr_status_frames = 0;
+    smp_wmb(); /* Just in case - all accesses should be under lock. */
+    for ( i = 0; i < n; i++ )
+        free_xenheap_page(gt->status[i]);
+    xfree(gt->status);
+    gt->status = ZERO_BLOCK_PTR;
 
     return 0;
 }
@@ -1943,11 +1962,11 @@ int grant_table_init(struct domain *d, i
     if ( gt->shared_raw == NULL )
         goto out;
 
-    /* Status pages for grant table - for version 2 */
-    gt->status = xzalloc_array(grant_status_t *,
-                               grant_to_status_frames(gt->max_grant_frames));
-    if ( gt->status == NULL )
-        goto out;
+    /*
+     * Status page tracking array for v2 gets allocated on demand. But don't
+     * leave a NULL pointer there.
+     */
+    gt->status = ZERO_BLOCK_PTR;
 
     grant_write_lock(gt);
 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2020-12-23 15:13 [PATCH v2] gnttab: defer allocation of status frame tracking array Jan Beulich
@ 2020-12-24  9:57 ` Julien Grall
  2021-01-04 13:37   ` Jan Beulich
  2021-01-04 15:04 ` Andrew Cooper
  1 sibling, 1 reply; 11+ messages in thread
From: Julien Grall @ 2020-12-24  9:57 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu

Hi Jan,

On 23/12/2020 15:13, Jan Beulich wrote:
> This array can be large when many grant frames are permitted; avoid
> allocating it when it's not going to be used anyway, by doing this only
> in gnttab_populate_status_frames().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Defer allocation to when a domain actually switches to the v2 grant
>      API.
> 
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -1725,6 +1728,17 @@ gnttab_populate_status_frames(struct dom
>       /* Make sure, prior version checks are architectural visible */
>       block_speculation();
>   
> +    if ( gt->status == ZERO_BLOCK_PTR )
> +    {
> +        gt->status = xzalloc_array(grant_status_t *,
> +                                   grant_to_status_frames(gt->max_grant_frames));
> +        if ( !gt->status )
> +        {
> +            gt->status = ZERO_BLOCK_PTR;
> +            return -ENOMEM;
> +        }
> +    }
> +
>       for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
>       {
>           if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
> @@ -1745,18 +1759,23 @@ status_alloc_failed:
>           free_xenheap_page(gt->status[i]);
>           gt->status[i] = NULL;
>       }
> +    if ( !nr_status_frames(gt) )
> +    {
> +        xfree(gt->status);
> +        gt->status = ZERO_BLOCK_PTR;
> +    }
>       return -ENOMEM;
>   }
>   
>   static int
>   gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>   {
> -    unsigned int i;
> +    unsigned int i, n = nr_status_frames(gt);
>   
>       /* Make sure, prior version checks are architectural visible */
>       block_speculation();
>   
> -    for ( i = 0; i < nr_status_frames(gt); i++ )
> +    for ( i = 0; i < n; i++ )
>       {
>           struct page_info *pg = virt_to_page(gt->status[i]);
>           gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
> @@ -1811,12 +1830,12 @@ gnttab_unpopulate_status_frames(struct d
>           page_set_owner(pg, NULL);
>       }
>   
> -    for ( i = 0; i < nr_status_frames(gt); i++ )
> -    {
> -        free_xenheap_page(gt->status[i]);
> -        gt->status[i] = NULL;
> -    }
>       gt->nr_status_frames = 0;
> +    smp_wmb(); /* Just in case - all accesses should be under lock. */

I think gt->status cannot be accessed locklessly. If a entity read 
gt->nr_status_frames != 0, then there is no promise the array will be 
accessible when trying to access it as it may have been freed.

So I would drop the barrier here.

The rest of the code looks good to me.

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2020-12-24  9:57 ` Julien Grall
@ 2021-01-04 13:37   ` Jan Beulich
  2021-01-04 14:16     ` Julien Grall
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2021-01-04 13:37 UTC (permalink / raw)
  To: Julien Grall
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Stefano Stabellini,
	Wei Liu, xen-devel

On 24.12.2020 10:57, Julien Grall wrote:
> Hi Jan,
> 
> On 23/12/2020 15:13, Jan Beulich wrote:
>> This array can be large when many grant frames are permitted; avoid
>> allocating it when it's not going to be used anyway, by doing this only
>> in gnttab_populate_status_frames().
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>      API.
>>
>> --- a/xen/common/grant_table.c
>> +++ b/xen/common/grant_table.c
>> @@ -1725,6 +1728,17 @@ gnttab_populate_status_frames(struct dom
>>       /* Make sure, prior version checks are architectural visible */
>>       block_speculation();
>>   
>> +    if ( gt->status == ZERO_BLOCK_PTR )
>> +    {
>> +        gt->status = xzalloc_array(grant_status_t *,
>> +                                   grant_to_status_frames(gt->max_grant_frames));
>> +        if ( !gt->status )
>> +        {
>> +            gt->status = ZERO_BLOCK_PTR;
>> +            return -ENOMEM;
>> +        }
>> +    }
>> +
>>       for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
>>       {
>>           if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
>> @@ -1745,18 +1759,23 @@ status_alloc_failed:
>>           free_xenheap_page(gt->status[i]);
>>           gt->status[i] = NULL;
>>       }
>> +    if ( !nr_status_frames(gt) )
>> +    {
>> +        xfree(gt->status);
>> +        gt->status = ZERO_BLOCK_PTR;
>> +    }
>>       return -ENOMEM;
>>   }
>>   
>>   static int
>>   gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>   {
>> -    unsigned int i;
>> +    unsigned int i, n = nr_status_frames(gt);
>>   
>>       /* Make sure, prior version checks are architectural visible */
>>       block_speculation();
>>   
>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>> +    for ( i = 0; i < n; i++ )
>>       {
>>           struct page_info *pg = virt_to_page(gt->status[i]);
>>           gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
>> @@ -1811,12 +1830,12 @@ gnttab_unpopulate_status_frames(struct d
>>           page_set_owner(pg, NULL);
>>       }
>>   
>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>> -    {
>> -        free_xenheap_page(gt->status[i]);
>> -        gt->status[i] = NULL;
>> -    }
>>       gt->nr_status_frames = 0;
>> +    smp_wmb(); /* Just in case - all accesses should be under lock. */
> 
> I think gt->status cannot be accessed locklessly. If a entity read 
> gt->nr_status_frames != 0, then there is no promise the array will be 
> accessible when trying to access it as it may have been freed.

Yet the common principle of (pointer,count) pairs to describe arrays
to be updated / accessed in sequences guaranteeing a non-zero count
implies a valid pointer could as well be considered to apply here.
I.e. when freeing, at least in principle clearing count first would
be a sensible thing to do, wouldn't it? Subsequently allocation and
consumers could be updated to follow suit ...

> So I would drop the barrier here.

I certainly can (unless double checking would tell me otherwise),
but I'm not convinced this is a good idea. I'd rather have a barrier
too many in the code than one too little, even if the "too little"
may only be the result of a future change. And this isn't a path
where performance considerations would suggest avoiding barriers
when they're not strictly needed.

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 13:37   ` Jan Beulich
@ 2021-01-04 14:16     ` Julien Grall
  2021-01-04 14:44       ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Julien Grall @ 2021-01-04 14:16 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Stefano Stabellini,
	Wei Liu, xen-devel

Hi Jan,

On 04/01/2021 13:37, Jan Beulich wrote:
> On 24.12.2020 10:57, Julien Grall wrote:
>> Hi Jan,
>>
>> On 23/12/2020 15:13, Jan Beulich wrote:
>>> This array can be large when many grant frames are permitted; avoid
>>> allocating it when it's not going to be used anyway, by doing this only
>>> in gnttab_populate_status_frames().
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>       API.
>>>
>>> --- a/xen/common/grant_table.c
>>> +++ b/xen/common/grant_table.c
>>> @@ -1725,6 +1728,17 @@ gnttab_populate_status_frames(struct dom
>>>        /* Make sure, prior version checks are architectural visible */
>>>        block_speculation();
>>>    
>>> +    if ( gt->status == ZERO_BLOCK_PTR )
>>> +    {
>>> +        gt->status = xzalloc_array(grant_status_t *,
>>> +                                   grant_to_status_frames(gt->max_grant_frames));
>>> +        if ( !gt->status )
>>> +        {
>>> +            gt->status = ZERO_BLOCK_PTR;
>>> +            return -ENOMEM;
>>> +        }
>>> +    }
>>> +
>>>        for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
>>>        {
>>>            if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
>>> @@ -1745,18 +1759,23 @@ status_alloc_failed:
>>>            free_xenheap_page(gt->status[i]);
>>>            gt->status[i] = NULL;
>>>        }
>>> +    if ( !nr_status_frames(gt) )
>>> +    {
>>> +        xfree(gt->status);
>>> +        gt->status = ZERO_BLOCK_PTR;
>>> +    }
>>>        return -ENOMEM;
>>>    }
>>>    
>>>    static int
>>>    gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>>    {
>>> -    unsigned int i;
>>> +    unsigned int i, n = nr_status_frames(gt);
>>>    
>>>        /* Make sure, prior version checks are architectural visible */
>>>        block_speculation();
>>>    
>>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>>> +    for ( i = 0; i < n; i++ )
>>>        {
>>>            struct page_info *pg = virt_to_page(gt->status[i]);
>>>            gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
>>> @@ -1811,12 +1830,12 @@ gnttab_unpopulate_status_frames(struct d
>>>            page_set_owner(pg, NULL);
>>>        }
>>>    
>>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>>> -    {
>>> -        free_xenheap_page(gt->status[i]);
>>> -        gt->status[i] = NULL;
>>> -    }
>>>        gt->nr_status_frames = 0;
>>> +    smp_wmb(); /* Just in case - all accesses should be under lock. */
>>
>> I think gt->status cannot be accessed locklessly. If a entity read
>> gt->nr_status_frames != 0, then there is no promise the array will be
>> accessible when trying to access it as it may have been freed.
> 
> Yet the common principle of (pointer,count) pairs to describe arrays
> to be updated / accessed in sequences guaranteeing a non-zero count
> implies a valid pointer could as well be considered to apply here.
> I.e. when freeing, at least in principle clearing count first would
> be a sensible thing to do, wouldn't it?

I am not arguing on whether this is a sensible thing to do but how 
someone else can make use of it. The common lockless pattern to access 
the array would be checking the count and if it is not zero, then access 
the array. Imagine the following:

CPU0 (free the array)       | CPU1 (access the array)
                             |
                             | if ( !gt->nr_status_frames )
gt->nr_status_frames = 0;   |   return;
smp_wmb();                  |
gt->status = NULL;          |
                             | smp_rmb();
                             | access gt->status[X];

Without any lock (or refcounting), I can't see how the example above 
would be safe.

> Subsequently allocation and
> consumers could be updated to follow suit ...
The allocation and free cannot happen locklessly (at least in the 
current state). For the consumers see above.

> 
>> So I would drop the barrier here.
> 
> I certainly can (unless double checking would tell me otherwise),
> but I'm not convinced this is a good idea. I'd rather have a barrier
> too many in the code than one too little, even if the "too little"
> may only be the result of a future change.

Future-proof code is always good to have. However, we need to avoid 
adding barriers that doesn't seem to usuable either today or in the future.

Your in-code comment suggests the barrier would help when the array is 
access without any lock. However, I can't see how this hypothetical code 
would work. Can you provide an example how you can deal with memory 
freed behind your back?

> And this isn't a path
> where performance considerations would suggest avoiding barriers
> when they're not strictly needed.

I am not concerned about the performance here.

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 14:16     ` Julien Grall
@ 2021-01-04 14:44       ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2021-01-04 14:44 UTC (permalink / raw)
  To: Julien Grall
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Stefano Stabellini,
	Wei Liu, xen-devel

On 04.01.2021 15:16, Julien Grall wrote:
> Hi Jan,
> 
> On 04/01/2021 13:37, Jan Beulich wrote:
>> On 24.12.2020 10:57, Julien Grall wrote:
>>> Hi Jan,
>>>
>>> On 23/12/2020 15:13, Jan Beulich wrote:
>>>> This array can be large when many grant frames are permitted; avoid
>>>> allocating it when it's not going to be used anyway, by doing this only
>>>> in gnttab_populate_status_frames().
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>>       API.
>>>>
>>>> --- a/xen/common/grant_table.c
>>>> +++ b/xen/common/grant_table.c
>>>> @@ -1725,6 +1728,17 @@ gnttab_populate_status_frames(struct dom
>>>>        /* Make sure, prior version checks are architectural visible */
>>>>        block_speculation();
>>>>    
>>>> +    if ( gt->status == ZERO_BLOCK_PTR )
>>>> +    {
>>>> +        gt->status = xzalloc_array(grant_status_t *,
>>>> +                                   grant_to_status_frames(gt->max_grant_frames));
>>>> +        if ( !gt->status )
>>>> +        {
>>>> +            gt->status = ZERO_BLOCK_PTR;
>>>> +            return -ENOMEM;
>>>> +        }
>>>> +    }
>>>> +
>>>>        for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
>>>>        {
>>>>            if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
>>>> @@ -1745,18 +1759,23 @@ status_alloc_failed:
>>>>            free_xenheap_page(gt->status[i]);
>>>>            gt->status[i] = NULL;
>>>>        }
>>>> +    if ( !nr_status_frames(gt) )
>>>> +    {
>>>> +        xfree(gt->status);
>>>> +        gt->status = ZERO_BLOCK_PTR;
>>>> +    }
>>>>        return -ENOMEM;
>>>>    }
>>>>    
>>>>    static int
>>>>    gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
>>>>    {
>>>> -    unsigned int i;
>>>> +    unsigned int i, n = nr_status_frames(gt);
>>>>    
>>>>        /* Make sure, prior version checks are architectural visible */
>>>>        block_speculation();
>>>>    
>>>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>>>> +    for ( i = 0; i < n; i++ )
>>>>        {
>>>>            struct page_info *pg = virt_to_page(gt->status[i]);
>>>>            gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
>>>> @@ -1811,12 +1830,12 @@ gnttab_unpopulate_status_frames(struct d
>>>>            page_set_owner(pg, NULL);
>>>>        }
>>>>    
>>>> -    for ( i = 0; i < nr_status_frames(gt); i++ )
>>>> -    {
>>>> -        free_xenheap_page(gt->status[i]);
>>>> -        gt->status[i] = NULL;
>>>> -    }
>>>>        gt->nr_status_frames = 0;
>>>> +    smp_wmb(); /* Just in case - all accesses should be under lock. */
>>>
>>> I think gt->status cannot be accessed locklessly. If a entity read
>>> gt->nr_status_frames != 0, then there is no promise the array will be
>>> accessible when trying to access it as it may have been freed.
>>
>> Yet the common principle of (pointer,count) pairs to describe arrays
>> to be updated / accessed in sequences guaranteeing a non-zero count
>> implies a valid pointer could as well be considered to apply here.
>> I.e. when freeing, at least in principle clearing count first would
>> be a sensible thing to do, wouldn't it?
> 
> I am not arguing on whether this is a sensible thing to do but how 
> someone else can make use of it. The common lockless pattern to access 
> the array would be checking the count and if it is not zero, then access 
> the array. Imagine the following:
> 
> CPU0 (free the array)       | CPU1 (access the array)
>                              |
>                              | if ( !gt->nr_status_frames )
> gt->nr_status_frames = 0;   |   return;
> smp_wmb();                  |
> gt->status = NULL;          |
>                              | smp_rmb();
>                              | access gt->status[X];
> 
> Without any lock (or refcounting), I can't see how the example above 
> would be safe.

Sure, I shouldn't have over-simplified. You can't guard against
a racing free this way. You can guard against an incomplete
allocation by setting count strictly after pointer. And the
natural order of freeing then is clearing count before freeing
pointer. I'll go and check that accesses indeed do all happen
under lock, and drop the barrier if so.

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2020-12-23 15:13 [PATCH v2] gnttab: defer allocation of status frame tracking array Jan Beulich
  2020-12-24  9:57 ` Julien Grall
@ 2021-01-04 15:04 ` Andrew Cooper
  2021-01-04 15:22   ` Jan Beulich
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2021-01-04 15:04 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: George Dunlap, Ian Jackson, Julien Grall, Stefano Stabellini, Wei Liu

On 23/12/2020 15:13, Jan Beulich wrote:
> This array can be large when many grant frames are permitted; avoid
> allocating it when it's not going to be used anyway, by doing this only
> in gnttab_populate_status_frames().
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Defer allocation to when a domain actually switches to the v2 grant
>     API.

I see this as a backwards step.  It turns a build-time -ENOMEM into a
runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
but it is in the field now.)

It is the toolstack's responsibility to not over-extend the capabilities
of the host, but this is impossible to do with behaviour like this. 
Failing at domain creation time is a whole lot less bad for the system
as a whole.

Longer term, allocations like this ought to come from some per-domain
pool (possibly not shared with the shadow memory pool), which can be
sized suitably at create time.

This is one of the points behind introducing dmalloc()/etc in the
fault-injection series.  At the moment, we have absolutely no clue what
the memory overhead of a domain is in Xen.  It is impossible to
calculate how many VMs will fit on a host, because the only way you can
answer the question is to attempt to create them all.

Understanding how much memory a domain actually takes is the first
step.  Creating a plausible bound is the second step, and that will be
of substantial use to higher level management software.

~Andrew


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 15:04 ` Andrew Cooper
@ 2021-01-04 15:22   ` Jan Beulich
  2021-01-04 15:41     ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2021-01-04 15:22 UTC (permalink / raw)
  To: Andrew Cooper, Julien Grall
  Cc: George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu, xen-devel

On 04.01.2021 16:04, Andrew Cooper wrote:
> On 23/12/2020 15:13, Jan Beulich wrote:
>> This array can be large when many grant frames are permitted; avoid
>> allocating it when it's not going to be used anyway, by doing this only
>> in gnttab_populate_status_frames().
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>     API.
> 
> I see this as a backwards step.  It turns a build-time -ENOMEM into a
> runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
> drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
> but it is in the field now.)

Well, if this was the only source of -ENOMEM (i.e. none was there
previously), I'd surely understand the concern. But there have been
memory allocations before on this path. In any event, this will
need settling between you and Julien, as it was him to request the
change.

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 15:22   ` Jan Beulich
@ 2021-01-04 15:41     ` Andrew Cooper
  2021-01-04 16:00       ` Jan Beulich
  2021-01-08 10:17       ` Julien Grall
  0 siblings, 2 replies; 11+ messages in thread
From: Andrew Cooper @ 2021-01-04 15:41 UTC (permalink / raw)
  To: Jan Beulich, Julien Grall
  Cc: George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu, xen-devel

On 04/01/2021 15:22, Jan Beulich wrote:
> On 04.01.2021 16:04, Andrew Cooper wrote:
>> On 23/12/2020 15:13, Jan Beulich wrote:
>>> This array can be large when many grant frames are permitted; avoid
>>> allocating it when it's not going to be used anyway, by doing this only
>>> in gnttab_populate_status_frames().
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>     API.
>> I see this as a backwards step.  It turns a build-time -ENOMEM into a
>> runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
>> drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
>> but it is in the field now.)
> Well, if this was the only source of -ENOMEM (i.e. none was there
> previously), I'd surely understand the concern. But there have been
> memory allocations before on this path.

... you're literally writing a patch saying "avoid large allocation at
domain create time, and make it at runtime instead" and then trying to
argue that it isn't a concern because there are other memory allocations.

It is very definitely a backwards step irrespective of the size of the
allocation, even if the current behaviour isn't necessarily perfect.

>  In any event, this will
> need settling between you and Julien, as it was him to request the
> change.

Well - that's because gnttab v2 is disabled in general on ARM.

Conditionally avoiding the allocation because of opt_gnttab_max_version
being 1 would be ok, because it doesn't introduce new runtime failures
for guests. 

The correctness of this change does depend on opt_gnttab_max_version
being invariant for the lifetime of a domain.  If it were to become a
runtime parameter, it would need caching per domain, (which is frankly
how it should have been implemented all along, along with a parameter in
XEN_DOMCTL_createdomain).

~Andrew


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 15:41     ` Andrew Cooper
@ 2021-01-04 16:00       ` Jan Beulich
  2021-01-08 10:17       ` Julien Grall
  1 sibling, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2021-01-04 16:00 UTC (permalink / raw)
  To: Andrew Cooper, Julien Grall
  Cc: George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu, xen-devel

On 04.01.2021 16:41, Andrew Cooper wrote:
> On 04/01/2021 15:22, Jan Beulich wrote:
>> On 04.01.2021 16:04, Andrew Cooper wrote:
>>> On 23/12/2020 15:13, Jan Beulich wrote:
>>>> This array can be large when many grant frames are permitted; avoid
>>>> allocating it when it's not going to be used anyway, by doing this only
>>>> in gnttab_populate_status_frames().
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>>     API.
>>> I see this as a backwards step.  It turns a build-time -ENOMEM into a
>>> runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
>>> drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
>>> but it is in the field now.)
>> Well, if this was the only source of -ENOMEM (i.e. none was there
>> previously), I'd surely understand the concern. But there have been
>> memory allocations before on this path.
> 
> ... you're literally writing a patch saying "avoid large allocation at
> domain create time, and make it at runtime instead" and then trying to
> argue that it isn't a concern because there are other memory allocations.
> 
> It is very definitely a backwards step irrespective of the size of the
> allocation, even if the current behaviour isn't necessarily perfect.

I agree when taking this one possible perspective. There's the other
one as well: For domains never switching to the v2 API, allocating
the table at build time is purely a waste of memory. Note also how
the description says "This array can be large" - it doesn't have to
be, and hence the other involved allocations may be at higher risk
of yielding -ENOMEM.

>>  In any event, this will
>> need settling between you and Julien, as it was him to request the
>> change.
> 
> Well - that's because gnttab v2 is disabled in general on ARM.

I'm afraid I don't understand the "because" here: For Arm it's
irrelevant whether v1 or v2 of this patch gets used - the table
would never be allocated anymore in either case.

> Conditionally avoiding the allocation because of opt_gnttab_max_version
> being 1 would be ok, because it doesn't introduce new runtime failures
> for guests. 
> 
> The correctness of this change does depend on opt_gnttab_max_version
> being invariant for the lifetime of a domain.  If it were to become a
> runtime parameter, it would need caching per domain, (which is frankly
> how it should have been implemented all along, along with a parameter in
> XEN_DOMCTL_createdomain).

I think it is well understood that conversion of boot time
(command line) options to runtime ones needs careful inspection
of the consumers of the controlled variable(s). The option isn't
a runtime one right now, which is all that counts. (I agree this
would better be a per-domain property set when creating one.)

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-04 15:41     ` Andrew Cooper
  2021-01-04 16:00       ` Jan Beulich
@ 2021-01-08 10:17       ` Julien Grall
  2021-01-08 10:59         ` Jan Beulich
  1 sibling, 1 reply; 11+ messages in thread
From: Julien Grall @ 2021-01-08 10:17 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich
  Cc: George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu, xen-devel

Hi Andrew and Jan,

On 04/01/2021 15:41, Andrew Cooper wrote:
> On 04/01/2021 15:22, Jan Beulich wrote:
>> On 04.01.2021 16:04, Andrew Cooper wrote:
>>> On 23/12/2020 15:13, Jan Beulich wrote:
>>>> This array can be large when many grant frames are permitted; avoid
>>>> allocating it when it's not going to be used anyway, by doing this only
>>>> in gnttab_populate_status_frames().
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>>      API.
>>> I see this as a backwards step.  It turns a build-time -ENOMEM into a
>>> runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
>>> drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
>>> but it is in the field now.)

During yesterday call, Paul pointed that this is a behavior from old 
Windows driver. New Windows PV driver will not use Grant Table v2.

However, AFAICT, there is already runtime -ENOMEM failure in set_version 
when trying to populate the status frame (see the call to 
gnttab_populate_status_frames()).

>> Well, if this was the only source of -ENOMEM (i.e. none was there
>> previously), I'd surely understand the concern. But there have been
>> memory allocations before on this path.
> 
> ... you're literally writing a patch saying "avoid large allocation at
> domain create time, and make it at runtime instead" and then trying to
> argue that it isn't a concern because there are other memory allocations.
> 
> It is very definitely a backwards step irrespective of the size of the
> allocation, even if the current behaviour isn't necessarily perfect.
> 
>>   In any event, this will
>> need settling between you and Julien, as it was him to request the
>> change.
> 
> Well - that's because gnttab v2 is disabled in general on ARM.

I didn't have Arm in mind when I originally requested Jan the change.

Instead, the request was based on the fact that most of the guests don't 
use of grant-table v2. To me this feels a waste of memory and if it can 
be avoid then it is best.

> 
> Conditionally avoiding the allocation because of opt_gnttab_max_version
> being 1 would be ok, because it doesn't introduce new runtime failures
> for guests.

Based on my answer above, I would not consider it as a new runtime 
failure -ENOMEM is already possible when switching from v1 to v2.

In fact, the allocation is going to be much smaller than the other 
allocations (we may be allocating multiple pages). So if we are 
concerned about this, then we should already be concerned about the 
existings one.

Anyway, the array itself is likely going to be small (I haven't computed 
the size) so I would be OK to not defer the allocation.

However, I would like to seek clarification on whether your end goal is 
to remove any -ENOMEM failure from set_version.

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] gnttab: defer allocation of status frame tracking array
  2021-01-08 10:17       ` Julien Grall
@ 2021-01-08 10:59         ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2021-01-08 10:59 UTC (permalink / raw)
  To: Julien Grall
  Cc: George Dunlap, Ian Jackson, Stefano Stabellini, Wei Liu,
	xen-devel, Andrew Cooper

On 08.01.2021 11:17, Julien Grall wrote:
> Hi Andrew and Jan,
> 
> On 04/01/2021 15:41, Andrew Cooper wrote:
>> On 04/01/2021 15:22, Jan Beulich wrote:
>>> On 04.01.2021 16:04, Andrew Cooper wrote:
>>>> On 23/12/2020 15:13, Jan Beulich wrote:
>>>>> This array can be large when many grant frames are permitted; avoid
>>>>> allocating it when it's not going to be used anyway, by doing this only
>>>>> in gnttab_populate_status_frames().
>>>>>
>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>> ---
>>>>> v2: Defer allocation to when a domain actually switches to the v2 grant
>>>>>      API.
>>>> I see this as a backwards step.  It turns a build-time -ENOMEM into a
>>>> runtime -ENOMEM, and if you recall from one of the XSAs, the Windows PV
>>>> drivers will BUG() if set_version fails.  (Yes - this is dumb behaviour,
>>>> but it is in the field now.)
> 
> During yesterday call, Paul pointed that this is a behavior from old 
> Windows driver. New Windows PV driver will not use Grant Table v2.
> 
> However, AFAICT, there is already runtime -ENOMEM failure in set_version 
> when trying to populate the status frame (see the call to 
> gnttab_populate_status_frames()).
> 
>>> Well, if this was the only source of -ENOMEM (i.e. none was there
>>> previously), I'd surely understand the concern. But there have been
>>> memory allocations before on this path.
>>
>> ... you're literally writing a patch saying "avoid large allocation at
>> domain create time, and make it at runtime instead" and then trying to
>> argue that it isn't a concern because there are other memory allocations.
>>
>> It is very definitely a backwards step irrespective of the size of the
>> allocation, even if the current behaviour isn't necessarily perfect.
>>
>>>   In any event, this will
>>> need settling between you and Julien, as it was him to request the
>>> change.
>>
>> Well - that's because gnttab v2 is disabled in general on ARM.
> 
> I didn't have Arm in mind when I originally requested Jan the change.
> 
> Instead, the request was based on the fact that most of the guests don't 
> use of grant-table v2. To me this feels a waste of memory and if it can 
> be avoid then it is best.
> 
>>
>> Conditionally avoiding the allocation because of opt_gnttab_max_version
>> being 1 would be ok, because it doesn't introduce new runtime failures
>> for guests.
> 
> Based on my answer above, I would not consider it as a new runtime 
> failure -ENOMEM is already possible when switching from v1 to v2.
> 
> In fact, the allocation is going to be much smaller than the other 
> allocations (we may be allocating multiple pages). So if we are 
> concerned about this, then we should already be concerned about the 
> existings one.

I agree with all arguments further up, but I'd like to correct this
aspect as well as ...

> Anyway, the array itself is likely going to be small (I haven't computed 
> the size) so I would be OK to not defer the allocation.

... this implication: Strictly speaking there's no correlation
between the allocation sizes. The one getting moved here has its
size depend on the maximum number of grants a domain is allowed
to use. The pre-existing allocation is dependent on the number of
grants the domain has or had already in use. Therefore if the
maximum is much larger than the in-use count, the new allocation
may turn out to be larger than the existing one. Yet I agree this
may not be very likely.

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-01-08 11:00 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-23 15:13 [PATCH v2] gnttab: defer allocation of status frame tracking array Jan Beulich
2020-12-24  9:57 ` Julien Grall
2021-01-04 13:37   ` Jan Beulich
2021-01-04 14:16     ` Julien Grall
2021-01-04 14:44       ` Jan Beulich
2021-01-04 15:04 ` Andrew Cooper
2021-01-04 15:22   ` Jan Beulich
2021-01-04 15:41     ` Andrew Cooper
2021-01-04 16:00       ` Jan Beulich
2021-01-08 10:17       ` Julien Grall
2021-01-08 10:59         ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.