[5/6] vhost: mark dirty pages during map uninit
diff mbox series

Message ID 20190723075718.6275-6-jasowang@redhat.com
State New
Headers show
Series
  • Fixes for meta data acceleration
Related show

Commit Message

Jason Wang July 23, 2019, 7:57 a.m. UTC
We don't mark dirty pages if the map was teared down outside MMU
notifier. This will lead untracked dirty pages. Fixing by marking
dirty pages during map uninit.

Reported-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin July 23, 2019, 9:17 a.m. UTC | #1
On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote:
> We don't mark dirty pages if the map was teared down outside MMU
> notifier. This will lead untracked dirty pages. Fixing by marking
> dirty pages during map uninit.
> 
> Reported-by: Michael S. Tsirkin <mst@redhat.com>
> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/vhost.c | 22 ++++++++++++++++------
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 89c9f08b5146..5b8821d00fe4 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
>  	kfree(map);
>  }
>  
> +static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
> +				struct vhost_map *map, int index)
> +{
> +	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> +	int i;
> +
> +	if (uaddr->write) {
> +		for (i = 0; i < map->npages; i++)
> +			set_page_dirty(map->pages[i]);
> +	}
> +}
> +
>  static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>  {
>  	struct vhost_map *map[VHOST_NUM_ADDRS];
> @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>  	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
>  		map[i] = rcu_dereference_protected(vq->maps[i],
>  				  lockdep_is_held(&vq->mmu_lock));
> -		if (map[i])
> +		if (map[i]) {
> +			vhost_set_map_dirty(vq, map[i], i);
>  			rcu_assign_pointer(vq->maps[i], NULL);
> +		}
>  	}
>  	spin_unlock(&vq->mmu_lock);
>  
> @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>  {
>  	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
>  	struct vhost_map *map;
> -	int i;
>  
>  	if (!vhost_map_range_overlap(uaddr, start, end))
>  		return;
> @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>  	map = rcu_dereference_protected(vq->maps[index],
>  					lockdep_is_held(&vq->mmu_lock));
>  	if (map) {
> -		if (uaddr->write) {
> -			for (i = 0; i < map->npages; i++)
> -				set_page_dirty(map->pages[i]);
> -		}
> +		vhost_set_map_dirty(vq, map, index);
>  		rcu_assign_pointer(vq->maps[index], NULL);
>  	}
>  	spin_unlock(&vq->mmu_lock);

OK and the reason it's safe is because the invalidate counter
got incremented so we know page will not get mapped again.

But we *do* need to wait for page not to be mapped.
And if that means waiting for VQ processing to finish,
then I worry that is a very log time.


> -- 
> 2.18.1
Jason Wang July 23, 2019, 1:19 p.m. UTC | #2
On 2019/7/23 下午5:17, Michael S. Tsirkin wrote:
> On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote:
>> We don't mark dirty pages if the map was teared down outside MMU
>> notifier. This will lead untracked dirty pages. Fixing by marking
>> dirty pages during map uninit.
>>
>> Reported-by: Michael S. Tsirkin<mst@redhat.com>
>> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>>   drivers/vhost/vhost.c | 22 ++++++++++++++++------
>>   1 file changed, 16 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 89c9f08b5146..5b8821d00fe4 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
>>   	kfree(map);
>>   }
>>   
>> +static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
>> +				struct vhost_map *map, int index)
>> +{
>> +	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
>> +	int i;
>> +
>> +	if (uaddr->write) {
>> +		for (i = 0; i < map->npages; i++)
>> +			set_page_dirty(map->pages[i]);
>> +	}
>> +}
>> +
>>   static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>>   {
>>   	struct vhost_map *map[VHOST_NUM_ADDRS];
>> @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>>   	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
>>   		map[i] = rcu_dereference_protected(vq->maps[i],
>>   				  lockdep_is_held(&vq->mmu_lock));
>> -		if (map[i])
>> +		if (map[i]) {
>> +			vhost_set_map_dirty(vq, map[i], i);
>>   			rcu_assign_pointer(vq->maps[i], NULL);
>> +		}
>>   	}
>>   	spin_unlock(&vq->mmu_lock);
>>   
>> @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>>   {
>>   	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
>>   	struct vhost_map *map;
>> -	int i;
>>   
>>   	if (!vhost_map_range_overlap(uaddr, start, end))
>>   		return;
>> @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>>   	map = rcu_dereference_protected(vq->maps[index],
>>   					lockdep_is_held(&vq->mmu_lock));
>>   	if (map) {
>> -		if (uaddr->write) {
>> -			for (i = 0; i < map->npages; i++)
>> -				set_page_dirty(map->pages[i]);
>> -		}
>> +		vhost_set_map_dirty(vq, map, index);
>>   		rcu_assign_pointer(vq->maps[index], NULL);
>>   	}
>>   	spin_unlock(&vq->mmu_lock);
> OK and the reason it's safe is because the invalidate counter
> got incremented so we know page will not get mapped again.
>
> But we*do*  need to wait for page not to be mapped.
> And if that means waiting for VQ processing to finish,
> then I worry that is a very log time.
>

I'm not sure I get you here. If we don't have such map, we will fall 
back to normal uaccess helper. And in the memory accessor, the rcu 
critical section is pretty small.

Thanks
Michael S. Tsirkin July 25, 2019, 5:21 a.m. UTC | #3
On Tue, Jul 23, 2019 at 09:19:33PM +0800, Jason Wang wrote:
> 
> On 2019/7/23 下午5:17, Michael S. Tsirkin wrote:
> > On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote:
> > > We don't mark dirty pages if the map was teared down outside MMU
> > > notifier. This will lead untracked dirty pages. Fixing by marking
> > > dirty pages during map uninit.
> > > 
> > > Reported-by: Michael S. Tsirkin<mst@redhat.com>
> > > Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
> > > Signed-off-by: Jason Wang<jasowang@redhat.com>
> > > ---
> > >   drivers/vhost/vhost.c | 22 ++++++++++++++++------
> > >   1 file changed, 16 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > index 89c9f08b5146..5b8821d00fe4 100644
> > > --- a/drivers/vhost/vhost.c
> > > +++ b/drivers/vhost/vhost.c
> > > @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
> > >   	kfree(map);
> > >   }
> > > +static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
> > > +				struct vhost_map *map, int index)
> > > +{
> > > +	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> > > +	int i;
> > > +
> > > +	if (uaddr->write) {
> > > +		for (i = 0; i < map->npages; i++)
> > > +			set_page_dirty(map->pages[i]);
> > > +	}
> > > +}
> > > +
> > >   static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
> > >   {
> > >   	struct vhost_map *map[VHOST_NUM_ADDRS];
> > > @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
> > >   	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
> > >   		map[i] = rcu_dereference_protected(vq->maps[i],
> > >   				  lockdep_is_held(&vq->mmu_lock));
> > > -		if (map[i])
> > > +		if (map[i]) {
> > > +			vhost_set_map_dirty(vq, map[i], i);
> > >   			rcu_assign_pointer(vq->maps[i], NULL);
> > > +		}
> > >   	}
> > >   	spin_unlock(&vq->mmu_lock);
> > > @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
> > >   {
> > >   	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> > >   	struct vhost_map *map;
> > > -	int i;
> > >   	if (!vhost_map_range_overlap(uaddr, start, end))
> > >   		return;
> > > @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
> > >   	map = rcu_dereference_protected(vq->maps[index],
> > >   					lockdep_is_held(&vq->mmu_lock));
> > >   	if (map) {
> > > -		if (uaddr->write) {
> > > -			for (i = 0; i < map->npages; i++)
> > > -				set_page_dirty(map->pages[i]);
> > > -		}
> > > +		vhost_set_map_dirty(vq, map, index);
> > >   		rcu_assign_pointer(vq->maps[index], NULL);
> > >   	}
> > >   	spin_unlock(&vq->mmu_lock);
> > OK and the reason it's safe is because the invalidate counter
> > got incremented so we know page will not get mapped again.
> > 
> > But we*do*  need to wait for page not to be mapped.
> > And if that means waiting for VQ processing to finish,
> > then I worry that is a very log time.
> > 
> 
> I'm not sure I get you here. If we don't have such map, we will fall back to
> normal uaccess helper. And in the memory accessor, the rcu critical section
> is pretty small.
> 
> Thanks
> 

OK. So the trick is that page_mkclean invokes mmu notifiers.

Patch
diff mbox series

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 89c9f08b5146..5b8821d00fe4 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -306,6 +306,18 @@  static void vhost_map_unprefetch(struct vhost_map *map)
 	kfree(map);
 }
 
+static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
+				struct vhost_map *map, int index)
+{
+	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
+	int i;
+
+	if (uaddr->write) {
+		for (i = 0; i < map->npages; i++)
+			set_page_dirty(map->pages[i]);
+	}
+}
+
 static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
 {
 	struct vhost_map *map[VHOST_NUM_ADDRS];
@@ -315,8 +327,10 @@  static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
 	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
 		map[i] = rcu_dereference_protected(vq->maps[i],
 				  lockdep_is_held(&vq->mmu_lock));
-		if (map[i])
+		if (map[i]) {
+			vhost_set_map_dirty(vq, map[i], i);
 			rcu_assign_pointer(vq->maps[i], NULL);
+		}
 	}
 	spin_unlock(&vq->mmu_lock);
 
@@ -354,7 +368,6 @@  static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
 {
 	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
 	struct vhost_map *map;
-	int i;
 
 	if (!vhost_map_range_overlap(uaddr, start, end))
 		return;
@@ -365,10 +378,7 @@  static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
 	map = rcu_dereference_protected(vq->maps[index],
 					lockdep_is_held(&vq->mmu_lock));
 	if (map) {
-		if (uaddr->write) {
-			for (i = 0; i < map->npages; i++)
-				set_page_dirty(map->pages[i]);
-		}
+		vhost_set_map_dirty(vq, map, index);
 		rcu_assign_pointer(vq->maps[index], NULL);
 	}
 	spin_unlock(&vq->mmu_lock);