All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] vhost: don't use kmap() to log dirty pages
@ 2019-05-13  5:27 Jason Wang
  2019-05-13 16:42 ` David Miller
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-13  5:27 UTC (permalink / raw)
  To: mst, jasowang, kvm, virtualization, netdev, linux-kernel
  Cc: Christoph Hellwig, James Bottomley, Andrea Arcangeli,
	Thomas Gleixner, Ingo Molnar, Peter Zijlstra, Darren Hart

Vhost log dirty pages directly to a userspace bitmap through GUP and
kmap_atomic() since kernel doesn't have a set_bit_to_user()
helper. This will cause issues for the arch that has virtually tagged
caches. The way to fix is to keep using userspace virtual
address. Fortunately, futex has arch_futex_atomic_op_inuser() which
could be used for setting a bit to user.

Note there're several cases that futex helper can fail e.g a page
fault or the arch that doesn't have the support. For those cases, a
simplified get_user()/put_user() pair protected by a global mutex is
provided as a fallback. The fallback may lead false positive that
userspace may see more dirty pages.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from RFC V2:
- drop GUP and provide get_user()/put_user() fallbacks
- round down log_base
Changes from RFC V1:
- switch to use arch_futex_atomic_op_inuser()
---
 drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 351af88..7fa05ba 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -31,6 +31,7 @@
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/nospec.h>
+#include <asm/futex.h>
 
 #include "vhost.h"
 
@@ -43,6 +44,8 @@
 MODULE_PARM_DESC(max_iotlb_entries,
 	"Maximum number of iotlb entries. (default: 2048)");
 
+static DEFINE_MUTEX(vhost_log_lock);
+
 enum {
 	VHOST_MEMORY_F_LOG = 0x1,
 };
@@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
 
-/* TODO: This is really inefficient.  We need something like get_user()
- * (instruction directly accesses the data, with an exception table entry
- * returning -EFAULT). See Documentation/x86/exception-tables.txt.
- */
-static int set_bit_to_user(int nr, void __user *addr)
+static int set_bit_to_user(int nr, u32 __user *addr)
 {
-	unsigned long log = (unsigned long)addr;
-	struct page *page;
-	void *base;
-	int bit = nr + (log % PAGE_SIZE) * 8;
+	u32 old;
 	int r;
 
-	r = get_user_pages_fast(log, 1, 1, &page);
-	if (r < 0)
-		return r;
-	BUG_ON(r != 1);
-	base = kmap_atomic(page);
-	set_bit(bit, base);
-	kunmap_atomic(base);
-	set_page_dirty_lock(page);
-	put_page(page);
+	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
+	if (r) {
+		/* Fallback through get_user()/put_user(), this may
+		 * lead false positive that userspace may see more
+		 * dirty pages. A mutex is used to synchronize log
+		 * access between vhost threads.
+		 */
+		mutex_lock(&vhost_log_lock);
+		r = get_user(old, addr);
+		if (r)
+			goto err;
+		r = put_user(old | 1 << nr, addr);
+		if (r)
+			goto err;
+		mutex_unlock(&vhost_log_lock);
+	}
 	return 0;
+err:
+	mutex_unlock(&vhost_log_lock);
+	return r;
 }
 
 static int log_write(void __user *log_base,
@@ -1725,13 +1731,13 @@ static int log_write(void __user *log_base,
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
+	log_base = (void __user *)((u64)log_base & ~0x3ULL);
+	write_page += ((u64)log_base & 0x3ULL) * 8;
 	for (;;) {
-		u64 base = (u64)(unsigned long)log_base;
-		u64 log = base + write_page / 8;
-		int bit = write_page % 8;
-		if ((u64)(unsigned long)log != log)
-			return -EFAULT;
-		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+		u32 __user *log = (u32 __user *)log_base + write_page / 32;
+		int bit = write_page % 32;
+
+		r = set_bit_to_user(bit, log);
 		if (r < 0)
 			return r;
 		if (write_length <= VHOST_PAGE_SIZE)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
  2019-05-13 16:42 ` David Miller
@ 2019-05-13 16:42 ` David Miller
  2019-05-14  3:42   ` Jason Wang
  2019-05-14  3:42   ` Jason Wang
  2019-05-13 21:55 ` kbuild test robot
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: David Miller @ 2019-05-13 16:42 UTC (permalink / raw)
  To: jasowang
  Cc: mst, kvm, virtualization, netdev, linux-kernel, hch,
	James.Bottomley, aarcange, tglx, mingo, peterz, dvhart

From: Jason Wang <jasowang@redhat.com>
Date: Mon, 13 May 2019 01:27:45 -0400

> Vhost log dirty pages directly to a userspace bitmap through GUP and
> kmap_atomic() since kernel doesn't have a set_bit_to_user()
> helper. This will cause issues for the arch that has virtually tagged
> caches. The way to fix is to keep using userspace virtual
> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
> could be used for setting a bit to user.
> 
> Note there're several cases that futex helper can fail e.g a page
> fault or the arch that doesn't have the support. For those cases, a
> simplified get_user()/put_user() pair protected by a global mutex is
> provided as a fallback. The fallback may lead false positive that
> userspace may see more dirty pages.
> 
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Darren Hart <dvhart@infradead.org>
> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I want to see a review from Michael for this change before applying.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
@ 2019-05-13 16:42 ` David Miller
  2019-05-13 16:42 ` David Miller
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: David Miller @ 2019-05-13 16:42 UTC (permalink / raw)
  To: jasowang
  Cc: aarcange, James.Bottomley, kvm, mst, peterz, netdev,
	linux-kernel, virtualization, hch, mingo, dvhart, tglx

From: Jason Wang <jasowang@redhat.com>
Date: Mon, 13 May 2019 01:27:45 -0400

> Vhost log dirty pages directly to a userspace bitmap through GUP and
> kmap_atomic() since kernel doesn't have a set_bit_to_user()
> helper. This will cause issues for the arch that has virtually tagged
> caches. The way to fix is to keep using userspace virtual
> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
> could be used for setting a bit to user.
> 
> Note there're several cases that futex helper can fail e.g a page
> fault or the arch that doesn't have the support. For those cases, a
> simplified get_user()/put_user() pair protected by a global mutex is
> provided as a fallback. The fallback may lead false positive that
> userspace may see more dirty pages.
> 
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Darren Hart <dvhart@infradead.org>
> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I want to see a review from Michael for this change before applying.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
  2019-05-13 16:42 ` David Miller
  2019-05-13 16:42 ` David Miller
@ 2019-05-13 21:55 ` kbuild test robot
  2019-05-14 22:16 ` Michael S. Tsirkin
  2019-05-14 22:16 ` Michael S. Tsirkin
  4 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2019-05-13 21:55 UTC (permalink / raw)
  To: Jason Wang
  Cc: Andrea Arcangeli, James Bottomley, kvm, mst, Peter Zijlstra,
	netdev, linux-kernel, virtualization, Christoph Hellwig,
	Ingo Molnar, kbuild-all, Darren Hart, Thomas Gleixner

[-- Attachment #1: Type: text/plain, Size: 2179 bytes --]

Hi Jason,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net/master]

url:    https://github.com/0day-ci/linux/commits/Jason-Wang/vhost-don-t-use-kmap-to-log-dirty-pages/20190514-052319
config: i386-randconfig-x005-201919 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers//vhost/vhost.c: In function 'log_write':
>> drivers//vhost/vhost.c:1734:29: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     log_base = (void __user *)((u64)log_base & ~0x3ULL);
                                ^
>> drivers//vhost/vhost.c:1734:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     log_base = (void __user *)((u64)log_base & ~0x3ULL);
                ^
   drivers//vhost/vhost.c:1735:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     write_page += ((u64)log_base & 0x3ULL) * 8;
                    ^

vim +1734 drivers//vhost/vhost.c

  1724	
  1725	static int log_write(void __user *log_base,
  1726			     u64 write_address, u64 write_length)
  1727	{
  1728		u64 write_page = write_address / VHOST_PAGE_SIZE;
  1729		int r;
  1730	
  1731		if (!write_length)
  1732			return 0;
  1733		write_length += write_address % VHOST_PAGE_SIZE;
> 1734		log_base = (void __user *)((u64)log_base & ~0x3ULL);
  1735		write_page += ((u64)log_base & 0x3ULL) * 8;
  1736		for (;;) {
  1737			u32 __user *log = (u32 __user *)log_base + write_page / 32;
  1738			int bit = write_page % 32;
  1739	
  1740			r = set_bit_to_user(bit, log);
  1741			if (r < 0)
  1742				return r;
  1743			if (write_length <= VHOST_PAGE_SIZE)
  1744				break;
  1745			write_length -= VHOST_PAGE_SIZE;
  1746			write_page += 1;
  1747		}
  1748		return r;
  1749	}
  1750	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28595 bytes --]

[-- Attachment #3: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13 16:42 ` David Miller
  2019-05-14  3:42   ` Jason Wang
@ 2019-05-14  3:42   ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-14  3:42 UTC (permalink / raw)
  To: David Miller
  Cc: mst, kvm, virtualization, netdev, linux-kernel, hch,
	James.Bottomley, aarcange, tglx, mingo, peterz, dvhart


On 2019/5/14 上午12:42, David Miller wrote:
> From: Jason Wang <jasowang@redhat.com>
> Date: Mon, 13 May 2019 01:27:45 -0400
>
>> Vhost log dirty pages directly to a userspace bitmap through GUP and
>> kmap_atomic() since kernel doesn't have a set_bit_to_user()
>> helper. This will cause issues for the arch that has virtually tagged
>> caches. The way to fix is to keep using userspace virtual
>> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
>> could be used for setting a bit to user.
>>
>> Note there're several cases that futex helper can fail e.g a page
>> fault or the arch that doesn't have the support. For those cases, a
>> simplified get_user()/put_user() pair protected by a global mutex is
>> provided as a fallback. The fallback may lead false positive that
>> userspace may see more dirty pages.
>>
>> Cc: Christoph Hellwig <hch@infradead.org>
>> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
>> Cc: Andrea Arcangeli <aarcange@redhat.com>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Darren Hart <dvhart@infradead.org>
>> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> I want to see a review from Michael for this change before applying.


No problem, since kbuild spotted an issue. Let me post V2.

Thanks


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13 16:42 ` David Miller
@ 2019-05-14  3:42   ` Jason Wang
  2019-05-14  3:42   ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-14  3:42 UTC (permalink / raw)
  To: David Miller
  Cc: aarcange, James.Bottomley, kvm, mst, peterz, netdev,
	linux-kernel, virtualization, hch, mingo, dvhart, tglx


On 2019/5/14 上午12:42, David Miller wrote:
> From: Jason Wang <jasowang@redhat.com>
> Date: Mon, 13 May 2019 01:27:45 -0400
>
>> Vhost log dirty pages directly to a userspace bitmap through GUP and
>> kmap_atomic() since kernel doesn't have a set_bit_to_user()
>> helper. This will cause issues for the arch that has virtually tagged
>> caches. The way to fix is to keep using userspace virtual
>> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
>> could be used for setting a bit to user.
>>
>> Note there're several cases that futex helper can fail e.g a page
>> fault or the arch that doesn't have the support. For those cases, a
>> simplified get_user()/put_user() pair protected by a global mutex is
>> provided as a fallback. The fallback may lead false positive that
>> userspace may see more dirty pages.
>>
>> Cc: Christoph Hellwig <hch@infradead.org>
>> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
>> Cc: Andrea Arcangeli <aarcange@redhat.com>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Darren Hart <dvhart@infradead.org>
>> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> I want to see a review from Michael for this change before applying.


No problem, since kbuild spotted an issue. Let me post V2.

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
                   ` (3 preceding siblings ...)
  2019-05-14 22:16 ` Michael S. Tsirkin
@ 2019-05-14 22:16 ` Michael S. Tsirkin
  2019-05-15  2:55   ` Jason Wang
  2019-05-15  2:55   ` Jason Wang
  4 siblings, 2 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2019-05-14 22:16 UTC (permalink / raw)
  To: Jason Wang
  Cc: kvm, virtualization, netdev, linux-kernel, Christoph Hellwig,
	James Bottomley, Andrea Arcangeli, Thomas Gleixner, Ingo Molnar,
	Peter Zijlstra, Darren Hart

On Mon, May 13, 2019 at 01:27:45AM -0400, Jason Wang wrote:
> Vhost log dirty pages directly to a userspace bitmap through GUP and
> kmap_atomic() since kernel doesn't have a set_bit_to_user()
> helper. This will cause issues for the arch that has virtually tagged
> caches. The way to fix is to keep using userspace virtual
> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
> could be used for setting a bit to user.
> 
> Note there're several cases that futex helper can fail e.g a page
> fault or the arch that doesn't have the support. For those cases, a
> simplified get_user()/put_user() pair protected by a global mutex is
> provided as a fallback. The fallback may lead false positive that
> userspace may see more dirty pages.
> 
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Darren Hart <dvhart@infradead.org>
> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from RFC V2:
> - drop GUP and provide get_user()/put_user() fallbacks
> - round down log_base
> Changes from RFC V1:
> - switch to use arch_futex_atomic_op_inuser()
> ---
>  drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
>  1 file changed, 30 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 351af88..7fa05ba 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -31,6 +31,7 @@
>  #include <linux/sched/signal.h>
>  #include <linux/interval_tree_generic.h>
>  #include <linux/nospec.h>
> +#include <asm/futex.h>
>  
>  #include "vhost.h"
>  
> @@ -43,6 +44,8 @@
>  MODULE_PARM_DESC(max_iotlb_entries,
>  	"Maximum number of iotlb entries. (default: 2048)");
>  
> +static DEFINE_MUTEX(vhost_log_lock);
> +
>  enum {
>  	VHOST_MEMORY_F_LOG = 0x1,
>  };
> @@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
>  
> -/* TODO: This is really inefficient.  We need something like get_user()
> - * (instruction directly accesses the data, with an exception table entry
> - * returning -EFAULT). See Documentation/x86/exception-tables.txt.
> - */
> -static int set_bit_to_user(int nr, void __user *addr)
> +static int set_bit_to_user(int nr, u32 __user *addr)
>  {
> -	unsigned long log = (unsigned long)addr;
> -	struct page *page;
> -	void *base;
> -	int bit = nr + (log % PAGE_SIZE) * 8;
> +	u32 old;
>  	int r;
>  
> -	r = get_user_pages_fast(log, 1, 1, &page);
> -	if (r < 0)
> -		return r;
> -	BUG_ON(r != 1);
> -	base = kmap_atomic(page);
> -	set_bit(bit, base);
> -	kunmap_atomic(base);
> -	set_page_dirty_lock(page);
> -	put_page(page);
> +	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
> +	if (r) {
> +		/* Fallback through get_user()/put_user(), this may
> +		 * lead false positive that userspace may see more
> +		 * dirty pages. A mutex is used to synchronize log
> +		 * access between vhost threads.
> +		 */
> +		mutex_lock(&vhost_log_lock);
> +		r = get_user(old, addr);
> +		if (r)
> +			goto err;
> +		r = put_user(old | 1 << nr, addr);
> +		if (r)
> +			goto err;
> +		mutex_unlock(&vhost_log_lock);
> +	}

Problem is, we always said it's atomic.

This trick will work if userspace only clears bits
in the log, but won't if it sets bits in the log.
E.g. reusing the log structure for vhost-user
will have exactly this effect.




>  	return 0;
> +err:
> +	mutex_unlock(&vhost_log_lock);
> +	return r;
>  }
>  
>  static int log_write(void __user *log_base,
> @@ -1725,13 +1731,13 @@ static int log_write(void __user *log_base,
>  	if (!write_length)
>  		return 0;
>  	write_length += write_address % VHOST_PAGE_SIZE;
> +	log_base = (void __user *)((u64)log_base & ~0x3ULL);
> +	write_page += ((u64)log_base & 0x3ULL) * 8;
>  	for (;;) {
> -		u64 base = (u64)(unsigned long)log_base;
> -		u64 log = base + write_page / 8;
> -		int bit = write_page % 8;
> -		if ((u64)(unsigned long)log != log)
> -			return -EFAULT;
> -		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
> +		u32 __user *log = (u32 __user *)log_base + write_page / 32;
> +		int bit = write_page % 32;
> +
> +		r = set_bit_to_user(bit, log);
>  		if (r < 0)
>  			return r;
>  		if (write_length <= VHOST_PAGE_SIZE)
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
                   ` (2 preceding siblings ...)
  2019-05-13 21:55 ` kbuild test robot
@ 2019-05-14 22:16 ` Michael S. Tsirkin
  2019-05-14 22:16 ` Michael S. Tsirkin
  4 siblings, 0 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2019-05-14 22:16 UTC (permalink / raw)
  To: Jason Wang
  Cc: Andrea Arcangeli, James Bottomley, kvm, Peter Zijlstra, netdev,
	linux-kernel, virtualization, Christoph Hellwig, Ingo Molnar,
	Darren Hart, Thomas Gleixner

On Mon, May 13, 2019 at 01:27:45AM -0400, Jason Wang wrote:
> Vhost log dirty pages directly to a userspace bitmap through GUP and
> kmap_atomic() since kernel doesn't have a set_bit_to_user()
> helper. This will cause issues for the arch that has virtually tagged
> caches. The way to fix is to keep using userspace virtual
> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
> could be used for setting a bit to user.
> 
> Note there're several cases that futex helper can fail e.g a page
> fault or the arch that doesn't have the support. For those cases, a
> simplified get_user()/put_user() pair protected by a global mutex is
> provided as a fallback. The fallback may lead false positive that
> userspace may see more dirty pages.
> 
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Darren Hart <dvhart@infradead.org>
> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from RFC V2:
> - drop GUP and provide get_user()/put_user() fallbacks
> - round down log_base
> Changes from RFC V1:
> - switch to use arch_futex_atomic_op_inuser()
> ---
>  drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
>  1 file changed, 30 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 351af88..7fa05ba 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -31,6 +31,7 @@
>  #include <linux/sched/signal.h>
>  #include <linux/interval_tree_generic.h>
>  #include <linux/nospec.h>
> +#include <asm/futex.h>
>  
>  #include "vhost.h"
>  
> @@ -43,6 +44,8 @@
>  MODULE_PARM_DESC(max_iotlb_entries,
>  	"Maximum number of iotlb entries. (default: 2048)");
>  
> +static DEFINE_MUTEX(vhost_log_lock);
> +
>  enum {
>  	VHOST_MEMORY_F_LOG = 0x1,
>  };
> @@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
>  
> -/* TODO: This is really inefficient.  We need something like get_user()
> - * (instruction directly accesses the data, with an exception table entry
> - * returning -EFAULT). See Documentation/x86/exception-tables.txt.
> - */
> -static int set_bit_to_user(int nr, void __user *addr)
> +static int set_bit_to_user(int nr, u32 __user *addr)
>  {
> -	unsigned long log = (unsigned long)addr;
> -	struct page *page;
> -	void *base;
> -	int bit = nr + (log % PAGE_SIZE) * 8;
> +	u32 old;
>  	int r;
>  
> -	r = get_user_pages_fast(log, 1, 1, &page);
> -	if (r < 0)
> -		return r;
> -	BUG_ON(r != 1);
> -	base = kmap_atomic(page);
> -	set_bit(bit, base);
> -	kunmap_atomic(base);
> -	set_page_dirty_lock(page);
> -	put_page(page);
> +	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
> +	if (r) {
> +		/* Fallback through get_user()/put_user(), this may
> +		 * lead false positive that userspace may see more
> +		 * dirty pages. A mutex is used to synchronize log
> +		 * access between vhost threads.
> +		 */
> +		mutex_lock(&vhost_log_lock);
> +		r = get_user(old, addr);
> +		if (r)
> +			goto err;
> +		r = put_user(old | 1 << nr, addr);
> +		if (r)
> +			goto err;
> +		mutex_unlock(&vhost_log_lock);
> +	}

Problem is, we always said it's atomic.

This trick will work if userspace only clears bits
in the log, but won't if it sets bits in the log.
E.g. reusing the log structure for vhost-user
will have exactly this effect.




>  	return 0;
> +err:
> +	mutex_unlock(&vhost_log_lock);
> +	return r;
>  }
>  
>  static int log_write(void __user *log_base,
> @@ -1725,13 +1731,13 @@ static int log_write(void __user *log_base,
>  	if (!write_length)
>  		return 0;
>  	write_length += write_address % VHOST_PAGE_SIZE;
> +	log_base = (void __user *)((u64)log_base & ~0x3ULL);
> +	write_page += ((u64)log_base & 0x3ULL) * 8;
>  	for (;;) {
> -		u64 base = (u64)(unsigned long)log_base;
> -		u64 log = base + write_page / 8;
> -		int bit = write_page % 8;
> -		if ((u64)(unsigned long)log != log)
> -			return -EFAULT;
> -		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
> +		u32 __user *log = (u32 __user *)log_base + write_page / 32;
> +		int bit = write_page % 32;
> +
> +		r = set_bit_to_user(bit, log);
>  		if (r < 0)
>  			return r;
>  		if (write_length <= VHOST_PAGE_SIZE)
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-14 22:16 ` Michael S. Tsirkin
@ 2019-05-15  2:55   ` Jason Wang
  2019-05-15  2:55   ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-15  2:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, virtualization, netdev, linux-kernel, Christoph Hellwig,
	James Bottomley, Andrea Arcangeli, Thomas Gleixner, Ingo Molnar,
	Peter Zijlstra, Darren Hart


On 2019/5/15 上午6:16, Michael S. Tsirkin wrote:
> On Mon, May 13, 2019 at 01:27:45AM -0400, Jason Wang wrote:
>> Vhost log dirty pages directly to a userspace bitmap through GUP and
>> kmap_atomic() since kernel doesn't have a set_bit_to_user()
>> helper. This will cause issues for the arch that has virtually tagged
>> caches. The way to fix is to keep using userspace virtual
>> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
>> could be used for setting a bit to user.
>>
>> Note there're several cases that futex helper can fail e.g a page
>> fault or the arch that doesn't have the support. For those cases, a
>> simplified get_user()/put_user() pair protected by a global mutex is
>> provided as a fallback. The fallback may lead false positive that
>> userspace may see more dirty pages.
>>
>> Cc: Christoph Hellwig<hch@infradead.org>
>> Cc: James Bottomley<James.Bottomley@HansenPartnership.com>
>> Cc: Andrea Arcangeli<aarcange@redhat.com>
>> Cc: Thomas Gleixner<tglx@linutronix.de>
>> Cc: Ingo Molnar<mingo@redhat.com>
>> Cc: Peter Zijlstra<peterz@infradead.org>
>> Cc: Darren Hart<dvhart@infradead.org>
>> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>> Changes from RFC V2:
>> - drop GUP and provide get_user()/put_user() fallbacks
>> - round down log_base
>> Changes from RFC V1:
>> - switch to use arch_futex_atomic_op_inuser()
>> ---
>>   drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
>>   1 file changed, 30 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 351af88..7fa05ba 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -31,6 +31,7 @@
>>   #include <linux/sched/signal.h>
>>   #include <linux/interval_tree_generic.h>
>>   #include <linux/nospec.h>
>> +#include <asm/futex.h>
>>   
>>   #include "vhost.h"
>>   
>> @@ -43,6 +44,8 @@
>>   MODULE_PARM_DESC(max_iotlb_entries,
>>   	"Maximum number of iotlb entries. (default: 2048)");
>>   
>> +static DEFINE_MUTEX(vhost_log_lock);
>> +
>>   enum {
>>   	VHOST_MEMORY_F_LOG = 0x1,
>>   };
>> @@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
>>   
>> -/* TODO: This is really inefficient.  We need something like get_user()
>> - * (instruction directly accesses the data, with an exception table entry
>> - * returning -EFAULT). See Documentation/x86/exception-tables.txt.
>> - */
>> -static int set_bit_to_user(int nr, void __user *addr)
>> +static int set_bit_to_user(int nr, u32 __user *addr)
>>   {
>> -	unsigned long log = (unsigned long)addr;
>> -	struct page *page;
>> -	void *base;
>> -	int bit = nr + (log % PAGE_SIZE) * 8;
>> +	u32 old;
>>   	int r;
>>   
>> -	r = get_user_pages_fast(log, 1, 1, &page);
>> -	if (r < 0)
>> -		return r;
>> -	BUG_ON(r != 1);
>> -	base = kmap_atomic(page);
>> -	set_bit(bit, base);
>> -	kunmap_atomic(base);
>> -	set_page_dirty_lock(page);
>> -	put_page(page);
>> +	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
>> +	if (r) {
>> +		/* Fallback through get_user()/put_user(), this may
>> +		 * lead false positive that userspace may see more
>> +		 * dirty pages. A mutex is used to synchronize log
>> +		 * access between vhost threads.
>> +		 */
>> +		mutex_lock(&vhost_log_lock);
>> +		r = get_user(old, addr);
>> +		if (r)
>> +			goto err;
>> +		r = put_user(old | 1 << nr, addr);
>> +		if (r)
>> +			goto err;
>> +		mutex_unlock(&vhost_log_lock);
>> +	}
> Problem is, we always said it's atomic.
>
> This trick will work if userspace only clears bits
> in the log, but won't if it sets bits in the log.
> E.g. reusing the log structure for vhost-user
> will have exactly this effect.
>

Ok, I admit this is an issue.

Then I think maybe we can simply fallback to a u8 put_user() which is 
guaranteed to be atomic like:

put_user(0xF, addr);

Then we may at most have 7 more dirty pages to be seen by guest. 
Consider futex helper is implemented is most architectures and it's 
likely to succeed. It should be acceptable.

Does this make sense?

Thanks


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net] vhost: don't use kmap() to log dirty pages
  2019-05-14 22:16 ` Michael S. Tsirkin
  2019-05-15  2:55   ` Jason Wang
@ 2019-05-15  2:55   ` Jason Wang
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-15  2:55 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Andrea Arcangeli, James Bottomley, kvm, Peter Zijlstra, netdev,
	linux-kernel, virtualization, Christoph Hellwig, Ingo Molnar,
	Darren Hart, Thomas Gleixner


On 2019/5/15 上午6:16, Michael S. Tsirkin wrote:
> On Mon, May 13, 2019 at 01:27:45AM -0400, Jason Wang wrote:
>> Vhost log dirty pages directly to a userspace bitmap through GUP and
>> kmap_atomic() since kernel doesn't have a set_bit_to_user()
>> helper. This will cause issues for the arch that has virtually tagged
>> caches. The way to fix is to keep using userspace virtual
>> address. Fortunately, futex has arch_futex_atomic_op_inuser() which
>> could be used for setting a bit to user.
>>
>> Note there're several cases that futex helper can fail e.g a page
>> fault or the arch that doesn't have the support. For those cases, a
>> simplified get_user()/put_user() pair protected by a global mutex is
>> provided as a fallback. The fallback may lead false positive that
>> userspace may see more dirty pages.
>>
>> Cc: Christoph Hellwig<hch@infradead.org>
>> Cc: James Bottomley<James.Bottomley@HansenPartnership.com>
>> Cc: Andrea Arcangeli<aarcange@redhat.com>
>> Cc: Thomas Gleixner<tglx@linutronix.de>
>> Cc: Ingo Molnar<mingo@redhat.com>
>> Cc: Peter Zijlstra<peterz@infradead.org>
>> Cc: Darren Hart<dvhart@infradead.org>
>> Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>> Changes from RFC V2:
>> - drop GUP and provide get_user()/put_user() fallbacks
>> - round down log_base
>> Changes from RFC V1:
>> - switch to use arch_futex_atomic_op_inuser()
>> ---
>>   drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
>>   1 file changed, 30 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 351af88..7fa05ba 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -31,6 +31,7 @@
>>   #include <linux/sched/signal.h>
>>   #include <linux/interval_tree_generic.h>
>>   #include <linux/nospec.h>
>> +#include <asm/futex.h>
>>   
>>   #include "vhost.h"
>>   
>> @@ -43,6 +44,8 @@
>>   MODULE_PARM_DESC(max_iotlb_entries,
>>   	"Maximum number of iotlb entries. (default: 2048)");
>>   
>> +static DEFINE_MUTEX(vhost_log_lock);
>> +
>>   enum {
>>   	VHOST_MEMORY_F_LOG = 0x1,
>>   };
>> @@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
>>   
>> -/* TODO: This is really inefficient.  We need something like get_user()
>> - * (instruction directly accesses the data, with an exception table entry
>> - * returning -EFAULT). See Documentation/x86/exception-tables.txt.
>> - */
>> -static int set_bit_to_user(int nr, void __user *addr)
>> +static int set_bit_to_user(int nr, u32 __user *addr)
>>   {
>> -	unsigned long log = (unsigned long)addr;
>> -	struct page *page;
>> -	void *base;
>> -	int bit = nr + (log % PAGE_SIZE) * 8;
>> +	u32 old;
>>   	int r;
>>   
>> -	r = get_user_pages_fast(log, 1, 1, &page);
>> -	if (r < 0)
>> -		return r;
>> -	BUG_ON(r != 1);
>> -	base = kmap_atomic(page);
>> -	set_bit(bit, base);
>> -	kunmap_atomic(base);
>> -	set_page_dirty_lock(page);
>> -	put_page(page);
>> +	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
>> +	if (r) {
>> +		/* Fallback through get_user()/put_user(), this may
>> +		 * lead false positive that userspace may see more
>> +		 * dirty pages. A mutex is used to synchronize log
>> +		 * access between vhost threads.
>> +		 */
>> +		mutex_lock(&vhost_log_lock);
>> +		r = get_user(old, addr);
>> +		if (r)
>> +			goto err;
>> +		r = put_user(old | 1 << nr, addr);
>> +		if (r)
>> +			goto err;
>> +		mutex_unlock(&vhost_log_lock);
>> +	}
> Problem is, we always said it's atomic.
>
> This trick will work if userspace only clears bits
> in the log, but won't if it sets bits in the log.
> E.g. reusing the log structure for vhost-user
> will have exactly this effect.
>

Ok, I admit this is an issue.

Then I think maybe we can simply fallback to a u8 put_user() which is 
guaranteed to be atomic like:

put_user(0xF, addr);

Then we may at most have 7 more dirty pages to be seen by guest. 
Consider futex helper is implemented is most architectures and it's 
likely to succeed. It should be acceptable.

Does this make sense?

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH net] vhost: don't use kmap() to log dirty pages
@ 2019-05-13  5:27 Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2019-05-13  5:27 UTC (permalink / raw)
  To: mst, jasowang, kvm, virtualization, netdev, linux-kernel
  Cc: Andrea Arcangeli, James Bottomley, Peter Zijlstra,
	Christoph Hellwig, Ingo Molnar, Darren Hart, Thomas Gleixner

Vhost log dirty pages directly to a userspace bitmap through GUP and
kmap_atomic() since kernel doesn't have a set_bit_to_user()
helper. This will cause issues for the arch that has virtually tagged
caches. The way to fix is to keep using userspace virtual
address. Fortunately, futex has arch_futex_atomic_op_inuser() which
could be used for setting a bit to user.

Note there're several cases that futex helper can fail e.g a page
fault or the arch that doesn't have the support. For those cases, a
simplified get_user()/put_user() pair protected by a global mutex is
provided as a fallback. The fallback may lead false positive that
userspace may see more dirty pages.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from RFC V2:
- drop GUP and provide get_user()/put_user() fallbacks
- round down log_base
Changes from RFC V1:
- switch to use arch_futex_atomic_op_inuser()
---
 drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 351af88..7fa05ba 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -31,6 +31,7 @@
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/nospec.h>
+#include <asm/futex.h>
 
 #include "vhost.h"
 
@@ -43,6 +44,8 @@
 MODULE_PARM_DESC(max_iotlb_entries,
 	"Maximum number of iotlb entries. (default: 2048)");
 
+static DEFINE_MUTEX(vhost_log_lock);
+
 enum {
 	VHOST_MEMORY_F_LOG = 0x1,
 };
@@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
 
-/* TODO: This is really inefficient.  We need something like get_user()
- * (instruction directly accesses the data, with an exception table entry
- * returning -EFAULT). See Documentation/x86/exception-tables.txt.
- */
-static int set_bit_to_user(int nr, void __user *addr)
+static int set_bit_to_user(int nr, u32 __user *addr)
 {
-	unsigned long log = (unsigned long)addr;
-	struct page *page;
-	void *base;
-	int bit = nr + (log % PAGE_SIZE) * 8;
+	u32 old;
 	int r;
 
-	r = get_user_pages_fast(log, 1, 1, &page);
-	if (r < 0)
-		return r;
-	BUG_ON(r != 1);
-	base = kmap_atomic(page);
-	set_bit(bit, base);
-	kunmap_atomic(base);
-	set_page_dirty_lock(page);
-	put_page(page);
+	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
+	if (r) {
+		/* Fallback through get_user()/put_user(), this may
+		 * lead false positive that userspace may see more
+		 * dirty pages. A mutex is used to synchronize log
+		 * access between vhost threads.
+		 */
+		mutex_lock(&vhost_log_lock);
+		r = get_user(old, addr);
+		if (r)
+			goto err;
+		r = put_user(old | 1 << nr, addr);
+		if (r)
+			goto err;
+		mutex_unlock(&vhost_log_lock);
+	}
 	return 0;
+err:
+	mutex_unlock(&vhost_log_lock);
+	return r;
 }
 
 static int log_write(void __user *log_base,
@@ -1725,13 +1731,13 @@ static int log_write(void __user *log_base,
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
+	log_base = (void __user *)((u64)log_base & ~0x3ULL);
+	write_page += ((u64)log_base & 0x3ULL) * 8;
 	for (;;) {
-		u64 base = (u64)(unsigned long)log_base;
-		u64 log = base + write_page / 8;
-		int bit = write_page % 8;
-		if ((u64)(unsigned long)log != log)
-			return -EFAULT;
-		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+		u32 __user *log = (u32 __user *)log_base + write_page / 32;
+		int bit = write_page % 32;
+
+		r = set_bit_to_user(bit, log);
 		if (r < 0)
 			return r;
 		if (write_length <= VHOST_PAGE_SIZE)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-05-15  2:55 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-13  5:27 [PATCH net] vhost: don't use kmap() to log dirty pages Jason Wang
2019-05-13 16:42 ` David Miller
2019-05-13 16:42 ` David Miller
2019-05-14  3:42   ` Jason Wang
2019-05-14  3:42   ` Jason Wang
2019-05-13 21:55 ` kbuild test robot
2019-05-14 22:16 ` Michael S. Tsirkin
2019-05-14 22:16 ` Michael S. Tsirkin
2019-05-15  2:55   ` Jason Wang
2019-05-15  2:55   ` Jason Wang
2019-05-13  5:27 Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.