All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: mst@redhat.com, jasowang@redhat.com, kvm@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
	James Bottomley <James.Bottomley@HansenPartnership.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Darren Hart <dvhart@infradead.org>
Subject: [PATCH net] vhost: don't use kmap() to log dirty pages
Date: Mon, 13 May 2019 01:27:45 -0400	[thread overview]
Message-ID: <1557725265-63525-1-git-send-email-jasowang@redhat.com> (raw)

Vhost log dirty pages directly to a userspace bitmap through GUP and
kmap_atomic() since kernel doesn't have a set_bit_to_user()
helper. This will cause issues for the arch that has virtually tagged
caches. The way to fix is to keep using userspace virtual
address. Fortunately, futex has arch_futex_atomic_op_inuser() which
could be used for setting a bit to user.

Note there're several cases that futex helper can fail e.g a page
fault or the arch that doesn't have the support. For those cases, a
simplified get_user()/put_user() pair protected by a global mutex is
provided as a fallback. The fallback may lead false positive that
userspace may see more dirty pages.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from RFC V2:
- drop GUP and provide get_user()/put_user() fallbacks
- round down log_base
Changes from RFC V1:
- switch to use arch_futex_atomic_op_inuser()
---
 drivers/vhost/vhost.c | 54 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 351af88..7fa05ba 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -31,6 +31,7 @@
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/nospec.h>
+#include <asm/futex.h>
 
 #include "vhost.h"
 
@@ -43,6 +44,8 @@
 MODULE_PARM_DESC(max_iotlb_entries,
 	"Maximum number of iotlb entries. (default: 2048)");
 
+static DEFINE_MUTEX(vhost_log_lock);
+
 enum {
 	VHOST_MEMORY_F_LOG = 0x1,
 };
@@ -1692,28 +1695,31 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
 
-/* TODO: This is really inefficient.  We need something like get_user()
- * (instruction directly accesses the data, with an exception table entry
- * returning -EFAULT). See Documentation/x86/exception-tables.txt.
- */
-static int set_bit_to_user(int nr, void __user *addr)
+static int set_bit_to_user(int nr, u32 __user *addr)
 {
-	unsigned long log = (unsigned long)addr;
-	struct page *page;
-	void *base;
-	int bit = nr + (log % PAGE_SIZE) * 8;
+	u32 old;
 	int r;
 
-	r = get_user_pages_fast(log, 1, 1, &page);
-	if (r < 0)
-		return r;
-	BUG_ON(r != 1);
-	base = kmap_atomic(page);
-	set_bit(bit, base);
-	kunmap_atomic(base);
-	set_page_dirty_lock(page);
-	put_page(page);
+	r = arch_futex_atomic_op_inuser(FUTEX_OP_OR, 1 << nr, &old, addr);
+	if (r) {
+		/* Fallback through get_user()/put_user(), this may
+		 * lead false positive that userspace may see more
+		 * dirty pages. A mutex is used to synchronize log
+		 * access between vhost threads.
+		 */
+		mutex_lock(&vhost_log_lock);
+		r = get_user(old, addr);
+		if (r)
+			goto err;
+		r = put_user(old | 1 << nr, addr);
+		if (r)
+			goto err;
+		mutex_unlock(&vhost_log_lock);
+	}
 	return 0;
+err:
+	mutex_unlock(&vhost_log_lock);
+	return r;
 }
 
 static int log_write(void __user *log_base,
@@ -1725,13 +1731,13 @@ static int log_write(void __user *log_base,
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
+	log_base = (void __user *)((u64)log_base & ~0x3ULL);
+	write_page += ((u64)log_base & 0x3ULL) * 8;
 	for (;;) {
-		u64 base = (u64)(unsigned long)log_base;
-		u64 log = base + write_page / 8;
-		int bit = write_page % 8;
-		if ((u64)(unsigned long)log != log)
-			return -EFAULT;
-		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+		u32 __user *log = (u32 __user *)log_base + write_page / 32;
+		int bit = write_page % 32;
+
+		r = set_bit_to_user(bit, log);
 		if (r < 0)
 			return r;
 		if (write_length <= VHOST_PAGE_SIZE)
-- 
1.8.3.1


             reply	other threads:[~2019-05-13  5:27 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-13  5:27 Jason Wang [this message]
2019-05-13 16:42 ` [PATCH net] vhost: don't use kmap() to log dirty pages David Miller
2019-05-13 16:42 ` David Miller
2019-05-14  3:42   ` Jason Wang
2019-05-14  3:42   ` Jason Wang
2019-05-13 21:55 ` kbuild test robot
2019-05-14 22:16 ` Michael S. Tsirkin
2019-05-14 22:16 ` Michael S. Tsirkin
2019-05-15  2:55   ` Jason Wang
2019-05-15  2:55   ` Jason Wang
2019-05-13  5:27 Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1557725265-63525-1-git-send-email-jasowang@redhat.com \
    --to=jasowang@redhat.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=aarcange@redhat.com \
    --cc=dvhart@infradead.org \
    --cc=hch@infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.