LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Jintack Lim <jintack@cs.columbia.edu>,
	Jason Wang <jasowang@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.14 07/68] vhost: log dirty page correctly
Date: Tue, 29 Jan 2019 12:35:29 +0100
Message-ID: <20190129113132.264836754@linuxfoundation.org> (raw)
In-Reply-To: <20190129113131.751891514@linuxfoundation.org>

4.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Jason Wang <jasowang@redhat.com>

[ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ]

Vhost dirty page logging API is designed to sync through GPA. But we
try to log GIOVA when device IOTLB is enabled. This is wrong and may
lead to missing data after migration.

To solve this issue, when logging with device IOTLB enabled, we will:

1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
   get HVA, for writable descriptor, get HVA through iovec. For used
   ring update, translate its GIOVA to HVA
2) traverse the GPA->HVA mapping to get the possible GPA and log
   through GPA. Pay attention this reverse mapping is not guaranteed
   to be unique, so we should log each possible GPA in this case.

This fix the failure of scp to guest during migration. In -next, we
will probably support passing GIOVA->GPA instead of GIOVA->HVA.

Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Cc: Jintack Lim <jintack@cs.columbia.edu>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/vhost/net.c   |    3 +
 drivers/vhost/vhost.c |   97 ++++++++++++++++++++++++++++++++++++++++++--------
 drivers/vhost/vhost.h |    3 +
 3 files changed, 87 insertions(+), 16 deletions(-)

--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -851,7 +851,8 @@ static void handle_rx(struct vhost_net *
 		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
 					    headcount);
 		if (unlikely(vq_log))
-			vhost_log_write(vq, vq_log, log, vhost_len);
+			vhost_log_write(vq, vq_log, log, vhost_len,
+					vq->iov, in);
 		total_len += vhost_len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
 			vhost_poll_queue(&vq->poll);
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1726,13 +1726,87 @@ static int log_write(void __user *log_ba
 	return r;
 }
 
+static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
+{
+	struct vhost_umem *umem = vq->umem;
+	struct vhost_umem_node *u;
+	u64 start, end, l, min;
+	int r;
+	bool hit = false;
+
+	while (len) {
+		min = len;
+		/* More than one GPAs can be mapped into a single HVA. So
+		 * iterate all possible umems here to be safe.
+		 */
+		list_for_each_entry(u, &umem->umem_list, link) {
+			if (u->userspace_addr > hva - 1 + len ||
+			    u->userspace_addr - 1 + u->size < hva)
+				continue;
+			start = max(u->userspace_addr, hva);
+			end = min(u->userspace_addr - 1 + u->size,
+				  hva - 1 + len);
+			l = end - start + 1;
+			r = log_write(vq->log_base,
+				      u->start + start - u->userspace_addr,
+				      l);
+			if (r < 0)
+				return r;
+			hit = true;
+			min = min(l, min);
+		}
+
+		if (!hit)
+			return -EFAULT;
+
+		len -= min;
+		hva += min;
+	}
+
+	return 0;
+}
+
+static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
+{
+	struct iovec iov[64];
+	int i, ret;
+
+	if (!vq->iotlb)
+		return log_write(vq->log_base, vq->log_addr + used_offset, len);
+
+	ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
+			     len, iov, 64, VHOST_ACCESS_WO);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ret; i++) {
+		ret = log_write_hva(vq,	(uintptr_t)iov[i].iov_base,
+				    iov[i].iov_len);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
-		    unsigned int log_num, u64 len)
+		    unsigned int log_num, u64 len, struct iovec *iov, int count)
 {
 	int i, r;
 
 	/* Make sure data written is seen before log. */
 	smp_wmb();
+
+	if (vq->iotlb) {
+		for (i = 0; i < count; i++) {
+			r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
+					  iov[i].iov_len);
+			if (r < 0)
+				return r;
+		}
+		return 0;
+	}
+
 	for (i = 0; i < log_num; ++i) {
 		u64 l = min(log[i].len, len);
 		r = log_write(vq->log_base, log[i].addr, l);
@@ -1762,9 +1836,8 @@ static int vhost_update_used_flags(struc
 		smp_wmb();
 		/* Log used flag write. */
 		used = &vq->used->flags;
-		log_write(vq->log_base, vq->log_addr +
-			  (used - (void __user *)vq->used),
-			  sizeof vq->used->flags);
+		log_used(vq, (used - (void __user *)vq->used),
+			 sizeof vq->used->flags);
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
@@ -1782,9 +1855,8 @@ static int vhost_update_avail_event(stru
 		smp_wmb();
 		/* Log avail event write */
 		used = vhost_avail_event(vq);
-		log_write(vq->log_base, vq->log_addr +
-			  (used - (void __user *)vq->used),
-			  sizeof *vhost_avail_event(vq));
+		log_used(vq, (used - (void __user *)vq->used),
+			 sizeof *vhost_avail_event(vq));
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
@@ -2189,10 +2261,8 @@ static int __vhost_add_used_n(struct vho
 		/* Make sure data is seen before log. */
 		smp_wmb();
 		/* Log used ring entry write. */
-		log_write(vq->log_base,
-			  vq->log_addr +
-			   ((void __user *)used - (void __user *)vq->used),
-			  count * sizeof *used);
+		log_used(vq, ((void __user *)used - (void __user *)vq->used),
+			 count * sizeof *used);
 	}
 	old = vq->last_used_idx;
 	new = (vq->last_used_idx += count);
@@ -2234,9 +2304,8 @@ int vhost_add_used_n(struct vhost_virtqu
 		/* Make sure used idx is seen before log. */
 		smp_wmb();
 		/* Log used index update. */
-		log_write(vq->log_base,
-			  vq->log_addr + offsetof(struct vring_used, idx),
-			  sizeof vq->used->idx);
+		log_used(vq, offsetof(struct vring_used, idx),
+			 sizeof vq->used->idx);
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -208,7 +208,8 @@ bool vhost_vq_avail_empty(struct vhost_d
 bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
 
 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
-		    unsigned int log_num, u64 len);
+		    unsigned int log_num, u64 len,
+		    struct iovec *iov, int count);
 int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
 
 struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);



  parent reply index

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-29 11:35 [PATCH 4.14 00/68] 4.14.97-stable review Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 01/68] amd-xgbe: Fix mdio access for non-zero ports and clause 45 PHYs Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 02/68] net: bridge: Fix ethernet header pointer before check skb forwardable Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 03/68] net: Fix usage of pskb_trim_rcsum Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 04/68] net: phy: mdio_bus: add missing device_del() in mdiobus_register() error handling Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 05/68] net_sched: refetch skb protocol for each filter Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 06/68] openvswitch: Avoid OOB read when parsing flow nlattrs Greg Kroah-Hartman
2019-01-29 11:35 ` Greg Kroah-Hartman [this message]
2019-01-29 11:35 ` [PATCH 4.14 08/68] net: ipv4: Fix memory leak in network namespace dismantle Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 09/68] tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 10/68] ipfrag: really prevent allocation on netns exit Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 11/68] mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 12/68] mei: me: add denverton innovation engine device IDs Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 13/68] USB: serial: simple: add Motorola Tetra TPG2200 device id Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 14/68] USB: serial: pl2303: add new PID to support PL2303TB Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 15/68] ASoC: atom: fix a missing check of snd_pcm_lib_malloc_pages Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 16/68] ASoC: rt5514-spi: Fix potential NULL pointer dereference Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 17/68] ALSA: hda - Add mute LED support for HP ProBook 470 G5 Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 18/68] ARCv2: lib: memeset: fix doing prefetchw outside of buffer Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 19/68] ARC: adjust memblock_reserve of kernel memory Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 20/68] ARC: perf: map generic branches to correct hardware condition Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 21/68] s390/early: improve machine detection Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 22/68] s390/smp: fix CPU hotplug deadlock with CPU rescan Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 23/68] char/mwave: fix potential Spectre v1 vulnerability Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 24/68] staging: rtl8188eu: Add device code for D-Link DWA-121 rev B1 Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 25/68] tty: Handle problem if line discipline does not have receive_buf Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 26/68] uart: Fix crash in uart_write and uart_put_char Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 27/68] tty/n_hdlc: fix __might_sleep warning Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 28/68] hv_balloon: avoid touching uninitialized struct page during tail onlining Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 29/68] Drivers: hv: vmbus: Check for ring when getting debug info Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 30/68] CIFS: Fix possible hang during async MTU reads and writes Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 31/68] CIFS: Fix credits calculations for reads with errors Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 32/68] CIFS: Fix credit calculation for encrypted " Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 33/68] CIFS: Do not reconnect TCP session in add_credits() Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 34/68] Input: xpad - add support for SteelSeries Stratus Duo Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 35/68] compiler.h: enable builtin overflow checkers and add fallback code Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 36/68] Input: uinput - fix undefined behavior in uinput_validate_absinfo() Greg Kroah-Hartman
2019-01-29 11:35 ` [PATCH 4.14 37/68] acpi/nfit: Block function zero DSMs Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 38/68] acpi/nfit: Fix command-supported detection Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 39/68] dm thin: fix passdown_double_checking_shared_status() Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 40/68] dm crypt: fix parsing of extended IV arguments Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 41/68] KVM: x86: Fix single-step debugging Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 42/68] x86/pkeys: Properly copy pkey state at fork() Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 43/68] x86/selftests/pkeys: Fork() to check for state being preserved Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 44/68] x86/kaslr: Fix incorrect i8254 outb() parameters Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 45/68] posix-cpu-timers: Unbreak timer rearming Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 46/68] irqchip/gic-v3-its: Align PCI Multi-MSI allocation on their size Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 47/68] can: dev: __can_get_echo_skb(): fix bogous check for non-existing skb by removing it Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 48/68] can: bcm: check timer values before ktime conversion Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 49/68] vt: invoke notifier on screen size change Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 50/68] perf unwind: Unwind with libdw doesnt take symfs into account Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 51/68] perf unwind: Take pgoff into account when reporting elf to libdwfl Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 52/68] Revert "seccomp: add a selftest for get_metadata" Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 53/68] net: stmmac: Use correct values in TQS/RQS fields Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 54/68] KVM: x86: Fix a 4.14 backport regression related to userspace/guest FPU Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 55/68] s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 56/68] nvmet-rdma: Add unlikely for response allocated check Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 57/68] nvmet-rdma: fix null dereference under heavy load Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 58/68] usb: dwc3: gadget: Clear req->needs_extra_trb flag on cleanup Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 59/68] xhci: Fix leaking USB3 shared_hcd at xhci removal Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 60/68] ptp_kvm: probe for kvm guest availability Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 61/68] x86/pvclock: add setter for pvclock_pvti_cpu0_va Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 62/68] x86/xen/time: set pvclock flags on xen_time_init() Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 63/68] x86/xen/time: setup vcpu 0 time info page Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 64/68] x86/xen/time: Output xen sched_clock time from 0 Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 65/68] xen: Fix x86 sched_clock() interface for xen Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 66/68] f2fs: read page index before freeing Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 67/68] btrfs: fix error handling in btrfs_dev_replace_start Greg Kroah-Hartman
2019-01-29 11:36 ` [PATCH 4.14 68/68] btrfs: dev-replace: go back to suspended state if target device is missing Greg Kroah-Hartman
2019-01-30  2:06 ` [PATCH 4.14 00/68] 4.14.97-stable review shuah
2019-01-30 12:51 ` Jon Hunter
2019-01-31  7:51   ` Greg Kroah-Hartman
2019-01-30 12:55 ` Naresh Kamboju
2019-01-30 18:49   ` Amir Goldstein
2019-01-30 19:32     ` Greg Kroah-Hartman
2019-02-04 10:12       ` Amir Goldstein
2019-02-04 10:35         ` Greg Kroah-Hartman
2019-01-30 22:13 ` Guenter Roeck

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190129113132.264836754@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=jasowang@redhat.com \
    --cc=jintack@cs.columbia.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox