linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Sunil Muthuswamy <sunilmut@microsoft.com>,
	Dexuan Cui <decui@microsoft.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.19 01/32] hv_sock: Add support for delayed close
Date: Fri,  2 Aug 2019 11:39:35 +0200	[thread overview]
Message-ID: <20190802092102.084353778@linuxfoundation.org> (raw)
In-Reply-To: <20190802092101.913646560@linuxfoundation.org>

From: Sunil Muthuswamy <sunilmut@microsoft.com>

commit a9eeb998c28d5506616426bd3a216bd5735a18b8 upstream.

Currently, hvsock does not implement any delayed or background close
logic. Whenever the hvsock socket is closed, a FIN is sent to the peer, and
the last reference to the socket is dropped, which leads to a call to
.destruct where the socket can hang indefinitely waiting for the peer to
close it's side. The can cause the user application to hang in the close()
call.

This change implements proper STREAM(TCP) closing handshake mechanism by
sending the FIN to the peer and the waiting for the peer's FIN to arrive
for a given timeout. On timeout, it will try to terminate the connection
(i.e. a RST). This is in-line with other socket providers such as virtio.

This change does not address the hang in the vmbus_hvsock_device_unregister
where it waits indefinitely for the host to rescind the channel. That
should be taken up as a separate fix.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 net/vmw_vsock/hyperv_transport.c |  110 +++++++++++++++++++++++++++------------
 1 file changed, 78 insertions(+), 32 deletions(-)

--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -35,6 +35,9 @@
 /* The MTU is 16KB per the host side's design */
 #define HVS_MTU_SIZE		(1024 * 16)
 
+/* How long to wait for graceful shutdown of a connection */
+#define HVS_CLOSE_TIMEOUT (8 * HZ)
+
 struct vmpipe_proto_header {
 	u32 pkt_type;
 	u32 data_size;
@@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx)
 		sk->sk_write_space(sk);
 }
 
-static void hvs_close_connection(struct vmbus_channel *chan)
+static void hvs_do_close_lock_held(struct vsock_sock *vsk,
+				   bool cancel_timeout)
 {
-	struct sock *sk = get_per_channel_state(chan);
-	struct vsock_sock *vsk = vsock_sk(sk);
-
-	lock_sock(sk);
+	struct sock *sk = sk_vsock(vsk);
 
-	sk->sk_state = TCP_CLOSE;
 	sock_set_flag(sk, SOCK_DONE);
-	vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
-
+	vsk->peer_shutdown = SHUTDOWN_MASK;
+	if (vsock_stream_has_data(vsk) <= 0)
+		sk->sk_state = TCP_CLOSING;
 	sk->sk_state_change(sk);
+	if (vsk->close_work_scheduled &&
+	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
+		vsk->close_work_scheduled = false;
+		vsock_remove_sock(vsk);
+
+		/* Release the reference taken while scheduling the timeout */
+		sock_put(sk);
+	}
+}
+
+static void hvs_close_connection(struct vmbus_channel *chan)
+{
+	struct sock *sk = get_per_channel_state(chan);
 
+	lock_sock(sk);
+	hvs_do_close_lock_held(vsock_sk(sk), true);
 	release_sock(sk);
 }
 
@@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock
 	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
 }
 
+static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
+{
+	struct vmpipe_proto_header hdr;
+
+	if (hvs->fin_sent || !hvs->chan)
+		return;
+
+	/* It can't fail: see hvs_channel_writable_bytes(). */
+	(void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
+	hvs->fin_sent = true;
+}
+
 static int hvs_shutdown(struct vsock_sock *vsk, int mode)
 {
 	struct sock *sk = sk_vsock(vsk);
-	struct vmpipe_proto_header hdr;
-	struct hvs_send_buf *send_buf;
-	struct hvsock *hvs;
 
 	if (!(mode & SEND_SHUTDOWN))
 		return 0;
 
 	lock_sock(sk);
-
-	hvs = vsk->trans;
-	if (hvs->fin_sent)
-		goto out;
-
-	send_buf = (struct hvs_send_buf *)&hdr;
-
-	/* It can't fail: see hvs_channel_writable_bytes(). */
-	(void)hvs_send_data(hvs->chan, send_buf, 0);
-
-	hvs->fin_sent = true;
-out:
+	hvs_shutdown_lock_held(vsk->trans, mode);
 	release_sock(sk);
 	return 0;
 }
 
-static void hvs_release(struct vsock_sock *vsk)
+static void hvs_close_timeout(struct work_struct *work)
 {
+	struct vsock_sock *vsk =
+		container_of(work, struct vsock_sock, close_work.work);
 	struct sock *sk = sk_vsock(vsk);
-	struct hvsock *hvs = vsk->trans;
-	struct vmbus_channel *chan;
 
+	sock_hold(sk);
 	lock_sock(sk);
+	if (!sock_flag(sk, SOCK_DONE))
+		hvs_do_close_lock_held(vsk, false);
 
-	sk->sk_state = TCP_CLOSING;
-	vsock_remove_sock(vsk);
-
+	vsk->close_work_scheduled = false;
 	release_sock(sk);
+	sock_put(sk);
+}
 
-	chan = hvs->chan;
-	if (chan)
-		hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+/* Returns true, if it is safe to remove socket; false otherwise */
+static bool hvs_close_lock_held(struct vsock_sock *vsk)
+{
+	struct sock *sk = sk_vsock(vsk);
+
+	if (!(sk->sk_state == TCP_ESTABLISHED ||
+	      sk->sk_state == TCP_CLOSING))
+		return true;
+
+	if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
+		hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
+
+	if (sock_flag(sk, SOCK_DONE))
+		return true;
+
+	/* This reference will be dropped by the delayed close routine */
+	sock_hold(sk);
+	INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
+	vsk->close_work_scheduled = true;
+	schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
+	return false;
+}
 
+static void hvs_release(struct vsock_sock *vsk)
+{
+	struct sock *sk = sk_vsock(vsk);
+	bool remove_sock;
+
+	lock_sock(sk);
+	remove_sock = hvs_close_lock_held(vsk);
+	release_sock(sk);
+	if (remove_sock)
+		vsock_remove_sock(vsk);
 }
 
 static void hvs_destruct(struct vsock_sock *vsk)



  reply	other threads:[~2019-08-02  9:59 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-02  9:39 [PATCH 4.19 00/32] 4.19.64-stable review Greg Kroah-Hartman
2019-08-02  9:39 ` Greg Kroah-Hartman [this message]
2019-08-02  9:39 ` [PATCH 4.19 02/32] vsock: correct removal of socket from the list Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 03/32] NFS: Fix dentry revalidation on NFSv4 lookup Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 04/32] NFS: Refactor nfs_lookup_revalidate() Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 05/32] NFSv4: Fix lookup revalidate of regular files Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 06/32] usb: dwc2: Disable all EPs on disconnect Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 07/32] usb: dwc2: Fix disable " Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 08/32] arm64: compat: Provide definition for COMPAT_SIGMINSTKSZ Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 09/32] binder: fix possible UAF when freeing buffer Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 10/32] ISDN: hfcsusb: checking idx of ep configuration Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 11/32] media: au0828: fix null dereference in error path Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 12/32] ath10k: Change the warning message string Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 13/32] media: cpia2_usb: first wake up, then free in disconnect Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 14/32] media: pvrusb2: use a different format for warnings Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 15/32] NFS: Cleanup if nfs_match_client is interrupted Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 16/32] media: radio-raremono: change devm_k*alloc to k*alloc Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 17/32] iommu/vt-d: Dont queue_iova() if there is no flush queue Greg Kroah-Hartman
2019-08-03 21:34   ` Pavel Machek
2019-08-06 22:47     ` Dmitry Safonov
2019-08-06 23:16       ` Dmitry Safonov
2019-08-02  9:39 ` [PATCH 4.19 18/32] iommu/iova: Fix compilation error with !CONFIG_IOMMU_IOVA Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 19/32] Bluetooth: hci_uart: check for missing tty operations Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 20/32] vhost: introduce vhost_exceeds_weight() Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 21/32] vhost_net: fix possible infinite loop Greg Kroah-Hartman
2019-08-03 21:49   ` Pavel Machek
2019-08-05  4:17     ` Jason Wang
2019-08-02  9:39 ` [PATCH 4.19 22/32] vhost: vsock: add weight support Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 23/32] vhost: scsi: " Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 24/32] sched/fair: Dont free p->numa_faults with concurrent readers Greg Kroah-Hartman
2019-08-02  9:39 ` [PATCH 4.19 25/32] sched/fair: Use RCU accessors consistently for ->numa_group Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 26/32] /proc/<pid>/cmdline: remove all the special cases Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 27/32] /proc/<pid>/cmdline: add back the setproctitle() special case Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 28/32] drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 29/32] Fix allyesconfig output Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 30/32] ceph: hold i_ceph_lock when removing caps for freeing inode Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 31/32] block, scsi: Change the preempt-only flag into a counter Greg Kroah-Hartman
2019-08-02  9:40 ` [PATCH 4.19 32/32] scsi: core: Avoid that a kernel warning appears during system resume Greg Kroah-Hartman
2019-08-02 23:22 ` [PATCH 4.19 00/32] 4.19.64-stable review shuah
2019-08-03  5:46 ` Naresh Kamboju
2019-08-03  9:58 ` Pavel Machek
2019-08-03 10:34   ` Greg Kroah-Hartman
2019-08-03 15:59 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190802092102.084353778@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=sunilmut@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).