Linux-HyperV Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net]: hv_sock: Remove the accept port restriction
@ 2020-01-14  0:52 Sunil Muthuswamy
  2020-01-14 19:51 ` David Miller
  0 siblings, 1 reply; 2+ messages in thread
From: Sunil Muthuswamy @ 2020-01-14  0:52 UTC (permalink / raw)
  To: netdev, David S. Miller, Dexuan Cui
  Cc: Stephen Hemminger, Sasha Levin, linux-hyperv, linux-kernel

Currently, hv_sock restricts the port the guest socket can accept
connections on. hv_sock divides the socket port namespace into two parts
for server side (listening socket), 0-0x7FFFFFFF & 0x80000000-0xFFFFFFFF
(there are no restrictions on client port namespace). The first part
(0-0x7FFFFFFF) is reserved for sockets where connections can be accepted.
The second part (0x80000000-0xFFFFFFFF) is reserved for allocating ports
for the peer (host) socket, once a connection is accepted.
This reservation of the port namespace is specific to hv_sock and not
known by the generic vsock library (ex: af_vsock). This is problematic
because auto-binds/ephemeral ports are handled by the generic vsock
library and it has no knowledge of this port reservation and could
allocate a port that is not compatible with hv_sock (and legitimately so).
The issue hasn't surfaced so far because the auto-bind code of vsock
(__vsock_bind_stream) prior to the change 'VSOCK: bind to random port for
VMADDR_PORT_ANY' would start walking up from LAST_RESERVED_PORT (1023) and
start assigning ports. That will take a large number of iterations to hit
0x7FFFFFFF. But, after the above change to randomize port selection, the
issue has started coming up more frequently.
There has really been no good reason to have this port reservation logic
in hv_sock from the get go. Reserving a local port for peer ports is not
how things are handled generally. Peer ports should reflect the peer port.
This fixes the issue by lifting the port reservation, and also returns the
right peer port. Since the code converts the GUID to the peer port (by
using the first 4 bytes), there is a possibility of conflicts, but that
seems like a reasonable risk to take, given this is limited to vsock and
that only applies to all local sockets.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
---
 net/vmw_vsock/hyperv_transport.c | 65 +++-----------------------------
 1 file changed, 6 insertions(+), 59 deletions(-)

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index b3bdae74c243..3492c021925f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -138,28 +138,15 @@ struct hvsock {
  ****************************************************************************
  * The only valid Service GUIDs, from the perspectives of both the host and *
  * Linux VM, that can be connected by the other end, must conform to this   *
- * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
- * this range [0, 0x7FFFFFFF].                                              *
+ * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
  ****************************************************************************
  *
  * When we write apps on the host to connect(), the GUID ServiceID is used.
  * When we write apps in Linux VM to connect(), we only need to specify the
  * port and the driver will form the GUID and use that to request the host.
  *
- * From the perspective of Linux VM:
- * 1. the local ephemeral port (i.e. the local auto-bound port when we call
- * connect() without explicit bind()) is generated by __vsock_bind_stream(),
- * and the range is [1024, 0xFFFFFFFF).
- * 2. the remote ephemeral port (i.e. the auto-generated remote port for
- * a connect request initiated by the host's connect()) is generated by
- * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
  */
 
-#define MAX_LISTEN_PORT			((u32)0x7FFFFFFF)
-#define MAX_VM_LISTEN_PORT		MAX_LISTEN_PORT
-#define MAX_HOST_LISTEN_PORT		MAX_LISTEN_PORT
-#define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
-
 /* 00000000-facb-11e6-bd58-64006a7986d3 */
 static const guid_t srv_id_template =
 	GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
@@ -184,34 +171,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
 	vsock_addr_init(addr, VMADDR_CID_ANY, port);
 }
 
-static void hvs_remote_addr_init(struct sockaddr_vm *remote,
-				 struct sockaddr_vm *local)
-{
-	static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
-	struct sock *sk;
-
-	/* Remote peer is always the host */
-	vsock_addr_init(remote, VMADDR_CID_HOST, VMADDR_PORT_ANY);
-
-	while (1) {
-		/* Wrap around ? */
-		if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
-		    host_ephemeral_port == VMADDR_PORT_ANY)
-			host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
-
-		remote->svm_port = host_ephemeral_port++;
-
-		sk = vsock_find_connected_socket(remote, local);
-		if (!sk) {
-			/* Found an available ephemeral port */
-			return;
-		}
-
-		/* Release refcnt got in vsock_find_connected_socket */
-		sock_put(sk);
-	}
-}
-
 static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
 {
 	set_channel_pending_send_size(chan,
@@ -341,12 +300,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 	if_type = &chan->offermsg.offer.if_type;
 	if_instance = &chan->offermsg.offer.if_instance;
 	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
-
-	/* The host or the VM should only listen on a port in
-	 * [0, MAX_LISTEN_PORT]
-	 */
-	if (!is_valid_srv_id(if_type) ||
-	    get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
+	if (!is_valid_srv_id(if_type))
 		return;
 
 	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
@@ -371,8 +325,11 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 		vnew = vsock_sk(new);
 
 		hvs_addr_init(&vnew->local_addr, if_type);
-		hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
 
+		/* Remote peer is always the host */
+		vsock_addr_init(&vnew->remote_addr,
+				VMADDR_CID_HOST, VMADDR_PORT_ANY);
+		vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
 		ret = vsock_assign_transport(vnew, vsock_sk(sk));
 		/* Transport assigned (looking at remote_addr) must be the
 		 * same where we received the request.
@@ -766,16 +723,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk)
 
 static bool hvs_stream_allow(u32 cid, u32 port)
 {
-	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
-	 * reserved as ephemeral ports, which are used as the host's ports
-	 * when the host initiates connections.
-	 *
-	 * Perform this check in the guest so an immediate error is produced
-	 * instead of a timeout.
-	 */
-	if (port > MAX_HOST_LISTEN_PORT)
-		return false;
-
 	if (cid == VMADDR_CID_HOST)
 		return true;
 
-- 
2.17.1

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH net]: hv_sock: Remove the accept port restriction
  2020-01-14  0:52 [PATCH net]: hv_sock: Remove the accept port restriction Sunil Muthuswamy
@ 2020-01-14 19:51 ` David Miller
  0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2020-01-14 19:51 UTC (permalink / raw)
  To: sunilmut; +Cc: netdev, decui, sthemmin, sashal, linux-hyperv, linux-kernel

From: Sunil Muthuswamy <sunilmut@microsoft.com>
Date: Tue, 14 Jan 2020 00:52:14 +0000

> Currently, hv_sock restricts the port the guest socket can accept
> connections on. hv_sock divides the socket port namespace into two parts
> for server side (listening socket), 0-0x7FFFFFFF & 0x80000000-0xFFFFFFFF
> (there are no restrictions on client port namespace). The first part
> (0-0x7FFFFFFF) is reserved for sockets where connections can be accepted.
> The second part (0x80000000-0xFFFFFFFF) is reserved for allocating ports
> for the peer (host) socket, once a connection is accepted.
> This reservation of the port namespace is specific to hv_sock and not
> known by the generic vsock library (ex: af_vsock). This is problematic
> because auto-binds/ephemeral ports are handled by the generic vsock
> library and it has no knowledge of this port reservation and could
> allocate a port that is not compatible with hv_sock (and legitimately so).
> The issue hasn't surfaced so far because the auto-bind code of vsock
> (__vsock_bind_stream) prior to the change 'VSOCK: bind to random port for
> VMADDR_PORT_ANY' would start walking up from LAST_RESERVED_PORT (1023) and
> start assigning ports. That will take a large number of iterations to hit
> 0x7FFFFFFF. But, after the above change to randomize port selection, the
> issue has started coming up more frequently.
> There has really been no good reason to have this port reservation logic
> in hv_sock from the get go. Reserving a local port for peer ports is not
> how things are handled generally. Peer ports should reflect the peer port.
> This fixes the issue by lifting the port reservation, and also returns the
> right peer port. Since the code converts the GUID to the peer port (by
> using the first 4 bytes), there is a possibility of conflicts, but that
> seems like a reasonable risk to take, given this is limited to vsock and
> that only applies to all local sockets.
> 
> Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>

Applied.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-14  0:52 [PATCH net]: hv_sock: Remove the accept port restriction Sunil Muthuswamy
2020-01-14 19:51 ` David Miller

Linux-HyperV Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-hyperv/0 linux-hyperv/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-hyperv linux-hyperv/ https://lore.kernel.org/linux-hyperv \
		linux-hyperv@vger.kernel.org
	public-inbox-index linux-hyperv

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-hyperv


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git