All of lore.kernel.org
 help / color / mirror / Atom feed
From: Akhmat Karakotov <hmukos@yandex-team.ru>
To: hmukos@yandex-team.ru
Cc: eric.dumazet@gmail.com, mitradir@yandex-team.ru,
	netdev@vger.kernel.org, tom@herbertland.com, zeil@yandex-team.ru
Subject: [RFC PATCH v2 net-next 2/4] txhash: Add socket option to control TX hash rethink behavior
Date: Fri, 12 Nov 2021 21:19:37 +0300	[thread overview]
Message-ID: <20211112181939.11329-3-hmukos@yandex-team.ru> (raw)
In-Reply-To: <20211112181939.11329-1-hmukos@yandex-team.ru>

Add the SO_TXREHASH socket option to control hash rethink behavior per socket.
When default mode is set, sockets disable rehash at initialization and use
sysctl option when entering listen state. setsockopt() overrides default
behavior.

Signed-off-by: Akhmat Karakotov <hmukos@yandex-team.ru>
---
 arch/alpha/include/uapi/asm/socket.h  |  2 ++
 arch/mips/include/uapi/asm/socket.h   |  2 ++
 arch/parisc/include/uapi/asm/socket.h |  2 ++
 arch/sparc/include/uapi/asm/socket.h  |  2 ++
 include/net/sock.h                    | 12 +++---------
 include/uapi/asm-generic/socket.h     |  2 ++
 include/uapi/linux/socket.h           |  1 +
 net/core/sock.c                       | 13 +++++++++++++
 net/ipv4/inet_connection_sock.c       |  3 +++
 9 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 1dd9baf4a6c2..e6b3f38f8c0e 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -131,6 +131,8 @@
 
 #define SO_BUF_LOCK		72
 
+#define SO_TXREHASH		73
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 1eaf6a1ca561..2c8085ecde0a 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -142,6 +142,8 @@
 
 #define SO_BUF_LOCK		72
 
+#define SO_TXREHASH		73
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 8baaad52d799..8bb78ed36e97 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -123,6 +123,8 @@
 
 #define SO_BUF_LOCK		0x4046
 
+#define SO_TXREHASH     	0x4047
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index e80ee8641ac3..cd43a690fbac 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -124,6 +124,8 @@
 
 #define SO_BUF_LOCK              0x0051
 
+#define SO_TXREHASH              0x0052
+
 #if !defined(__KERNEL__)
 
 
diff --git a/include/net/sock.h b/include/net/sock.h
index cc83140d6502..26c0efd7aa4b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -313,6 +313,7 @@ struct bpf_local_storage;
   *	@sk_rcvtimeo: %SO_RCVTIMEO setting
   *	@sk_sndtimeo: %SO_SNDTIMEO setting
   *	@sk_txhash: computed flow hash for use on transmit
+  *	@sk_txrehash: enable TX hash rethink
   *	@sk_filter: socket filtering instructions
   *	@sk_timer: sock cleanup timer
   *	@sk_stamp: time stamp of last packet received
@@ -462,6 +463,7 @@ struct sock {
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
 	__u32			sk_txhash;
+	u8			sk_txrehash;
 
 	/*
 	 * Because of non atomicity rules, all
@@ -1954,18 +1956,10 @@ static inline void sk_set_txhash(struct sock *sk)
 
 static inline bool sk_rethink_txhash(struct sock *sk)
 {
-	u8 rehash;
-
-	if (!sk->sk_txhash)
-		return false;
-
-	rehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-
-	if (rehash) {
+	if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
 		sk_set_txhash(sk);
 		return true;
 	}
-
 	return false;
 }
 
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 1f0a2b4864e4..6c17e477ec9f 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -126,6 +126,8 @@
 
 #define SO_BUF_LOCK		72
 
+#define SO_TXREHASH		73
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 0accd6102ece..75fab2ada8cf 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage {
 
 #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
 
+#define SOCK_TXREHASH_DEFAULT	-1
 #define SOCK_TXREHASH_DISABLED	0
 #define SOCK_TXREHASH_ENABLED	1
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..ca349ca4c31d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1367,6 +1367,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 					  ~SOCK_BUF_LOCK_MASK);
 		break;
 
+	case SO_TXREHASH:
+		if (val < -1 || val > 1) {
+			ret = -EINVAL;
+			break;
+		}
+		sk->sk_txrehash = val;
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1733,6 +1741,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
 		break;
 
+	case SO_TXREHASH:
+		v.val = sk->sk_txrehash;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -3165,6 +3177,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0UL;
 	WRITE_ONCE(sk->sk_pacing_shift, 10);
 	sk->sk_incoming_cpu = -1;
+	sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
 
 	sk_rx_queue_clear(sk);
 	/*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f25d02ad4a8a..0d477c816309 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1046,6 +1046,9 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
 
+	if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
+		sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+
 	/* There is race window here: we announce ourselves listening,
 	 * but this transition is still not validated by get_port().
 	 * It is OK, because this socket enters to hash table only
-- 
2.17.1


  parent reply	other threads:[~2021-11-12 18:20 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-25 20:35 [RFC PATCH net-next 0/4] txhash: Make hash rethink configurable Akhmat Karakotov
2021-10-25 20:35 ` [RFC PATCH net-next 1/4] txhash: Make rethinking txhash behavior configurable via sysctl Akhmat Karakotov
2021-10-25 20:54   ` Eric Dumazet
2021-11-05  6:13   ` [txhash] d7fa06e1ae: WARNING:at_net/sysctl_net.c:#register_net_sysctl kernel test robot
2021-11-05  6:13     ` kernel test robot
2021-11-05  6:13     ` [LTP] " kernel test robot
2021-10-25 20:35 ` [RFC PATCH net-next 2/4] txhash: Add socket option to control TX hash rethink behavior Akhmat Karakotov
2021-10-25 21:05   ` Eric Dumazet
2021-10-29 10:01     ` Akhmat Karakotov
     [not found]       ` <D7FFC160-1DC3-42A5-BE0E-15FD81BEB1F3@yandex-team.ru>
2021-11-08 12:48         ` Akhmat Karakotov
2021-11-08 19:39           ` Eric Dumazet
2021-10-25 20:35 ` [RFC PATCH net-next 3/4] bpf: Add SO_TXREHASH setsockopt Akhmat Karakotov
2021-10-25 20:35 ` [RFC PATCH net-next 4/4] tcp: change SYN ACK retransmit behaviour to account for rehash Akhmat Karakotov
2021-11-12 18:19 ` [RFC PATCH v2 net-next 0/4] txhash: Make hash rethink configurable Akhmat Karakotov
2021-11-12 18:19   ` [RFC PATCH v2 net-next 1/4] txhash: Make rethinking txhash behavior configurable via sysctl Akhmat Karakotov
2021-11-12 18:19   ` Akhmat Karakotov [this message]
2021-11-12 18:19   ` [RFC PATCH v2 net-next 3/4] bpf: Add SO_TXREHASH setsockopt Akhmat Karakotov
2021-11-12 18:19   ` [RFC PATCH v2 net-next 4/4] tcp: change SYN ACK retransmit behaviour to account for rehash Akhmat Karakotov
2021-11-23 13:20   ` [RFC PATCH v2 net-next 0/4] txhash: Make hash rethink configurable Akhmat Karakotov
2021-11-30  9:58     ` Akhmat Karakotov
2021-12-01 16:49       ` Eric Dumazet
2021-12-02 16:40         ` Akhmat Karakotov
2021-12-02 16:40           ` [RFC PATCH v2 net-next 1/4] txhash: Make rethinking txhash behavior configurable via sysctl Akhmat Karakotov
2021-12-02 16:40           ` [RFC PATCH v2 net-next 2/4] txhash: Add socket option to control TX hash rethink behavior Akhmat Karakotov
2021-12-02 17:18             ` Eric Dumazet
2021-12-02 21:59             ` kernel test robot
2021-12-02 21:59               ` kernel test robot
2021-12-02 16:40           ` [RFC PATCH v2 net-next 3/4] bpf: Add SO_TXREHASH setsockopt Akhmat Karakotov
2021-12-02 16:40           ` [RFC PATCH v2 net-next 4/4] tcp: change SYN ACK retransmit behaviour to account for rehash Akhmat Karakotov
2021-12-02 17:24             ` Eric Dumazet
2021-12-06 19:11 ` [RFC PATCH v3 net-next 0/4] txhash: Make hash rethink configurable Akhmat Karakotov
2021-12-06 19:11   ` [RFC PATCH v3 net-next 1/4] txhash: Make rethinking txhash behavior configurable via sysctl Akhmat Karakotov
2021-12-06 19:47     ` Eric Dumazet
2021-12-06 19:11   ` [RFC PATCH v3 net-next 2/4] txhash: Add socket option to control TX hash rethink behavior Akhmat Karakotov
2021-12-06 19:48     ` Eric Dumazet
2021-12-06 19:11   ` [RFC PATCH v3 net-next 3/4] bpf: Add SO_TXREHASH setsockopt Akhmat Karakotov
2021-12-06 19:48     ` Eric Dumazet
2021-12-06 19:11   ` [RFC PATCH v3 net-next 4/4] tcp: change SYN ACK retransmit behaviour to account for rehash Akhmat Karakotov
2021-12-06 19:49     ` Eric Dumazet
2022-01-17 15:31     ` Akhmat Karakotov
2022-01-18 16:03       ` Jakub Kicinski
2022-01-18 16:52         ` Akhmat Karakotov
2021-12-04 10:51 [RFC PATCH v2 net-next 2/4] txhash: Add socket option to control TX hash rethink behavior kernel test robot
2021-12-06 13:33 ` Dan Carpenter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211112181939.11329-3-hmukos@yandex-team.ru \
    --to=hmukos@yandex-team.ru \
    --cc=eric.dumazet@gmail.com \
    --cc=mitradir@yandex-team.ru \
    --cc=netdev@vger.kernel.org \
    --cc=tom@herbertland.com \
    --cc=zeil@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.