* [Ocfs2-devel] [PATCH 01/28] net: add sock_set_reuseaddr
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 02/28] net: add sock_no_linger Christoph Hellwig
` (27 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA, Sagi Grimberg,
Hideaki YOSHIFUJI, netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Jon Maloy,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, Eric Dumazet,
target-devel-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
linux-nfs-u79uwXL29TY76Z2rM5mHXA, Ying Xue, Alexey Kuznetsov,
ceph-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ
Add a helper to directly set the SO_REUSEADDR sockopt from kernel space
without going through a fake uaccess.
For this the iscsi target now has to formally depend on inet to avoid
a mostly theoretical compile failure. For actual operation it already
did depend on having ipv4 or ipv6 support.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/infiniband/sw/siw/siw_cm.c | 18 +++++-------------
drivers/nvme/target/tcp.c | 8 +-------
drivers/target/iscsi/Kconfig | 2 +-
drivers/target/iscsi/iscsi_target_login.c | 9 +--------
fs/dlm/lowcomms.c | 6 +-----
include/net/sock.h | 2 ++
net/core/sock.c | 8 ++++++++
7 files changed, 19 insertions(+), 34 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 559e5fd3bad8b..d1860f3e87401 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1312,17 +1312,14 @@ static void siw_cm_llp_state_change(struct sock *sk)
static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
struct sockaddr *raddr)
{
- int rv, flags = 0, s_val = 1;
+ int rv, flags = 0;
size_t size = laddr->sa_family == AF_INET ?
sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
/*
* Make address available again asap.
*/
- rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
- sizeof(s_val));
- if (rv < 0)
- return rv;
+ sock_set_reuseaddr(s->sk);
rv = s->ops->bind(s, laddr, size);
if (rv < 0)
@@ -1781,7 +1778,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
struct siw_cep *cep = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
int addr_family = id->local_addr.ss_family;
- int rv = 0, s_val;
+ int rv = 0;
if (addr_family != AF_INET && addr_family != AF_INET6)
return -EAFNOSUPPORT;
@@ -1793,13 +1790,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
/*
* Allow binding local port when still in TIME_WAIT from last close.
*/
- s_val = 1;
- rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
- sizeof(s_val));
- if (rv) {
- siw_dbg(id->device, "setsockopt error: %d\n", rv);
- goto error;
- }
+ sock_set_reuseaddr(s->sk);
+
if (addr_family == AF_INET) {
struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f40..40757a63f4553 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1632,6 +1632,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
port->sock->sk->sk_user_data = port;
port->data_ready = port->sock->sk->sk_data_ready;
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
+ sock_set_reuseaddr(port->sock->sk);
opt = 1;
ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
@@ -1641,13 +1642,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
- ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
- goto err_sock;
- }
-
if (so_priority > 0) {
ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
(char *)&so_priority, sizeof(so_priority));
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 1f93ea3813536..922484ea4e304 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config ISCSI_TARGET
tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
- depends on NET
+ depends on INET
select CRYPTO
select CRYPTO_CRC32C
select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 731ee67fe914b..91acb3f07b4cc 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -909,14 +909,7 @@ int iscsit_setup_np(
}
}
- /* FIXME: Someone please explain why this is endian-safe */
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for SO_REUSEADDR"
- " failed\n");
- goto fail;
- }
+ sock_set_reuseaddr(sock->sk);
ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
(char *)&opt, sizeof(opt));
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdfaf4f0e11a0..b801e77e3e596 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1244,12 +1244,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
sizeof(one));
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&one, sizeof(one));
+ sock_set_reuseaddr(sock->sk);
- if (result < 0) {
- log_print("Failed to set SO_REUSEADDR on socket: %d", result);
- }
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = con;
save_listen_callbacks(sock);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3e8c6d4b4b59f..2ec085044790c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,4 +2688,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
+void sock_set_reuseaddr(struct sock *sk);
+
#endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index fd85e651ce284..18eb84fdf5fbe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -712,6 +712,14 @@ bool sk_mc_loop(struct sock *sk)
}
EXPORT_SYMBOL(sk_mc_loop);
+void sock_set_reuseaddr(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuse = SK_CAN_REUSE;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseaddr);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 02/28] net: add sock_no_linger
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 01/28] net: add sock_set_reuseaddr Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 03/28] net: add sock_set_priority Christoph Hellwig
` (26 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA, Sagi Grimberg,
Hideaki YOSHIFUJI, netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Jon Maloy,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, Eric Dumazet,
target-devel-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
linux-nfs-u79uwXL29TY76Z2rM5mHXA, Ying Xue, Alexey Kuznetsov,
ceph-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ
Add a helper to directly set the SO_LINGER sockopt from kernel space
with onoff set to true and a linger time of 0 without going through a
fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 9 +--------
drivers/nvme/target/tcp.c | 6 +-----
include/net/sock.h | 1 +
net/core/sock.c | 9 +++++++++
net/rds/tcp.h | 1 -
net/rds/tcp_connect.c | 2 +-
net/rds/tcp_listen.c | 13 +------------
net/sunrpc/svcsock.c | 12 ++----------
8 files changed, 16 insertions(+), 37 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f7..e72d87482eb78 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret, opt, rcv_pdu_size;
queue->ctrl = ctrl;
@@ -1361,13 +1360,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
* close. This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out.
*/
- ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
- (char *)&sol, sizeof(sol));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set SO_LINGER sock opt %d\n", ret);
- goto err_sock;
- }
+ sock_no_linger(queue->sock->sk);
if (so_priority > 0) {
ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 40757a63f4553..e0801494b097f 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1429,7 +1429,6 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
{
struct socket *sock = queue->sock;
struct inet_sock *inet = inet_sk(sock->sk);
- struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret;
ret = kernel_getsockname(sock,
@@ -1447,10 +1446,7 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
* close. This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out.
*/
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&sol, sizeof(sol));
- if (ret)
- return ret;
+ sock_no_linger(sock->sk);
if (so_priority > 0) {
ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/include/net/sock.h b/include/net/sock.h
index 2ec085044790c..6ed00bf009bbe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,6 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
+void sock_no_linger(struct sock *sk);
void sock_set_reuseaddr(struct sock *sk);
#endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 18eb84fdf5fbe..f0f09524911c8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -720,6 +720,15 @@ void sock_set_reuseaddr(struct sock *sk)
}
EXPORT_SYMBOL(sock_set_reuseaddr);
+void sock_no_linger(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_lingertime = 0;
+ sock_set_flag(sk, SOCK_LINGER);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_no_linger);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361d21c73..d640e210b97b6 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -73,7 +73,6 @@ void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
-void rds_tcp_set_linger(struct socket *sock);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 008f50fb25dd2..4e64598176b05 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
if (sock) {
if (rds_destroy_pending(cp->cp_conn))
- rds_tcp_set_linger(sock);
+ sock_no_linger(sock->sk);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 810a3a49e9474..bbb31b9c0b391 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -111,17 +111,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
return NULL;
}
-void rds_tcp_set_linger(struct socket *sock)
-{
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
-}
-
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -241,7 +230,7 @@ int rds_tcp_accept_one(struct socket *sock)
* be pending on it. By setting linger, we achieve the side-effect
* of avoiding TIME_WAIT state on new_sock.
*/
- rds_tcp_set_linger(new_sock);
+ sock_no_linger(new_sock->sk);
kernel_sock_shutdown(new_sock, SHUT_RDWR);
ret = 0;
out:
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b31..6773dacc64d8e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
{
- struct svc_sock *svsk;
- struct socket *sock;
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- svsk = container_of(xprt, struct svc_sock, sk_xprt);
- sock = svsk->sk_sock;
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
+ sock_no_linger(svsk->sk_sock->sk);
}
/*
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 03/28] net: add sock_set_priority
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 01/28] net: add sock_set_reuseaddr Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 02/28] net: add sock_no_linger Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 04/28] net: add sock_set_sndtimeo Christoph Hellwig
` (25 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion,
Sagi Grimberg
Add a helper to directly set the SO_PRIORITY sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 12 ++----------
drivers/nvme/target/tcp.c | 18 ++++--------------
include/net/sock.h | 1 +
net/core/sock.c | 8 ++++++++
4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e72d87482eb78..a307972d33a02 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1362,16 +1362,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
*/
sock_no_linger(queue->sock->sk);
- if (so_priority > 0) {
- ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret) {
- dev_err(ctrl->ctrl.device,
- "failed to set SO_PRIORITY sock opt, ret %d\n",
- ret);
- goto err_sock;
- }
- }
+ if (so_priority > 0)
+ sock_set_priority(queue->sock->sk, so_priority);
/* Set socket type of service */
if (nctrl->opts->tos >= 0) {
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index e0801494b097f..f3088156d01da 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1448,12 +1448,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
*/
sock_no_linger(sock->sk);
- if (so_priority > 0) {
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret)
- return ret;
- }
+ if (so_priority > 0)
+ sock_set_priority(sock->sk, so_priority);
/* Set socket type of service */
if (inet->rcv_tos > 0) {
@@ -1638,14 +1634,8 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
- if (so_priority > 0) {
- ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret) {
- pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
- goto err_sock;
- }
- }
+ if (so_priority > 0)
+ sock_set_priority(port->sock->sk, so_priority);
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr));
diff --git a/include/net/sock.h b/include/net/sock.h
index 6ed00bf009bbe..a3a43141a4be2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2689,6 +2689,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
void sock_no_linger(struct sock *sk);
+void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_reuseaddr(struct sock *sk);
#endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index f0f09524911c8..ceda1a9248b3e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -729,6 +729,14 @@ void sock_no_linger(struct sock *sk)
}
EXPORT_SYMBOL(sock_no_linger);
+void sock_set_priority(struct sock *sk, u32 priority)
+{
+ lock_sock(sk);
+ sk->sk_priority = priority;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_priority);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 04/28] net: add sock_set_sndtimeo
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (2 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 03/28] net: add sock_set_priority Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 05/28] net: add sock_bindtoindex Christoph Hellwig
` (24 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the SO_SNDTIMEO_NEW sockopt from kernel
space without going through a fake uaccess. The interface is
simplified to only pass the seconds value, as that is the only
thing needed at the moment.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dlm/lowcomms.c | 8 ++------
include/net/sock.h | 1 +
net/core/sock.c | 11 +++++++++++
3 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b801e77e3e596..b4d491122814b 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1035,7 +1035,6 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
- struct __kernel_sock_timeval tv = { .tv_sec = 5, .tv_usec = 0 };
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1087,13 +1086,10 @@ static void sctp_connect_to_sock(struct connection *con)
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 5);
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
0);
- memset(&tv, 0, sizeof(tv));
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 0);
if (result == -EINPROGRESS)
result = 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index a3a43141a4be2..9a7b9e98685ac 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,5 +2691,6 @@ void sock_def_readable(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_reuseaddr(struct sock *sk);
+void sock_set_sndtimeo(struct sock *sk, s64 secs);
#endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index ceda1a9248b3e..d3b1d61e4f768 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -737,6 +737,17 @@ void sock_set_priority(struct sock *sk, u32 priority)
}
EXPORT_SYMBOL(sock_set_priority);
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
+{
+ lock_sock(sk);
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+ sk->sk_sndtimeo = secs * HZ;
+ else
+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_sndtimeo);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 05/28] net: add sock_bindtoindex
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (3 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 04/28] net: add sock_set_sndtimeo Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 06/28] net: add sock_enable_timestamps Christoph Hellwig
` (23 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the SO_BINDTOIFINDEX sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/sock.h | 1 +
net/core/sock.c | 21 +++++++++++++++------
net/ipv4/udp_tunnel.c | 4 +---
net/ipv6/ip6_udp_tunnel.c | 4 +---
4 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 9a7b9e98685ac..cdec7bc055d5b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,6 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
+int sock_bindtoindex(struct sock *sk, int ifindex);
void sock_no_linger(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_reuseaddr(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index d3b1d61e4f768..23f80880fbb2c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -594,6 +594,18 @@ static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
return ret;
}
+int sock_bindtoindex(struct sock *sk, int ifindex)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = sock_bindtoindex_locked(sk, ifindex);
+ release_sock(sk);
+
+ return ret;
+}
+EXPORT_SYMBOL(sock_bindtoindex);
+
static int sock_setbindtodevice(struct sock *sk, char __user *optval,
int optlen)
{
@@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- lock_sock(sk);
- ret = sock_setbindtodevice_locked(sk, index);
- release_sock(sk);
-
+ return sock_bindtoindex(sk, index);
out:
#endif
@@ -1216,7 +1225,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTOIFINDEX:
- ret = sock_setbindtodevice_locked(sk, val);
+ ret = sock_bindtoindex_locked(sk, val);
break;
default:
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 150e6f0fdbf59..2158e8bddf41c 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 58956a6b66a21..6523609516d25 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -33,9 +33,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
}
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 06/28] net: add sock_enable_timestamps
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (4 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 05/28] net: add sock_bindtoindex Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 07/28] net: add sock_set_keepalive Christoph Hellwig
` (22 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Add a helper to directly enable timestamps instead of setting the
SO_TIMESTAMP* sockopts from kernel space and going through a fake
uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/sock.h | 1 +
net/core/sock.c | 47 +++++++++++++++++++++++++---------------
net/rxrpc/local_object.c | 8 +------
3 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index cdec7bc055d5b..99ef43508d2b5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2689,6 +2689,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
int sock_bindtoindex(struct sock *sk, int ifindex);
+void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_reuseaddr(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 23f80880fbb2c..e4a4dd2b3d8b3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -757,6 +757,28 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
}
EXPORT_SYMBOL(sock_set_sndtimeo);
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+ if (val) {
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+ } else {
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+ sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+ }
+}
+
+void sock_enable_timestamps(struct sock *sk)
+{
+ lock_sock(sk);
+ __sock_set_timestamps(sk, true, false, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_enable_timestamps);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -948,28 +970,17 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_TIMESTAMP_OLD:
+ __sock_set_timestamps(sk, valbool, false, false);
+ break;
case SO_TIMESTAMP_NEW:
+ __sock_set_timestamps(sk, valbool, true, false);
+ break;
case SO_TIMESTAMPNS_OLD:
+ __sock_set_timestamps(sk, valbool, false, true);
+ break;
case SO_TIMESTAMPNS_NEW:
- if (valbool) {
- if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- else
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
- if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- else
- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
- sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- } else {
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
- }
+ __sock_set_timestamps(sk, valbool, true, true);
break;
-
case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
/* fall through */
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 01135e54d95d2..5ea2bd01fdd59 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -189,13 +189,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
}
/* We want receive timestamps. */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ sock_enable_timestamps(local->socket->sk);
break;
default:
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 07/28] net: add sock_set_keepalive
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (5 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 06/28] net: add sock_enable_timestamps Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 08/28] net: add sock_set_rcvbuf Christoph Hellwig
` (21 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the SO_KEEPALIVE sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dlm/lowcomms.c | 6 +-----
include/net/sock.h | 1 +
net/core/sock.c | 10 ++++++++++
net/rds/tcp_listen.c | 6 +-----
net/sunrpc/xprtsock.c | 4 +---
5 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b4d491122814b..138009c6a2ee1 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1259,11 +1259,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
con->sock = NULL;
goto create_out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&one, sizeof(one));
- if (result < 0) {
- log_print("Set keepalive failed: %d", result);
- }
+ sock_set_keepalive(sock->sk);
result = sock->ops->listen(sock, 5);
if (result < 0) {
diff --git a/include/net/sock.h b/include/net/sock.h
index 99ef43508d2b5..dc08c176238fd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,6 +2691,7 @@ void sock_def_readable(struct sock *sk);
int sock_bindtoindex(struct sock *sk, int ifindex);
void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
+void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_reuseaddr(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
diff --git a/net/core/sock.c b/net/core/sock.c
index e4a4dd2b3d8b3..728f5fb156a0c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -779,6 +779,16 @@ void sock_enable_timestamps(struct sock *sk)
}
EXPORT_SYMBOL(sock_enable_timestamps);
+void sock_set_keepalive(struct sock *sk)
+{
+ lock_sock(sk);
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, true);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_keepalive);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index bbb31b9c0b391..d8bd132769594 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -43,13 +43,9 @@ int rds_tcp_keepalive(struct socket *sock)
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
- int keepalive = 1;
int ret = 0;
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&keepalive, sizeof(keepalive));
- if (ret < 0)
- goto bail;
+ sock_set_keepalive(sock->sk);
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ece..30082cd039960 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2110,7 +2110,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
unsigned int keepidle;
unsigned int keepcnt;
- unsigned int opt_on = 1;
unsigned int timeo;
spin_lock(&xprt->transport_lock);
@@ -2122,8 +2121,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
spin_unlock(&xprt->transport_lock);
/* TCP Keepalive options */
- kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&opt_on, sizeof(opt_on));
+ sock_set_keepalive(sock->sk);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
(char *)&keepidle, sizeof(keepidle));
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 08/28] net: add sock_set_rcvbuf
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (6 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 07/28] net: add sock_set_keepalive Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 09/28] net: add sock_set_reuseport Christoph Hellwig
` (20 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA, Hideaki YOSHIFUJI,
netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Jon Maloy,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, Eric Dumazet,
target-devel-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
linux-nfs-u79uwXL29TY76Z2rM5mHXA, Ying Xue, Alexey Kuznetsov,
ceph-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ
Add a helper to directly set the SO_RCVBUFFORCE sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/dlm/lowcomms.c | 7 +-----
include/net/sock.h | 1 +
net/core/sock.c | 59 +++++++++++++++++++++++++---------------------
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 138009c6a2ee1..45c37f572c9d2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1297,7 +1297,6 @@ static int sctp_listen_for_all(void)
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
- int bufsize = NEEDED_RMEM;
int one = 1;
if (!con)
@@ -1312,11 +1311,7 @@ static int sctp_listen_for_all(void)
goto out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
- (char *)&bufsize, sizeof(bufsize));
- if (result)
- log_print("Error increasing buffer space on socket %d", result);
-
+ sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
if (result < 0)
diff --git a/include/net/sock.h b/include/net/sock.h
index dc08c176238fd..c997289aabbf9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2693,6 +2693,7 @@ void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_rcvbuf(struct sock *sk, int val);
void sock_set_reuseaddr(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
diff --git a/net/core/sock.c b/net/core/sock.c
index 728f5fb156a0c..3c6ebf952e9ad 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -789,6 +789,35 @@ void sock_set_keepalive(struct sock *sk)
}
EXPORT_SYMBOL(sock_set_keepalive);
+static void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+ * as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+ /* We double it on the way in to account for "struct sk_buff" etc.
+ * overhead. Applications assume that the SO_RCVBUF setting they make
+ * will allow that much actual data to be received on that socket.
+ *
+ * Applications are unaware that "struct sk_buff" and other overheads
+ * allocate from the receive buffer during socket buffer allocation.
+ *
+ * And after considering the possible alternatives, returning the value
+ * we actually used in getsockopt is the most desirable behavior.
+ */
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __sock_set_rcvbuf(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -885,30 +914,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
- /* Ensure val * 2 fits into an int, to prevent max_t()
- * from treating it as a negative value.
- */
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- WRITE_ONCE(sk->sk_rcvbuf,
- max_t(int, val * 2, SOCK_MIN_RCVBUF));
+ __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
break;
case SO_RCVBUFFORCE:
@@ -920,9 +926,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
/* No negative values (to prevent underflow, as val will be
* multiplied by 2).
*/
- if (val < 0)
- val = 0;
- goto set_rcvbuf;
+ __sock_set_rcvbuf(sk, max(val, 0));
+ break;
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 09/28] net: add sock_set_reuseport
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (7 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 08/28] net: add sock_set_rcvbuf Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 10/28] tcp: add tcp_sock_set_cork Christoph Hellwig
` (19 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the SO_REUSEPORT sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/sock.h | 1 +
net/core/sock.c | 8 ++++++++
net/sunrpc/xprtsock.c | 17 +----------------
3 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index c997289aabbf9..d994daa418ec2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2695,6 +2695,7 @@ void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_rcvbuf(struct sock *sk, int val);
void sock_set_reuseaddr(struct sock *sk);
+void sock_set_reuseport(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
#endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 3c6ebf952e9ad..2ca3425b519c0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -729,6 +729,14 @@ void sock_set_reuseaddr(struct sock *sk)
}
EXPORT_SYMBOL(sock_set_reuseaddr);
+void sock_set_reuseport(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuseport = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseport);
+
void sock_no_linger(struct sock *sk)
{
lock_sock(sk);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 30082cd039960..399848c2bcb29 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1594,21 +1594,6 @@ static int xs_get_random_port(void)
return rand + min;
}
-/**
- * xs_set_reuseaddr_port - set the socket's port and address reuse options
- * @sock: socket
- *
- * Note that this function has to be called on all sockets that share the
- * same port, and it must be called before binding.
- */
-static void xs_sock_set_reuseport(struct socket *sock)
-{
- int opt = 1;
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT,
- (char *)&opt, sizeof(opt));
-}
-
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
@@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
xs_reclassify_socket(family, sock);
if (reuseport)
- xs_sock_set_reuseport(sock);
+ sock_set_reuseport(sock->sk);
err = xs_bind(transport, sock);
if (err) {
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 10/28] tcp: add tcp_sock_set_cork
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (8 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 09/28] net: add sock_set_reuseport Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 11/28] tcp: add tcp_sock_set_nodelay Christoph Hellwig
` (18 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the TCP_CORK sockopt from kernel space
without going through a fake uaccess. Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/block/drbd/drbd_int.h | 14 --------
drivers/block/drbd/drbd_receiver.c | 4 +--
drivers/block/drbd/drbd_worker.c | 6 ++--
fs/cifs/transport.c | 8 ++---
include/linux/tcp.h | 2 ++
net/ipv4/tcp.c | 51 +++++++++++++++++++-----------
net/rds/tcp_send.c | 9 ++----
7 files changed, 43 insertions(+), 51 deletions(-)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index aae99a2d7bd40..3550adc93c68b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,20 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
extern int drbd_connected(struct drbd_peer_device *);
-static inline void drbd_tcp_cork(struct socket *sock)
-{
- int val = 1;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
- (char*)&val, sizeof(val));
-}
-
-static inline void drbd_tcp_uncork(struct socket *sock)
-{
- int val = 0;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
- (char*)&val, sizeof(val));
-}
-
static inline void drbd_tcp_nodelay(struct socket *sock)
{
int val = 1;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c15e7083b13a6..55ea907ad33cb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -6162,7 +6162,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
rcu_read_unlock();
if (tcp_cork)
- drbd_tcp_cork(connection->meta.socket);
+ tcp_sock_set_cork(connection->meta.socket->sk, true);
err = drbd_finish_peer_reqs(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -6175,7 +6175,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
}
if (tcp_cork)
- drbd_tcp_uncork(connection->meta.socket);
+ tcp_sock_set_cork(connection->meta.socket->sk, false);
return;
}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0dc019da1f8d0..2b89c9f2ca707 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -2098,7 +2098,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
if (uncork) {
mutex_lock(&connection->data.mutex);
if (connection->data.socket)
- drbd_tcp_uncork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, false);
mutex_unlock(&connection->data.mutex);
}
@@ -2153,9 +2153,9 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
mutex_lock(&connection->data.mutex);
if (connection->data.socket) {
if (cork)
- drbd_tcp_cork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, true);
else if (!uncork)
- drbd_tcp_uncork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, false);
}
mutex_unlock(&connection->data.mutex);
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c97570eb2c180..99760063e0006 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -325,7 +325,6 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
struct msghdr smb_msg;
- int val = 1;
__be32 rfc1002_marker;
if (cifs_rdma_enabled(server)) {
@@ -345,8 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
}
/* cork the socket */
- kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
- (char *)&val, sizeof(val));
+ tcp_sock_set_cork(ssocket->sk, true);
for (j = 0; j < num_rqst; j++)
send_length += smb_rqst_len(server, &rqst[j]);
@@ -435,9 +433,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
}
/* uncork it */
- val = 0;
- kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
- (char *)&val, sizeof(val));
+ tcp_sock_set_cork(ssocket->sk, false);
if ((total_len > 0) && (total_len != send_length)) {
cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bf44e85d709dc..889eeb2256c2d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -497,4 +497,6 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
+void tcp_sock_set_cork(struct sock *sk, bool on);
+
#endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9700649963773..e6cf702e16d66 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2801,6 +2801,37 @@ static void tcp_enable_tx_delay(void)
}
}
+/* When set indicates to always queue non-full frames. Later the user clears
+ * this option and we transmit any pending partial frames in the queue. This is
+ * meant to be used alongside sendfile() to get properly filled frames when the
+ * user (for example) must write out headers with a write() call first and then
+ * use sendfile to send out the data parts.
+ *
+ * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
+ * TCP_NODELAY.
+ */
+static void __tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (on) {
+ tp->nonagle |= TCP_NAGLE_CORK;
+ } else {
+ tp->nonagle &= ~TCP_NAGLE_CORK;
+ if (tp->nonagle & TCP_NAGLE_OFF)
+ tp->nonagle |= TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ }
+}
+
+void tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ lock_sock(sk);
+ __tcp_sock_set_cork(sk, on);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_cork);
+
/*
* Socket option code for TCP.
*/
@@ -2979,25 +3010,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_CORK:
- /* When set indicates to always queue non-full frames.
- * Later the user clears this option and we transmit
- * any pending partial frames in the queue. This is
- * meant to be used alongside sendfile() to get properly
- * filled frames when the user (for example) must write
- * out headers with a write() call first and then use
- * sendfile to send out the data parts.
- *
- * TCP_CORK can be set together with TCP_NODELAY and it is
- * stronger than TCP_NODELAY.
- */
- if (val) {
- tp->nonagle |= TCP_NAGLE_CORK;
- } else {
- tp->nonagle &= ~TCP_NAGLE_CORK;
- if (tp->nonagle&TCP_NAGLE_OFF)
- tp->nonagle |= TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- }
+ __tcp_sock_set_cork(sk, val);
break;
case TCP_KEEPIDLE:
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 78a2554a44979..8c4d1d6e9249d 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -38,23 +38,18 @@
#include "rds.h"
#include "tcp.h"
-static void rds_tcp_cork(struct socket *sock, int val)
-{
- kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val));
-}
-
void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 1);
+ tcp_sock_set_cork(tc->t_sock->sk, true);
}
void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 0);
+ tcp_sock_set_cork(tc->t_sock->sk, false);
}
/* the core send_sem serializes this with other xmit and shutdown */
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 11/28] tcp: add tcp_sock_set_nodelay
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (9 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 10/28] tcp: add tcp_sock_set_cork Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 12/28] tcp: add tcp_sock_set_quickack Christoph Hellwig
` (17 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Sagi Grimberg,
Jason Gunthorpe
Add a helper to directly set the TCP_NODELAY sockopt from kernel space
without going through a fake uaccess. Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
---
drivers/block/drbd/drbd_int.h | 7 ----
drivers/block/drbd/drbd_main.c | 2 +-
drivers/block/drbd/drbd_receiver.c | 4 +--
drivers/infiniband/sw/siw/siw_cm.c | 24 +++-----------
drivers/nvme/host/tcp.c | 9 +-----
drivers/nvme/target/tcp.c | 12 ++-----
drivers/target/iscsi/iscsi_target_login.c | 15 ++-------
fs/cifs/connect.c | 10 ++----
fs/dlm/lowcomms.c | 8 ++---
fs/ocfs2/cluster/tcp.c | 20 ++----------
include/linux/tcp.h | 1 +
net/ceph/messenger.c | 11 ++-----
net/ipv4/tcp.c | 39 +++++++++++++++--------
net/rds/tcp.c | 11 +------
net/rds/tcp.h | 1 -
net/rds/tcp_listen.c | 2 +-
16 files changed, 49 insertions(+), 127 deletions(-)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 3550adc93c68b..e24bba87c8e02 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
extern int drbd_connected(struct drbd_peer_device *);
-static inline void drbd_tcp_nodelay(struct socket *sock)
-{
- int val = 1;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char*)&val, sizeof(val));
-}
-
static inline void drbd_tcp_quickack(struct socket *sock)
{
int val = 2;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c094c3c2c5d4d..45fbd526c453b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr,
/* DRBD protocol "pings" are latency critical.
* This is supposed to trigger tcp_push_pending_frames() */
if (!err && (cmd == P_PING || cmd == P_PING_ACK))
- drbd_tcp_nodelay(sock->socket);
+ tcp_sock_set_nodelay(sock->socket->sk);
return err;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 55ea907ad33cb..20a5e94494acd 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1051,8 +1051,8 @@ static int conn_connect(struct drbd_connection *connection)
/* we don't want delays.
* we use TCP_CORK where appropriate, though */
- drbd_tcp_nodelay(sock.socket);
- drbd_tcp_nodelay(msock.socket);
+ tcp_sock_set_nodelay(sock.socket->sk);
+ tcp_sock_set_nodelay(msock.socket->sk);
connection->data.socket = sock.socket;
connection->meta.socket = msock.socket;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index d1860f3e87401..1662216be66df 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_get(new_cep);
new_s->sk->sk_user_data = new_cep;
- if (siw_tcp_nagle == false) {
- int val = 1;
-
- rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- if (rv) {
- siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
- goto error;
- }
- }
+ if (siw_tcp_nagle == false)
+ tcp_sock_set_nodelay(new_s->sk);
new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
@@ -1386,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
goto error;
}
- if (siw_tcp_nagle == false) {
- int val = 1;
-
- rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val,
- sizeof(val));
- if (rv) {
- siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
- goto error;
- }
- }
+ if (siw_tcp_nagle == false)
+ tcp_sock_set_nodelay(s->sk);
cep = siw_cep_alloc(sdev);
if (!cep) {
rv = -ENOMEM;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a307972d33a02..4e4a750ecdb97 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1346,14 +1346,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
/* Set TCP no delay */
- opt = 1;
- ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
- TCP_NODELAY, (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set TCP_NODELAY sock opt %d\n", ret);
- goto err_sock;
- }
+ tcp_sock_set_nodelay(queue->sock->sk);
/*
* Cleanup whatever is sitting in the TCP transmit queue on socket
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f3088156d01da..55bc4c3c0a74a 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1580,7 +1580,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
{
struct nvmet_tcp_port *port;
__kernel_sa_family_t af;
- int opt, ret;
+ int ret;
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port)
@@ -1625,15 +1625,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
port->data_ready = port->sock->sk->sk_data_ready;
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
sock_set_reuseaddr(port->sock->sk);
-
- opt = 1;
- ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
- TCP_NODELAY, (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
- goto err_sock;
- }
-
+ tcp_sock_set_nodelay(port->sock->sk);
if (so_priority > 0)
sock_set_priority(port->sock->sk, so_priority);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 91acb3f07b4cc..b561b07a869a0 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -897,20 +897,11 @@ int iscsit_setup_np(
/*
* Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
*/
- /* FIXME: Someone please explain why this is endian-safe */
- opt = 1;
- if (np->np_network_transport == ISCSI_TCP) {
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for TCP_NODELAY"
- " failed: %d\n", ret);
- goto fail;
- }
- }
-
+ if (np->np_network_transport == ISCSI_TCP)
+ tcp_sock_set_nodelay(sock->sk);
sock_set_reuseaddr(sock->sk);
+ opt = 1;
ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
(char *)&opt, sizeof(opt));
if (ret < 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 28268ed461b82..ad8fb53b36827 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket->sk->sk_rcvbuf = 140 * 1024;
}
- if (server->tcp_nodelay) {
- int val = 1;
- rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- if (rc)
- cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n",
- rc);
- }
+ if (server->tcp_nodelay)
+ tcp_sock_set_nodelay(socket->sk);
cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
socket->sk->sk_sndbuf,
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 45c37f572c9d2..749f9bc548d2a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1128,7 +1128,6 @@ static void tcp_connect_to_sock(struct connection *con)
struct sockaddr_storage saddr, src_addr;
int addr_len;
struct socket *sock = NULL;
- int one = 1;
int result;
if (con->nodeid == 0) {
@@ -1177,8 +1176,7 @@ static void tcp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
O_NONBLOCK);
@@ -1220,7 +1218,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
{
struct socket *sock = NULL;
int result = 0;
- int one = 1;
int addr_len;
if (dlm_local_addr[0]->ss_family == AF_INET)
@@ -1237,8 +1234,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
}
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
sock_set_reuseaddr(sock->sk);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2c512b40a940e..4c70fe9d19ab2 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
sc_put(sc);
}
-static int o2net_set_nodelay(struct socket *sock)
-{
- int val = 1;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (void *)&val, sizeof(val));
-}
-
static int o2net_set_usertimeout(struct socket *sock)
{
int user_timeout = O2NET_TCP_USER_TIMEOUT;
@@ -1636,11 +1628,7 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
- ret = o2net_set_nodelay(sc->sc_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(sc->sc_sock->sk);
ret = o2net_set_usertimeout(sock);
if (ret) {
@@ -1832,11 +1820,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
*more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
- ret = o2net_set_nodelay(new_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(new_sock->sk);
ret = o2net_set_usertimeout(new_sock);
if (ret) {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 889eeb2256c2d..9e42c7fe50a8b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,5 +498,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
void tcp_sock_set_cork(struct sock *sk, bool on);
+void tcp_sock_set_nodelay(struct sock *sk);
#endif /* _LINUX_TCP_H */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f8ca5edc5f2c9..27d6ab11f9ee8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
- if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
- int optval = 1;
-
- ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)&optval, sizeof(optval));
- if (ret)
- pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
- ret);
- }
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
+ tcp_sock_set_nodelay(sock->sk);
con->sock = sock;
return 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e6cf702e16d66..a65f293a19fac 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2832,6 +2832,30 @@ void tcp_sock_set_cork(struct sock *sk, bool on)
}
EXPORT_SYMBOL(tcp_sock_set_cork);
+/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
+ * remembered, but it is not activated until cork is cleared.
+ *
+ * However, when TCP_NODELAY is set we make an explicit push, which overrides
+ * even TCP_CORK for currently queued segments.
+ */
+static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
+{
+ if (on) {
+ tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ } else {
+ tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
+ }
+}
+
+void tcp_sock_set_nodelay(struct sock *sk)
+{
+ lock_sock(sk);
+ __tcp_sock_set_nodelay(sk, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_nodelay);
+
/*
* Socket option code for TCP.
*/
@@ -2929,20 +2953,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_NODELAY:
- if (val) {
- /* TCP_NODELAY is weaker than TCP_CORK, so that
- * this option on corked socket is remembered, but
- * it is not activated until cork is cleared.
- *
- * However, when TCP_NODELAY is set we make
- * an explicit push, which overrides even TCP_CORK
- * for currently queued segments.
- */
- tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- } else {
- tp->nonagle &= ~TCP_NAGLE_OFF;
- }
+ __tcp_sock_set_nodelay(sk, val);
break;
case TCP_THIN_LINEAR_TIMEOUTS:
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 46782fac4c162..43db0eca911fa 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
{ }
};
-/* doing it this way avoids calling tcp_sk() */
-void rds_tcp_nonagle(struct socket *sock)
-{
- int val = 1;
-
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val,
- sizeof(val));
-}
-
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
/* seq# of the last byte of data in tcp send buffer */
@@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock)
struct net *net = sock_net(sk);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index d640e210b97b6..f6d75d8cb167a 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -50,7 +50,6 @@ struct rds_tcp_statistics {
/* tcp.c */
void rds_tcp_tune(struct socket *sock);
-void rds_tcp_nonagle(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d8bd132769594..6f90ea077adcd 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -288,7 +288,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
}
sock->sk->sk_reuse = SK_CAN_REUSE;
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = sock->sk->sk_data_ready;
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 12/28] tcp: add tcp_sock_set_quickack
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (10 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 11/28] tcp: add tcp_sock_set_nodelay Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 13/28] tcp: add tcp_sock_set_syncnt Christoph Hellwig
` (16 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the TCP_QUICKACK sockopt from kernel space
without going through a fake uaccess. Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/block/drbd/drbd_int.h | 7 ------
drivers/block/drbd/drbd_receiver.c | 5 ++--
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 39 ++++++++++++++++++++----------
4 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e24bba87c8e02..14345a87c7cc5 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
extern int drbd_connected(struct drbd_peer_device *);
-static inline void drbd_tcp_quickack(struct socket *sock)
-{
- int val = 2;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
- (char*)&val, sizeof(val));
-}
-
/* sets the number of 512 byte sectors of our virtual device */
void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 20a5e94494acd..3a3f2b6a821f3 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, str
* quickly as possible, and let remote TCP know what we have
* received so far. */
if (err == -EAGAIN) {
- drbd_tcp_quickack(connection->data.socket);
+ tcp_sock_set_quickack(connection->data.socket->sk, 2);
drbd_unplug_all_devices(connection);
}
if (err > 0) {
@@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection *connection, struct packe
{
/* Make sure we've acked all the TCP data associated
* with the data requests being unplugged */
- drbd_tcp_quickack(connection->data.socket);
-
+ tcp_sock_set_quickack(connection->data.socket->sk, 2);
return 0;
}
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9e42c7fe50a8b..2eaf8320b9db0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -499,5 +499,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
void tcp_sock_set_cork(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
+void tcp_sock_set_quickack(struct sock *sk, int val);
#endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a65f293a19fac..27b5e7a4e2ef9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2856,6 +2856,31 @@ void tcp_sock_set_nodelay(struct sock *sk)
}
EXPORT_SYMBOL(tcp_sock_set_nodelay);
+static void __tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ if (!val) {
+ inet_csk_enter_pingpong_mode(sk);
+ return;
+ }
+
+ inet_csk_exit_pingpong_mode(sk);
+ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+ inet_csk_ack_scheduled(sk)) {
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
+ tcp_cleanup_rbuf(sk, 1);
+ if (!(val & 1))
+ inet_csk_enter_pingpong_mode(sk);
+ }
+}
+
+void tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __tcp_sock_set_quickack(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_quickack);
+
/*
* Socket option code for TCP.
*/
@@ -3096,19 +3121,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_QUICKACK:
- if (!val) {
- inet_csk_enter_pingpong_mode(sk);
- } else {
- inet_csk_exit_pingpong_mode(sk);
- if ((1 << sk->sk_state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
- inet_csk_ack_scheduled(sk)) {
- icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
- tcp_cleanup_rbuf(sk, 1);
- if (!(val & 1))
- inet_csk_enter_pingpong_mode(sk);
- }
- }
+ __tcp_sock_set_quickack(sk, val);
break;
#ifdef CONFIG_TCP_MD5SIG
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 13/28] tcp: add tcp_sock_set_syncnt
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (11 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 12/28] tcp: add tcp_sock_set_quickack Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 14/28] tcp: add tcp_sock_set_user_timeout Christoph Hellwig
` (15 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion,
Sagi Grimberg
Add a helper to directly set the TCP_SYNCNT sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 9 +--------
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 12 ++++++++++++
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4e4a750ecdb97..2872584f52f63 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1336,14 +1336,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
/* Single syn retry */
- opt = 1;
- ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
- (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set TCP_SYNCNT sock opt %d\n", ret);
- goto err_sock;
- }
+ tcp_sock_set_syncnt(queue->sock->sk, 1);
/* Set TCP no delay */
tcp_sock_set_nodelay(queue->sock->sk);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2eaf8320b9db0..6aa4ae5ebf3d5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -500,5 +500,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
void tcp_sock_set_cork(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
+int tcp_sock_set_syncnt(struct sock *sk, int val);
#endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27b5e7a4e2ef9..d2c67ae1da07a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2881,6 +2881,18 @@ void tcp_sock_set_quickack(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_quickack);
+int tcp_sock_set_syncnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ inet_csk(sk)->icsk_syn_retries = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_syncnt);
+
/*
* Socket option code for TCP.
*/
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 14/28] tcp: add tcp_sock_set_user_timeout
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (12 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 13/28] tcp: add tcp_sock_set_syncnt Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 15/28] tcp: add tcp_sock_set_keepidle Christoph Hellwig
` (14 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Add a helper to directly set the TCP_USER_TIMEOUT sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/ocfs2/cluster/tcp.c | 22 ++--------------------
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 8 ++++++++
net/sunrpc/xprtsock.c | 3 +--
4 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4c70fe9d19ab2..79a2317194600 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
sc_put(sc);
}
-static int o2net_set_usertimeout(struct socket *sock)
-{
- int user_timeout = O2NET_TCP_USER_TIMEOUT;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (void *)&user_timeout, sizeof(user_timeout));
-}
-
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1629,12 +1621,7 @@ static void o2net_start_connect(struct work_struct *work)
}
tcp_sock_set_nodelay(sc->sc_sock->sk);
-
- ret = o2net_set_usertimeout(sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT);
o2net_register_callbacks(sc->sc_sock->sk, sc);
@@ -1821,12 +1808,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->sk->sk_allocation = GFP_ATOMIC;
tcp_sock_set_nodelay(new_sock->sk);
-
- ret = o2net_set_usertimeout(new_sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6aa4ae5ebf3d5..de682143efe4d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -501,5 +501,6 @@ void tcp_sock_set_cork(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
#endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d2c67ae1da07a..0004bd9ae7b0a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2893,6 +2893,14 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+{
+ lock_sock(sk);
+ inet_csk(sk)->icsk_user_timeout = val;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+
/*
* Socket option code for TCP.
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 399848c2bcb29..231fd6162f68d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2115,8 +2115,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
(char *)&keepcnt, sizeof(keepcnt));
/* TCP user timeout (see RFC5482) */
- kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (char *)&timeo, sizeof(timeo));
+ tcp_sock_set_user_timeout(sock->sk, timeo);
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 15/28] tcp: add tcp_sock_set_keepidle
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (13 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 14/28] tcp: add tcp_sock_set_user_timeout Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 16/28] tcp: add tcp_sock_set_keepintvl Christoph Hellwig
` (13 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Add a helper to directly set the TCP_KEEP_IDLE sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 49 ++++++++++++++++++++++++++++++-------------
net/rds/tcp_listen.c | 5 +----
net/sunrpc/xprtsock.c | 3 +--
4 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index de682143efe4d..5724dd84a85ed 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
void tcp_sock_set_cork(struct sock *sk, bool on);
+int tcp_sock_set_keepidle(struct sock *sk, int val);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0004bd9ae7b0a..bdf0ff9333514 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2901,6 +2901,39 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ return -EINVAL;
+
+ tp->keepalive_time = val * HZ;
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ u32 elapsed = keepalive_time_elapsed(tp);
+
+ if (tp->keepalive_time > elapsed)
+ elapsed = tp->keepalive_time - elapsed;
+ else
+ elapsed = 0;
+ inet_csk_reset_keepalive_timer(sk, elapsed);
+ }
+
+ return 0;
+}
+
+int tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ int err;
+
+ lock_sock(sk);
+ err = __tcp_sock_set_keepidle(sk, val);
+ release_sock(sk);
+ return err;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepidle);
+
/*
* Socket option code for TCP.
*/
@@ -3070,21 +3103,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_KEEPIDLE:
- if (val < 1 || val > MAX_TCP_KEEPIDLE)
- err = -EINVAL;
- else {
- tp->keepalive_time = val * HZ;
- if (sock_flag(sk, SOCK_KEEPOPEN) &&
- !((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN))) {
- u32 elapsed = keepalive_time_elapsed(tp);
- if (tp->keepalive_time > elapsed)
- elapsed = tp->keepalive_time - elapsed;
- else
- elapsed = 0;
- inet_csk_reset_keepalive_timer(sk, elapsed);
- }
- }
+ err = __tcp_sock_set_keepidle(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 6f90ea077adcd..79f9adc008114 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -52,10 +52,7 @@ int rds_tcp_keepalive(struct socket *sock)
if (ret < 0)
goto bail;
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- if (ret < 0)
- goto bail;
+ tcp_sock_set_keepidle(sock->sk, keepidle);
/* KEEPINTVL is the interval between successive probes. We follow
* the model in xs_tcp_finish_connecting() and re-use keepidle.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 231fd6162f68d..473290f7c5c0a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2107,8 +2107,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP Keepalive options */
sock_set_keepalive(sock->sk);
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
+ tcp_sock_set_keepidle(sock->sk, keepidle);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
(char *)&keepidle, sizeof(keepidle));
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 16/28] tcp: add tcp_sock_set_keepintvl
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (14 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 15/28] tcp: add tcp_sock_set_keepidle Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 17/28] tcp: add tcp_sock_set_keepcnt Christoph Hellwig
` (12 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the TCP_KEEPINTVL sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 12 ++++++++++++
net/rds/tcp_listen.c | 4 +---
net/sunrpc/xprtsock.c | 3 +--
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5724dd84a85ed..1f9bada00faab 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -499,6 +499,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
void tcp_sock_set_cork(struct sock *sk, bool on);
int tcp_sock_set_keepidle(struct sock *sk, int val);
+int tcp_sock_set_keepintvl(struct sock *sk, int val);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bdf0ff9333514..7eb083e09786a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2934,6 +2934,18 @@ int tcp_sock_set_keepidle(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_keepidle);
+int tcp_sock_set_keepintvl(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_intvl = val * HZ;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+
/*
* Socket option code for TCP.
*/
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 79f9adc008114..9ad555c48d15d 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -53,12 +53,10 @@ int rds_tcp_keepalive(struct socket *sock)
goto bail;
tcp_sock_set_keepidle(sock->sk, keepidle);
-
/* KEEPINTVL is the interval between successive probes. We follow
* the model in xs_tcp_finish_connecting() and re-use keepidle.
*/
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
bail:
return ret;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 473290f7c5c0a..5ca64e12af0c5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2108,8 +2108,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP Keepalive options */
sock_set_keepalive(sock->sk);
tcp_sock_set_keepidle(sock->sk, keepidle);
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 17/28] tcp: add tcp_sock_set_keepcnt
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (15 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 16/28] tcp: add tcp_sock_set_keepintvl Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 18/28] ipv4: add ip_sock_set_tos Christoph Hellwig
` (11 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Add a helper to directly set the TCP_KEEPCNT sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 12 ++++++++++++
net/rds/tcp.h | 2 +-
net/rds/tcp_listen.c | 17 +++--------------
net/sunrpc/xprtsock.c | 3 +--
5 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1f9bada00faab..9aac824c523cf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
void tcp_sock_set_cork(struct sock *sk, bool on);
+int tcp_sock_set_keepcnt(struct sock *sk, int val);
int tcp_sock_set_keepidle(struct sock *sk, int val);
int tcp_sock_set_keepintvl(struct sock *sk, int val);
void tcp_sock_set_nodelay(struct sock *sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7eb083e09786a..15d47d5e79510 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2946,6 +2946,18 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+int tcp_sock_set_keepcnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_probes = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepcnt);
+
/*
* Socket option code for TCP.
*/
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f6d75d8cb167a..bad9cf49d5657 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -70,7 +70,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
-int rds_tcp_keepalive(struct socket *sock);
+void rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
/* tcp_recv.c */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 9ad555c48d15d..101cf14215a0b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -38,27 +38,19 @@
#include "rds.h"
#include "tcp.h"
-int rds_tcp_keepalive(struct socket *sock)
+void rds_tcp_keepalive(struct socket *sock)
{
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
- int ret = 0;
sock_set_keepalive(sock->sk);
-
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
- if (ret < 0)
- goto bail;
-
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
tcp_sock_set_keepidle(sock->sk, keepidle);
/* KEEPINTVL is the interval between successive probes. We follow
* the model in xs_tcp_finish_connecting() and re-use keepidle.
*/
tcp_sock_set_keepintvl(sock->sk, keepidle);
-bail:
- return ret;
}
/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
@@ -140,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock)
new_sock->ops = sock->ops;
__module_get(new_sock->ops->owner);
- ret = rds_tcp_keepalive(new_sock);
- if (ret < 0)
- goto out;
-
+ rds_tcp_keepalive(new_sock);
rds_tcp_tune(new_sock);
inet = inet_sk(new_sock->sk);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 5ca64e12af0c5..0d3ec055bc12f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2109,8 +2109,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
sock_set_keepalive(sock->sk);
tcp_sock_set_keepidle(sock->sk, keepidle);
tcp_sock_set_keepintvl(sock->sk, keepidle);
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
/* TCP user timeout (see RFC5482) */
tcp_sock_set_user_timeout(sock->sk, timeo);
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 18/28] ipv4: add ip_sock_set_tos
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (16 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 17/28] tcp: add tcp_sock_set_keepcnt Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 19/28] ipv4: add ip_sock_set_freebind Christoph Hellwig
` (10 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Sagi Grimberg
Add a helper to directly set the IP_TOS sockopt from kernel space without
going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 14 +++-----------
drivers/nvme/target/tcp.c | 10 ++--------
include/net/ip.h | 2 ++
net/ipv4/ip_sockglue.c | 30 +++++++++++++++++++++---------
4 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2872584f52f63..4c972d8abf317 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- int ret, opt, rcv_pdu_size;
+ int ret, rcv_pdu_size;
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
@@ -1352,16 +1352,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
sock_set_priority(queue->sock->sk, so_priority);
/* Set socket type of service */
- if (nctrl->opts->tos >= 0) {
- opt = nctrl->opts->tos;
- ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
- (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set IP_TOS sock opt %d\n", ret);
- goto err_sock;
- }
- }
+ if (nctrl->opts->tos >= 0)
+ ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
queue->sock->sk->sk_allocation = GFP_ATOMIC;
nvme_tcp_set_queue_io_cpu(queue);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 55bc4c3c0a74a..4546049a96b37 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1452,14 +1452,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
sock_set_priority(sock->sk, so_priority);
/* Set socket type of service */
- if (inet->rcv_tos > 0) {
- int tos = inet->rcv_tos;
-
- ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
- (char *)&tos, sizeof(tos));
- if (ret)
- return ret;
- }
+ if (inet->rcv_tos > 0)
+ ip_sock_set_tos(sock->sk, inet->rcv_tos);
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = queue;
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b317c9f4470a..2fc52e26fa88b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,4 +765,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
return likely(mtu >= IPV4_MIN_MTU);
}
+void ip_sock_set_tos(struct sock *sk, int val);
+
#endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f43d5f12aa86a..b43a29e11f4a5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -560,6 +560,26 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return err;
}
+static void __ip_sock_set_tos(struct sock *sk, int val)
+{
+ if (sk->sk_type == SOCK_STREAM) {
+ val &= ~INET_ECN_MASK;
+ val |= inet_sk(sk)->tos & INET_ECN_MASK;
+ }
+ if (inet_sk(sk)->tos != val) {
+ inet_sk(sk)->tos = val;
+ sk->sk_priority = rt_tos2priority(val);
+ sk_dst_reset(sk);
+ }
+}
+
+void ip_sock_set_tos(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __ip_sock_set_tos(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_tos);
/*
* Socket option code for IP. This is the end of the line after any
@@ -823,15 +843,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS: /* This sets both TOS and Precedence */
- if (sk->sk_type == SOCK_STREAM) {
- val &= ~INET_ECN_MASK;
- val |= inet->tos & INET_ECN_MASK;
- }
- if (inet->tos != val) {
- inet->tos = val;
- sk->sk_priority = rt_tos2priority(val);
- sk_dst_reset(sk);
- }
+ __ip_sock_set_tos(sk, val);
break;
case IP_TTL:
if (optlen < 1)
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 19/28] ipv4: add ip_sock_set_freebind
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (17 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 18/28] ipv4: add ip_sock_set_tos Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 20/28] ipv4: add ip_sock_set_recverr Christoph Hellwig
` (9 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Add a helper to directly set the IP_FREEBIND sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/target/iscsi/iscsi_target_login.c | 13 +++----------
include/net/ip.h | 1 +
net/ipv4/ip_sockglue.c | 8 ++++++++
3 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index b561b07a869a0..85748e3388582 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <linux/idr.h>
#include <linux/tcp.h> /* TCP_NODELAY */
+#include <net/ip.h>
#include <net/ipv6.h> /* ipv6_addr_v4mapped() */
#include <scsi/iscsi_proto.h>
#include <target/target_core_base.h>
@@ -855,7 +856,7 @@ int iscsit_setup_np(
struct sockaddr_storage *sockaddr)
{
struct socket *sock = NULL;
- int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
+ int backlog = ISCSIT_TCP_BACKLOG, ret, len;
switch (np->np_network_transport) {
case ISCSI_TCP:
@@ -900,15 +901,7 @@ int iscsit_setup_np(
if (np->np_network_transport == ISCSI_TCP)
tcp_sock_set_nodelay(sock->sk);
sock_set_reuseaddr(sock->sk);
-
- opt = 1;
- ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for IP_FREEBIND"
- " failed\n");
- goto fail;
- }
+ ip_sock_set_freebind(sock->sk);
ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
if (ret < 0) {
diff --git a/include/net/ip.h b/include/net/ip.h
index 2fc52e26fa88b..5f5d8226b6abc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,6 +765,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
return likely(mtu >= IPV4_MIN_MTU);
}
+void ip_sock_set_freebind(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
#endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b43a29e11f4a5..767838d2030d8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -581,6 +581,14 @@ void ip_sock_set_tos(struct sock *sk, int val)
}
EXPORT_SYMBOL(ip_sock_set_tos);
+void ip_sock_set_freebind(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->freebind = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_freebind);
+
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 20/28] ipv4: add ip_sock_set_recverr
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (18 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 19/28] ipv4: add ip_sock_set_freebind Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 21/28] ipv4: add ip_sock_set_mtu_discover Christoph Hellwig
` (8 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion,
David Howells
Add a helper to directly set the IP_RECVERR sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
---
include/net/ip.h | 1 +
net/ipv4/ip_sockglue.c | 8 ++++++++
net/rxrpc/local_object.c | 8 +-------
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index 5f5d8226b6abc..f063a491b9063 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -766,6 +766,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
}
void ip_sock_set_freebind(struct sock *sk);
+void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
#endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 767838d2030d8..aca6b81da9bae 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -589,6 +589,14 @@ void ip_sock_set_freebind(struct sock *sk)
}
EXPORT_SYMBOL(ip_sock_set_freebind);
+void ip_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_recverr);
+
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 5ea2bd01fdd59..4c0e8fe5ec1fb 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -171,13 +171,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
/* Fall through */
case AF_INET:
/* we want to receive ICMP errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_recverr(local->socket->sk);
/* we want to set the don't fragment bit */
opt = IP_PMTUDISC_DO;
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 21/28] ipv4: add ip_sock_set_mtu_discover
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (19 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 20/28] ipv4: add ip_sock_set_recverr Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 22/28] ipv4: add ip_sock_set_pktinfo Christoph Hellwig
` (7 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs, rds-devel, cluster-devel, Hideaki YOSHIFUJI, netdev,
linux-kernel, linux-nvme, Jon Maloy, Ying Xue, Eric Dumazet,
target-devel, tipc-discussion, linux-nfs, David Howells,
drbd-dev, Alexey Kuznetsov, ceph-devel, linux-afs, ocfs2-devel,
linux-rdma
Add a helper to directly set the IP_MTU_DISCOVER sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com> [rxrpc bits]
---
include/net/ip.h | 1 +
net/ipv4/ip_sockglue.c | 11 +++++++++++
net/rxrpc/local_object.c | 8 +-------
net/rxrpc/output.c | 14 +++++---------
4 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index f063a491b9063..d3649c49dd333 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -766,6 +766,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
}
void ip_sock_set_freebind(struct sock *sk);
+int ip_sock_set_mtu_discover(struct sock *sk, int val);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aca6b81da9bae..aa115be11dcfb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -597,6 +597,17 @@ void ip_sock_set_recverr(struct sock *sk)
}
EXPORT_SYMBOL(ip_sock_set_recverr);
+int ip_sock_set_mtu_discover(struct sock *sk, int val)
+{
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
+ return -EINVAL;
+ lock_sock(sk);
+ inet_sk(sk)->pmtudisc = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(ip_sock_set_mtu_discover);
+
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 4c0e8fe5ec1fb..6f4e6b4817cf2 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -174,13 +174,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
ip_sock_set_recverr(local->socket->sk);
/* we want to set the don't fragment bit */
- opt = IP_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
/* We want receive timestamps. */
sock_enable_timestamps(local->socket->sk);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f8b632a5c6197..1ba43c3df4adb 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
- int ret, opt;
+ int ret;
_enter(",{%d}", skb->len);
@@ -473,18 +473,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
switch (conn->params.local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- opt = IP_PMTUDISC_DONT;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DONT);
ret = kernel_sendmsg(conn->params.local->socket, &msg,
iov, 2, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
- opt = IP_PMTUDISC_DO;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DO);
break;
default:
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 22/28] ipv4: add ip_sock_set_pktinfo
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (20 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 21/28] ipv4: add ip_sock_set_mtu_discover Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 23/28] ipv6: add ip6_sock_set_v6only Christoph Hellwig
` (6 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs, rds-devel, cluster-devel, Hideaki YOSHIFUJI, netdev,
linux-kernel, linux-nvme, Jon Maloy, Ying Xue, Eric Dumazet,
target-devel, tipc-discussion, linux-nfs, drbd-dev,
Alexey Kuznetsov, ceph-devel, linux-afs, ocfs2-devel, linux-rdma
Add a helper to directly set the IP_PKTINFO sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/ip.h | 1 +
net/ipv4/ip_sockglue.c | 8 ++++++++
net/sunrpc/svcsock.c | 5 ++---
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index d3649c49dd333..04ebe7bf54c6a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,6 +767,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
void ip_sock_set_freebind(struct sock *sk);
int ip_sock_set_mtu_discover(struct sock *sk, int val);
+void ip_sock_set_pktinfo(struct sock *sk);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aa115be11dcfb..84ec3703c9091 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -608,6 +608,14 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val)
}
EXPORT_SYMBOL(ip_sock_set_mtu_discover);
+void ip_sock_set_pktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_pktinfo);
+
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6773dacc64d8e..7a805d165689c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -616,9 +616,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
/* make sure we get destination address info */
switch (svsk->sk_sk->sk_family) {
case AF_INET:
- level = SOL_IP;
- optname = IP_PKTINFO;
- break;
+ ip_sock_set_pktinfo(svsk->sk_sock->sk);
+ return;
case AF_INET6:
level = SOL_IPV6;
optname = IPV6_RECVPKTINFO;
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 23/28] ipv6: add ip6_sock_set_v6only
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (21 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 22/28] ipv4: add ip_sock_set_pktinfo Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 24/28] ipv6: add ip6_sock_set_recverr Christoph Hellwig
` (5 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the IPV6_V6ONLY sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/ipv6.h | 11 +++++++++++
net/ipv6/ip6_udp_tunnel.c | 5 +----
net/sunrpc/svcsock.c | 6 +-----
3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 39a00d3ef5e22..9b91188c9a74c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1177,4 +1177,15 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
const struct in6_addr *addr, unsigned int mode);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+
+static inline int ip6_sock_set_v6only(struct sock *sk)
+{
+ if (inet_sk(sk)->inet_num)
+ return -EINVAL;
+ lock_sock(sk);
+ sk->sk_ipv6only = true;
+ release_sock(sk);
+ return 0;
+}
+
#endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 6523609516d25..2e0ad1bc84a83 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -25,10 +25,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->ipv6_v6only) {
- int val = 1;
-
- err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
- (char *) &val, sizeof(val));
+ err = ip6_sock_set_v6only(sock->sk);
if (err < 0)
goto error;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7a805d165689c..a391892977cd2 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1328,7 +1328,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
- int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1364,11 +1363,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
* getting requests from IPv4 remotes. Those should
* be shunted to a PF_INET listener via rpcbind.
*/
- val = 1;
if (family == PF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
+ ip6_sock_set_v6only(sock->sk);
if (type == SOCK_STREAM)
sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
error = kernel_bind(sock, sin, len);
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 24/28] ipv6: add ip6_sock_set_recverr
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (22 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 23/28] ipv6: add ip6_sock_set_v6only Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 25/28] ipv6: add ip6_sock_set_addr_preferences Christoph Hellwig
` (4 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion,
David Howells
Add a helper to directly set the IPV6_RECVERR sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
---
include/net/ipv6.h | 7 +++++++
net/rxrpc/local_object.c | 10 ++--------
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9b91188c9a74c..49c4abf991489 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1188,4 +1188,11 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
return 0;
}
+static inline void ip6_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+
#endif /* _NET_IPV6_H */
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 6f4e6b4817cf2..c8b2097f499c0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
{
struct sock *usk;
- int ret, opt;
+ int ret;
_enter("%p{%d,%d}",
local, local->srx.transport_type, local->srx.transport.family);
@@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
switch (local->srx.transport.family) {
case AF_INET6:
/* we want to receive ICMPv6 errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip6_sock_set_recverr(local->socket->sk);
/* Fall through and set IPv4 options too otherwise we don't get
* errors from IPv4 packets sent through the IPv6 socket.
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 25/28] ipv6: add ip6_sock_set_addr_preferences
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (23 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 24/28] ipv6: add ip6_sock_set_recverr Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 26/28] ipv6: add ip6_sock_set_recvpktinfo Christoph Hellwig
` (3 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the IPV6_ADD_PREFERENCES sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/ipv6.h | 67 ++++++++++++++++++++++++++++++++++++++++
net/ipv6/ipv6_sockglue.c | 59 +----------------------------------
net/sunrpc/xprtsock.c | 7 +++--
3 files changed, 72 insertions(+), 61 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 49c4abf991489..9a90759830162 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1195,4 +1195,71 @@ static inline void ip6_sock_set_recverr(struct sock *sk)
release_sock(sk);
}
+static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+{
+ unsigned int pref = 0;
+ unsigned int prefmask = ~0;
+
+ /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+ switch (val & (IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP |
+ IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+ case IPV6_PREFER_SRC_PUBLIC:
+ pref |= IPV6_PREFER_SRC_PUBLIC;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_TMP:
+ pref |= IPV6_PREFER_SRC_TMP;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check HOME/COA conflicts */
+ switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
+ case IPV6_PREFER_SRC_HOME:
+ prefmask &= ~IPV6_PREFER_SRC_COA;
+ break;
+ case IPV6_PREFER_SRC_COA:
+ pref |= IPV6_PREFER_SRC_COA;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check CGA/NONCGA conflicts */
+ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+ case IPV6_PREFER_SRC_CGA:
+ case IPV6_PREFER_SRC_NONCGA:
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ return 0;
+}
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = __ip6_sock_set_addr_preferences(sk, val);
+ release_sock(sk);
+ return ret;
+}
+
#endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e10258c2210e8..adbfed6adf11c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -845,67 +845,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_ADDR_PREFERENCES:
- {
- unsigned int pref = 0;
- unsigned int prefmask = ~0;
-
if (optlen < sizeof(int))
goto e_inval;
-
- retv = -EINVAL;
-
- /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
- switch (val & (IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP|
- IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
- case IPV6_PREFER_SRC_PUBLIC:
- pref |= IPV6_PREFER_SRC_PUBLIC;
- break;
- case IPV6_PREFER_SRC_TMP:
- pref |= IPV6_PREFER_SRC_TMP;
- break;
- case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
- break;
- case 0:
- goto pref_skip_pubtmp;
- default:
- goto e_inval;
- }
-
- prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP);
-pref_skip_pubtmp:
-
- /* check HOME/COA conflicts */
- switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
- case IPV6_PREFER_SRC_HOME:
- break;
- case IPV6_PREFER_SRC_COA:
- pref |= IPV6_PREFER_SRC_COA;
- case 0:
- goto pref_skip_coa;
- default:
- goto e_inval;
- }
-
- prefmask &= ~IPV6_PREFER_SRC_COA;
-pref_skip_coa:
-
- /* check CGA/NONCGA conflicts */
- switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
- case IPV6_PREFER_SRC_CGA:
- case IPV6_PREFER_SRC_NONCGA:
- case 0:
- break;
- default:
- goto e_inval;
- }
-
- np->srcprefs = (np->srcprefs & prefmask) | pref;
- retv = 0;
-
+ retv = __ip6_sock_set_addr_preferences(sk, val);
break;
- }
case IPV6_MINHOPCOUNT:
if (optlen < sizeof(int))
goto e_inval;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0d3ec055bc12f..3a143e250b9ac 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2150,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
- unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
/* Avoid temporary address, they are bad for long-lived
* connections such as NFS mounts.
@@ -2159,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
* knowledge about the normal duration of connections,
* MAY override this as appropriate.
*/
- kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
- (char *)&addr_pref, sizeof(addr_pref));
+ if (xs_addr(xprt)->sa_family == PF_INET6) {
+ ip6_sock_set_addr_preferences(sk,
+ IPV6_PREFER_SRC_PUBLIC);
+ }
xs_tcp_set_socket_timeouts(xprt, sock);
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 26/28] ipv6: add ip6_sock_set_recvpktinfo
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (24 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 25/28] ipv6: add ip6_sock_set_addr_preferences Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 27/28] rxrpc: add rxrpc_sock_set_min_security_level Christoph Hellwig
` (2 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev, linux-kernel, linux-rdma, linux-nvme,
target-devel, linux-afs, linux-cifs, cluster-devel, ocfs2-devel,
netdev, ceph-devel, rds-devel, linux-nfs, tipc-discussion
Add a helper to directly set the IPV6_RECVPKTINFO sockopt from kernel
space without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/net/ipv6.h | 7 +++++++
net/sunrpc/svcsock.c | 10 ++--------
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9a90759830162..5e65bf2fd32d0 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1262,4 +1262,11 @@ static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
return ret;
}
+static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->rxopt.bits.rxinfo = true;
+ release_sock(sk);
+}
+
#endif /* _NET_IPV6_H */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a391892977cd2..e7a0037d9b56c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -595,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = {
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
- int err, level, optname, one = 1;
-
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -617,17 +615,13 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
switch (svsk->sk_sk->sk_family) {
case AF_INET:
ip_sock_set_pktinfo(svsk->sk_sock->sk);
- return;
+ break;
case AF_INET6:
- level = SOL_IPV6;
- optname = IPV6_RECVPKTINFO;
+ ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
break;
default:
BUG();
}
- err = kernel_setsockopt(svsk->sk_sock, level, optname,
- (char *)&one, sizeof(one));
- dprintk("svc: kernel_setsockopt returned %d\n", err);
}
/*
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 27/28] rxrpc: add rxrpc_sock_set_min_security_level
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (25 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 26/28] ipv6: add ip6_sock_set_recvpktinfo Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 28/28] tipc: call tsk_set_importance from tipc_topsrv_create_listener Christoph Hellwig
2020-05-28 18:12 ` [Ocfs2-devel] remove most callers of kernel_setsockopt v3 David Miller
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: linux-cifs, rds-devel, cluster-devel, Hideaki YOSHIFUJI, netdev,
linux-kernel, linux-nvme, Jon Maloy, Ying Xue, Eric Dumazet,
target-devel, tipc-discussion, linux-nfs, David Howells,
drbd-dev, Alexey Kuznetsov, ceph-devel, linux-afs, ocfs2-devel,
linux-rdma
Add a helper to directly set the RXRPC_MIN_SECURITY_LEVEL sockopt from
kernel space without going through a fake uaccess.
Thanks to David Howells for the documentation updates.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Howells <dhowells@redhat.com>
---
Documentation/networking/rxrpc.rst | 13 +++++++++++--
fs/afs/rxrpc.c | 6 ++----
include/net/af_rxrpc.h | 2 ++
net/rxrpc/af_rxrpc.c | 13 +++++++++++++
4 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index 5ad35113d0f46..68552b92dc442 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -477,7 +477,7 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
Encrypted checksum plus packet padded and first eight bytes of packet
encrypted - which includes the actual packet length.
- (c) RXRPC_SECURITY_ENCRYPTED
+ (c) RXRPC_SECURITY_ENCRYPT
Encrypted checksum plus entire packet padded and encrypted, including
actual packet length.
@@ -578,7 +578,7 @@ A client would issue an operation by:
This issues a request_key() to get the key representing the security
context. The minimum security level can be set::
- unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+ unsigned int sec = RXRPC_SECURITY_ENCRYPT;
setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
&sec, sizeof(sec));
@@ -1090,6 +1090,15 @@ The kernel interface functions are as follows:
jiffies). In the event of the timeout occurring, the call will be
aborted and -ETIME or -ETIMEDOUT will be returned.
+ (#) Apply the RXRPC_MIN_SECURITY_LEVEL sockopt to a socket from within in the
+ kernel::
+
+ int rxrpc_sock_set_min_security_level(struct sock *sk,
+ unsigned int val);
+
+ This specifies the minimum security level required for calls on this
+ socket.
+
Configurable Parameters
=======================
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1ecc67da6c1a4..e313dae01674f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -37,7 +37,6 @@ int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
- unsigned int min_level;
int ret;
_enter("");
@@ -57,9 +56,8 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family = AF_INET6;
srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
- min_level = RXRPC_SECURITY_ENCRYPT;
- ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
- (void *)&min_level, sizeof(min_level));
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
if (ret < 0)
goto error_2;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ab988940bf045..91eacbdcf33d2 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -72,4 +72,6 @@ bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+
#endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 15ee92d795815..394189b81849f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -571,6 +571,19 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
return ret;
}
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+ if (sk->sk_state != RXRPC_UNBOUND)
+ return -EISCONN;
+ if (val > RXRPC_SECURITY_MAX)
+ return -EINVAL;
+ lock_sock(sk);
+ rxrpc_sk(sk)->min_sec_level = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
/*
* set RxRPC socket options
*/
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] [PATCH 28/28] tipc: call tsk_set_importance from tipc_topsrv_create_listener
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (26 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 27/28] rxrpc: add rxrpc_sock_set_min_security_level Christoph Hellwig
@ 2020-05-28 5:12 ` Christoph Hellwig
2020-05-28 18:12 ` [Ocfs2-devel] remove most callers of kernel_setsockopt v3 David Miller
28 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2020-05-28 5:12 UTC (permalink / raw)
To: David S. Miller, Jakub Kicinski
Cc: Eric Dumazet, Alexey Kuznetsov, Hideaki YOSHIFUJI, Jon Maloy,
Ying Xue, drbd-dev-cunTk1MwBs8qoQakbn7OcQ,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
target-devel-u79uwXL29TY76Z2rM5mHXA,
linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-cifs-u79uwXL29TY76Z2rM5mHXA,
cluster-devel-H+wXaHxf7aLQT0dZR+AlfA,
ocfs2-devel-N0ozoZBvEnrZJqsBc5GL+g,
netdev-u79uwXL29TY76Z2rM5mHXA, ceph-devel-u79uwXL29TY76Z2rM5mHXA,
rds-devel-N0ozoZBvEnrZJqsBc5GL+g,
linux-nfs-u79uwXL29TY76Z2rM5mHXA,
tipc-discussion-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Avoid using kernel_setsockopt for the TIPC_IMPORTANCE option when we can
just use the internal helper. The only change needed is to pass a struct
sock instead of tipc_sock, which is private to socket.c
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
net/tipc/socket.c | 18 +++++++++---------
net/tipc/socket.h | 2 ++
net/tipc/topsrv.c | 6 +++---
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d6b67d07d22ec..3734cdbedc9cc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -196,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(&tsk->phdr);
}
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
{
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&tsk->phdr, (u32)imp);
- return 0;
+ return container_of(sk, struct tipc_sock, sk);
}
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
{
- return container_of(sk, struct tipc_sock, sk);
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
}
static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -2721,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tsk_set_importance(new_tsock, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
new_tsock->conn_type = msg_nametype(msg);
new_tsock->conn_instance = msg_nameinst(msg);
@@ -3139,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tsk_set_importance(tsk, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee4..b11575afc66fe 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk);
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tsk_set_importance(struct sock *sk, int imp);
+
#endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 446af7bbd13e6..1489cfb941d8e 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
{
- int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
sk->sk_user_data = srv;
write_unlock_bh(&sk->sk_callback_lock);
- rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&imp, sizeof(imp));
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
if (rc < 0)
goto err;
--
2.26.2
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [Ocfs2-devel] remove most callers of kernel_setsockopt v3
2020-05-28 5:12 [Ocfs2-devel] remove most callers of kernel_setsockopt v3 Christoph Hellwig
` (27 preceding siblings ...)
2020-05-28 5:12 ` [Ocfs2-devel] [PATCH 28/28] tipc: call tsk_set_importance from tipc_topsrv_create_listener Christoph Hellwig
@ 2020-05-28 18:12 ` David Miller
28 siblings, 0 replies; 30+ messages in thread
From: David Miller @ 2020-05-28 18:12 UTC (permalink / raw)
To: hch
Cc: kuba, edumazet, kuznet, yoshfuji, jmaloy, ying.xue, drbd-dev,
linux-kernel, linux-rdma, linux-nvme, target-devel, linux-afs,
linux-cifs, cluster-devel, ocfs2-devel, netdev, ceph-devel,
rds-devel, linux-nfs, tipc-discussion
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:08 +0200
> this series removes most callers of the kernel_setsockopt functions, and
> instead switches their users to small functions that implement setting a
> sockopt directly using a normal kernel function call with type safety and
> all the other benefits of not having a function call.
>
> In some cases these functions seem pretty heavy handed as they do
> a lock_sock even for just setting a single variable, but this mirrors
> the real setsockopt implementation unlike a few drivers that just set
> set the fields directly.
...
Series applied, thanks Christoph.
^ permalink raw reply [flat|nested] 30+ messages in thread