netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 net-next 0/4] af_unix: Random improvements for GC.
@ 2023-11-22  1:36 Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 1/4] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Kuniyuki Iwashima
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2023-11-22  1:36 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Ivan Babrou, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev

If more than 16000 inflight AF_UNIX sockets exist on a host, each
sendmsg() will be forced to wait for unix_gc() even if a process
is not sending any FD.

This series tries not to impose such a penalty on sane users.


Kuniyuki Iwashima (4):
  af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
  af_unix: Return struct unix_sock from unix_get_socket().
  af_unix: Run GC on only one CPU.
  af_unix: Try to run GC async.

 include/linux/io_uring.h |  4 +-
 include/net/af_unix.h    |  6 +--
 include/net/scm.h        |  1 +
 io_uring/io_uring.c      |  5 ++-
 net/core/scm.c           |  3 ++
 net/unix/af_unix.c       | 10 +++--
 net/unix/garbage.c       | 84 ++++++++++++++++++----------------------
 net/unix/scm.c           | 34 ++++++++--------
 8 files changed, 72 insertions(+), 75 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v1 net-next 1/4] af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
  2023-11-22  1:36 [PATCH v1 net-next 0/4] af_unix: Random improvements for GC Kuniyuki Iwashima
@ 2023-11-22  1:36 ` Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 2/4] af_unix: Return struct unix_sock from unix_get_socket() Kuniyuki Iwashima
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2023-11-22  1:36 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Ivan Babrou, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev

When touching unix_sk(sk)->inflight, we are always under
spin_lock(&unix_gc_lock).

Let's convert unix_sk(sk)->inflight to the normal unsigned long.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/af_unix.h |  2 +-
 net/unix/af_unix.c    |  4 ++--
 net/unix/garbage.c    | 17 ++++++++---------
 net/unix/scm.c        |  8 +++++---
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 824c258143a3..5a8a670b1920 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -61,7 +61,7 @@ struct unix_sock {
 	struct mutex		iolock, bindlock;
 	struct sock		*peer;
 	struct list_head	link;
-	atomic_long_t		inflight;
+	unsigned long		inflight;
 	spinlock_t		lock;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 45506a95b25f..0ba7fb09c1bd 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -995,11 +995,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_write_space	= unix_write_space;
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
-	u	  = unix_sk(sk);
+	u = unix_sk(sk);
+	u->inflight = 0;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
-	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
 	mutex_init(&u->bindlock); /* single task binding lock */
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..db1bb99bb793 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 
 static void dec_inflight(struct unix_sock *usk)
 {
-	atomic_long_dec(&usk->inflight);
+	usk->inflight--;
 }
 
 static void inc_inflight(struct unix_sock *usk)
 {
-	atomic_long_inc(&usk->inflight);
+	usk->inflight++;
 }
 
 static void inc_inflight_move_tail(struct unix_sock *u)
 {
-	atomic_long_inc(&u->inflight);
+	u->inflight++;
+
 	/* If this still might be part of a cycle, move it to the end
 	 * of the list, so that it's checked even if it was already
 	 * passed over
@@ -237,14 +238,12 @@ void unix_gc(void)
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
-		long inflight_refs;
 
 		total_refs = file_count(u->sk.sk_socket->file);
-		inflight_refs = atomic_long_read(&u->inflight);
 
-		BUG_ON(inflight_refs < 1);
-		BUG_ON(total_refs < inflight_refs);
-		if (total_refs == inflight_refs) {
+		BUG_ON(!u->inflight);
+		BUG_ON(total_refs < u->inflight);
+		if (total_refs == u->inflight) {
 			list_move_tail(&u->link, &gc_candidates);
 			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
@@ -271,7 +270,7 @@ void unix_gc(void)
 		/* Move cursor to after the current position. */
 		list_move(&cursor, &u->link);
 
-		if (atomic_long_read(&u->inflight) > 0) {
+		if (u->inflight) {
 			list_move_tail(&u->link, &not_cycle_list);
 			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 6ff628f2349f..4b3979272a81 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -55,12 +55,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		if (atomic_long_inc_return(&u->inflight) == 1) {
+		if (!u->inflight) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
 		} else {
 			BUG_ON(list_empty(&u->link));
 		}
+		u->inflight++;
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
 	}
@@ -77,10 +78,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		BUG_ON(!atomic_long_read(&u->inflight));
+		BUG_ON(!u->inflight);
 		BUG_ON(list_empty(&u->link));
 
-		if (atomic_long_dec_and_test(&u->inflight))
+		u->inflight--;
+		if (!u->inflight)
 			list_del_init(&u->link);
 		/* Paired with READ_ONCE() in wait_for_unix_gc() */
 		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 net-next 2/4] af_unix: Return struct unix_sock from unix_get_socket().
  2023-11-22  1:36 [PATCH v1 net-next 0/4] af_unix: Random improvements for GC Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 1/4] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Kuniyuki Iwashima
@ 2023-11-22  1:36 ` Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 3/4] af_unix: Run GC on only one CPU Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 4/4] af_unix: Try to run GC async Kuniyuki Iwashima
  3 siblings, 0 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2023-11-22  1:36 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Ivan Babrou, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev,
	Jens Axboe, Pavel Begunkov

Currently, unix_get_socket() returns struct sock, but after calling
it, we always cast it to unix_sk().

Let's return struct unix_sock from unix_get_socket().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/io_uring.h |  4 ++--
 include/net/af_unix.h    |  2 +-
 io_uring/io_uring.c      |  5 +++--
 net/unix/garbage.c       | 19 +++++++------------
 net/unix/scm.c           | 26 +++++++++++---------------
 5 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 106cdc55ff3b..53170ecca396 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -46,7 +46,7 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 			      struct iov_iter *iter, void *ioucmd);
 void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
 			unsigned issue_flags);
-struct sock *io_uring_get_socket(struct file *file);
+struct unix_sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
 void io_uring_unreg_ringfd(void);
@@ -100,7 +100,7 @@ static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
 			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
 {
 }
-static inline struct sock *io_uring_get_socket(struct file *file)
+static inline struct unix_sock *io_uring_get_socket(struct file *file)
 {
 	return NULL;
 }
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 5a8a670b1920..c628d30ceb19 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -14,7 +14,7 @@ void unix_destruct_scm(struct sk_buff *skb);
 void io_uring_destruct_scm(struct sk_buff *skb);
 void unix_gc(void);
 void wait_for_unix_gc(void);
-struct sock *unix_get_socket(struct file *filp);
+struct unix_sock *unix_get_socket(struct file *filp);
 struct sock *unix_peer_get(struct sock *sk);
 
 #define UNIX_HASH_MOD	(256 - 1)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 8d1bc6cdfe71..e93431c0ec97 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -175,13 +175,14 @@ static struct ctl_table kernel_io_uring_disabled_table[] = {
 };
 #endif
 
-struct sock *io_uring_get_socket(struct file *file)
+struct unix_sock *io_uring_get_socket(struct file *file)
 {
 #if defined(CONFIG_UNIX)
 	if (io_is_uring_fops(file)) {
 		struct io_ring_ctx *ctx = file->private_data;
 
-		return ctx->ring_sock->sk;
+		if (ctx->ring_sock->sk)
+			return unix_sk(ctx->ring_sock->sk);
 	}
 #endif
 	return NULL;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index db1bb99bb793..4d634f5f6a55 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -105,20 +105,15 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 
 			while (nfd--) {
 				/* Get the socket the fd matches if it indeed does so */
-				struct sock *sk = unix_get_socket(*fp++);
+				struct unix_sock *u = unix_get_socket(*fp++);
 
-				if (sk) {
-					struct unix_sock *u = unix_sk(sk);
+				/* Ignore non-candidates, they could have been added
+				 * to the queues after starting the garbage collection
+				 */
+				if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
+					hit = true;
 
-					/* Ignore non-candidates, they could
-					 * have been added to the queues after
-					 * starting the garbage collection
-					 */
-					if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
-						hit = true;
-
-						func(u);
-					}
+					func(u);
 				}
 			}
 			if (hit && hitlist != NULL) {
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 4b3979272a81..36ce8fed9acc 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -21,9 +21,8 @@ EXPORT_SYMBOL(gc_inflight_list);
 DEFINE_SPINLOCK(unix_gc_lock);
 EXPORT_SYMBOL(unix_gc_lock);
 
-struct sock *unix_get_socket(struct file *filp)
+struct unix_sock *unix_get_socket(struct file *filp)
 {
-	struct sock *u_sock = NULL;
 	struct inode *inode = file_inode(filp);
 
 	/* Socket ? */
@@ -34,12 +33,13 @@ struct sock *unix_get_socket(struct file *filp)
 
 		/* PF_UNIX ? */
 		if (s && ops && ops->family == PF_UNIX)
-			u_sock = s;
-	} else {
-		/* Could be an io_uring instance */
-		u_sock = io_uring_get_socket(filp);
+			return unix_sk(s);
+
+		return NULL;
 	}
-	return u_sock;
+
+	/* Could be an io_uring instance */
+	return io_uring_get_socket(filp);
 }
 EXPORT_SYMBOL(unix_get_socket);
 
@@ -48,13 +48,11 @@ EXPORT_SYMBOL(unix_get_socket);
  */
 void unix_inflight(struct user_struct *user, struct file *fp)
 {
-	struct sock *s = unix_get_socket(fp);
+	struct unix_sock *u = unix_get_socket(fp);
 
 	spin_lock(&unix_gc_lock);
 
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
+	if (u) {
 		if (!u->inflight) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
@@ -71,13 +69,11 @@ void unix_inflight(struct user_struct *user, struct file *fp)
 
 void unix_notinflight(struct user_struct *user, struct file *fp)
 {
-	struct sock *s = unix_get_socket(fp);
+	struct unix_sock *u = unix_get_socket(fp);
 
 	spin_lock(&unix_gc_lock);
 
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
+	if (u) {
 		BUG_ON(!u->inflight);
 		BUG_ON(list_empty(&u->link));
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 net-next 3/4] af_unix: Run GC on only one CPU.
  2023-11-22  1:36 [PATCH v1 net-next 0/4] af_unix: Random improvements for GC Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 1/4] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 2/4] af_unix: Return struct unix_sock from unix_get_socket() Kuniyuki Iwashima
@ 2023-11-22  1:36 ` Kuniyuki Iwashima
  2023-11-22  1:36 ` [PATCH v1 net-next 4/4] af_unix: Try to run GC async Kuniyuki Iwashima
  3 siblings, 0 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2023-11-22  1:36 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Ivan Babrou, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev

If more than 16000 inflight AF_UNIX sockets exist and the garbage
collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc().
Also, they wait for unix_gc() to complete.

In unix_gc(), all inflight AF_UNIX sockets are traversed at least once,
and more if they are the GC candidate.  Thus, sendmsg() significantly
slows down with too many inflight AF_UNIX sockets.

There is a small window to invoke multiple unix_gc() instances, which
will then be blocked by the same spinlock except for one.

Let's convert unix_gc() to use struct work so that it will not consume
CPUs unnecessarily.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/unix/garbage.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 4d634f5f6a55..8bc93a7e745f 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,7 +86,9 @@
 /* Internal data structures and random procedures: */
 
 static LIST_HEAD(gc_candidates);
-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+
+static void __unix_gc(struct work_struct *work);
+static DECLARE_WORK(unix_gc_work, __unix_gc);
 
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
@@ -181,24 +183,22 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
-static bool gc_in_progress;
 #define UNIX_INFLIGHT_TRIGGER_GC 16000
 
 void wait_for_unix_gc(void)
 {
-	/* If number of inflight sockets is insane,
-	 * force a garbage collect right now.
+	/* If number of inflight sockets is insane, kick a
+	 * garbage collect right now.
 	 * Paired with the WRITE_ONCE() in unix_inflight(),
-	 * unix_notinflight() and gc_in_progress().
+	 * unix_notinflight().
 	 */
-	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
-	    !READ_ONCE(gc_in_progress))
-		unix_gc();
-	wait_event(unix_gc_wait, gc_in_progress == false);
+	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC)
+		queue_work(system_unbound_wq, &unix_gc_work);
+
+	flush_work(&unix_gc_work);
 }
 
-/* The external entry point: unix_gc() */
-void unix_gc(void)
+static void __unix_gc(struct work_struct *work)
 {
 	struct sk_buff *next_skb, *skb;
 	struct unix_sock *u;
@@ -209,13 +209,6 @@ void unix_gc(void)
 
 	spin_lock(&unix_gc_lock);
 
-	/* Avoid a recursive GC. */
-	if (gc_in_progress)
-		goto out;
-
-	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
-	WRITE_ONCE(gc_in_progress, true);
-
 	/* First, select candidates for garbage collection.  Only
 	 * in-flight sockets are considered, and from those only ones
 	 * which don't have any external reference.
@@ -319,11 +312,10 @@ void unix_gc(void)
 	/* All candidates should have been detached by now. */
 	BUG_ON(!list_empty(&gc_candidates));
 
-	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
-	WRITE_ONCE(gc_in_progress, false);
-
-	wake_up(&unix_gc_wait);
-
- out:
 	spin_unlock(&unix_gc_lock);
 }
+
+void unix_gc(void)
+{
+	queue_work(system_unbound_wq, &unix_gc_work);
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 net-next 4/4] af_unix: Try to run GC async.
  2023-11-22  1:36 [PATCH v1 net-next 0/4] af_unix: Random improvements for GC Kuniyuki Iwashima
                   ` (2 preceding siblings ...)
  2023-11-22  1:36 ` [PATCH v1 net-next 3/4] af_unix: Run GC on only one CPU Kuniyuki Iwashima
@ 2023-11-22  1:36 ` Kuniyuki Iwashima
  2023-11-22 15:45   ` kernel test robot
  2023-11-22 20:23   ` kernel test robot
  3 siblings, 2 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2023-11-22  1:36 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Ivan Babrou, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev

If more than 16000 inflight AF_UNIX sockets exist and the garbage
collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc().
Also, they wait for unix_gc() to complete.

In unix_gc(), all inflight AF_UNIX sockets are traversed at least once,
and more if they are the GC candidate.  Thus, sendmsg() significantly
slows down with too many inflight AF_UNIX sockets.

However, if a process sends data with no AF_UNIX FD, the sendmsg() call
does not need to wait for GC.  After this change, only the process that
meets the condition below will be blocked under such a situation.

  1) cmsg contains AF_UNIX socket
  2) more than 32 AF_UNIX sent by the same user are still inflight

Note that even a sendmsg() call that only meets the condition 1) will be
blocked later in unix_scm_to_skb() by the spinlock, but we allow that as
a bonus for sane users.

The results below are the time spent in unix_dgram_sendmsg() sending 1
byte of data with no FD 4096 times on a host where 32K inflight AF_UNIX
sockets exist.

Without series: the sane sendmsg() needs to wait gc unreasonably.

  $ sudo /usr/share/bcc/tools/funclatency -p 11165 unix_dgram_sendmsg
  Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end.
  ^C
       nsecs               : count     distribution
  [...]
      524288 -> 1048575    : 0        |                                        |
     1048576 -> 2097151    : 3881     |****************************************|
     2097152 -> 4194303    : 214      |**                                      |
     4194304 -> 8388607    : 1        |                                        |

  avg = 1825567 nsecs, total: 7477526027 nsecs, count: 4096

With series: the sane sendmsg() can finish much faster.

  $ sudo /usr/share/bcc/tools/funclatency -p 8702  unix_dgram_sendmsg
  Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end.
  ^C
       nsecs               : count     distribution
  [...]
         128 -> 255        : 0        |                                        |
         256 -> 511        : 4092     |****************************************|
         512 -> 1023       : 2        |                                        |
        1024 -> 2047       : 0        |                                        |
        2048 -> 4095       : 0        |                                        |
        4096 -> 8191       : 1        |                                        |
        8192 -> 16383      : 1        |                                        |

  avg = 410 nsecs, total: 1680510 nsecs, count: 4096

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/af_unix.h |  2 +-
 include/net/scm.h     |  1 +
 net/core/scm.c        |  3 +++
 net/unix/af_unix.c    |  6 ++++--
 net/unix/garbage.c    | 10 ++++++++--
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index c628d30ceb19..f8e654d418e6 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_destruct_scm(struct sk_buff *skb);
 void io_uring_destruct_scm(struct sk_buff *skb);
 void unix_gc(void);
-void wait_for_unix_gc(void);
+void wait_for_unix_gc(struct scm_fp_list *fpl);
 struct unix_sock *unix_get_socket(struct file *filp);
 struct sock *unix_peer_get(struct sock *sk);
 
diff --git a/include/net/scm.h b/include/net/scm.h
index e8c76b4be2fe..1ff6a2855064 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -24,6 +24,7 @@ struct scm_creds {
 
 struct scm_fp_list {
 	short			count;
+	short			count_unix;
 	short			max;
 	struct user_struct	*user;
 	struct file		*fp[SCM_MAX_FD];
diff --git a/net/core/scm.c b/net/core/scm.c
index 880027ecf516..13d0e7e88be5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -35,6 +35,7 @@
 #include <net/compat.h>
 #include <net/scm.h>
 #include <net/cls_cgroup.h>
+#include <net/af_unix.h>
 
 
 /*
@@ -105,6 +106,8 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			return -EBADF;
 		*fpp++ = file;
 		fpl->count++;
+		if (unix_get_socket(file))
+			fpl->count_unix++;
 	}
 
 	if (!fpl->user)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ba7fb09c1bd..d1a54e65f2cf 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1925,11 +1925,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeo;
 	int err;
 
-	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
+	wait_for_unix_gc(scm.fp);
+
 	err = -EOPNOTSUPP;
 	if (msg->msg_flags&MSG_OOB)
 		goto out;
@@ -2201,11 +2202,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	bool fds_sent = false;
 	int data_len;
 
-	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
+	wait_for_unix_gc(scm.fp);
+
 	err = -EOPNOTSUPP;
 	if (msg->msg_flags & MSG_OOB) {
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8bc93a7e745f..73091d6b7fc4 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -184,8 +184,9 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 }
 
 #define UNIX_INFLIGHT_TRIGGER_GC 16000
+#define UNIX_INFLIGHT_SANE_USER 32
 
-void wait_for_unix_gc(void)
+void wait_for_unix_gc(struct scm_fp_list *fpl)
 {
 	/* If number of inflight sockets is insane, kick a
 	 * garbage collect right now.
@@ -195,7 +196,12 @@ void wait_for_unix_gc(void)
 	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC)
 		queue_work(system_unbound_wq, &unix_gc_work);
 
-	flush_work(&unix_gc_work);
+	/* Penalise users who want to send AF_UNIX sockets
+	 * but whose sockets have not been received yet.
+	 */
+	if (fpl && fpl->count_unix &&
+	    READ_ONCE(fpl->user->unix_inflight) > UNIX_INFLIGHT_SANE_USER)
+		flush_work(&unix_gc_work);
 }
 
 static void __unix_gc(struct work_struct *work)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 net-next 4/4] af_unix: Try to run GC async.
  2023-11-22  1:36 ` [PATCH v1 net-next 4/4] af_unix: Try to run GC async Kuniyuki Iwashima
@ 2023-11-22 15:45   ` kernel test robot
  2023-11-22 20:23   ` kernel test robot
  1 sibling, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-11-22 15:45 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: oe-kbuild-all, netdev, Ivan Babrou, Kuniyuki Iwashima

Hi Kuniyuki,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Do-not-use-atomic-ops-for-unix_sk-sk-inflight/20231122-094214
base:   net-next/main
patch link:    https://lore.kernel.org/r/20231122013629.28554-5-kuniyu%40amazon.com
patch subject: [PATCH v1 net-next 4/4] af_unix: Try to run GC async.
config: sh-defconfig (https://download.01.org/0day-ci/archive/20231122/202311222204.jSne0FwB-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231122/202311222204.jSne0FwB-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311222204.jSne0FwB-lkp@intel.com/

All errors (new ones prefixed by >>):

   sh4-linux-ld: net/core/scm.o: in function `__scm_send':
>> scm.c:(.text+0x6e8): undefined reference to `unix_get_socket'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 net-next 4/4] af_unix: Try to run GC async.
  2023-11-22  1:36 ` [PATCH v1 net-next 4/4] af_unix: Try to run GC async Kuniyuki Iwashima
  2023-11-22 15:45   ` kernel test robot
@ 2023-11-22 20:23   ` kernel test robot
  1 sibling, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-11-22 20:23 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: llvm, oe-kbuild-all, netdev, Ivan Babrou, Kuniyuki Iwashima

Hi Kuniyuki,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/af_unix-Do-not-use-atomic-ops-for-unix_sk-sk-inflight/20231122-094214
base:   net-next/main
patch link:    https://lore.kernel.org/r/20231122013629.28554-5-kuniyu%40amazon.com
patch subject: [PATCH v1 net-next 4/4] af_unix: Try to run GC async.
config: x86_64-buildonly-randconfig-006-20231122 (https://download.01.org/0day-ci/archive/20231123/202311230220.WfMchMxF-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231123/202311230220.WfMchMxF-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311230220.WfMchMxF-lkp@intel.com/

All errors (new ones prefixed by >>):

>> ld.lld: error: undefined symbol: unix_get_socket
   >>> referenced by usercopy_64.c
   >>>               vmlinux.o:(__scm_send)

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-11-22 20:23 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-22  1:36 [PATCH v1 net-next 0/4] af_unix: Random improvements for GC Kuniyuki Iwashima
2023-11-22  1:36 ` [PATCH v1 net-next 1/4] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Kuniyuki Iwashima
2023-11-22  1:36 ` [PATCH v1 net-next 2/4] af_unix: Return struct unix_sock from unix_get_socket() Kuniyuki Iwashima
2023-11-22  1:36 ` [PATCH v1 net-next 3/4] af_unix: Run GC on only one CPU Kuniyuki Iwashima
2023-11-22  1:36 ` [PATCH v1 net-next 4/4] af_unix: Try to run GC async Kuniyuki Iwashima
2023-11-22 15:45   ` kernel test robot
2023-11-22 20:23   ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).