All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: ikent@redhat.com, onestero@redhat.com, willy@infradead.org,
	ebiederm@redhat.com
Subject: [PATCH v3 1/5] pid: replace pidmap_lock with xarray lock
Date: Fri,  2 Dec 2022 12:16:16 -0500	[thread overview]
Message-ID: <20221202171620.509140-2-bfoster@redhat.com> (raw)
In-Reply-To: <20221202171620.509140-1-bfoster@redhat.com>

As a first step to changing the struct pid tracking code from the
idr over to the xarray, replace the custom pidmap_lock spinlock with
the internal lock associated with the underlying xarray. This is
effectively equivalent to using idr_lock() and friends, but since
the goal is to disentangle from the idr, move directly to the
underlying xarray api.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 kernel/pid.c | 79 ++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index 3fbc5e46b721..3622f8b13143 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -86,22 +86,6 @@ struct pid_namespace init_pid_ns = {
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
-/*
- * Note: disable interrupts while the pidmap_lock is held as an
- * interrupt might come in and do read_lock(&tasklist_lock).
- *
- * If we don't disable interrupts there is a nasty deadlock between
- * detach_pid()->free_pid() and another cpu that does
- * spin_lock(&pidmap_lock) followed by an interrupt routine that does
- * read_lock(&tasklist_lock);
- *
- * After we clean up the tasklist_lock and know there are no
- * irq handlers that take it we can leave the interrupts enabled.
- * For now it is easier to be safe than to prove it can't happen.
- */
-
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
-
 void put_pid(struct pid *pid)
 {
 	struct pid_namespace *ns;
@@ -129,10 +113,11 @@ void free_pid(struct pid *pid)
 	int i;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pidmap_lock, flags);
 	for (i = 0; i <= pid->level; i++) {
 		struct upid *upid = pid->numbers + i;
 		struct pid_namespace *ns = upid->ns;
+
+		xa_lock_irqsave(&ns->idr.idr_rt, flags);
 		switch (--ns->pid_allocated) {
 		case 2:
 		case 1:
@@ -150,8 +135,8 @@ void free_pid(struct pid *pid)
 		}
 
 		idr_remove(&ns->idr, upid->nr);
+		xa_unlock_irqrestore(&ns->idr.idr_rt, flags);
 	}
-	spin_unlock_irqrestore(&pidmap_lock, flags);
 
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
@@ -206,7 +191,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 		}
 
 		idr_preload(GFP_KERNEL);
-		spin_lock_irq(&pidmap_lock);
+		xa_lock_irq(&tmp->idr.idr_rt);
 
 		if (tid) {
 			nr = idr_alloc(&tmp->idr, NULL, tid,
@@ -233,7 +218,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 			nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
 					      pid_max, GFP_ATOMIC);
 		}
-		spin_unlock_irq(&pidmap_lock);
+		xa_unlock_irq(&tmp->idr.idr_rt);
 		idr_preload_end();
 
 		if (nr < 0) {
@@ -266,34 +251,38 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 	INIT_HLIST_HEAD(&pid->inodes);
 
 	upid = pid->numbers + ns->level;
-	spin_lock_irq(&pidmap_lock);
-	if (!(ns->pid_allocated & PIDNS_ADDING))
-		goto out_unlock;
 	for ( ; upid >= pid->numbers; --upid) {
+		tmp = upid->ns;
+
+		xa_lock_irq(&tmp->idr.idr_rt);
+		if (tmp == ns && !(tmp->pid_allocated & PIDNS_ADDING)) {
+			xa_unlock_irq(&tmp->idr.idr_rt);
+			put_pid_ns(ns);
+			goto out_free;
+		}
+
 		/* Make the PID visible to find_pid_ns. */
-		idr_replace(&upid->ns->idr, pid, upid->nr);
-		upid->ns->pid_allocated++;
+		idr_replace(&tmp->idr, pid, upid->nr);
+		tmp->pid_allocated++;
+		xa_unlock_irq(&tmp->idr.idr_rt);
 	}
-	spin_unlock_irq(&pidmap_lock);
 
 	return pid;
 
-out_unlock:
-	spin_unlock_irq(&pidmap_lock);
-	put_pid_ns(ns);
-
 out_free:
-	spin_lock_irq(&pidmap_lock);
 	while (++i <= ns->level) {
 		upid = pid->numbers + i;
-		idr_remove(&upid->ns->idr, upid->nr);
-	}
+		tmp = upid->ns;
 
-	/* On failure to allocate the first pid, reset the state */
-	if (ns->pid_allocated == PIDNS_ADDING)
-		idr_set_cursor(&ns->idr, 0);
+		xa_lock_irq(&tmp->idr.idr_rt);
 
-	spin_unlock_irq(&pidmap_lock);
+		/* On failure to allocate the first pid, reset the state */
+		if (tmp == ns && tmp->pid_allocated == PIDNS_ADDING)
+			idr_set_cursor(&ns->idr, 0);
+
+		idr_remove(&tmp->idr, upid->nr);
+		xa_unlock_irq(&tmp->idr.idr_rt);
+	}
 
 	kmem_cache_free(ns->pid_cachep, pid);
 	return ERR_PTR(retval);
@@ -301,9 +290,9 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 
 void disable_pid_allocation(struct pid_namespace *ns)
 {
-	spin_lock_irq(&pidmap_lock);
+	xa_lock_irq(&ns->idr.idr_rt);
 	ns->pid_allocated &= ~PIDNS_ADDING;
-	spin_unlock_irq(&pidmap_lock);
+	xa_unlock_irq(&ns->idr.idr_rt);
 }
 
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
@@ -647,6 +636,18 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
 	return fd;
 }
 
+/*
+ * Note: disable interrupts while the xarray lock is held as an interrupt might
+ * come in and do read_lock(&tasklist_lock).
+ *
+ * If we don't disable interrupts there is a nasty deadlock between
+ * detach_pid()->free_pid() and another cpu that does xa_lock() followed by an
+ * interrupt routine that does read_lock(&tasklist_lock);
+ *
+ * After we clean up the tasklist_lock and know there are no irq handlers that
+ * take it we can leave the interrupts enabled.  For now it is easier to be safe
+ * than to prove it can't happen.
+ */
 void __init pid_idr_init(void)
 {
 	/* Verify no one has done anything silly: */
-- 
2.37.3


  reply	other threads:[~2022-12-02 17:17 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-02 17:16 [PATCH v3 0/5] proc: improve root readdir latency with many threads Brian Foster
2022-12-02 17:16 ` Brian Foster [this message]
2022-12-12  1:44   ` [PATCH v3 1/5] pid: replace pidmap_lock with xarray lock Ian Kent
2022-12-02 17:16 ` [PATCH v3 2/5] pid: split cyclic id allocation cursor from idr Brian Foster
2022-12-12  1:45   ` Ian Kent
2022-12-02 17:16 ` [PATCH v3 3/5] pid: switch pid_namespace from idr to xarray Brian Foster
2022-12-12  1:47   ` Ian Kent
2022-12-02 17:16 ` [PATCH v3 4/5] pid: mark pids associated with group leader tasks Brian Foster
2022-12-12  1:51   ` Ian Kent
2022-12-13  2:00   ` kernel test robot
2022-12-02 17:16 ` [PATCH v3 5/5] procfs: use efficient tgid pid search on root readdir Brian Foster
2022-12-12  1:58   ` Ian Kent
2022-12-12  2:05 ` [PATCH v3 0/5] proc: improve root readdir latency with many threads Ian Kent

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221202171620.509140-2-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=ebiederm@redhat.com \
    --cc=ikent@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=onestero@redhat.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.