All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org, linux-cachefs@redhat.com,
	dhowells@redhat.com, gfs2@lists.linux.dev,
	dm-devel@lists.linux.dev, linux-security-module@vger.kernel.org,
	selinux@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 05/11] selinux: use dlist for isec inode list
Date: Wed,  6 Dec 2023 17:05:34 +1100	[thread overview]
Message-ID: <20231206060629.2827226-6-david@fromorbit.com> (raw)
In-Reply-To: <20231206060629.2827226-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

Because it's a horrible point of lock contention under heavily
concurrent directory traversals...

  - 12.14% d_instantiate
     - 12.06% security_d_instantiate
	- 12.13% selinux_d_instantiate
	   - 12.16% inode_doinit_with_dentry
	      - 15.45% _raw_spin_lock
		 - do_raw_spin_lock
		      14.68% __pv_queued_spin_lock_slowpath


Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 include/linux/dlock-list.h        |  9 ++++
 security/selinux/hooks.c          | 72 +++++++++++++++----------------
 security/selinux/include/objsec.h |  6 +--
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index 327cb9edc7e3..7ad933b8875d 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -132,6 +132,15 @@ extern void dlock_lists_add(struct dlock_list_node *node,
 			    struct dlock_list_heads *dlist);
 extern void dlock_lists_del(struct dlock_list_node *node);
 
+static inline void
+dlock_list_del_iter(struct dlock_list_iter *iter,
+		struct dlock_list_node *node)
+{
+	WARN_ON_ONCE((iter->entry != READ_ONCE(node->head)));
+	list_del_init(&node->list);
+	WRITE_ONCE(node->head, NULL);
+}
+
 /*
  * Find the first entry of the next available list.
  */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index feda711c6b7b..0358d7c66949 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -340,26 +340,11 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr
 static void inode_free_security(struct inode *inode)
 {
 	struct inode_security_struct *isec = selinux_inode(inode);
-	struct superblock_security_struct *sbsec;
 
 	if (!isec)
 		return;
-	sbsec = selinux_superblock(inode->i_sb);
-	/*
-	 * As not all inode security structures are in a list, we check for
-	 * empty list outside of the lock to make sure that we won't waste
-	 * time taking a lock doing nothing.
-	 *
-	 * The list_del_init() function can be safely called more than once.
-	 * It should not be possible for this function to be called with
-	 * concurrent list_add(), but for better safety against future changes
-	 * in the code, we use list_empty_careful() here.
-	 */
-	if (!list_empty_careful(&isec->list)) {
-		spin_lock(&sbsec->isec_lock);
-		list_del_init(&isec->list);
-		spin_unlock(&sbsec->isec_lock);
-	}
+	if (!list_empty(&isec->list.list))
+		dlock_lists_del(&isec->list);
 }
 
 struct selinux_mnt_opts {
@@ -547,6 +532,8 @@ static int sb_finish_set_opts(struct super_block *sb)
 	struct superblock_security_struct *sbsec = selinux_superblock(sb);
 	struct dentry *root = sb->s_root;
 	struct inode *root_inode = d_backing_inode(root);
+	struct dlock_list_iter iter;
+	struct inode_security_struct *isec, *n;
 	int rc = 0;
 
 	if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
@@ -570,27 +557,28 @@ static int sb_finish_set_opts(struct super_block *sb)
 	/* Initialize the root inode. */
 	rc = inode_doinit_with_dentry(root_inode, root);
 
-	/* Initialize any other inodes associated with the superblock, e.g.
-	   inodes created prior to initial policy load or inodes created
-	   during get_sb by a pseudo filesystem that directly
-	   populates itself. */
-	spin_lock(&sbsec->isec_lock);
-	while (!list_empty(&sbsec->isec_head)) {
-		struct inode_security_struct *isec =
-				list_first_entry(&sbsec->isec_head,
-					   struct inode_security_struct, list);
+	/*
+	 * Initialize any other inodes associated with the superblock, e.g.
+	 * inodes created prior to initial policy load or inodes created during
+	 * get_sb by a pseudo filesystem that directly populates itself.
+	 */
+	init_dlock_list_iter(&iter, &sbsec->isec_head);
+	dlist_for_each_entry_safe(isec, n, &iter, list) {
 		struct inode *inode = isec->inode;
-		list_del_init(&isec->list);
-		spin_unlock(&sbsec->isec_lock);
+
+		dlock_list_del_iter(&iter, &isec->list);
+		dlock_list_unlock(&iter);
+
 		inode = igrab(inode);
 		if (inode) {
 			if (!IS_PRIVATE(inode))
 				inode_doinit_with_dentry(inode, NULL);
 			iput(inode);
 		}
-		spin_lock(&sbsec->isec_lock);
+
+		dlock_list_relock(&iter);
 	}
-	spin_unlock(&sbsec->isec_lock);
+	WARN_ON_ONCE(!dlock_lists_empty(&sbsec->isec_head));
 	return rc;
 }
 
@@ -1428,10 +1416,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 		/* Defer initialization until selinux_complete_init,
 		   after the initial policy is loaded and the security
 		   server is ready to handle calls. */
-		spin_lock(&sbsec->isec_lock);
-		if (list_empty(&isec->list))
-			list_add(&isec->list, &sbsec->isec_head);
-		spin_unlock(&sbsec->isec_lock);
+		if (list_empty(&isec->list.list))
+			dlock_lists_add(&isec->list, &sbsec->isec_head);
 		goto out_unlock;
 	}
 
@@ -2548,9 +2534,10 @@ static int selinux_sb_alloc_security(struct super_block *sb)
 {
 	struct superblock_security_struct *sbsec = selinux_superblock(sb);
 
+	if (alloc_dlock_list_heads(&sbsec->isec_head))
+		return -ENOMEM;
+
 	mutex_init(&sbsec->lock);
-	INIT_LIST_HEAD(&sbsec->isec_head);
-	spin_lock_init(&sbsec->isec_lock);
 	sbsec->sid = SECINITSID_UNLABELED;
 	sbsec->def_sid = SECINITSID_FILE;
 	sbsec->mntpoint_sid = SECINITSID_UNLABELED;
@@ -2558,6 +2545,15 @@ static int selinux_sb_alloc_security(struct super_block *sb)
 	return 0;
 }
 
+static void selinux_sb_free_security(struct super_block *sb)
+{
+	struct superblock_security_struct *sbsec = selinux_superblock(sb);
+
+	if (!sbsec)
+		return;
+	free_dlock_list_heads(&sbsec->isec_head);
+}
+
 static inline int opt_len(const char *s)
 {
 	bool open_quote = false;
@@ -2841,7 +2837,7 @@ static int selinux_inode_alloc_security(struct inode *inode)
 	u32 sid = current_sid();
 
 	spin_lock_init(&isec->lock);
-	INIT_LIST_HEAD(&isec->list);
+	init_dlock_list_node(&isec->list);
 	isec->inode = inode;
 	isec->sid = SECINITSID_UNLABELED;
 	isec->sclass = SECCLASS_FILE;
@@ -2920,6 +2916,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	if (rc)
 		return rc;
 
+
 	/* Possibly defer initialization to selinux_complete_init. */
 	if (sbsec->flags & SE_SBINITIALIZED) {
 		struct inode_security_struct *isec = selinux_inode(inode);
@@ -7215,6 +7212,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 		      selinux_msg_queue_alloc_security),
 	LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security),
 	LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security),
+	LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security),
 	LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security),
 	LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security),
 	LSM_HOOK_INIT(secid_to_secctx, selinux_secid_to_secctx),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 8159fd53c3de..e0709f429c56 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/lsm_hooks.h>
 #include <linux/msg.h>
+#include <linux/dlock-list.h>
 #include <net/net_namespace.h>
 #include "flask.h"
 #include "avc.h"
@@ -45,7 +46,7 @@ enum label_initialized {
 
 struct inode_security_struct {
 	struct inode *inode;	/* back pointer to inode object */
-	struct list_head list;	/* list of inode_security_struct */
+	struct dlock_list_node list;	/* list of inode_security_struct */
 	u32 task_sid;		/* SID of creating task */
 	u32 sid;		/* SID of this object */
 	u16 sclass;		/* security class of this object */
@@ -67,8 +68,7 @@ struct superblock_security_struct {
 	unsigned short behavior;	/* labeling behavior */
 	unsigned short flags;		/* which mount options were specified */
 	struct mutex lock;
-	struct list_head isec_head;
-	spinlock_t isec_lock;
+	struct dlock_list_heads isec_head;
 };
 
 struct msg_security_struct {
-- 
2.42.0


  parent reply	other threads:[~2023-12-06  6:06 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-06  6:05 [PATCH 0/11] vfs: inode cache scalability improvements Dave Chinner
2023-12-06  6:05 ` [PATCH 01/11] lib/dlock-list: Distributed and lock-protected lists Dave Chinner
2023-12-07  2:23   ` Al Viro
2023-12-06  6:05 ` [PATCH 02/11] vfs: Remove unnecessary list_for_each_entry_safe() variants Dave Chinner
2023-12-07  2:26   ` Al Viro
2023-12-07  4:18   ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 03/11] vfs: Use dlock list for superblock's inode list Dave Chinner
2023-12-07  2:40   ` Al Viro
2023-12-07  4:59     ` Dave Chinner
2023-12-07  5:03       ` Kent Overstreet
2023-12-06  6:05 ` [PATCH 04/11] lib/dlock-list: Make sibling CPUs share the same linked list Dave Chinner
2023-12-07  4:31   ` Kent Overstreet
2023-12-07  5:42   ` Kent Overstreet
2023-12-07  6:25     ` Dave Chinner
2023-12-07  6:49   ` Al Viro
2023-12-06  6:05 ` Dave Chinner [this message]
2023-12-06 21:52   ` [PATCH 05/11] selinux: use dlist for isec inode list Paul Moore
2023-12-06 23:04     ` Dave Chinner
2023-12-07  0:36       ` Paul Moore
2023-12-06  6:05 ` [PATCH 06/11] vfs: factor out inode hash head calculation Dave Chinner
2023-12-07  3:02   ` Al Viro
2023-12-06  6:05 ` [PATCH 07/11] hlist-bl: add hlist_bl_fake() Dave Chinner
2023-12-07  3:05   ` Al Viro
2023-12-06  6:05 ` [PATCH 08/11] vfs: inode cache conversion to hash-bl Dave Chinner
2023-12-07  4:58   ` Kent Overstreet
2023-12-07  6:03     ` Dave Chinner
2023-12-07  6:42   ` Al Viro
2023-12-06  6:05 ` [PATCH 09/11] hash-bl: explicitly initialise hash-bl heads Dave Chinner
2023-12-07  3:15   ` Al Viro
2023-12-06  6:05 ` [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep Dave Chinner
2023-12-07  4:16   ` Kent Overstreet
2023-12-07  4:41     ` Dave Chinner
2023-12-06  6:05 ` [PATCH 11/11] hlist-bl: introduced nested locking for dm-snap Dave Chinner
2023-12-07 17:08 ` [PATCH 0/11] vfs: inode cache scalability improvements Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231206060629.2827226-6-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=dhowells@redhat.com \
    --cc=dm-devel@lists.linux.dev \
    --cc=gfs2@lists.linux.dev \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=selinux@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.