Re: [x86/signal] 3aac3ebea0: will-it-scale.per_thread_ops -11.9% regression

From: Thomas Gleixner <tglx@linutronix.de>
To: kernel test robot <oliver.sang@intel.com>
Cc: Borislav Petkov <bp@suse.de>,
	"Chang S. Bae" <chang.seok.bae@intel.com>,
	LKML <linux-kernel@vger.kernel.org>,
	lkp@lists.01.org, lkp@intel.com, ying.huang@intel.com,
	feng.tang@intel.com, zhengjun.xing@linux.intel.com,
	fengwei.yin@intel.com
Subject: Re: [x86/signal]  3aac3ebea0:  will-it-scale.per_thread_ops -11.9% regression
Date: Tue, 07 Dec 2021 14:38:34 +0100	[thread overview]
Message-ID: <87bl1s357p.ffs@tglx> (raw)
In-Reply-To: <20211207012128.GA16074@xsang-OptiPlex-9020>

Hi!

On Tue, Dec 07 2021 at 09:21, kernel test robot wrote:

> (please be noted we made some further analysis before reporting out,
> and thought it's likely the regression is related with the extra spinlock
> introducded by enalbling DYNAMIC_SIGFRAME. below is the full report.)
>
> FYI, we noticed a -11.9% regression of will-it-scale.per_thread_ops due to commit:

Does that use sigaltstack() ?

> 1bdda24c4af64cd2 3aac3ebea08f2d342364f827c89 
> ---------------- --------------------------- 
>          %stddev     %change         %stddev
>              \          |                \  
>     754824 ±  2%     -11.9%     664668 ±  2%  will-it-scale.16.threads
>      47176 ±  2%     -11.9%      41541 ±  2%  will-it-scale.per_thread_ops
>     754824 ±  2%     -11.9%     664668 ±  2%  will-it-scale.workload
>    1422782 ±  8%  +3.3e+05     1751520 ± 12%  syscalls.sys_getpid.noise.5%

Somehow the printout got confused ...

>  1.583e+10            -2.1%   1.55e+10        perf-stat.i.instructions
>    6328594 ±  2%     +11.1%    7032338 ±  2%  perf-stat.overall.path-length
>  1.578e+10            -2.1%  1.545e+10        perf-stat.ps.instructions
>  4.774e+12            -2.2%  4.671e+12        perf-stat.total.instructions
>       0.00            +6.3        6.33 ±  8%  perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock_irq.do_sigaltstack.restore_altstack.__x64_sys_rt_sigreturn
>       0.00            +6.5        6.51 ±  8%  perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.do_sigaltstack.restore_altstack.__x64_sys_rt_sigreturn.do_syscall_64
>       0.00            +6.6        6.58 ±  8%  perf-profile.calltrace.cycles-pp.do_sigaltstack.restore_altstack.__x64_sys_rt_sigreturn.do_syscall_64.entry_SYSCALL_64_after_hwframe
>       0.00            +6.6        6.62 ±  8%  perf-profile.calltrace.cycles-pp.restore_altstack.__x64_sys_rt_sigreturn.do_syscall_64.entry_SYSCALL_64_after_hwframe.raise
>       0.00            +6.9        6.88 ±  9%  perf-profile.calltrace.cycles-pp.__x64_sys_rt_sigreturn.do_syscall_64.entry_SYSCALL_64_after_hwframe.raise
>       7.99 ± 12%      +6.0       14.00 ±  9%  perf-profile.children.cycles-pp.__x64_sys_rt_sigreturn
>       0.05 ± 44%      +6.6        6.62 ±  8%  perf-profile.children.cycles-pp.restore_altstack
>       0.00            +6.6        6.58 ±  8%  perf-profile.children.cycles-pp.do_sigaltstack

It looks like it does. The problem is that sighand->lock is process
wide.

Can you test whether the below cures it?

Not pretty, but that's what I came up with for now.

Thanks,

        tglx
---

--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -457,10 +457,10 @@ static inline void fpu_inherit_perms(str
 	if (fpu_state_size_dynamic()) {
 		struct fpu *src_fpu = &current->group_leader->thread.fpu;
 
-		spin_lock_irq(&current->sighand->siglock);
+		read_lock(&current->sighand->sigaltstack_lock);
 		/* Fork also inherits the permissions of the parent */
 		dst_fpu->perm = src_fpu->perm;
-		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&current->sighand->sigaltstack_lock);
 	}
 }
 
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1582,17 +1582,22 @@ static int validate_sigaltstack(unsigned
 {
 	struct task_struct *thread, *leader = current->group_leader;
 	unsigned long framesize = get_sigframe_size();
+	int ret = 0;
 
-	lockdep_assert_held(&current->sighand->siglock);
+	lockdep_assert_held_write(&current->sighand->sigaltstack_lock);
 
 	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
 	framesize -= fpu_user_cfg.max_size;
 	framesize += usize;
+	read_lock(&tasklist_lock);
 	for_each_thread(leader, thread) {
-		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
-			return -ENOSPC;
+		if (thread->sas_ss_size && thread->sas_ss_size < framesize) {
+			ret = -ENOSPC;
+			break;
+		}
 	}
-	return 0;
+	read_unlock(&tasklist_lock);
+	return ret;
 }
 
 static int __xstate_request_perm(u64 permitted, u64 requested)
@@ -1627,7 +1632,7 @@ static int __xstate_request_perm(u64 per
 
 	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
 	WRITE_ONCE(fpu->perm.__state_perm, requested);
-	/* Protected by sighand lock */
+	/* Protected by sighand::sigaltstack_lock */
 	fpu->perm.__state_size = ksize;
 	fpu->perm.__user_state_size = usize;
 	return ret;
@@ -1666,10 +1671,10 @@ static int xstate_request_perm(unsigned
 		return 0;
 
 	/* Protect against concurrent modifications */
-	spin_lock_irq(&current->sighand->siglock);
+	write_lock(&current->sighand->sigaltstack_lock);
 	permitted = xstate_get_host_group_perm();
 	ret = __xstate_request_perm(permitted, requested);
-	spin_unlock_irq(&current->sighand->siglock);
+	write_unlock(&current->sighand->sigaltstack_lock);
 	return ret;
 }
 
@@ -1685,11 +1690,11 @@ int xfd_enable_feature(u64 xfd_err)
 	}
 
 	/* Protect against concurrent modifications */
-	spin_lock_irq(&current->sighand->siglock);
+	read_lock(&current->sighand->sigaltstack_lock);
 
 	/* If not permitted let it die */
 	if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
-		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&current->sighand->sigaltstack_lock);
 		return -EPERM;
 	}
 
@@ -1702,7 +1707,7 @@ int xfd_enable_feature(u64 xfd_err)
 	 * another task, the retrieved buffer sizes are valid for the
 	 * currently requested feature(s).
 	 */
-	spin_unlock_irq(&current->sighand->siglock);
+	read_unlock(&current->sighand->sigaltstack_lock);
 
 	/*
 	 * Try to allocate a new fpstate. If that fails there is no way
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -939,17 +939,19 @@ static int __init strict_sas_size(char *
  * the task has permissions to use dynamic features. Tasks which have no
  * permission are checked against the size of the non-dynamic feature set
  * if strict checking is enabled. This avoids forcing all tasks on the
- * system to allocate large sigaltstacks even if they are never going
- * to use a dynamic feature. As this is serialized via sighand::siglock
- * any permission request for a dynamic feature either happened already
- * or will see the newly install sigaltstack size in the permission checks.
+ * system to allocate large sigaltstacks even if they are never going to
+ * use a dynamic feature.
+ *
+ * As this is serialized via sighand::sigaltstack_lock any permission
+ * request for a dynamic feature either happened already or will see the
+ * newly install sigaltstack size in the permission checks.
  */
 bool sigaltstack_size_valid(size_t ss_size)
 {
 	unsigned long fsize = max_frame_size - fpu_default_state_size;
 	u64 mask;
 
-	lockdep_assert_held(&current->sighand->siglock);
+	lockdep_assert_held_read(&current->sighand->sigaltstack_lock);
 
 	if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
 		return true;
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -19,6 +19,9 @@
 
 struct sighand_struct {
 	spinlock_t		siglock;
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+	rwlock_t		sigaltstack_lock;
+#endif
 	refcount_t		count;
 	wait_queue_head_t	signalfd_wqh;
 	struct k_sigaction	action[_NSIG];
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -48,6 +48,9 @@ static struct sighand_struct init_sighan
 	.action		= { { { .sa_handler = SIG_DFL, } }, },
 	.siglock	= __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
 	.signalfd_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+	.sigaltstack_lock	= __RW_LOCK_UNLOCKED(init_sighand.sigaltstack_lock),
+#endif
 };
 
 #ifdef CONFIG_SHADOW_CALL_STACK
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2900,6 +2900,9 @@ static void sighand_ctor(void *data)
 
 	spin_lock_init(&sighand->siglock);
 	init_waitqueue_head(&sighand->signalfd_wqh);
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+	rwlock_init(&sighand->sigaltstack_lock);
+#endif
 }
 
 void __init proc_caches_init(void)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4141,15 +4141,15 @@ int do_sigaction(int sig, struct k_sigac
 
 #ifdef CONFIG_DYNAMIC_SIGFRAME
 static inline void sigaltstack_lock(void)
-	__acquires(&current->sighand->siglock)
+	__acquires(&current->sighand->sigaltstack_lock)
 {
-	spin_lock_irq(&current->sighand->siglock);
+	read_lock(&current->sighand->sigaltstack_lock);
 }
 
 static inline void sigaltstack_unlock(void)
-	__releases(&current->sighand->siglock)
+	__releases(&current->sighand->sigaltstack_lock)
 {
-	spin_unlock_irq(&current->sighand->siglock);
+	read_unlock(&current->sighand->sigaltstack_lock);
 }
 #else
 static inline void sigaltstack_lock(void) { }