linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/split_lock: Make life miserable for split lockers
@ 2022-02-17  1:27 Tony Luck
  2022-03-07 22:30 ` Thomas Gleixner
  2022-03-10 20:48 ` [PATCH v2 0/2] " Tony Luck
  0 siblings, 2 replies; 16+ messages in thread
From: Tony Luck @ 2022-02-17  1:27 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Fenghua Yu, x86, linux-kernel, patches, Tony Luck

In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
said:

  Its's simply wishful thinking that stuff gets fixed because of a
  WARN_ONCE(). This has never worked. The only thing which works is to
  make stuff fail hard or slow it down in a way which makes it annoying
  enough to users to complain.

He was talking about WBINVD. But it made me think about how we
use the split lock detection feature in Linux.

Existing code has three options for applications:
1) Don't enable split lock detection (allow arbitrary split locks)
2) Warn once when a process uses split lock, but let the process
   keep running with split lock detection disabled
3) Kill process that use split locks

Option 2 falls into the "wishful thinking" territory that Thomas
warns does nothing. But option 3 might not be viable in a situation
with legacy applications that need to run.

Hence a new option to "slow it down in a way which makes it annoying".

Primary reason for this change is to provide better quality of service
to the rest of the applications running on the system. Internal
testing shows that even with many processes splitting locks, performance
for the rest of the system is much more responsive.

Add a new choice to the existing "split_lock_detect" boot parameter
"sequential". In this mode split lock detection is enabled. When an
application tries to execute a bus lock the #AC handler.

1) Blocks (interruptibly) until it can get the semaphore
	If interrupted, just return. Assume the signal will either
	kill the task, or direct execution away from the instruction
	that is trying to get the bus lock.
2) Disables split lock detection for the current core
3) Schedules a work queue to re-enable split lock detect in 2 jiffies
4) Returns

The work queue that re-enables split lock detection also releases the
semaphore.

There is a corner case where a CPU may be taken offline while
split lock detection is disabled. A CPU hotplug handler handles
this case.

Questions for this RFC:

1) Does this need to be a new option? Maybe just update the
   existing "warn" mode to add this level of extra pain.
2) Under what circumstances will work a function scheduled with
   schedule_delayed_work() run on different CPU? I've covered
   the obvious case of the CPU being taken offline before the
   work is run. But are there other cases?
3) Should I add even more pain with an msleep() before even trying
   to get the semaphore?

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/intel.c | 67 ++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8321c43554a1..a331c4a71847 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,10 +7,13 @@
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/semaphore.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -43,6 +46,7 @@ enum split_lock_detect_state {
 	sld_warn,
 	sld_fatal,
 	sld_ratelimit,
+	sld_sequential,
 };
 
 /*
@@ -1002,10 +1006,13 @@ static const struct {
 	{ "warn",	sld_warn  },
 	{ "fatal",	sld_fatal },
 	{ "ratelimit:", sld_ratelimit },
+	{ "sequential", sld_sequential },
 };
 
 static struct ratelimit_state bld_ratelimit;
 
+static DEFINE_SEMAPHORE(buslock_sem);
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt), ratelimit;
@@ -1045,7 +1052,7 @@ static bool split_lock_verify_msr(bool on)
 
 static void __init sld_state_setup(void)
 {
-	enum split_lock_detect_state state = sld_warn;
+	enum split_lock_detect_state state = sld_sequential;
 	char arg[20];
 	int i, ret;
 
@@ -1116,23 +1123,60 @@ static void split_lock_init(void)
 		split_lock_verify_msr(sld_state != sld_off);
 }
 
+static void __split_lock_reenable(struct work_struct *work)
+{
+	sld_update_msr(true);
+	up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to
+ * re-enable split lock detection then the delayed work
+ * will be executed on some other CPU. That handles releasing
+ * the buslock_sem, but because it executes on a different
+ * CPU probably won't re-enable split lock detection. This
+ * is a problem on HT systems since the sibling CPU on the
+ * same core may then be left running with split lock
+ * detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+	sld_update_msr(true);
+
+	return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
 static void split_lock_warn(unsigned long ip)
 {
 	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
 			    current->comm, current->pid, ip);
 
-	/*
-	 * Disable the split lock detection for this task so it can make
-	 * progress and set TIF_SLD so the detection is re-enabled via
-	 * switch_to_sld() when the task is scheduled out.
-	 */
+	switch (sld_state) {
+	case sld_warn:
+		/* This task will keep running with split lock disabled */
+		set_tsk_thread_flag(current, TIF_SLD);
+		break;
+	case sld_sequential:
+		/* Only allow one buslocked disabled core at a time */
+		if (down_interruptible(&buslock_sem) == -EINTR)
+			return;
+		schedule_delayed_work(&split_lock_reenable, 2);
+		break;
+	default:
+		break;
+	}
+
+	/* Disable split lock detection to make progress */
 	sld_update_msr(false);
-	set_tsk_thread_flag(current, TIF_SLD);
 }
 
 bool handle_guest_split_lock(unsigned long ip)
 {
-	if (sld_state == sld_warn) {
+	if (sld_state == sld_warn || sld_state == sld_sequential) {
 		split_lock_warn(ip);
 		return true;
 	}
@@ -1191,6 +1235,7 @@ void handle_bus_lock(struct pt_regs *regs)
 		/* Warn on the bus lock. */
 		fallthrough;
 	case sld_warn:
+	case sld_sequential:
 		pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
 				    current->comm, current->pid, regs->ip);
 		break;
@@ -1299,6 +1344,12 @@ static void sld_state_show(void)
 		if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
 			pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
 		break;
+	case sld_sequential:
+		pr_info("#AC: crashing the kernel on kernel split_locks and forcing sequential access for user-space split locks\n");
+		if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+			pr_warn("No splitlock CPU offline handler\n");
+		break;
 	}
 }
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] x86/split_lock: Make life miserable for split lockers
  2022-02-17  1:27 [PATCH] x86/split_lock: Make life miserable for split lockers Tony Luck
@ 2022-03-07 22:30 ` Thomas Gleixner
  2022-03-08  0:37   ` Luck, Tony
  2022-03-10 20:48 ` [PATCH v2 0/2] " Tony Luck
  1 sibling, 1 reply; 16+ messages in thread
From: Thomas Gleixner @ 2022-03-07 22:30 UTC (permalink / raw)
  To: Tony Luck; +Cc: Fenghua Yu, x86, linux-kernel, patches, Tony Luck

Tony,

On Wed, Feb 16 2022 at 17:27, Tony Luck wrote:
> Questions for this RFC:
>
> 1) Does this need to be a new option? Maybe just update the
>    existing "warn" mode to add this level of extra pain.

That's fine. Warn is the default today, right?

> 2) Under what circumstances will work a function scheduled with
>    schedule_delayed_work() run on different CPU?

Under many...

>    I've covered the obvious case of the CPU being taken offline before
>    the work is run. But are there other cases?

scheduled_delayed_work_on() is what you are looking for.

> 3) Should I add even more pain with an msleep() before even trying
>    to get the semaphore?

No objections from me.

> +static void __split_lock_reenable(struct work_struct *work)
> +{
> +	sld_update_msr(true);
> +	up(&buslock_sem);
> +}
> +
> +/*
> + * If a CPU goes offline with pending delayed work to
> + * re-enable split lock detection then the delayed work
> + * will be executed on some other CPU. That handles releasing
> + * the buslock_sem, but because it executes on a different
> + * CPU probably won't re-enable split lock detection. This
> + * is a problem on HT systems since the sibling CPU on the
> + * same core may then be left running with split lock
> + * detection disabled.
> + *
> + * Unconditionally re-enable detection here.

Had to think twice whether this works under all circumstances. It
actually works because of how CPU hotunplug works nowadays. It
guarantees that after the initial CPU down state sched_cpu_deactivate()
no task which is running or affine to the CPU can get back to user space
on that CPU. That was not always the case, that's why I had to think
twice :)

But I'm not yet convinced that this is required at all.

> + */
> +static int splitlock_cpu_offline(unsigned int cpu)
> +{
> +	sld_update_msr(true);
> +
> +	return 0;
> +}
> +
> +static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
> +
>  static void split_lock_warn(unsigned long ip)
>  {
>  	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
>  			    current->comm, current->pid, ip);
>  
> -	/*
> -	 * Disable the split lock detection for this task so it can make
> -	 * progress and set TIF_SLD so the detection is re-enabled via
> -	 * switch_to_sld() when the task is scheduled out.
> -	 */
> +	switch (sld_state) {
> +	case sld_warn:
> +		/* This task will keep running with split lock disabled */
> +		set_tsk_thread_flag(current, TIF_SLD);
> +		break;
> +	case sld_sequential:
> +		/* Only allow one buslocked disabled core at a time */
> +		if (down_interruptible(&buslock_sem) == -EINTR)
> +			return;
> +		schedule_delayed_work(&split_lock_reenable, 2);

Hmm. This does not set TIF_SLD. So:

 task hits splitlock
   #AC
     down(sema);
     schedule_work();
     disable_sld();

 task is preempted or schedules out voluntarily

   -> SLD stays disabled for the incoming task which is wrong and it
      stays disabled up to the point where the timer fires or a task
      switch with TIF_SLD mismatch happens. 

Not what we want, right?

So the right thing to do is to set TIF_SLD also for the sequential
case. Now how to do that delayed split lock reenable for the task in
question?

	case sld_sequential:
		if (down_interruptible(&buslock_sem) == -EINTR)
			return;
		set_tsk_thread_flag(current, TIF_SLD);
                buslock_sequential_task = current;
                get_task_struct(current);
		schedule_delayed_work_on(smp_processor_id(), &split_lock_reenable, 2);

and then the work function does:

    clear_tsk_thread_flag(buslock_sequential_task, TIF_SLD);
    put_task_struct(buslock_sequential_task);
    buslock_sequential_task = NULL;
    up(&buslock_sem);
    
With that you spare the cpu hotplug callback as well simply because it's
guaranteed that the SLD state is handled correctly when the task in
question schedules out. I.e. it does not matter at all on which CPU the
timer goes off if the CPU on which is was armed is offlined before it
fires.

But that's nasty too because if the task schedules away from the CPU on
which it hit the buslock in the first place and then stays on the other
CPU in user space forever (think NOHZ_FULL) then it can buslock forever
too.

The question is whether this is something to worry about. If so, then we
need to go back to the drawing board.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] x86/split_lock: Make life miserable for split lockers
  2022-03-07 22:30 ` Thomas Gleixner
@ 2022-03-08  0:37   ` Luck, Tony
  2022-03-08 14:59     ` Thomas Gleixner
  0 siblings, 1 reply; 16+ messages in thread
From: Luck, Tony @ 2022-03-08  0:37 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Fenghua Yu, x86, linux-kernel, patches

On Mon, Mar 07, 2022 at 11:30:35PM +0100, Thomas Gleixner wrote:
> Tony,
> 
> On Wed, Feb 16 2022 at 17:27, Tony Luck wrote:
> > Questions for this RFC:
> >
> > 1) Does this need to be a new option? Maybe just update the
> >    existing "warn" mode to add this level of extra pain.
> 
> That's fine. Warn is the default today, right?

Yes. Warn is the current default.
Does "That's fine" mean ok to change exiting warn code to add
this level of pain? Or OK to add a new option?

> > 2) Under what circumstances will work a function scheduled with
> >    schedule_delayed_work() run on different CPU?
> 
> Under many...
> 
> >    I've covered the obvious case of the CPU being taken offline before
> >    the work is run. But are there other cases?
> 
> scheduled_delayed_work_on() is what you are looking for.

That sounds like the right choice ... I just didn't dig deep
enough into the options available.

> > 3) Should I add even more pain with an msleep() before even trying
> >    to get the semaphore?
> 
> No objections from me.

Will do in next version.

> > +static void __split_lock_reenable(struct work_struct *work)
> > +{
> > +	sld_update_msr(true);
> > +	up(&buslock_sem);
> > +}
> > +
> > +/*
> > + * If a CPU goes offline with pending delayed work to
> > + * re-enable split lock detection then the delayed work
> > + * will be executed on some other CPU. That handles releasing
> > + * the buslock_sem, but because it executes on a different
> > + * CPU probably won't re-enable split lock detection. This
> > + * is a problem on HT systems since the sibling CPU on the
> > + * same core may then be left running with split lock
> > + * detection disabled.
> > + *
> > + * Unconditionally re-enable detection here.
> 
> Had to think twice whether this works under all circumstances. It
> actually works because of how CPU hotunplug works nowadays. It
> guarantees that after the initial CPU down state sched_cpu_deactivate()
> no task which is running or affine to the CPU can get back to user space
> on that CPU. That was not always the case, that's why I had to think
> twice :)
> 
> But I'm not yet convinced that this is required at all.
> 
> > + */
> > +static int splitlock_cpu_offline(unsigned int cpu)
> > +{
> > +	sld_update_msr(true);
> > +
> > +	return 0;
> > +}
> > +
> > +static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
> > +
> >  static void split_lock_warn(unsigned long ip)
> >  {
> >  	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
> >  			    current->comm, current->pid, ip);
> >  
> > -	/*
> > -	 * Disable the split lock detection for this task so it can make
> > -	 * progress and set TIF_SLD so the detection is re-enabled via
> > -	 * switch_to_sld() when the task is scheduled out.
> > -	 */
> > +	switch (sld_state) {
> > +	case sld_warn:
> > +		/* This task will keep running with split lock disabled */
> > +		set_tsk_thread_flag(current, TIF_SLD);
> > +		break;
> > +	case sld_sequential:
> > +		/* Only allow one buslocked disabled core at a time */
> > +		if (down_interruptible(&buslock_sem) == -EINTR)
> > +			return;
> > +		schedule_delayed_work(&split_lock_reenable, 2);
> 
> Hmm. This does not set TIF_SLD. So:
> 
>  task hits splitlock
>    #AC
>      down(sema);
>      schedule_work();
>      disable_sld();
> 
>  task is preempted or schedules out voluntarily
> 
>    -> SLD stays disabled for the incoming task which is wrong and it
>       stays disabled up to the point where the timer fires or a task
>       switch with TIF_SLD mismatch happens. 
> 
> Not what we want, right?

It isn't ideal. We just gave a free pass to some other tasks to
do split locks for up to two jiffies. But they would have been
given those two jiffies later had they taken an #AC trap ... so
they just bypassed the queue to get what we would have given them
later.

> So the right thing to do is to set TIF_SLD also for the sequential
> case. Now how to do that delayed split lock reenable for the task in
> question?
> 
> 	case sld_sequential:
> 		if (down_interruptible(&buslock_sem) == -EINTR)
> 			return;
> 		set_tsk_thread_flag(current, TIF_SLD);
>                 buslock_sequential_task = current;
>                 get_task_struct(current);
> 		schedule_delayed_work_on(smp_processor_id(), &split_lock_reenable, 2);
> 
> and then the work function does:
> 
>     clear_tsk_thread_flag(buslock_sequential_task, TIF_SLD);
>     put_task_struct(buslock_sequential_task);
>     buslock_sequential_task = NULL;
>     up(&buslock_sem);
>     
> With that you spare the cpu hotplug callback as well simply because it's
> guaranteed that the SLD state is handled correctly when the task in
> question schedules out. I.e. it does not matter at all on which CPU the
> timer goes off if the CPU on which is was armed is offlined before it
> fires.
> 
> But that's nasty too because if the task schedules away from the CPU on
> which it hit the buslock in the first place and then stays on the other
> CPU in user space forever (think NOHZ_FULL) then it can buslock forever
> too.

Agreed. Trying to get this "perfect" has many ugly corner cases.

> The question is whether this is something to worry about. If so, then we
> need to go back to the drawing board.

I don't think it is worth worrying about. The case you describe is
a process that is about to be preempted when the #AC trap happens.
In that case this CPU (in fact both HT threads on this core) get
two jiffies of free split locks.  Cases from here:

1) The original process gets to run on either of these threads
before the timeout. They get to execute their split lock and carry
on running.

2) The process is scheduled on a different core during the two jiffie
window. They take an #AC trap and block on the semaphore until the
original core releases. Then they get their chance to run on this new
core.

3) The original process doesn't get rescheduled for two jiffies, then
runs somewhere. The original core has released the sempahore and re-enabled
split lock checking. So the process takes #AC, gets the semaphore, kernel
disables split lock checking ... and we try again.

Now it is possible that the process may repeatedly be preempted in between
getting the semaphore and actually getting all the way to user space
to split a lock ... but can only happen if there are multiple processes
splitting locks. The goal of this patch is to be mean to all of them. If
we happen to be extra mean to some of them, well so be it.

-Tony

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] x86/split_lock: Make life miserable for split lockers
  2022-03-08  0:37   ` Luck, Tony
@ 2022-03-08 14:59     ` Thomas Gleixner
  2022-03-08 17:12       ` Luck, Tony
  0 siblings, 1 reply; 16+ messages in thread
From: Thomas Gleixner @ 2022-03-08 14:59 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Fenghua Yu, x86, linux-kernel, patches

Tony,

On Mon, Mar 07 2022 at 16:37, Tony Luck wrote:
> On Mon, Mar 07, 2022 at 11:30:35PM +0100, Thomas Gleixner wrote:
>> On Wed, Feb 16 2022 at 17:27, Tony Luck wrote:
>> > Questions for this RFC:
>> >
>> > 1) Does this need to be a new option? Maybe just update the
>> >    existing "warn" mode to add this level of extra pain.
>> 
>> That's fine. Warn is the default today, right?
>
> Yes. Warn is the current default.
> Does "That's fine" mean ok to change exiting warn code to add
> this level of pain? Or OK to add a new option?

Add pain to the existing warn code.

>> The question is whether this is something to worry about. If so, then we
>> need to go back to the drawing board.
>
> I don't think it is worth worrying about. The case you describe is
> a process that is about to be preempted when the #AC trap happens.
> In that case this CPU (in fact both HT threads on this core) get
> two jiffies of free split locks.  Cases from here:
>
> 1) The original process gets to run on either of these threads
> before the timeout. They get to execute their split lock and carry
> on running.
>
> 2) The process is scheduled on a different core during the two jiffie
> window. They take an #AC trap and block on the semaphore until the
> original core releases. Then they get their chance to run on this new
> core.
>
> 3) The original process doesn't get rescheduled for two jiffies, then
> runs somewhere. The original core has released the sempahore and re-enabled
> split lock checking. So the process takes #AC, gets the semaphore, kernel
> disables split lock checking ... and we try again.
>
> Now it is possible that the process may repeatedly be preempted in between
> getting the semaphore and actually getting all the way to user space
> to split a lock ... but can only happen if there are multiple processes
> splitting locks. The goal of this patch is to be mean to all of them. If
> we happen to be extra mean to some of them, well so be it.

Fair enough.

I still do not like the inconsistent state between the TIF flag and the
SLD MSR.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] x86/split_lock: Make life miserable for split lockers
  2022-03-08 14:59     ` Thomas Gleixner
@ 2022-03-08 17:12       ` Luck, Tony
  0 siblings, 0 replies; 16+ messages in thread
From: Luck, Tony @ 2022-03-08 17:12 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Yu, Fenghua, x86, linux-kernel, patches

> I still do not like the inconsistent state between the TIF flag and the
> SLD MSR.

If I'm updating the "warn" option to work this way, I think I can make the TIF flag
and the code that updates the MSR in context switch go away. But need to work
though the patch to make sure I'm not missing something.

-Tony


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 0/2] Make life miserable for split lockers
  2022-02-17  1:27 [PATCH] x86/split_lock: Make life miserable for split lockers Tony Luck
  2022-03-07 22:30 ` Thomas Gleixner
@ 2022-03-10 20:48 ` Tony Luck
  2022-03-10 20:48   ` [PATCH v2 1/2] x86/split_lock: " Tony Luck
  2022-03-10 20:48   ` [PATCH v2 2/2] x86/split-lock: Remove unused TIF_SLD bit Tony Luck
  1 sibling, 2 replies; 16+ messages in thread
From: Tony Luck @ 2022-03-10 20:48 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Fenghua Yu, x86, linux-kernel, patches, Tony Luck

See part 0001 commit message for what & why.

Changes since v1(RFC) based on feedback from Thomas:

1) No longer implemented as new sub-option of split_lock_detect boot
   option.  Just update the existing (default) "warn" mode.
2) Use the (more appropriate) "schedule_delayed_work_on() instead of
   schedule_delayed_work() to re-enable split lock detection.
3) Add msleep for extra misery before disabling split lock detection
   on a core.

Next two aren't from Thomas.
4) Drop the TIF_SLD bit and associated code. No longer needed. This
   part is done as clean up in patch 0002.
5) Add a bit in task structure to make sure each split-locking task
   only tries to print a warn message to the console once. Need this
   to preserve existing behavior that was previously controlled by the
   TIF_SLD bit.

Testing notes. I checked this out by running 2,000 processes that
each looped executing 10,000 split locks. System remains very
responsive while this test is running (compared with unusable
without these patches). When each process gets the semaphore
and two jiffies of time to execute split locks it manages to
complete ~430 before the delayed work thread re-enables detection.

The semaphore gives nice round-robin access to the 2,000 tasks.
So each ends up blocking for 24ish seconds while the other 1,999
task take their turn.

Tony Luck (2):
  x86/split_lock: Make life miserable for split lockers
  x86/split-lock: Remove unused TIF_SLD bit

 arch/x86/include/asm/cpu.h         |  2 -
 arch/x86/include/asm/thread_info.h |  4 +-
 arch/x86/kernel/cpu/intel.c        | 77 +++++++++++++++++++++---------
 arch/x86/kernel/process.c          |  3 --
 include/linux/sched.h              |  3 ++
 kernel/fork.c                      |  5 ++
 6 files changed, 64 insertions(+), 30 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-10 20:48 ` [PATCH v2 0/2] " Tony Luck
@ 2022-03-10 20:48   ` Tony Luck
  2022-03-17 11:13     ` Pavel Machek
  2022-04-27 13:49     ` [tip: x86/splitlock] " tip-bot2 for Tony Luck
  2022-03-10 20:48   ` [PATCH v2 2/2] x86/split-lock: Remove unused TIF_SLD bit Tony Luck
  1 sibling, 2 replies; 16+ messages in thread
From: Tony Luck @ 2022-03-10 20:48 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Fenghua Yu, x86, linux-kernel, patches, Tony Luck

In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
said:

  Its's simply wishful thinking that stuff gets fixed because of a
  WARN_ONCE(). This has never worked. The only thing which works is to
  make stuff fail hard or slow it down in a way which makes it annoying
  enough to users to complain.

He was talking about WBINVD. But it made me think about how we
use the split lock detection feature in Linux.

Existing code has three options for applications:
1) Don't enable split lock detection (allow arbitrary split locks)
2) Warn once when a process uses split lock, but let the process
   keep running with split lock detection disabled
3) Kill process that use split locks

Option 2 falls into the "wishful thinking" territory that Thomas
warns does nothing. But option 3 might not be viable in a situation
with legacy applications that need to run.

Hence make option 2 much stricter to "slow it down in a way which makes
it annoying".

Primary reason for this change is to provide better quality of service
to the rest of the applications running on the system. Internal
testing shows that even with many processes splitting locks, performance
for the rest of the system is much more responsive.

The new "warn" mode operates like this.  When an application tries to
execute a bus lock the #AC handler.

1) Delays (interruptibly) 10 ms before moving to next step.
2) Blocks (interruptibly) until it can get the semaphore
	If interrupted, just return. Assume the signal will either
	kill the task, or direct execution away from the instruction
	that is trying to get the bus lock.
3) Disables split lock detection for the current core
4) Schedules a work queue to re-enable split lock detect in 2 jiffies
5) Returns

The work queue that re-enables split lock detection also releases the
semaphore.

There is a corner case where a CPU may be taken offline while
split lock detection is disabled. A CPU hotplug handler handles
this case.

Old behaviour was to only print the split lock warning on
the first occurrence of a split lock from a task. Preserve
that by adding a flag to the task structure that suppresses
subsequent split lock messages from that task.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/intel.c | 65 +++++++++++++++++++++++++++++++------
 include/linux/sched.h       |  3 ++
 kernel/fork.c               |  5 +++
 3 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8321c43554a1..2536784511e3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,10 +7,13 @@
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/semaphore.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -1006,6 +1009,8 @@ static const struct {
 
 static struct ratelimit_state bld_ratelimit;
 
+static DEFINE_SEMAPHORE(buslock_sem);
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt), ratelimit;
@@ -1116,18 +1121,54 @@ static void split_lock_init(void)
 		split_lock_verify_msr(sld_state != sld_off);
 }
 
+static void __split_lock_reenable(struct work_struct *work)
+{
+	sld_update_msr(true);
+	up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to
+ * re-enable split lock detection then the delayed work
+ * will be executed on some other CPU. That handles releasing
+ * the buslock_sem, but because it executes on a different
+ * CPU probably won't re-enable split lock detection. This
+ * is a problem on HT systems since the sibling CPU on the
+ * same core may then be left running with split lock
+ * detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+	sld_update_msr(true);
+
+	return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
 static void split_lock_warn(unsigned long ip)
 {
-	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
-			    current->comm, current->pid, ip);
+	int cpu;
 
-	/*
-	 * Disable the split lock detection for this task so it can make
-	 * progress and set TIF_SLD so the detection is re-enabled via
-	 * switch_to_sld() when the task is scheduled out.
-	 */
+	if (!current->reported_split_lock)
+		pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+				    current->comm, current->pid, ip);
+	current->reported_split_lock = 1;
+
+	/* misery factor #1, sleep 10ms before trying to execute split lock */
+	if (msleep_interruptible(10) > 0)
+		return;
+	/* Misery factor #2, only allow one buslocked disabled core at a time */
+	if (down_interruptible(&buslock_sem) == -EINTR)
+		return;
+	cpu = get_cpu();
+	schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+
+	/* Disable split lock detection on this CPU to make progress */
 	sld_update_msr(false);
-	set_tsk_thread_flag(current, TIF_SLD);
+	put_cpu();
 }
 
 bool handle_guest_split_lock(unsigned long ip)
@@ -1281,10 +1322,14 @@ static void sld_state_show(void)
 		pr_info("disabled\n");
 		break;
 	case sld_warn:
-		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
 			pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
-		else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+			if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+				pr_warn("No splitlock CPU offline handler\n");
+		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
 			pr_info("#DB: warning on user-space bus_locks\n");
+		}
 		break;
 	case sld_fatal:
 		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248..ffa7166e23d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -938,6 +938,9 @@ struct task_struct {
 	/* Recursion prevention for eventfd_signal() */
 	unsigned			in_eventfd_signal:1;
 #endif
+#ifdef	CONFIG_CPU_SUP_INTEL
+	unsigned			reported_split_lock:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index f1e89007f228..085d68d143b2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -970,6 +970,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
+
+#ifdef CONFIG_CPU_SUP_INTEL
+	tsk->reported_split_lock = 0;
+#endif
+
 	return tsk;
 
 free_stack:
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 2/2] x86/split-lock: Remove unused TIF_SLD bit
  2022-03-10 20:48 ` [PATCH v2 0/2] " Tony Luck
  2022-03-10 20:48   ` [PATCH v2 1/2] x86/split_lock: " Tony Luck
@ 2022-03-10 20:48   ` Tony Luck
  2022-04-27 13:49     ` [tip: x86/splitlock] " tip-bot2 for Tony Luck
  1 sibling, 1 reply; 16+ messages in thread
From: Tony Luck @ 2022-03-10 20:48 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Fenghua Yu, x86, linux-kernel, patches, Tony Luck

Changes to the "warn" mode of split lock handling mean that
TIF_SLD is never set.

Remove the bit, and the functions that use it.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/include/asm/cpu.h         |  2 --
 arch/x86/include/asm/thread_info.h |  4 +---
 arch/x86/kernel/cpu/intel.c        | 12 ------------
 arch/x86/kernel/process.c          |  3 ---
 4 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 33d41e350c79..e346f5c103b7 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -42,14 +42,12 @@ unsigned int x86_model(unsigned int sig);
 unsigned int x86_stepping(unsigned int sig);
 #ifdef CONFIG_CPU_SUP_INTEL
 extern void __init sld_setup(struct cpuinfo_x86 *c);
-extern void switch_to_sld(unsigned long tifn);
 extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
 extern bool handle_guest_split_lock(unsigned long ip);
 extern void handle_bus_lock(struct pt_regs *regs);
 u8 get_this_hybrid_cpu_type(void);
 #else
 static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
-static inline void switch_to_sld(unsigned long tifn) {}
 static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 {
 	return false;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ebec69c35e95..f0cb881c1d69 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,6 @@ struct thread_info {
 #define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
-#define TIF_SLD			18	/* Restore split lock detection on context switch */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@@ -116,7 +115,6 @@ struct thread_info {
 #define _TIF_NOCPUID		(1 << TIF_NOCPUID)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SLD		(1 << TIF_SLD)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
@@ -128,7 +126,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
 	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
-	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2536784511e3..7cba84616c20 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1241,18 +1241,6 @@ void handle_bus_lock(struct pt_regs *regs)
 	}
 }
 
-/*
- * This function is called only when switching between tasks with
- * different split-lock detection modes. It sets the MSR for the
- * mode of the new task. This is right most of the time, but since
- * the MSR is shared by hyperthreads on a physical core there can
- * be glitches when the two threads need different modes.
- */
-void switch_to_sld(unsigned long tifn)
-{
-	sld_update_msr(!(tifn & _TIF_SLD));
-}
-
 /*
  * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
  * only be trusted if it is confirmed that a CPU model implements a
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637..47db777da92a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -686,9 +686,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 		/* Enforce MSR update to ensure consistent state */
 		__speculation_ctrl_update(~tifn, tifn);
 	}
-
-	if ((tifp ^ tifn) & _TIF_SLD)
-		switch_to_sld(tifn);
 }
 
 /*
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-10 20:48   ` [PATCH v2 1/2] x86/split_lock: " Tony Luck
@ 2022-03-17 11:13     ` Pavel Machek
  2022-03-17 16:27       ` Luck, Tony
  2022-03-17 18:06       ` Thomas Gleixner
  2022-04-27 13:49     ` [tip: x86/splitlock] " tip-bot2 for Tony Luck
  1 sibling, 2 replies; 16+ messages in thread
From: Pavel Machek @ 2022-03-17 11:13 UTC (permalink / raw)
  To: Tony Luck; +Cc: Thomas Gleixner, Fenghua Yu, x86, linux-kernel, patches

[-- Attachment #1: Type: text/plain, Size: 1127 bytes --]

Hi!

> In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
> said:
> 
>   Its's simply wishful thinking that stuff gets fixed because of a
>   WARN_ONCE(). This has never worked. The only thing which works is to
>   make stuff fail hard or slow it down in a way which makes it annoying
>   enough to users to complain.
> 
> He was talking about WBINVD. But it made me think about how we
> use the split lock detection feature in Linux.
> 
> Existing code has three options for applications:
> 1) Don't enable split lock detection (allow arbitrary split locks)
> 2) Warn once when a process uses split lock, but let the process
>    keep running with split lock detection disabled
> 3) Kill process that use split locks

I'm not sure what split locks are, and if you want applications to
stop doing that maybe documentation would help.

Anyway, you can't really introduce regressions to userspace to "get
stuff fixed" in applications.
								Pavel
-- 
DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-17 11:13     ` Pavel Machek
@ 2022-03-17 16:27       ` Luck, Tony
  2022-03-17 18:06       ` Thomas Gleixner
  1 sibling, 0 replies; 16+ messages in thread
From: Luck, Tony @ 2022-03-17 16:27 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Thomas Gleixner, Yu, Fenghua, x86, linux-kernel, patches

Pavel,

> I'm not sure what split locks are, and if you want applications to
> stop doing that maybe documentation would help.

See existing Documentation/x86/buslock.rst

> Anyway, you can't really introduce regressions to userspace to "get
> stuff fixed" in applications.

Applications can inflict pain on the system with atomic operations
on unaligned operands that cross cache line boundaries. This
is just pushing some pain back to the applications.

An alternate title for this patch series could have been:

	"Split-locks: The OS strikes back"

Note that there are few applications that actually split locks.
Normal compiler alignment rules generally avoid them.
Applications that run on non-x86 architectures have to
avoid them because few others allow atomic operations
on unaligned operands.

-Tony

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-17 11:13     ` Pavel Machek
  2022-03-17 16:27       ` Luck, Tony
@ 2022-03-17 18:06       ` Thomas Gleixner
  2022-03-17 22:21         ` David Laight
  1 sibling, 1 reply; 16+ messages in thread
From: Thomas Gleixner @ 2022-03-17 18:06 UTC (permalink / raw)
  To: Pavel Machek, Tony Luck; +Cc: Fenghua Yu, x86, linux-kernel, patches

On Thu, Mar 17 2022 at 12:13, Pavel Machek wrote:
>> In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
>> said:
>> 
>>   Its's simply wishful thinking that stuff gets fixed because of a
>>   WARN_ONCE(). This has never worked. The only thing which works is to
>>   make stuff fail hard or slow it down in a way which makes it annoying
>>   enough to users to complain.
>> 
>> He was talking about WBINVD. But it made me think about how we
>> use the split lock detection feature in Linux.
>> 
>> Existing code has three options for applications:
>> 1) Don't enable split lock detection (allow arbitrary split locks)
>> 2) Warn once when a process uses split lock, but let the process
>>    keep running with split lock detection disabled
>> 3) Kill process that use split locks
>
> I'm not sure what split locks are, and if you want applications to
> stop doing that maybe documentation would help.

Split locks are lock prefixed operations which cross a cache line
boundary. The way how they are implemented is taking the bus lock, which
is the largest serialization hammer.

Bus locks are also triggered by lock prefixed operations on uncached
memory and on MMIO.

> Anyway, you can't really introduce regressions to userspace to "get
> stuff fixed" in applications.

Split locks can be triggered by unpriviledged user space and can be used
to run a local DoS attack. A very effective DoS to be clear.

We have no indication whether a process is malicious or just doing
stupid things. The only reason to not kill the offending process
instantly is that there can be legacy binary only executables which
trigger that due to stupidity.

But that opens up DoS at the same time. So the only way to "protect"
against that is to slow down the freqeuncy of buslocks unconditionally.
If you don't like that after analysing the situation you can turn split
lock detection off.

The only binary I've seen so far triggering this, is some stoneage "value
add" blob from HP which is also doing totally nuts other things.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-17 18:06       ` Thomas Gleixner
@ 2022-03-17 22:21         ` David Laight
  2022-03-17 22:40           ` Luck, Tony
  0 siblings, 1 reply; 16+ messages in thread
From: David Laight @ 2022-03-17 22:21 UTC (permalink / raw)
  To: 'Thomas Gleixner', Pavel Machek, Tony Luck
  Cc: Fenghua Yu, x86, linux-kernel, patches

From: Thomas Gleixner
> Sent: 17 March 2022 18:07
> 
> On Thu, Mar 17 2022 at 12:13, Pavel Machek wrote:
> >> In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
> >> said:
> >>
> >>   Its's simply wishful thinking that stuff gets fixed because of a
> >>   WARN_ONCE(). This has never worked. The only thing which works is to
> >>   make stuff fail hard or slow it down in a way which makes it annoying
> >>   enough to users to complain.
> >>
> >> He was talking about WBINVD. But it made me think about how we
> >> use the split lock detection feature in Linux.
> >>
> >> Existing code has three options for applications:
> >> 1) Don't enable split lock detection (allow arbitrary split locks)
> >> 2) Warn once when a process uses split lock, but let the process
> >>    keep running with split lock detection disabled
> >> 3) Kill process that use split locks
> >
> > I'm not sure what split locks are, and if you want applications to
> > stop doing that maybe documentation would help.
> 
> Split locks are lock prefixed operations which cross a cache line
> boundary. The way how they are implemented is taking the bus lock, which
> is the largest serialization hammer.
> 
> Bus locks are also triggered by lock prefixed operations on uncached
> memory and on MMIO.
> 
> > Anyway, you can't really introduce regressions to userspace to "get
> > stuff fixed" in applications.
> 
> Split locks can be triggered by unpriviledged user space and can be used
> to run a local DoS attack. A very effective DoS to be clear.
> 
> We have no indication whether a process is malicious or just doing
> stupid things. The only reason to not kill the offending process
> instantly is that there can be legacy binary only executables which
> trigger that due to stupidity.
> 
> But that opens up DoS at the same time. So the only way to "protect"
> against that is to slow down the freqeuncy of buslocks unconditionally.
> If you don't like that after analysing the situation you can turn split
> lock detection off.
> 
> The only binary I've seen so far triggering this, is some stoneage "value
> add" blob from HP which is also doing totally nuts other things.

They are actually more likely to happen in the kernel
when code casts int[] to long[] and then uses the 'BIT' functions to
set/clear bits - which do locked operations.
Quite often then don't need the locks anyway.
And that cast is surprisingly common and completely broken on BE.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-17 22:21         ` David Laight
@ 2022-03-17 22:40           ` Luck, Tony
  2022-03-18 12:21             ` Fenghua Yu
  0 siblings, 1 reply; 16+ messages in thread
From: Luck, Tony @ 2022-03-17 22:40 UTC (permalink / raw)
  To: David Laight, 'Thomas Gleixner', Pavel Machek
  Cc: Yu, Fenghua, x86, linux-kernel, patches

> They are actually more likely to happen in the kernel
> when code casts int[] to long[] and then uses the 'BIT' functions to
> set/clear bits - which do locked operations.
> Quite often then don't need the locks anyway.
> And that cast is surprisingly common and completely broken on BE.

Default split lock mode is "kernel dies". We had to fix up a dozen or
so places (mostly involving set_bit/clr_bit as you describe). But
all Ice Lake. Tiger Lake and Alder Lake systems are running in
that mode ... and we haven't heard of widespread death (or disabling
the boot option).

-Tony

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 1/2] x86/split_lock: Make life miserable for split lockers
  2022-03-17 22:40           ` Luck, Tony
@ 2022-03-18 12:21             ` Fenghua Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Fenghua Yu @ 2022-03-18 12:21 UTC (permalink / raw)
  To: Luck, Tony
  Cc: David Laight, 'Thomas Gleixner',
	Pavel Machek, x86, linux-kernel, patches

On Thu, Mar 17, 2022 at 03:40:16PM -0700, Luck, Tony wrote:
> > They are actually more likely to happen in the kernel
> > when code casts int[] to long[] and then uses the 'BIT' functions to
> > set/clear bits - which do locked operations.
> > Quite often then don't need the locks anyway.
> > And that cast is surprisingly common and completely broken on BE.
> 
> Default split lock mode is "kernel dies". We had to fix up a dozen or
> so places (mostly involving set_bit/clr_bit as you describe). But
> all Ice Lake. Tiger Lake and Alder Lake systems are running in
> that mode ... and we haven't heard of widespread death (or disabling
> the boot option).

Split lock is designed to report this kind of "hidden" performance issues.
After initial fix up wave, it has been sparse to find any legacy split lock
issue in the kernel.

If a new split lock issue is introduced, the developer hopefully will
capture and fix it before upstream. So others won't see it any more.

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [tip: x86/splitlock] x86/split-lock: Remove unused TIF_SLD bit
  2022-03-10 20:48   ` [PATCH v2 2/2] x86/split-lock: Remove unused TIF_SLD bit Tony Luck
@ 2022-04-27 13:49     ` tip-bot2 for Tony Luck
  0 siblings, 0 replies; 16+ messages in thread
From: tip-bot2 for Tony Luck @ 2022-04-27 13:49 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Tony Luck, Thomas Gleixner, x86, linux-kernel

The following commit has been merged into the x86/splitlock branch of tip:

Commit-ID:     ef79970d7ccdc4e8855aa6079fc2f4797a6807fb
Gitweb:        https://git.kernel.org/tip/ef79970d7ccdc4e8855aa6079fc2f4797a6807fb
Author:        Tony Luck <tony.luck@intel.com>
AuthorDate:    Thu, 10 Mar 2022 12:48:54 -08:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 27 Apr 2022 15:43:39 +02:00

x86/split-lock: Remove unused TIF_SLD bit

Changes to the "warn" mode of split lock handling mean that TIF_SLD is
never set.

Remove the bit, and the functions that use it.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220310204854.31752-3-tony.luck@intel.com

---
 arch/x86/include/asm/cpu.h         |  2 --
 arch/x86/include/asm/thread_info.h |  4 +---
 arch/x86/kernel/cpu/intel.c        | 12 ------------
 arch/x86/kernel/process.c          |  3 ---
 4 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 86e5e4e..d1c86b2 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -43,14 +43,12 @@ unsigned int x86_model(unsigned int sig);
 unsigned int x86_stepping(unsigned int sig);
 #ifdef CONFIG_CPU_SUP_INTEL
 extern void __init sld_setup(struct cpuinfo_x86 *c);
-extern void switch_to_sld(unsigned long tifn);
 extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
 extern bool handle_guest_split_lock(unsigned long ip);
 extern void handle_bus_lock(struct pt_regs *regs);
 u8 get_this_hybrid_cpu_type(void);
 #else
 static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
-static inline void switch_to_sld(unsigned long tifn) {}
 static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 {
 	return false;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ebec69c..f0cb881 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,6 @@ struct thread_info {
 #define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
-#define TIF_SLD			18	/* Restore split lock detection on context switch */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@@ -116,7 +115,6 @@ struct thread_info {
 #define _TIF_NOCPUID		(1 << TIF_NOCPUID)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SLD		(1 << TIF_SLD)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
@@ -128,7 +126,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
 	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
-	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be2a0bd..672e253 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1233,18 +1233,6 @@ void handle_bus_lock(struct pt_regs *regs)
 }
 
 /*
- * This function is called only when switching between tasks with
- * different split-lock detection modes. It sets the MSR for the
- * mode of the new task. This is right most of the time, but since
- * the MSR is shared by hyperthreads on a physical core there can
- * be glitches when the two threads need different modes.
- */
-void switch_to_sld(unsigned long tifn)
-{
-	sld_update_msr(!(tifn & _TIF_SLD));
-}
-
-/*
  * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
  * only be trusted if it is confirmed that a CPU model implements a
  * specific feature at a particular bit position.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b370767..bcc76c1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -686,9 +686,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 		/* Enforce MSR update to ensure consistent state */
 		__speculation_ctrl_update(~tifn, tifn);
 	}
-
-	if ((tifp ^ tifn) & _TIF_SLD)
-		switch_to_sld(tifn);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [tip: x86/splitlock] x86/split_lock: Make life miserable for split lockers
  2022-03-10 20:48   ` [PATCH v2 1/2] x86/split_lock: " Tony Luck
  2022-03-17 11:13     ` Pavel Machek
@ 2022-04-27 13:49     ` tip-bot2 for Tony Luck
  1 sibling, 0 replies; 16+ messages in thread
From: tip-bot2 for Tony Luck @ 2022-04-27 13:49 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Tony Luck, Thomas Gleixner, x86, linux-kernel

The following commit has been merged into the x86/splitlock branch of tip:

Commit-ID:     b041b525dab95352fbd666b14dc73ab898df465f
Gitweb:        https://git.kernel.org/tip/b041b525dab95352fbd666b14dc73ab898df465f
Author:        Tony Luck <tony.luck@intel.com>
AuthorDate:    Thu, 10 Mar 2022 12:48:53 -08:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 27 Apr 2022 15:43:38 +02:00

x86/split_lock: Make life miserable for split lockers

In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas
said:

  Its's simply wishful thinking that stuff gets fixed because of a
  WARN_ONCE(). This has never worked. The only thing which works is to
  make stuff fail hard or slow it down in a way which makes it annoying
  enough to users to complain.

He was talking about WBINVD. But it made me think about how we use the
split lock detection feature in Linux.

Existing code has three options for applications:

 1) Don't enable split lock detection (allow arbitrary split locks)
 2) Warn once when a process uses split lock, but let the process
    keep running with split lock detection disabled
 3) Kill process that use split locks

Option 2 falls into the "wishful thinking" territory that Thomas warns does
nothing. But option 3 might not be viable in a situation with legacy
applications that need to run.

Hence make option 2 much stricter to "slow it down in a way which makes
it annoying".

Primary reason for this change is to provide better quality of service to
the rest of the applications running on the system. Internal testing shows
that even with many processes splitting locks, performance for the rest of
the system is much more responsive.

The new "warn" mode operates like this.  When an application tries to
execute a bus lock the #AC handler.

 1) Delays (interruptibly) 10 ms before moving to next step.

 2) Blocks (interruptibly) until it can get the semaphore
	If interrupted, just return. Assume the signal will either
	kill the task, or direct execution away from the instruction
	that is trying to get the bus lock.
 3) Disables split lock detection for the current core
 4) Schedules a work queue to re-enable split lock detect in 2 jiffies
 5) Returns

The work queue that re-enables split lock detection also releases the
semaphore.

There is a corner case where a CPU may be taken offline while split lock
detection is disabled. A CPU hotplug handler handles this case.

Old behaviour was to only print the split lock warning on the first
occurrence of a split lock from a task. Preserve that by adding a flag to
the task structure that suppresses subsequent split lock messages from that
task.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220310204854.31752-2-tony.luck@intel.com

---
 arch/x86/kernel/cpu/intel.c | 63 ++++++++++++++++++++++++++++++------
 include/linux/sched.h       |  3 ++-
 kernel/fork.c               |  5 +++-
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f7a5370..be2a0bd 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,10 +7,13 @@
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/semaphore.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
 #include <linux/delay.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -999,6 +1002,8 @@ static const struct {
 
 static struct ratelimit_state bld_ratelimit;
 
+static DEFINE_SEMAPHORE(buslock_sem);
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt), ratelimit;
@@ -1109,18 +1114,52 @@ static void split_lock_init(void)
 		split_lock_verify_msr(sld_state != sld_off);
 }
 
+static void __split_lock_reenable(struct work_struct *work)
+{
+	sld_update_msr(true);
+	up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+	sld_update_msr(true);
+
+	return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
 static void split_lock_warn(unsigned long ip)
 {
-	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
-			    current->comm, current->pid, ip);
+	int cpu;
 
-	/*
-	 * Disable the split lock detection for this task so it can make
-	 * progress and set TIF_SLD so the detection is re-enabled via
-	 * switch_to_sld() when the task is scheduled out.
-	 */
+	if (!current->reported_split_lock)
+		pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+				    current->comm, current->pid, ip);
+	current->reported_split_lock = 1;
+
+	/* misery factor #1, sleep 10ms before trying to execute split lock */
+	if (msleep_interruptible(10) > 0)
+		return;
+	/* Misery factor #2, only allow one buslocked disabled core at a time */
+	if (down_interruptible(&buslock_sem) == -EINTR)
+		return;
+	cpu = get_cpu();
+	schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+
+	/* Disable split lock detection on this CPU to make progress */
 	sld_update_msr(false);
-	set_tsk_thread_flag(current, TIF_SLD);
+	put_cpu();
 }
 
 bool handle_guest_split_lock(unsigned long ip)
@@ -1274,10 +1313,14 @@ static void sld_state_show(void)
 		pr_info("disabled\n");
 		break;
 	case sld_warn:
-		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
 			pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
-		else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+			if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+				pr_warn("No splitlock CPU offline handler\n");
+		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
 			pr_info("#DB: warning on user-space bus_locks\n");
+		}
 		break;
 	case sld_fatal:
 		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a8911b1..23e03c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -941,6 +941,9 @@ struct task_struct {
 #ifdef CONFIG_IOMMU_SVA
 	unsigned			pasid_activated:1;
 #endif
+#ifdef	CONFIG_CPU_SUP_INTEL
+	unsigned			reported_split_lock:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9796897..f39795f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,6 +1045,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
+
+#ifdef CONFIG_CPU_SUP_INTEL
+	tsk->reported_split_lock = 0;
+#endif
+
 	return tsk;
 
 free_stack:

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-04-27 13:49 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-17  1:27 [PATCH] x86/split_lock: Make life miserable for split lockers Tony Luck
2022-03-07 22:30 ` Thomas Gleixner
2022-03-08  0:37   ` Luck, Tony
2022-03-08 14:59     ` Thomas Gleixner
2022-03-08 17:12       ` Luck, Tony
2022-03-10 20:48 ` [PATCH v2 0/2] " Tony Luck
2022-03-10 20:48   ` [PATCH v2 1/2] x86/split_lock: " Tony Luck
2022-03-17 11:13     ` Pavel Machek
2022-03-17 16:27       ` Luck, Tony
2022-03-17 18:06       ` Thomas Gleixner
2022-03-17 22:21         ` David Laight
2022-03-17 22:40           ` Luck, Tony
2022-03-18 12:21             ` Fenghua Yu
2022-04-27 13:49     ` [tip: x86/splitlock] " tip-bot2 for Tony Luck
2022-03-10 20:48   ` [PATCH v2 2/2] x86/split-lock: Remove unused TIF_SLD bit Tony Luck
2022-04-27 13:49     ` [tip: x86/splitlock] " tip-bot2 for Tony Luck

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).