linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] random: use raw spinlocks for use on RT
@ 2022-08-01 14:25 Jason A. Donenfeld
  2022-08-01 14:34 ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-01 14:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason A. Donenfeld, Sebastian Andrzej Siewior

After handling several bug reports using various creative solutions,
it's becoming clear that random bytes are actually a useful thing to
happen from any ordinary context, including when interruptsare off.
Actually, that's been long recognized, which is why the RNG uses
spinlocks rather than mutexes. But on RT, those spinlocks are getting
converted back into sleeping locks.

This clearly is causing more problems than it might hypothetically
solve. Additionally, the locks in random.c are generally for fixed
durations doing CPU-bound operations -- no waiting for hardware or I/O
or the like. So this shouldn't result in a real harm to latency.

Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
Sebastian - I won't move forward with this without your Ack, obviously.
What do you think of this general approach? -Jason

 drivers/char/random.c | 44 +++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d44832e9e709..67fdd71405b3 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -180,9 +180,9 @@ static struct {
 	u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
 	unsigned long birth;
 	unsigned long generation;
-	spinlock_t lock;
+	raw_spinlock_t lock;
 } base_crng = {
-	.lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
+	.lock = __RAW_SPIN_LOCK_UNLOCKED(base_crng.lock)
 };
 
 struct crng {
@@ -214,7 +214,7 @@ static void crng_reseed(void)
 	 * because the per-cpu crngs are initialized to ULONG_MAX, so this
 	 * forces new CPUs that come online to always initialize.
 	 */
-	spin_lock_irqsave(&base_crng.lock, flags);
+	raw_spin_lock_irqsave(&base_crng.lock, flags);
 	memcpy(base_crng.key, key, sizeof(base_crng.key));
 	next_gen = base_crng.generation + 1;
 	if (next_gen == ULONG_MAX)
@@ -223,7 +223,7 @@ static void crng_reseed(void)
 	WRITE_ONCE(base_crng.birth, jiffies);
 	if (!static_branch_likely(&crng_is_ready))
 		crng_init = CRNG_READY;
-	spin_unlock_irqrestore(&base_crng.lock, flags);
+	raw_spin_unlock_irqrestore(&base_crng.lock, flags);
 	memzero_explicit(key, sizeof(key));
 }
 
@@ -303,7 +303,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	if (!crng_ready()) {
 		bool ready;
 
-		spin_lock_irqsave(&base_crng.lock, flags);
+		raw_spin_lock_irqsave(&base_crng.lock, flags);
 		ready = crng_ready();
 		if (!ready) {
 			if (crng_init == CRNG_EMPTY)
@@ -311,7 +311,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 			crng_fast_key_erasure(base_crng.key, chacha_state,
 					      random_data, random_data_len);
 		}
-		spin_unlock_irqrestore(&base_crng.lock, flags);
+		raw_spin_unlock_irqrestore(&base_crng.lock, flags);
 		if (!ready)
 			return;
 	}
@@ -333,11 +333,11 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	 * for our per-cpu crng. This brings us up to date with base_crng.
 	 */
 	if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
-		spin_lock(&base_crng.lock);
+		raw_spin_lock(&base_crng.lock);
 		crng_fast_key_erasure(base_crng.key, chacha_state,
 				      crng->key, sizeof(crng->key));
 		crng->generation = base_crng.generation;
-		spin_unlock(&base_crng.lock);
+		raw_spin_unlock(&base_crng.lock);
 	}
 
 	/*
@@ -555,14 +555,14 @@ enum {
 
 static struct {
 	struct blake2s_state hash;
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	unsigned int init_bits;
 } input_pool = {
 	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
 		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
 		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
 	.hash.outlen = BLAKE2S_HASH_SIZE,
-	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+	.lock = __RAW_SPIN_LOCK_UNLOCKED(input_pool.lock),
 };
 
 static void _mix_pool_bytes(const void *buf, size_t len)
@@ -579,9 +579,9 @@ static void mix_pool_bytes(const void *buf, size_t len)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&input_pool.lock, flags);
+	raw_spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(buf, len);
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	raw_spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 
 /*
@@ -612,7 +612,7 @@ static void extract_entropy(void *buf, size_t len)
 		block.rdseed[i++] = random_get_entropy();
 	}
 
-	spin_lock_irqsave(&input_pool.lock, flags);
+	raw_spin_lock_irqsave(&input_pool.lock, flags);
 
 	/* seed = HASHPRF(last_key, entropy_input) */
 	blake2s_final(&input_pool.hash, seed);
@@ -622,7 +622,7 @@ static void extract_entropy(void *buf, size_t len)
 	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
 	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
 
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	raw_spin_unlock_irqrestore(&input_pool.lock, flags);
 	memzero_explicit(next_key, sizeof(next_key));
 
 	while (len) {
@@ -667,13 +667,13 @@ static void __cold _credit_init_bits(size_t bits)
 			pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
 				  urandom_warning.missed);
 	} else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
-		spin_lock_irqsave(&base_crng.lock, flags);
+		raw_spin_lock_irqsave(&base_crng.lock, flags);
 		/* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
 		if (crng_init == CRNG_EMPTY) {
 			extract_entropy(base_crng.key, sizeof(base_crng.key));
 			crng_init = CRNG_EARLY;
 		}
-		spin_unlock_irqrestore(&base_crng.lock, flags);
+		raw_spin_unlock_irqrestore(&base_crng.lock, flags);
 	}
 }
 
@@ -756,11 +756,11 @@ static int random_pm_notification(struct notifier_block *nb, unsigned long actio
 	 */
 	ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() };
 
-	spin_lock_irqsave(&input_pool.lock, flags);
+	raw_spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(&action, sizeof(action));
 	_mix_pool_bytes(stamps, sizeof(stamps));
 	_mix_pool_bytes(&entropy, sizeof(entropy));
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	raw_spin_unlock_irqrestore(&input_pool.lock, flags);
 
 	if (crng_ready() && (action == PM_RESTORE_PREPARE ||
 	    (action == PM_POST_SUSPEND &&
@@ -848,10 +848,10 @@ void add_device_randomness(const void *buf, size_t len)
 	unsigned long entropy = random_get_entropy();
 	unsigned long flags;
 
-	spin_lock_irqsave(&input_pool.lock, flags);
+	raw_spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(&entropy, sizeof(entropy));
 	_mix_pool_bytes(buf, len);
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	raw_spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 EXPORT_SYMBOL(add_device_randomness);
 
@@ -1062,10 +1062,10 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned int nu
 	if (in_hardirq()) {
 		fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num);
 	} else {
-		spin_lock_irqsave(&input_pool.lock, flags);
+		raw_spin_lock_irqsave(&input_pool.lock, flags);
 		_mix_pool_bytes(&entropy, sizeof(entropy));
 		_mix_pool_bytes(&num, sizeof(num));
-		spin_unlock_irqrestore(&input_pool.lock, flags);
+		raw_spin_unlock_irqrestore(&input_pool.lock, flags);
 	}
 
 	if (crng_ready())
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-01 14:25 [PATCH] random: use raw spinlocks for use on RT Jason A. Donenfeld
@ 2022-08-01 14:34 ` Sebastian Andrzej Siewior
  2022-08-01 14:41   ` Jason A. Donenfeld
  2022-08-11  0:17   ` Jason A. Donenfeld
  0 siblings, 2 replies; 14+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-08-01 14:34 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-kernel

On 2022-08-01 16:25:31 [+0200], Jason A. Donenfeld wrote:
> After handling several bug reports using various creative solutions,
> it's becoming clear that random bytes are actually a useful thing to
> happen from any ordinary context, including when interruptsare off.
> Actually, that's been long recognized, which is why the RNG uses
> spinlocks rather than mutexes. But on RT, those spinlocks are getting
> converted back into sleeping locks.
> 
> This clearly is causing more problems than it might hypothetically
> solve. Additionally, the locks in random.c are generally for fixed
> durations doing CPU-bound operations -- no waiting for hardware or I/O
> or the like. So this shouldn't result in a real harm to latency.
> 
> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
> Sebastian - I won't move forward with this without your Ack, obviously.
> What do you think of this general approach? -Jason

I would need to do worst-case measurements and I've been looking at this
just before writting the other email and there was a local_lock_t
somewhere which needs also change…

So I have everything ready for 5.20 (6.0) ready without the RT patch and
then this vsprintf issues comes along…
From that point of view I would prefer to either init it upfront in a
way that works for everyone/ loose the first %p since it is probably a
minor inconvenience if nobody complains - instead swapping all locks.
We managed without this for kasan and lockdep which are both not used in
a production environment.

Sebastian

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-01 14:34 ` Sebastian Andrzej Siewior
@ 2022-08-01 14:41   ` Jason A. Donenfeld
  2022-08-11  0:17   ` Jason A. Donenfeld
  1 sibling, 0 replies; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-01 14:41 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel

Hi Sebastian,

On Mon, Aug 01, 2022 at 04:34:13PM +0200, Sebastian Andrzej Siewior wrote:
> So I have everything ready for 5.20 (6.0) ready without the RT patch and
> then this vsprintf issues comes along…

I already sent my rc1 pull to Linus for 6.0, but I can do a second pull
with this in it for next week while the merge window is still open, so
that your RT-patch-len-zero objective is still met for 6.0.

> From that point of view I would prefer to either init it upfront in a
> way that works for everyone/ loose the first %p since it is probably a
> minor inconvenience if nobody complains - instead swapping all locks.
> We managed without this for kasan and lockdep which are both not used in
> a production environment.

The kfence change was a production change, actually. Lots of people turn
that on by default.

If you want to address this within printk itself, just do `if (rt || lockdep)`
as the condition, so we don't swallow the first one. When you have to
make code worse to satisfy a tool, the tool is the problem. We only
would need this first message dropping on rt, not on other kernels.
Don't knock other kernels.

However... I suspect these issues will continue to bite us in new subtle
ways for some time to come. Who is to say that you can't call
get_random_bytes() from a driver's hard IRQ? As RT gets integrated and
more widely deployed, I imagine these things will start coming up.
random.c was already designed to handle random bytes in irqoff; that's
why it uses irqsave/irqrestore all over its spinlock handling. This RT
thing is a snag in that original intention. But its an intention trivial
to recover with this patch. So if you're okay with it, I think I'd
prefer to do this and have our problems go away once and for all.

> I would need to do worst-case measurements and I've been looking at this
> just before writting the other email and there was a local_lock_t
> somewhere which needs also change…

That would be very interesting to learn about. If your measurements say
yes, then maybe we can do this. If your measurements say "yikes", then I
guess we can't. Either way, I like having some metric to decide this by.

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-01 14:34 ` Sebastian Andrzej Siewior
  2022-08-01 14:41   ` Jason A. Donenfeld
@ 2022-08-11  0:17   ` Jason A. Donenfeld
  2022-08-11  7:15     ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-11  0:17 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel

Hey Sebastian,

On Mon, Aug 01, 2022 at 04:34:13PM +0200, Sebastian Andrzej Siewior wrote:
> On 2022-08-01 16:25:31 [+0200], Jason A. Donenfeld wrote:
> > After handling several bug reports using various creative solutions,
> > it's becoming clear that random bytes are actually a useful thing to
> > happen from any ordinary context, including when interruptsare off.
> > Actually, that's been long recognized, which is why the RNG uses
> > spinlocks rather than mutexes. But on RT, those spinlocks are getting
> > converted back into sleeping locks.
> > 
> > This clearly is causing more problems than it might hypothetically
> > solve. Additionally, the locks in random.c are generally for fixed
> > durations doing CPU-bound operations -- no waiting for hardware or I/O
> > or the like. So this shouldn't result in a real harm to latency.
> > 
> > Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > ---
> > Sebastian - I won't move forward with this without your Ack, obviously.
> > What do you think of this general approach? -Jason
> 
> I would need to do worst-case measurements and I've been looking at this
> just before writting the other email and there was a local_lock_t
> somewhere which needs also change…

Did you ever come up some measurements here? It sure would be nice if I
could apply this, but obviously that's contingent on you saying it's
okay latency-wise on RT.

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-11  0:17   ` Jason A. Donenfeld
@ 2022-08-11  7:15     ` Sebastian Andrzej Siewior
  2022-08-11 14:20       ` Jason A. Donenfeld
  0 siblings, 1 reply; 14+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-08-11  7:15 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-kernel, tglx

On 2022-08-11 02:17:31 [+0200], Jason A. Donenfeld wrote:
> Hey Sebastian,
Hi Jason,

> > > Sebastian - I won't move forward with this without your Ack, obviously.
> > > What do you think of this general approach? -Jason
> > 
> > I would need to do worst-case measurements and I've been looking at this
> > just before writting the other email and there was a local_lock_t
> > somewhere which needs also change…
> 
> Did you ever come up some measurements here? It sure would be nice if I
> could apply this, but obviously that's contingent on you saying it's
> okay latency-wise on RT.

No, I did not. But I've been thinking a little about it. The worst case
latency is important now and later.
Looking at it, all we need is one init in vsprintf at boot time and we
are done. That is the third fallout that I am aware of since the rework
of get_random_*().
We managed to get rid of all memory allocations (including GFP_ATOMIC)
from preempt/IRQ-off section on PREEMPT_RT. Therefore I am not convinced
to make all locks in random core a raw_spinlock_t just to make things
work here as of now.

> Jason

Sebastian

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-11  7:15     ` Sebastian Andrzej Siewior
@ 2022-08-11 14:20       ` Jason A. Donenfeld
  2022-08-15 10:26         ` David Laight
                           ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-11 14:20 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel, tglx

Hi Sebastian,

On Thu, Aug 11, 2022 at 09:15:11AM +0200, Sebastian Andrzej Siewior wrote:
> On 2022-08-11 02:17:31 [+0200], Jason A. Donenfeld wrote:
> > Hey Sebastian,
> Hi Jason,
> 
> > > > Sebastian - I won't move forward with this without your Ack, obviously.
> > > > What do you think of this general approach? -Jason
> > > 
> > > I would need to do worst-case measurements and I've been looking at this
> > > just before writting the other email and there was a local_lock_t
> > > somewhere which needs also change…
> > 
> > Did you ever come up some measurements here? It sure would be nice if I
> > could apply this, but obviously that's contingent on you saying it's
> > okay latency-wise on RT.
> 
> No, I did not. But I've been thinking a little about it. The worst case
> latency is important now and later.
> Looking at it, all we need is one init in vsprintf at boot time and we
> are done. That is the third fallout that I am aware of since the rework
> of get_random_*().
> We managed to get rid of all memory allocations (including GFP_ATOMIC)
> from preempt/IRQ-off section on PREEMPT_RT. Therefore I am not convinced
> to make all locks in random core a raw_spinlock_t just to make things
> work here as of now.

By grouping everything into "the rework of get_random_*()", you miss
important subtleties, as I mentioned before. Importantly, in this case,
the issue we're facing has absolutely nothing at all to do with that,
but is rather entirely the result of removing the async notifier
mechanism in favor of doing things more directly, more straight
forwardly. So let's not muddle what we're discussing here.

But more generally, the RNG is supposed to be usable from any context.
And adding wild workarounds, or worse, adding back complex async
notifier stuff, seems bad. So far your proposals for the printk issue
haven't been acceptable at all.

So why don't we actually fix this, so we don't have to keep coming up
with hacks? The question is: does using raw spinlocks over this code
result in any real issue for RT latency? If so, I'd like to know where,
and maybe I can do something about that (or maybe I can't). If not, then
this is a non problem and I'll apply this patch with your blessing.

If you don't want to spend time doing latency measurements, could you
instead share a document or similar to the type of methodology you
usually use for that, so I can do the same? And at the very least, I am
simply curious and want to know more about the RT world.

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] random: use raw spinlocks for use on RT
  2022-08-11 14:20       ` Jason A. Donenfeld
@ 2022-08-15 10:26         ` David Laight
  2022-08-16 14:02         ` Jason A. Donenfeld
  2022-08-29 19:45         ` Sebastian Andrzej Siewior
  2 siblings, 0 replies; 14+ messages in thread
From: David Laight @ 2022-08-15 10:26 UTC (permalink / raw)
  To: 'Jason A. Donenfeld', Sebastian Andrzej Siewior
  Cc: linux-kernel, tglx

...
> So why don't we actually fix this, so we don't have to keep coming up
> with hacks? The question is: does using raw spinlocks over this code
> result in any real issue for RT latency? If so, I'd like to know where,
> and maybe I can do something about that (or maybe I can't). If not, then
> this is a non problem and I'll apply this patch with your blessing.
> 
> If you don't want to spend time doing latency measurements, could you
> instead share a document or similar to the type of methodology you
> usually use for that, so I can do the same? And at the very least, I am
> simply curious and want to know more about the RT world.

I'd have thought that the majority of kernel spinlocks are
held for much less than the time taken to do a context switch.
So converting them to sleep locks is always going to harm
lock acquisition time.

The other problem the is inherent with sleep locks is priority
inversion.
I've have terrible problems getting a multithreaded RT application
to run reliably.
The main fix required removing ALL the mutex from the hot paths.
Basically you can't ensure that mutex are only held for short
periods (an ethernet hardware interrupt and then the softint code)
will mean the mutex is held for far too long.
This means you can't use linked lists unless they can be managed
with cmpxchg (etc).

Now there may be some kernel spinlocks that are held for 'too long'
but they should probably be changed anyway.

Making the kernel 'mostly pre-emptable' (which isn't really that
much different from SMP) would fix a lot of RT issues (like RT
processes not being scheduled because a low priority process
in running in kernel on the same cpu) without the complete
scheduling train-wreck of making every spinlock a sleeplock.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-11 14:20       ` Jason A. Donenfeld
  2022-08-15 10:26         ` David Laight
@ 2022-08-16 14:02         ` Jason A. Donenfeld
  2022-08-29 19:45         ` Sebastian Andrzej Siewior
  2 siblings, 0 replies; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-16 14:02 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel, tglx

Hey Sebastian,

On Thu, Aug 11, 2022 at 04:20:21PM +0200, Jason A. Donenfeld wrote:
> Hi Sebastian,
> 
> On Thu, Aug 11, 2022 at 09:15:11AM +0200, Sebastian Andrzej Siewior wrote:
> > On 2022-08-11 02:17:31 [+0200], Jason A. Donenfeld wrote:
> > > Hey Sebastian,
> > Hi Jason,
> > 
> > > > > Sebastian - I won't move forward with this without your Ack, obviously.
> > > > > What do you think of this general approach? -Jason
> > > > 
> > > > I would need to do worst-case measurements and I've been looking at this
> > > > just before writting the other email and there was a local_lock_t
> > > > somewhere which needs also change…
> > > 
> > > Did you ever come up some measurements here? It sure would be nice if I
> > > could apply this, but obviously that's contingent on you saying it's
> > > okay latency-wise on RT.
> > 
> > No, I did not. But I've been thinking a little about it. The worst case
> > latency is important now and later.
> > Looking at it, all we need is one init in vsprintf at boot time and we
> > are done. That is the third fallout that I am aware of since the rework
> > of get_random_*().
> > We managed to get rid of all memory allocations (including GFP_ATOMIC)
> > from preempt/IRQ-off section on PREEMPT_RT. Therefore I am not convinced
> > to make all locks in random core a raw_spinlock_t just to make things
> > work here as of now.
> 
> By grouping everything into "the rework of get_random_*()", you miss
> important subtleties, as I mentioned before. Importantly, in this case,
> the issue we're facing has absolutely nothing at all to do with that,
> but is rather entirely the result of removing the async notifier
> mechanism in favor of doing things more directly, more straight
> forwardly. So let's not muddle what we're discussing here.
> 
> But more generally, the RNG is supposed to be usable from any context.
> And adding wild workarounds, or worse, adding back complex async
> notifier stuff, seems bad. So far your proposals for the printk issue
> haven't been acceptable at all.
> 
> So why don't we actually fix this, so we don't have to keep coming up
> with hacks? The question is: does using raw spinlocks over this code
> result in any real issue for RT latency? If so, I'd like to know where,
> and maybe I can do something about that (or maybe I can't). If not, then
> this is a non problem and I'll apply this patch with your blessing.
> 
> If you don't want to spend time doing latency measurements, could you
> instead share a document or similar to the type of methodology you
> usually use for that, so I can do the same? And at the very least, I am
> simply curious and want to know more about the RT world.

Thought I'd ping you about this again...

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-11 14:20       ` Jason A. Donenfeld
  2022-08-15 10:26         ` David Laight
  2022-08-16 14:02         ` Jason A. Donenfeld
@ 2022-08-29 19:45         ` Sebastian Andrzej Siewior
  2022-08-29 19:56           ` Jason A. Donenfeld
  2 siblings, 1 reply; 14+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-08-29 19:45 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-kernel, tglx

On 2022-08-11 16:20:21 [+0200], Jason A. Donenfeld wrote:
> Hi Sebastian,
Hi Jason,

> By grouping everything into "the rework of get_random_*()", you miss
> important subtleties, as I mentioned before. Importantly, in this case,
> the issue we're facing has absolutely nothing at all to do with that,
> but is rather entirely the result of removing the async notifier
> mechanism in favor of doing things more directly, more straight
> forwardly. So let's not muddle what we're discussing here.

I meant the number of fallouts I saw on RT's side. The removal of the
async notifier is a different story and no complains on my side.

> But more generally, the RNG is supposed to be usable from any context.
> And adding wild workarounds, or worse, adding back complex async
> notifier stuff, seems bad. So far your proposals for the printk issue
> haven't been acceptable at all.

The printk issue a two way story:
- I tried to avoid the print out to the console because the code is not
  available yet and the recent reverts of already available code moved
  the missing bits further away. Linus wasn't happy with this, it wasn't
  merged so we have to wait for the code for the complete code.

- The vsnprintf issue I try to solve. This needs to work in any context
  due printk. printk needs to work in any context (incl. preempt-off)
  and it evaluates its format string upon invocation.

The second point needs to solved even after the first one has been
solved.

> So why don't we actually fix this, so we don't have to keep coming up
> with hacks? The question is: does using raw spinlocks over this code
> result in any real issue for RT latency? If so, I'd like to know where,
> and maybe I can do something about that (or maybe I can't). If not, then
> this is a non problem and I'll apply this patch with your blessing.

It depends on what you do define as hacks. I suggested an explicit init
during boot for everyone. The only "hacky" thing might be the reschedule
of the worker every two secs in case random-core isn't ready yet.

RNG may be used from any context but I doubt if this also includes any
context on RT. (Side note: it does not include NMI but in general any
context, yes.)
The work in hardirq context is limited on RT and therefore I doubt that
there is any need to request random numbers (from hardirq, preempt or
IRQ disabled context) on PREEMPT RT on a regular basis. We had (have)
requests where this is needed but we managed to avoid it.
Another example: While kmalloc() can be invoked from any context, it
still must not be used on PREEMPT_RT from hardirq context and or
preempt-disabled regions.

> If you don't want to spend time doing latency measurements, could you
> instead share a document or similar to the type of methodology you
> usually use for that, so I can do the same? And at the very least, I am
> simply curious and want to know more about the RT world.

Latency. If you check "now" and "patched" and look at the difference in
max. latency then this a good indication as a first step if you don't
see a difference. The next step is to hammer on the exposed API ensure
that it does not lead to any changes. If it does lead to changes then it
needs to be considered if the change is worth it or not.
A global raw_spinlock_t, which can be heavy contended, will be visible if
it is acquired from multiple CPUs at the same time (especially on a NUMA
box with 16+ CPUs).
A list which can is iterated under a lock and can be filled with many
items will be visible. Therefore swake_up_all() must not be invoked with
disabled interrupts and the function itself drops all locks after one
loop, simply not to block for too long.
In general, if it can be avoided to use raw_spinlock_t and does not hurt
performance too much then why not.

In order to do this (create a test and hammer on the exposed API), I
applied your patch and looked at the result from get_random_bytes()
perspective as in "can we do this?":

get_random_bytes()
-> _get_random_bytes()
  -> crng_make_state()
    -> local_lock_irqsave(&crngs.lock, flags);

So that local_lock_t is still breaking things since it can not be
acquired from blocking context. So in order to continue this needs to be
replaced somehow and checked again…
Assuming this has been done, round #2:

get_random_bytes()
-> _get_random_bytes()
  -> crng_make_state()
    -> crng_reseed()
      -> extract_entropy()
        -> blake2s_final()
	  -> blake2s_compress()
	    -> kernel_fpu_begin()…

This blake2s_compress() can be called again within this callchain (via
blake2s()). The problem here is that kernel_fpu_begin() disables
preemption and the following SIMD operation can be expensive (not to
mention the onetime register store) and so it is attempted to have a
scheduling point on a regular basis.
Invoking this call chain from an already preempt-disabled section would
not allow any scheduling at this point (and so build up the max. latency
worst case).

After looking at this after a break, while writing this and paging
everything in, I still think that initialising the random number at boot
up for vsprintf's sake is the easiest thing. One init for RT and non-RT
from an initcall. No hack, just one plain and simple init with no need
to perform anything later on demand. 

> Jason

Sebastian

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-29 19:45         ` Sebastian Andrzej Siewior
@ 2022-08-29 19:56           ` Jason A. Donenfeld
  2022-08-30 10:13             ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-29 19:56 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel, tglx

Hi Sebastian,

On Mon, Aug 29, 2022 at 09:45:10PM +0200, Sebastian Andrzej Siewior wrote:
> > So why don't we actually fix this, so we don't have to keep coming up
> > with hacks? The question is: does using raw spinlocks over this code
> > result in any real issue for RT latency? If so, I'd like to know where,
> > and maybe I can do something about that (or maybe I can't). If not, then
> > this is a non problem and I'll apply this patch with your blessing.
> 
> It depends on what you do define as hacks. I suggested an explicit init
> during boot for everyone. The only "hacky" thing might be the reschedule
> of the worker every two secs in case random-core isn't ready yet.

The worker solution you proposed before was problematic in that it
changes RNG semantics by making jitter entropy run early on at boot
before even attempting to get entropy from later. Maybe that's an okay
change, or maybe it's not, but either way it isn't one that should be
forced by wacky vnsprintf changes.

> RNG may be used from any context but I doubt if this also includes any
> context on RT. (Side note: it does not include NMI but in general any
> context, yes.)
> The work in hardirq context is limited on RT and therefore I doubt that
> there is any need to request random numbers (from hardirq, preempt or
> IRQ disabled context) on PREEMPT RT on a regular basis. We had (have)
> requests where this is needed but we managed to avoid it.
> Another example: While kmalloc() can be invoked from any context, it
> still must not be used on PREEMPT_RT from hardirq context and or
> preempt-disabled regions.

Okay but this on-demand aspect of vnsprintf() is clearly a place where
it makes sense to do it from the occasional irq context.

> So that local_lock_t is still breaking things since it can not be
> acquired from blocking context. So in order to continue this needs to be
> replaced somehow and checked again…
> Assuming this has been done, round #2:
> 
> get_random_bytes()
> -> _get_random_bytes()
>   -> crng_make_state()
>     -> crng_reseed()
>       -> extract_entropy()
>         -> blake2s_final()
> 	  -> blake2s_compress()
> 	    -> kernel_fpu_begin()…

kernel_fpu_begin() is no longer used from IRQ context, since there's no
longer SIMD in IRQ context. So this callgraph isn't representative.

> This blake2s_compress() can be called again within this callchain (via
> blake2s()). The problem here is that kernel_fpu_begin() disables
> preemption and the following SIMD operation can be expensive (not to
> mention the onetime register store) and so it is attempted to have a
> scheduling point on a regular basis.
> Invoking this call chain from an already preempt-disabled section would
> not allow any scheduling at this point (and so build up the max. latency
> worst case).

Irrelevant, since kernel_fpu_begin() shouldn't be called in this context
any more, right?

> After looking at this after a break, while writing this and paging
> everything in, I still think that initialising the random number at boot
> up for vsprintf's sake is the easiest thing. One init for RT and non-RT
> from an initcall. No hack, just one plain and simple init with no need
> to perform anything later on demand. 

The "once at boot time" thing does not work here, as I've said over and
over, if what we're talking about is the workqueued get_random_bytes_wait()
call. The much smarter thing to do is let entropy be collected for as
long as possible, and when the RNG is initialized, initialize the
siphash secret, which is exactly what the current code does. So I think
the current vnsprintf code can stay the same. What needs fixing, rather,
are the lack of raw spinlocks in random.c...

In light of my note on kernel_fpu_begin() not being used from IRQ
context, can you now consider this raw spinlock patch?

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-29 19:56           ` Jason A. Donenfeld
@ 2022-08-30 10:13             ` Sebastian Andrzej Siewior
  2022-08-30 15:24               ` Jason A. Donenfeld
  0 siblings, 1 reply; 14+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-08-30 10:13 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-kernel, tglx

On 2022-08-29 15:56:06 [-0400], Jason A. Donenfeld wrote:
> Hi Sebastian,
Hi,

> On Mon, Aug 29, 2022 at 09:45:10PM +0200, Sebastian Andrzej Siewior wrote:
> > > So why don't we actually fix this, so we don't have to keep coming up
> > > with hacks? The question is: does using raw spinlocks over this code
> > > result in any real issue for RT latency? If so, I'd like to know where,
> > > and maybe I can do something about that (or maybe I can't). If not, then
> > > this is a non problem and I'll apply this patch with your blessing.
> > 
> > It depends on what you do define as hacks. I suggested an explicit init
> > during boot for everyone. The only "hacky" thing might be the reschedule
> > of the worker every two secs in case random-core isn't ready yet.
> 
> The worker solution you proposed before was problematic in that it
> changes RNG semantics by making jitter entropy run early on at boot
> before even attempting to get entropy from later. Maybe that's an okay
> change, or maybe it's not, but either way it isn't one that should be
> forced by wacky vnsprintf changes.

The first patch did so yes. The second simply retried in two secs and
this shouldn't be problematic.

> Okay but this on-demand aspect of vnsprintf() is clearly a place where
> it makes sense to do it from the occasional irq context.
> 
> > So that local_lock_t is still breaking things since it can not be
> > acquired from blocking context. So in order to continue this needs to be
> > replaced somehow and checked again…
> > Assuming this has been done, round #2:
> > 
> > get_random_bytes()
> > -> _get_random_bytes()
> >   -> crng_make_state()
> >     -> crng_reseed()
> >       -> extract_entropy()
> >         -> blake2s_final()
> > 	  -> blake2s_compress()
> > 	    -> kernel_fpu_begin()…
> 
> kernel_fpu_begin() is no longer used from IRQ context, since there's no
> longer SIMD in IRQ context. So this callgraph isn't representative.

hard-IRQ context yes. But it is still used in preemptible context under
a raw_spinlock_t or with disabled interrupts/ preemption.
In the vsprintf/printk case it is invoked from preemptible context with
disabled interrupts.

> > This blake2s_compress() can be called again within this callchain (via
> > blake2s()). The problem here is that kernel_fpu_begin() disables
> > preemption and the following SIMD operation can be expensive (not to
> > mention the onetime register store) and so it is attempted to have a
> > scheduling point on a regular basis.
> > Invoking this call chain from an already preempt-disabled section would
> > not allow any scheduling at this point (and so build up the max. latency
> > worst case).
> 
> Irrelevant, since kernel_fpu_begin() shouldn't be called in this context
> any more, right?

wrong, see above. It only excludes the in-hardirq users. 

> > After looking at this after a break, while writing this and paging
> > everything in, I still think that initialising the random number at boot
> > up for vsprintf's sake is the easiest thing. One init for RT and non-RT
> > from an initcall. No hack, just one plain and simple init with no need
> > to perform anything later on demand. 
> 
> The "once at boot time" thing does not work here, as I've said over and
> over, if what we're talking about is the workqueued get_random_bytes_wait()
> call. The much smarter thing to do is let entropy be collected for as
> long as possible, and when the RNG is initialized, initialize the
> siphash secret, which is exactly what the current code does. So I think
> the current vnsprintf code can stay the same. What needs fixing, rather,
> are the lack of raw spinlocks in random.c...

Not get_random_bytes_wait() but get_random_bytes() + reschedule, see
	https://lkml.kernel.org/r/YueeIgPGUJgsnsAh@linutronix.de

> In light of my note on kernel_fpu_begin() not being used from IRQ
> context, can you now consider this raw spinlock patch?

No because it gives the wrong motivation to use this without fear from
section with disabled interrupts/ preemption as it is the case in the
current printk example. So it extends the runtime without the need for
it since it could have been upfront at a lower price.

I intend to resend the previously mentioned patch where there is _no_
get_random_bytes_wait() so I don't see how this can be a problem. Do you
see still one?

> Jason

Sebastian

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-30 10:13             ` Sebastian Andrzej Siewior
@ 2022-08-30 15:24               ` Jason A. Donenfeld
  2022-08-30 18:57                 ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-30 15:24 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel, tglx

Hi Sebastian,

On Tue, Aug 30, 2022 at 12:13:44PM +0200, Sebastian Andrzej Siewior wrote:
> The first patch did so yes. The second simply retried in two secs and
> this shouldn't be problematic.

This seemed pretty bad too, because now you potentially miss up to 2
seconds of messages AND it adds more complexity.

I'm fine with changing things up to accommodate RT, but not when the
result is so obviously worse than before.

In my tests I can't see any latency difference with using raw spinlocks
in random.c. Maybe I'm doing things wrong? But I'm not seeing anything
change...

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-30 15:24               ` Jason A. Donenfeld
@ 2022-08-30 18:57                 ` Sebastian Andrzej Siewior
  2022-08-31 16:27                   ` Jason A. Donenfeld
  0 siblings, 1 reply; 14+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-08-30 18:57 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-kernel, tglx

On 2022-08-30 11:24:33 [-0400], Jason A. Donenfeld wrote:
> Hi Sebastian,
Hi Jason,

> On Tue, Aug 30, 2022 at 12:13:44PM +0200, Sebastian Andrzej Siewior wrote:
> > The first patch did so yes. The second simply retried in two secs and
> > this shouldn't be problematic.
> 
> This seemed pretty bad too, because now you potentially miss up to 2
> seconds of messages AND it adds more complexity.

It is early at boot and it could be reduced to one if it helps. I
remember you had a suggestion where we would lose always the first print
out on RT you said it is okay since you can't rely on that…

> I'm fine with changing things up to accommodate RT, but not when the
> result is so obviously worse than before.

I don't think it is worse. This is your opinion and I did not hear any
other feedback so far.

> In my tests I can't see any latency difference with using raw spinlocks
> in random.c. Maybe I'm doing things wrong? But I'm not seeing anything
> change...

You need to look at the maximum latency that may happen. Also the other
thing is that there is no need to add raw_spinlock_t locking if it can
be avoided.

> Jason

Sebastian

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] random: use raw spinlocks for use on RT
  2022-08-30 18:57                 ` Sebastian Andrzej Siewior
@ 2022-08-31 16:27                   ` Jason A. Donenfeld
  0 siblings, 0 replies; 14+ messages in thread
From: Jason A. Donenfeld @ 2022-08-31 16:27 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-kernel, tglx

On Tue, Aug 30, 2022 at 08:57:47PM +0200, Sebastian Andrzej Siewior wrote:
> On 2022-08-30 11:24:33 [-0400], Jason A. Donenfeld wrote:
> > Hi Sebastian,
> Hi Jason,
> 
> > On Tue, Aug 30, 2022 at 12:13:44PM +0200, Sebastian Andrzej Siewior wrote:
> > > The first patch did so yes. The second simply retried in two secs and
> > > this shouldn't be problematic.
> > 
> > This seemed pretty bad too, because now you potentially miss up to 2
> > seconds of messages AND it adds more complexity.
> 
> It is early at boot and it could be reduced to one if it helps. I
> remember you had a suggestion where we would lose always the first print
> out on RT you said it is okay since you can't rely on that…

I mean, the mechanism now is simple and doesn't fail. What you're
suggesting is more complex and fails sometimes. So,

> > I'm fine with changing things up to accommodate RT, but not when the
> > result is so obviously worse than before.
> 
> I don't think it is worse. This is your opinion and I did not hear any
> other feedback so far.

so, I think it's beyond a matter of opinion and is actually objectively
worse.

And it's not like I even care particularly much about vnsprintf; as I
said before, none of this really matters _that_ much. But I *do* very
much object to dirtying up random bits of code and making things
actually worse in the name of RT, especially when there are other
solutions being considered. Namely:

> > In my tests I can't see any latency difference with using raw spinlocks
> > in random.c. Maybe I'm doing things wrong? But I'm not seeing anything
> > change...
> 
> You need to look at the maximum latency that may happen. Also the other
> thing is that there is no need to add raw_spinlock_t locking if it can
> be avoided.

I really am having trouble fashioning a test that shows a higher maximum
latency. All the RNG critical sections are really short in the end. So I
dunno... seems like not a big deal to me. If you're seeing different
numbers, can you post them and how you came up with them? If I can
reproduce it, maybe it's possible for me to do something about that
latency. But so far I'm not seeing any latency spike...

Jason

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2022-08-31 16:27 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-01 14:25 [PATCH] random: use raw spinlocks for use on RT Jason A. Donenfeld
2022-08-01 14:34 ` Sebastian Andrzej Siewior
2022-08-01 14:41   ` Jason A. Donenfeld
2022-08-11  0:17   ` Jason A. Donenfeld
2022-08-11  7:15     ` Sebastian Andrzej Siewior
2022-08-11 14:20       ` Jason A. Donenfeld
2022-08-15 10:26         ` David Laight
2022-08-16 14:02         ` Jason A. Donenfeld
2022-08-29 19:45         ` Sebastian Andrzej Siewior
2022-08-29 19:56           ` Jason A. Donenfeld
2022-08-30 10:13             ` Sebastian Andrzej Siewior
2022-08-30 15:24               ` Jason A. Donenfeld
2022-08-30 18:57                 ` Sebastian Andrzej Siewior
2022-08-31 16:27                   ` Jason A. Donenfeld

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).