All of lore.kernel.org
 help / color / mirror / Atom feed
* Revising prandom_32 generator
@ 2019-03-26 11:10 George Spelvin
  2019-03-26 11:17 ` George Spelvin
  2019-03-26 14:58 ` Stephen Hemminger
  0 siblings, 2 replies; 9+ messages in thread
From: George Spelvin @ 2019-03-26 11:10 UTC (permalink / raw)
  To: daniel, hannes, Borkmann, Daniel, Frederic, Hannes, Sowa
  Cc: lkml, George, netdev, Spelvin

I started on a project to correct all of the instances of
"prandom_u32() % FOO" in the kernel (there are lots)
to "prandom_u32_max(FOO)".

This has snowballed into an ongoing audit of random number use in the
kernel, including uses of get_random_bytes() when get_random_u32 or
prandom_u32() would suffice.

But one of the more potentially-controversial changes in the series,
which might benefit from more discussion time, is a change of the
actual prandom_u32() generator itself.

The current lfsr113 generator isn't bad, but like all LFSRs, it fails
linear complexity tests in suites like testU01 and PractRand.

And there are generators with the same 128-bit state which produce
better output in about half the time.  So I figured as long as I was
neck-deep in the code, I might as well tweak that, too.

Thw ones that seem interesting to me are:
- Chris Doty-Humphrey's sfc32.  This is a 96-bit chaotic generator
  (meaning period *probably* long but not well defined) fed with
  a 32-bit counter to ensure a minimum period.  It's extremely
  fast, and the author is also the author of PractRand, so it's
  well-tested.
- Vigna and Bacman's xoshiro128**.  This is a 128-bit LFSR with some
  output postprocessing.
- (on 64-bit machines) xoroshiro128**, by the same authors.
  This is only efficient on 64-bit machines, so it would need
  a have a 32-bit backup.
- Bob Jenkins' jsf (originally "flea").  128 bits, good mixing,
  fully chaotic.  I prefer the safety of a guaranteed minimum
  period, but this is well thought of.
- A lag-3 mutiply-with-carry generator.  2^32 - 1736 is the largest
  "safe prime" mutiplier.

I'm currently planning on using the first.  It also has the advantage
for lockless reseeding that there are no bad states to avoid.

Some discussion of most of these at
http://www.pcg-random.org/posts/some-prng-implementations.html

Here are some timing numbers.  Clock cycles per 1e7 iterations,
sp 8-19 cycles per iteration.

Core 2:
         lcg32: cycles 89018580
         lcg64: cycles 130782048
           mwc: cycles 90385992
   prandom_u32: cycles 168780348
  xoshiro128ss: cycles 90331056
xoroshiro128ss: cycles 106012008
         sfc32: cycles 90318276
         jsf32: cycles 90364464
        jsf32a: cycles 100382724
        gjrand: cycles 131713680

Opteron:
         lcg32: cycles 103434699
         lcg64: cycles 80064205
           mwc: cycles 110313786
   prandom_u32: cycles 190311062
  xoshiro128ss: cycles 110115475
xoroshiro128ss: cycles 100114345
         sfc32: cycles 100163397
         jsf32: cycles 100104957
        jsf32a: cycles 110133030
        gjrand: cycles 110122007

(The LCGs are not 128-bit state; they're just there as a speed baseine.)

As you can see, the current lfsr113-based prandom_u32 takes almost
twice the time of the aternatives.

My quick & dirty test code is appended for anyone who wants to play.


#include <stdint.h>

typedef uint32_t u32;
typedef uint64_t u64;
struct rnd_state {
	u32 s1, s2, s3, s4;
};

u32 lcg32(struct rnd_state *state)
{
	u32 x = state->s1 * 2891336453 + 1;

	return state->s1 = x;
}

u32 lcg64(struct rnd_state *state)
{
	u64 x = *(u64 *)&state->s1;
	//x = x * 0xf2fc5985 + 1;
	x = x * 6364136223846793005 + 1;
	*(u64 *)&state->s1 = x;
	return x >> 32;
}

u32 mwc(struct rnd_state *state)
{
	u64 x = state->s1 + (u64)state->s2 * -(u32)1736;
	state->s2 = state->s3;
	state->s3 = state->s4;
	state->s1 = x >> 32;
	return state->s4 = (u32)x;
}

u32 prandom_u32_state(struct rnd_state *state)
{
#define TAUSWORTHE(s, a, b, c, d) ((s & c) << d) ^ (((s << a) ^ s) >> b)
	u32 x = state->s1 = TAUSWORTHE(state->s1,  6, 13,   ~1u, 18);
	x    ^= state->s2 = TAUSWORTHE(state->s2,  2, 27,   ~7u,  2);
	//barrier();
	x    ^= state->s3 = TAUSWORTHE(state->s3, 13, 21,  ~15u,  7);
	x    ^= state->s4 = TAUSWORTHE(state->s4,  3, 12, ~127u, 13);

	return x;
}

static inline uint32_t rol32(uint32_t x, int k)
{
	return x << k | x >> (32 - k);
}

uint32_t xoshiro128ss(uint32_t s[4])
{
#if 0
	const uint32_t res = rol32(s[0] * 5, 7) * 9;
	const uint32_t t = s[1] << 9;

	s[2] ^= s[0];
	s[3] ^= s[1];
	s[1] ^= s[2];
	s[0] ^= s[3];
	s[2] ^= t;
	s[3] = rol32(s[3], 11);

	return res;
#else
	/* xoshiro128** */
	uint32_t a = s[0], b = s[1], c = s[2] ^ a, d = s[3] ^ b;

	s[0] = a ^ d;
	s[1] = b ^ c;
	s[2] = c ^ (b << 9);
	s[3] = rol32(d, 11);

	return rol32(a * 5, 7) * 9;	/* The "**" non-linear function */
#endif
}

static inline uint64_t rol64(uint64_t x, int k)
{
	return x << k | x >> (64 - k);
}

uint64_t xoroshiro128ss(uint64_t s[2])
{
	const uint64_t s0 = s[0];
	uint64_t s1 = s[1] ^ s0;
	const uint64_t res = rol64(s0 * 5, 7) * 9;

	s[1] = rol64(s1, 37); // c
	s[0] = rol64(s0, 24) ^ s1 ^ (s1 << 16); // a, b

	return res;
}

uint32_t sfc32(uint32_t s[4])
{
	uint32_t b = s[1], c = s[2];
	uint32_t const res = s[0] + b + s[3]++;

	// good sets include {21,9,3},{15,8,3};
	// older versions used {25,8,3} which wasn't as good
	s[0] = b ^ (b >> 9);
	s[1] = c + (c << 3);
	s[2] = rol32(c, 21) + res;
	return res;
}

/* http://burtleburtle.net/bob/rand/smallprng.html */
uint32_t jsf32(uint32_t s[4])
{
	uint32_t  e = s[0] - rol32(s[1], 27);
	       s[0] = s[1] ^ rol32(s[2], 17);
	       s[1] = s[2] + s[3];
	       s[2] = s[3] + e;
	return s[3] = e + s[0];
}

uint32_t jsf32a(uint32_t s[4])
{
	uint32_t  e = s[0] - rol32(s[1], 23);
	       s[0] = s[1] ^ rol32(s[2], 16);
	       s[1] = s[2] + rol32(s[3], 11);
	       s[2] = s[3] + e;
	return s[3] = e + s[0];
}

/* See https://gist.github.com/imneme/7a783e20f71259cc13e219829bcea4ac */
uint32_t gjrand(uint32_t s[4])
{
	uint32_t a = s[0], b = s[1], c = s[2], d = s[3] += 0x96a5;

        b += c;            a  =  rol32(a, 16);
	c ^= b;            a += b;
	c  = rol32(c, 11); b ^= a;
        a += c;            b  = rol32(b, 19);
        c += a;            b += d;
	s[0] = a;
	s[1] = b;
	s[2] = c;
	return a;
}

#include <stdio.h>

static uint64_t rdtsc()
{
	uint32_t low, high;
	asm volatile("rdtsc" : "=a" (low), "=d" (high));
	return (uint64_t)high << 32 | low;
}

typedef uint32_t (*rng_func)(uint64_t s[2]);

#define FUNCS 10

int
main(void)
{
	static const rng_func funcs[FUNCS] = {
		(rng_func)lcg32,
		(rng_func)lcg64,
		(rng_func)mwc,
		(rng_func)prandom_u32_state,
		(rng_func)xoshiro128ss,
		(rng_func)xoroshiro128ss,
		(rng_func)sfc32,
		(rng_func)jsf32,
		(rng_func)jsf32a,
		(rng_func)gjrand
	};
	static const char rng_names[FUNCS][16] = {
		"         lcg32",
		"         lcg64",
		"           mwc",
		"   prandom_u32",
		"  xoshiro128ss",
		"xoroshiro128ss",
		"         sfc32",
		"         jsf32",
		"        jsf32a",
		"        gjrand"
	};
	uint32_t sums[FUNCS];
	int i, j, k;
	uint64_t timing[FUNCS], min_timing[FUNCS];

	for (k = 0; k < 10; k++) {
		for (i = 0; i < FUNCS; i++) {
			uint32_t sum = 0;
			uint64_t s[2] = { -1ul, -1ul };
			uint64_t start = rdtsc();
			asm("" : "+g" (start));

			for (j = 0; j < 10000000; j++)
				sum += funcs[i](s);
			asm("" : "+g" (start));
			timing[i] = rdtsc() - start;
			sums[i] = sum;
		}
		for (i = 0; i < FUNCS; i++) {
			printf("%s: sum %08x cycles %lu\n",
				rng_names[i], sums[i], timing[i]);
			if (!k || timing[i] < min_timing[i])
				min_timing[i] = timing[i];
		}
	}
	puts("Minimum timings:");
	for (i = 0; i < FUNCS; i++)
		printf("%s: cycles %lu\n", rng_names[i], min_timing[i]);
	return 0;
}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 11:10 Revising prandom_32 generator George Spelvin
@ 2019-03-26 11:17 ` George Spelvin
  2019-03-26 18:03   ` Hannes Frederic Sowa
  2019-03-26 14:58 ` Stephen Hemminger
  1 sibling, 1 reply; 9+ messages in thread
From: George Spelvin @ 2019-03-26 11:17 UTC (permalink / raw)
  To: daniel, hannes; +Cc: lkml, netdev

Apologies... I severely mangled the headers in the previous
message, feeding "Daniel Borkmann <daniel@iogearbox.net>" to a
parser which split on whitespace and thought that was three separate
recipients. :-(

To limit the damage, please either reply to *this* message or trim the
headers manually.  Sorry about that.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 11:10 Revising prandom_32 generator George Spelvin
  2019-03-26 11:17 ` George Spelvin
@ 2019-03-26 14:58 ` Stephen Hemminger
  2019-03-26 16:24   ` George Spelvin
  1 sibling, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2019-03-26 14:58 UTC (permalink / raw)
  To: George Spelvin
  Cc: daniel, hannes, Borkmann, Daniel, Frederic, Hannes, Sowa, George,
	netdev, Spelvin

On Tue, 26 Mar 2019 11:10:15 GMT
George Spelvin <lkml@sdf.org> wrote:

> I started on a project to correct all of the instances of
> "prandom_u32() % FOO" in the kernel (there are lots)
> to "prandom_u32_max(FOO)".
> 
> This has snowballed into an ongoing audit of random number use in the
> kernel, including uses of get_random_bytes() when get_random_u32 or
> prandom_u32() would suffice.
> 
> But one of the more potentially-controversial changes in the series,
> which might benefit from more discussion time, is a change of the
> actual prandom_u32() generator itself.
> 
> The current lfsr113 generator isn't bad, but like all LFSRs, it fails
> linear complexity tests in suites like testU01 and PractRand.
> 
> And there are generators with the same 128-bit state which produce
> better output in about half the time.  So I figured as long as I was
> neck-deep in the code, I might as well tweak that, too.
> 
> Thw ones that seem interesting to me are:
> - Chris Doty-Humphrey's sfc32.  This is a 96-bit chaotic generator
>   (meaning period *probably* long but not well defined) fed with
>   a 32-bit counter to ensure a minimum period.  It's extremely
>   fast, and the author is also the author of PractRand, so it's
>   well-tested.
> - Vigna and Bacman's xoshiro128**.  This is a 128-bit LFSR with some
>   output postprocessing.
> - (on 64-bit machines) xoroshiro128**, by the same authors.
>   This is only efficient on 64-bit machines, so it would need
>   a have a 32-bit backup.
> - Bob Jenkins' jsf (originally "flea").  128 bits, good mixing,
>   fully chaotic.  I prefer the safety of a guaranteed minimum
>   period, but this is well thought of.
> - A lag-3 mutiply-with-carry generator.  2^32 - 1736 is the largest
>   "safe prime" mutiplier.
> 
> I'm currently planning on using the first.  It also has the advantage
> for lockless reseeding that there are no bad states to avoid.
> 
> Some discussion of most of these at
> http://www.pcg-random.org/posts/some-prng-implementations.html
> 
> Here are some timing numbers.  Clock cycles per 1e7 iterations,
> sp 8-19 cycles per iteration.
> 
> Core 2:
>          lcg32: cycles 89018580
>          lcg64: cycles 130782048
>            mwc: cycles 90385992
>    prandom_u32: cycles 168780348
>   xoshiro128ss: cycles 90331056
> xoroshiro128ss: cycles 106012008
>          sfc32: cycles 90318276
>          jsf32: cycles 90364464
>         jsf32a: cycles 100382724
>         gjrand: cycles 131713680
> 
> Opteron:
>          lcg32: cycles 103434699
>          lcg64: cycles 80064205
>            mwc: cycles 110313786
>    prandom_u32: cycles 190311062
>   xoshiro128ss: cycles 110115475
> xoroshiro128ss: cycles 100114345
>          sfc32: cycles 100163397
>          jsf32: cycles 100104957
>         jsf32a: cycles 110133030
>         gjrand: cycles 110122007
> 
> (The LCGs are not 128-bit state; they're just there as a speed baseine.)
> 
> As you can see, the current lfsr113-based prandom_u32 takes almost
> twice the time of the aternatives.
> 
> My quick & dirty test code is appended for anyone who wants to play.
> 

A little backstory.  I started the prandom_32 stuff long ago when I wanted
a better (longer period) PRNG for use in netem. Took the existing code from
older version of GNU scientific library (pre GPLv3).  If there is something
faster with better properties go for it. But the whole point of prandom_32
is that it doesn't have to be crypto quality.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 14:58 ` Stephen Hemminger
@ 2019-03-26 16:24   ` George Spelvin
  0 siblings, 0 replies; 9+ messages in thread
From: George Spelvin @ 2019-03-26 16:24 UTC (permalink / raw)
  To: lkml, stephen; +Cc: daniel, hannes, netdev

> A little backstory.  I started the prandom_32 stuff long ago when I wanted
> a better (longer period) PRNG for use in netem. Took the existing code from
> older version of GNU scientific library (pre GPLv3).  If there is something
> faster with better properties go for it. But the whole point of prandom_32
> is that it doesn't have to be crypto quality.

Than you for the encouragement!  The lfsr113 generator is really
a perfectly respectable generator.  There's nothing about it that
"needs fixing"; it's just possible to do slighty better.

When I can get better statistics, longer period (for the same state
size), faster *and* smaller code size, it seems worth looking into.

And yes, I understand "pseudorandom" very well.  From a documentation
patch for drivers/char/random.c I also posted recenty:
+ *
+ * prandom_u32()
+ * -------------
+ *
+ * For even weaker applications, see the pseudorandom generator
+ * prandom_u32(), prandom_max(), and prandom_bytes().  If the random
+ * numbers aren't security-critical at all, these are *far* cheaper.
+ * Useful for self-tests, random error simulation, randomized backoffs,
+ * and any other application where you trust that nobody is trying to
+ * maliciously mess with you by guessing the "random" numbers.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 11:17 ` George Spelvin
@ 2019-03-26 18:03   ` Hannes Frederic Sowa
  2019-03-26 19:07     ` George Spelvin
  0 siblings, 1 reply; 9+ messages in thread
From: Hannes Frederic Sowa @ 2019-03-26 18:03 UTC (permalink / raw)
  To: George Spelvin, Daniel Borkmann; +Cc: netdev

Hi,

On Tue, Mar 26, 2019, at 12:10, George Spelvin wrote:
> I started on a project to correct all of the instances of
> "prandom_u32() % FOO" in the kernel (there are lots)
> to "prandom_u32_max(FOO)".

The conversation definitely makes sense.

Are you trying to fix the modulo biases? I think that prandom_u32_max also has bias, would that be worth fixing as well?

> [...]
>
> Thw ones that seem interesting to me are:
> - Chris Doty-Humphrey's sfc32.  This is a 96-bit chaotic generator
>   (meaning period *probably* long but not well defined) fed with
>   a 32-bit counter to ensure a minimum period.  It's extremely
>   fast, and the author is also the author of PractRand, so it's
>   well-tested.
> - Vigna and Bacman's xoshiro128**.  This is a 128-bit LFSR with some
>   output postprocessing.
> - (on 64-bit machines) xoroshiro128**, by the same authors.
>   This is only efficient on 64-bit machines, so it would need
>   a have a 32-bit backup.
> - Bob Jenkins' jsf (originally "flea").  128 bits, good mixing,
>   fully chaotic.  I prefer the safety of a guaranteed minimum
>   period, but this is well thought of.
> - A lag-3 mutiply-with-carry generator.  2^32 - 1736 is the largest
>   "safe prime" mutiplier.

I think tausworthe is not _trivially_ to predict, what about your proposed algorithms? I think it is a nice to have safety-net in case too much random numbers accidentally leaks (despite reseeding).

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 18:03   ` Hannes Frederic Sowa
@ 2019-03-26 19:07     ` George Spelvin
  2019-03-26 19:23       ` Stephen Hemminger
  2019-03-27 18:32       ` Hannes Frederic Sowa
  0 siblings, 2 replies; 9+ messages in thread
From: George Spelvin @ 2019-03-26 19:07 UTC (permalink / raw)
  To: daniel, hannes, lkml; +Cc: netdev

On Tue, 26 Mar 2019 at 14:03:55 -0400, Hannes Frederic Sowa wrote:
> On Tue, Mar 26, 2019, at 12:10, George Spelvin wrote:
> The conversation definitely makes sense.

I also fixed the seeding of prandom; it now uses the
random_ready_callback system for seeding.

> Are you trying to fix the modulo biases? I think that prandom_u32_max
> also has bias, would that be worth fixing as well?

I could if you like.  I submitted a patch (appended) to do that
for /dev/urandom (using a very clever new algorithm that does it
all with a single multiply in the common case!), but didn't think
the prandom users were critical enough to care.

I documented the bias as follows:
/**
 * prandom_u32_max - returns a pseudo-random number in interval [0,
 * range)
 * @range: Size of random interval [0,range); the first value not
 * included.
 *
 * Returns a pseudo-random number that is in interval [0, range).
 * This is useful when requesting a random index of an array containing
 * range elements, for example.  This is similar to, but more efficient
 * than, "prandom_u32() % range".
 *
 * It's also a tiny bit more uniform than the modulo algorithm.  If
 * range
 * does not divide 1<<32, then some results will occur
 * ceil((1<<32)/range)
 * times, while others will ocur floor((1<32)/range) times.  The "%
 * range"
 * algorithm makes the lowest (1<<32) % range possible results occur
 * more often, and the higher values all occur slightly less often.
 *
 * A multiplicative algorithm spreads out the over- and underrepresented
 * outputs evenly across the range.  For example, if range == 7, then
 * 2, 4 and 6 occur slighty less often than 0, 1, 3 or 5.
 *
 * Note that the result depends on PRNG being well distributed in [0,
 * ~0U]
 * u32 space, a property that is provided by prandom_u32().
 *
 * It is possible to extend this to avoid all bias and return perfectly
 * uniform pseudorandom numbers by discarding and regenerating
 * sometimes,
 * but if your needs are that stringent, you should be using a stronger
 * generator.
 *
 * Ref:
 *      Fast Random Integer Generation in an Interval
 *      Daniel Lemire
 *      https://arxiv.org/abs/1805.10941
 *
 * Returns: pseudo-random number in interval [0, range)
 */

> I think tausworthe is not _trivially_ to predict, what about your
> proposed algorithms? I think it is a nice to have safety-net in
> case too much random numbers accidentally leaks (despite reseeding).

lfsr113 is indeed trivial to predict.  It's a 113-bit LFSR defined
by a degree-113 polynomial.  (The implementation as four separate
polynomials of degree 31, 29, 28 and 25 doesn't change this.)  Given
any 113 bits of its output (not necessarily consecutive), that's
113 boolean linear equations in 113 unknowns to find the internal
state.

I don't have PoC code, but Gaussian elimination is not exactly
rocket science.

All of the proposed replacements are considerably less linear and
harder to invert.  The xoshiro family are the most linear,
with an LFSR core and only a very simple non-linear state masking.


Here's the patch mentioned above for unbiased range reduction.

I just thought of a great improvement!  The slow computation
is "-range % range".  I could use __buitin_constant_p(range)
to decide between the following implementation and one that
has -range % range pre-computed and passed in.  I.e.

static inline u32 get_random_max32(u32 range)
{
	if (__builtin_constant_p(range))
		return get_random_max32_const(range, -range % range);
	else
		return get_random_max32_var(range);
}
u32 get_random_max32_const(u32 range, u32 lim)
{
	u64 prod;
	do
		prod = mul_u32_u32(get_random_u32(), range);
	while (unlikely((u32)prod < lim));
	return prod >> 32;
}
u32 get_random_max32_var(u32 range)
{
	u64 prod = mul_u32_u32(get_random_u32(), range);

	if ((u32)prod < range - 1) {
		u32 lim = -range % range;	/* = (1<<32) % range */

		while ((u32)prod < lim)
			prod = mul_u32_u32(get_random_u32(), range);
	}
	return prod >> 32;
}
 
From: George Spelvin <lkml@sdf.org>
Subject: [RFC PATCH] random: add get_random_max() function
To: linux-kernel@vger.kernel.org,
    Theodore Ts'o <tytso@mit.edu>
Cc: : Jason A. Donenfeld <Jason@zx2c4.com>,
    George Spelvin <lkml@sdf.org>

RFC because:
- Only lightly tested so far, and
- Expecting feedback on the function names

I got annoyed that randomize_page() wasn't exactly uniform, and used the
slow modulo-based range reduction rather than the fast multiplicative one,
so I fixed it.

I figure, if you're going to the trouble of using cryptographic random
numbers, you don't want biased outputs, even if it's slight.

Signed-off-by: George Spelvin <lkml@sdf.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 106 +++++++++++++++++++++++++++++++++++++----
 include/linux/random.h |  32 +++++++++++++
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/include/linux/random.h b/include/linux/random.h
index feef20fa7e87..cf00fc9cd706 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -60,6 +60,38 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+u32 get_random_max32(u32 range);
+#if CONFIG_64BIT
+u64 get_random_max64(u64 range);
+#endif
+
+/**
+ * get_random_max - Generate a uniform random number 0 <= x < range
+ * @range:	Modulus for random number; must not be zero!
+ */
+#if BITS_PER_LONG == 64
+/*
+ * We inline the check for 32-bit range on the grounds that it can
+ * often be optimized out because range is a compile-time constant
+ * or passed from a 32-bit variable.
+ *
+ * We could get fancier with __builtin_constant_p, but it's not used
+ * often enough to be worth the hassle.
+ */
+static inline unsigned long get_random_max(unsigned long range)
+{
+	if (range <= 0xffffffff)
+		return get_random_max32(range);
+	else
+		return get_random_max64(range);
+}
+#else
+static inline unsigned long get_random_max(unsigned long range)
+{
+	return get_random_max32(range);
+}
+#endif
+
 /*
  * On 64-bit architectures, protect against non-terminated C string overflows
  * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9f4a348c7eb5..a891886d6228 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2353,6 +2353,92 @@ static void invalidate_batched_entropy(void)
 	write_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 }
 
+/**
+ * get_random_max32 - Generate a uniform random number 0 <= x < range < 2^32
+ * @range:	Modulus for random number; must not be zero!
+ *
+ * This generates an exactly uniform distribution over [0, range).
+ * If we're going to go to the expense of a cryptographic RNG, we
+ * may as well get the distribution right.
+ *
+ * This uses Lemire's multiplicative algorithm, from
+ *	Fast Random Integer Generation in an Interval
+ *	https://arxiv.org/abs/1805.10941
+ *
+ * The standard mutiplicative range technique is (rand32() * range) >> 32.
+ * However, there are either floor((1<<32) / range) or ceil((1<<32) / range)
+ * random values that will result in each output value.  More specifically,
+ * lim = (1 << 32) % range values will be generated one extra time.
+ *
+ * Lemire proves (Lemma 4.1) that those extra values can be identified
+ * by the lsbits of the product.  If you discard and retry whenever the
+ * lsbits are less than lim, you get the floor option always.
+ */
+u32 get_random_max32(u32 range)
+{
+	u64 prod = mul_u32_u32(get_random_u32(), range);
+
+	/*
+	 * Fast-patch check: lim < range, so lsbits < range-1
+	 * implies lsbits < lim.
+	 */
+	if ((u32)prod < range - 1) {
+		/* Slow path; we need to divide to check exactly */
+		u32 lim = -range % range;	/* = (1<<32) % range */
+
+		while ((u32)prod < lim)
+			prod = mul_u32_u32(get_random_u32(), range);
+	}
+	return prod >> 32;
+}
+EXPORT_SYMBOL(get_random_max32);
+
+#ifdef CONFIG_64BIT
+/**
+ * get_random_max64 - Generate a uniform random number 0 <= x < range < 2^64
+ * @range:	Modulus for random number; must not be zero!
+ *
+ * Like get_random_max32, but for larger ranges.  There are two
+ * implementations, depending on CONFIG_ARCH_SUPPORTS_INT128.
+ *
+ * This function does not special-case 32-bit ranges; use the generic
+ * get_random_max() for that.
+ */
+u64 get_random_max64(u64 range)
+{
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+	/* Same as get_random_max32(), with larger data types. */
+	unsigned __int128 prod = (unsigned __int128)get_random_u64() * range;
+
+	if ((u64)prod < range - 1) {
+		/* Slow path; we need to divide to check exactly */
+		u64 lim = -range % range;	/* = (1<<64) % range */
+
+		while ((u64)prod < lim)
+			prod = (unsigned __int128)get_random_u64() * range;
+	}
+	return prod >> 64;
+#else
+	/*
+	 * We don't have efficient 128-bit products (e.g. SPARCv9), so
+	 * break it into two halves: choose the high bits, then
+	 * choose the low bits depending on them.
+	 *
+	 * Note that if range >= 0xffffffff00000001, then range_high
+	 * will wrap to 0.  This is handled correctly.
+	 */
+	u32 range_high = (range + 0xffffffff) >> 32;
+	u32 msbits = likely(range_high) ? get_random_max32(range_high)
+                                        : get_random_u32();
+	u32 lsbits = likely(msbits != (u32)(range >> 32))
+			? get_random_u32()
+			: get_random_max32((u32)range);
+	return (u64)msbits << 32 | lsbits;
+#endif
+}
+EXPORT_SYMBOL(get_random_max64);
+#endif /* CONFIG_64BIT */
+
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.
@@ -2370,20 +2456,24 @@ static void invalidate_batched_entropy(void)
 unsigned long
 randomize_page(unsigned long start, unsigned long range)
 {
-	if (!PAGE_ALIGNED(start)) {
-		range -= PAGE_ALIGN(start) - start;
-		start = PAGE_ALIGN(start);
-	}
+	/* How much to round up to make start page-aligned */
+	unsigned long align = -start & (PAGE_SIZE - 1);
 
-	if (start > ULONG_MAX - range)
-		range = ULONG_MAX - start;
+	if (unlikely(range < align))
+		return start;
+
+	start += align;
+	range -= align;
+
+	if (unlikely(range > -start))
+		range = -start;
 
 	range >>= PAGE_SHIFT;
 
-	if (range == 0)
+	if (unlikely(range == 0))
 		return start;
 
-	return start + (get_random_long() % range << PAGE_SHIFT);
+	return start + (get_random_max(range) << PAGE_SHIFT);
 }
 
 /* Interface for in-kernel drivers of true hardware RNGs.
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 19:07     ` George Spelvin
@ 2019-03-26 19:23       ` Stephen Hemminger
  2019-03-27 18:32       ` Hannes Frederic Sowa
  1 sibling, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2019-03-26 19:23 UTC (permalink / raw)
  To: George Spelvin; +Cc: daniel, hannes, netdev

On Tue, 26 Mar 2019 19:07:01 GMT
George Spelvin <lkml@sdf.org> wrote:

> lfsr113 is indeed trivial to predict.  It's a 113-bit LFSR defined
> by a degree-113 polynomial.  (The implementation as four separate
> polynomials of degree 31, 29, 28 and 25 doesn't change this.)  Given
> any 113 bits of its output (not necessarily consecutive), that's
> 113 boolean linear equations in 113 unknowns to find the internal
> state.
> 
> I don't have PoC code, but Gaussian elimination is not exactly
> rocket science.

If some code is using existing lfsr in a manner where prediction
would be a problem, then it is probably using the PRNG incorrectly
and should be using a cryptographic RNG. 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-26 19:07     ` George Spelvin
  2019-03-26 19:23       ` Stephen Hemminger
@ 2019-03-27 18:32       ` Hannes Frederic Sowa
  2019-03-27 21:43         ` George Spelvin
  1 sibling, 1 reply; 9+ messages in thread
From: Hannes Frederic Sowa @ 2019-03-27 18:32 UTC (permalink / raw)
  To: George Spelvin, Daniel Borkmann; +Cc: netdev

On Tue, Mar 26, 2019, at 20:07, George Spelvin wrote:
> On Tue, 26 Mar 2019 at 14:03:55 -0400, Hannes Frederic Sowa wrote:
> > On Tue, Mar 26, 2019, at 12:10, George Spelvin wrote:
> > The conversation definitely makes sense.
> 
> I also fixed the seeding of prandom; it now uses the
> random_ready_callback system for seeding.
> 
> > Are you trying to fix the modulo biases? I think that prandom_u32_max
> > also has bias, would that be worth fixing as well?
> 
> I could if you like.  I submitted a patch (appended) to do that
> for /dev/urandom (using a very clever new algorithm that does it
> all with a single multiply in the common case!), but didn't think
> the prandom users were critical enough to care.

I am not sure. A quick look shows that it might get used for selecting timeouts in the networking stack. Should not matter here at all. If used to select an index in pre-allocated arrays, then effects might be visible.
Additionally, by adding all the branches it might make sense to downgrade the function from 'static inline' to regular function.

All in all, I don't think it is required and I would just add it if you think it is worth it as well. The additional comments you proposed are definitely worth to add.

> > I think tausworthe is not _trivially_ to predict, what about your
> > proposed algorithms? I think it is a nice to have safety-net in
> > case too much random numbers accidentally leaks (despite reseeding).
> 
> lfsr113 is indeed trivial to predict.  It's a 113-bit LFSR defined
> by a degree-113 polynomial.  (The implementation as four separate
> polynomials of degree 31, 29, 28 and 25 doesn't change this.)  Given
> any 113 bits of its output (not necessarily consecutive), that's
> 113 boolean linear equations in 113 unknowns to find the internal
> state.
> 
> I don't have PoC code, but Gaussian elimination is not exactly
> rocket science.

Fair enough; the attack vector I had in mind was solely that some random output could be observed from the kernel via prandom_u32() and the security bar I wanted to beat was a PRNGs which outputs its entire state or a considerable amount of them. As you wrote, your proposed RNGs exhibit more non-linearity, so it is time to switch. :) I wasn't sure if the non-consecutive extraction of output would blow up the Gaussian Elimination at some point (as in taking a few hours to reduce), which I would not consider trivial anymore.

Anyway, using those functions in those situations would already be a bug, as Stephen correctly said, but some safety net would be useful (e.g. observing timeouts from a box right after boot up to determine the initial state of the fragmentation IDs would be bad - but early reseeding etc. makes that already difficult).

> All of the proposed replacements are considerably less linear and
> harder to invert.  The xoshiro family are the most linear,
> with an LFSR core and only a very simple non-linear state masking.

Then all of the proposed prngs are an improvement? Definitely go for it.

> Here's the patch mentioned above for unbiased range reduction.
> 
> I just thought of a great improvement!  The slow computation
> is "-range % range".  I could use __buitin_constant_p(range)
> to decide between the following implementation and one that
> has -range % range pre-computed and passed in.  I.e.

Heh, if you already made the work and maybe benchmarked it, it might make sense to kill the bias in prandom_u32_max anyway? :) The proposal below looks great!

> static inline u32 get_random_max32(u32 range)
> {
> 	if (__builtin_constant_p(range))
> 		return get_random_max32_const(range, -range % range);
> 	else
> 		return get_random_max32_var(range);
> }
>
> [...]
>
> u32 get_random_max32_var(u32 range)
> {
> 	u64 prod = mul_u32_u32(get_random_u32(), range);
> 
> 	if ((u32)prod < range - 1) {

Maybe 'unlikely' could help here?

> 		u32 lim = -range % range;	/* = (1<<32) % range */
> 
> 		while ((u32)prod < lim)
> 			prod = mul_u32_u32(get_random_u32(), range);
> 	}
> 	return prod >> 32;
> }

Bye,
Hannes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Revising prandom_32 generator
  2019-03-27 18:32       ` Hannes Frederic Sowa
@ 2019-03-27 21:43         ` George Spelvin
  0 siblings, 0 replies; 9+ messages in thread
From: George Spelvin @ 2019-03-27 21:43 UTC (permalink / raw)
  To: daniel, hannes, lkml; +Cc: netdev

On Wed, 27 Mar 2019 at 14:32:55 -0400, Hannes Frederic Sowa wrote:
> On Tue, Mar 26, 2019, at 20:07, George Spelvin wrote:
>> On Tue, 26 Mar 2019 at 14:03:55 -0400, Hannes Frederic Sowa wrote:
>>> Are you trying to fix the modulo biases? I think that prandom_u32_max
>>> also has bias, would that be worth fixing as well?
>> 
>> I could if you like.  I submitted a patch (appended) to do that
>> for /dev/urandom (using a very clever new algorithm that does it
>> all with a single multiply in the common case!), but didn't think
>> the prandom users were critical enough to care.
> 
> I am not sure. A quick look shows that it might get used for
> selecting timeouts in the networking stack. Should not matter here
> at all. If used to select an index in pre-allocated arrays, then
> effects might be visible.

I looked around the code and couldn't find anything that looked
critical.

Remember that the bias is very small for small ranges.  You're
choosing between floor(2^32/range) and ceil(2^32/range), which is
a relative difference of 1/(2^32/range) = range/2^32.

If range = 100, that's 12.3 ppb.  It reaches 1 ppm at range=4295.

> Additionally, by adding all the branches it might make sense to
> downgrade the function from 'static inline' to regular function.

If the range is a compile-time constant, the code is

	u64 prod;

	do
		prod = (u64)range * prandom_u32();
	whie (unlikely((u32)prod < lim));
	reurn prod >> 32;

This is almost the same as the biased code, except that one
compare and one branch is added.  (And a mutiply-low on
processors with separate mutiply-high instructions.)

But if the range is variable, it's worth taking out of line.

> All in all, I don't think it is required and I would just add it
> if you think it is worth it as well. The additional comments you
> proposed are definitely worth to add.

I also judged it unnecessary, so until someone speaks up I won't do it.

> I wasn't sure if the non-consecutive extraction of output
> would blow up the Gaussian Elimination at some point (as in taking
> a few hours to reduce), which I would not consider trivial anymore.

No, it doesn't matter where the bits are as long as you know the
spacing.  What might happen with non-consecutive bits is that a
few are linearly dependent and so don't count toward the required
113.  This is easily solved by getting a few more bits.

Also, linear equations are just as good as bits.  For example,
given the output of prandom_u32_max(3), we can always derive an
equation:

Output = 0 -> bit 31 = 0
Output = 1 -> bit 31 ^ bit30 == 1
Output = 2 -> bit 31 = 1

> Anyway, using those functions in those situations would already
> be a bug, as Stephen correctly said, but some safety net would be
> useful (e.g. observing timeouts from a box right after boot up to
> determine the initial state of the fragmentation IDs would be bad
> - but early reseeding etc. makes that already difficult).

If it's attackable, you're not supposed to use prandom().  Dithered
timeouts, for example, are a way for *cooperating* devices to avoid
fighting over a shared bus (or other resource).  A malicious device
can just monopolize the bus and fight all comers.

>> All of the proposed replacements are considerably less linear and
>> harder to invert.  The xoshiro family are the most linear,
>> with an LFSR core and only a very simple non-linear state masking.
> 
> Then all of the proposed prngs are an improvement? Definitely go for it.

>> Here's the patch mentioned above for unbiased range reduction.
>> 
>> I just thought of a great improvement!  The slow computation
>> is "-range % range".  I could use __buitin_constant_p(range)
>> to decide between the following implementation and one that
>> has -range % range pre-computed and passed in.  I.e.
> 
> Heh, if you already made the work and maybe benchmarked it, it
> might make sense to kill the bias in prandom_u32_max anyway? :)
> The proposal below looks great!

I aready wrote and posted the code for get_random_u32().
The question is code size and time, not implementation.
(It's appended for reference.)

>> 	if ((u32)prod < range - 1) {
> 
> Maybe 'unlikely' could help here?

That's in the final version; see below.

In-Reply-To: <201903241244.x2OCiL8P011277@sdf.org>
References: <201903241244.x2OCiL8P011277@sdf.org>
From: George Spelvin <lkml@sdf.org>
Subject: [RFC PATCH v2] random: add get_random_max() function
To: linux-kernel@vger.kernel.org, Theodore Ts'o <tytso@mit.edu>
Cc: : Jason A. Donenfeld <Jason@zx2c4.com>, George Spelvin <lkml@sdf.org>

RFC because:
- Only lightly tested so far, and
- Expecting feedback on the function names

v1->v2: I realized that if the range is a compile-time constant, the
	code can be simplified considerably.  This version has two
	implementations depending on __buitin_constant_p(range).
	(Specifically, the expensive "lim = -range % range" is also a
	compile-time constant, and the "fast path" which tries to avoid
	computing it is unnecessary.)

I got annoyed that randomize_page() wasn't exactly uniform, and used the
slow modulo-based range reduction rather than the fast multiplicative one,
so I fixed it.

I figure, if you're going to the trouble of using cryptographic random
numbers, you don't want biased outputs, even if it's slight.

This includes optimized code for compile-time constant ranges, for the
benefit of future callers, even though that doesn't help randomize_page().

Signed-off-by: George Spelvin <lkml@sdf.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 113 ++++++++++++++++++++++++++++++++++++++---
 include/linux/random.h | 109 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 214 insertions(+), 8 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9f4a348c7eb5..ffe6af0e165b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2353,6 +2353,99 @@ static void invalidate_batched_entropy(void)
 	write_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 }
 
+/**
+ * __get_random_max32 - Generate a uniform random number 0 <= x < range < 2^32
+ * @range:	Modulus for random number; must not be zero!
+ *
+ * This generates an exactly uniform distribution over [0, range), where
+ * range is *not* a compile-time constant.
+ *
+ * This uses Lemire's multiplicative algorithm, from
+ *	Fast Random Integer Generation in an Interval
+ *	https://arxiv.org/abs/1805.10941
+ *
+ * The standard multiplicative range technique is (rand32() * range) >> 32.
+ * However, there are either floor((1<<32) / range) or ceil((1<<32) / range)
+ * random values that will result in each output value.  More specifically,
+ * lim = (1 << 32) % range values will be generated one extra time.
+ *
+ * Lemire proves (Lemma 4.1) that those extra values can be identified
+ * by the lsbits of the product.  If you discard and retry whenever the
+ * lsbits are less than lim, you get the floor option always.
+ */
+u32 __get_random_max32(u32 range)
+{
+	u64 prod = mul_u32_u32(get_random_u32(), range);
+
+	/*
+	 * Fast-path check: lim < range, so lsbits < range-1
+	 * implies lsbits < lim.
+	 */
+	if (unlikely((u32)prod < range - 1)) {
+		/* Slow path; we need to divide to check exactly */
+		u32 lim = -range % range;	/* = (1<<32) % range */
+
+		while (unlikely((u32)prod < lim))
+			prod = mul_u32_u32(get_random_u32(), range);
+	}
+	return prod >> 32;
+}
+EXPORT_SYMBOL(__get_random_max32);
+
+#ifdef CONFIG_64BIT
+/**
+ * __get_random_max64 - Generate a uniform random number 0 <= x < range < 2^64
+ * @range:	Modulus for random number; must not be zero!
+ *
+ * Like get_random_max32, but for larger ranges.  There are two
+ * implementations, depending on CONFIG_ARCH_SUPPORTS_INT128.
+ */
+u64 __get_random_max64(u64 range)
+{
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+	/* Same as get_random_max32(), with larger data types. */
+	unsigned __int128 prod;
+
+	if (range >> 32 == 0)
+		return __get_random_max32(range);
+
+	prod = (unsigned __int128)get_random_u64() * range;
+
+	if (likely((u64)prod < range - 1)) {
+		/* Slow path; we need to divide to check exactly */
+		u64 lim = -range % range;	/* = (1<<64) % range */
+
+		while ((u64)prod < lim)
+			prod = (unsigned __int128)get_random_u64() * range;
+	}
+	return prod >> 64;
+#else
+	/*
+	 * We don't have efficient 128-bit products (e.g. SPARCv9), so
+	 * break it into two halves: choose the high bits, then
+	 * choose the low bits depending on them.
+	 */
+	u32 lsbits, msbits:
+
+	if (range >> 32 == 0)
+		return __get_random_max32(range);
+
+	msbits = (range + 0xffffffff) >> 32;
+	/* If range >= 0xffffffff00000001, msbits will wrap to 0 */
+	if (likely(msbits))
+		msbits = __get_random_max32(msbits)
+	else
+		msbits = get_random_u32();
+	if (unlikely(msbits == (u32)(range >> 32)))
+		lsbits = __get_random_max32((u32)range)
+	else
+		lsbits = get_random_u32();
+	return (u64)msbits << 32 | lsbits;
+#endif
+}
+EXPORT_SYMBOL(__get_random_max64);
+#endif /* CONFIG_64BIT */
+
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.
@@ -2370,20 +2463,24 @@ static void invalidate_batched_entropy(void)
 unsigned long
 randomize_page(unsigned long start, unsigned long range)
 {
-	if (!PAGE_ALIGNED(start)) {
-		range -= PAGE_ALIGN(start) - start;
-		start = PAGE_ALIGN(start);
-	}
+	/* How much to round up to make start page-aligned */
+	unsigned long align = -start & (PAGE_SIZE - 1);
 
-	if (start > ULONG_MAX - range)
-		range = ULONG_MAX - start;
+	if (unlikely(range < align))
+		return start;
+
+	start += align;
+	range -= align;
+
+	if (unlikely(range > -start))
+		range = -start;
 
 	range >>= PAGE_SHIFT;
 
-	if (range == 0)
+	if (unlikely(range == 0))
 		return start;
 
-	return start + (get_random_long() % range << PAGE_SHIFT);
+	return start + (get_random_max(range) << PAGE_SHIFT);
 }
 
 /* Interface for in-kernel drivers of true hardware RNGs.
diff --git a/include/linux/random.h b/include/linux/random.h
index 25c0e3f0152c..fc929369471f 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -60,6 +60,115 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/**
+ * get_random_max32 - return a 32-bit random number in interval [0, range)
+ * @range: Size of random interval [0,range); the first value not included.
+ *
+ * This uses Lemire's algorithm; see drivers/char/random.c for details.
+ *
+ * The expensive part of this is the "(1ull<<32) % range" computation.
+ * If that's a compile-time constant, things get simple enough to inline.
+ */
+/* When the range is a compile-time constant, so is lim, and it's simple. */
+static __always_inline u32 _get_random_max32(u32 range)
+{
+	u32 lim = -range % range;
+	u64 prod;
+
+	do
+		prod = mul_u32_u32(get_random_u32(), range);
+	while (unlikely((u32)prod < lim));
+	return prod >> 32;
+}
+/* Non-constant ranges are done out of line. */
+u32 __get_random_max32(u32 range);
+
+/* Decide between the two.  (Case #3 is power of two.) */
+static __always_inline u32 get_random_max32(u32 range)
+{
+	if (!__builtin_constant_p(range))
+		return __get_random_max32(range);
+	else if (range & (range - 1))
+		return _get_random_max32(range);
+	else
+		return get_random_u32() / (u32)(0x100000000 / range);
+}
+
+#if CONFIG_64BIT
+/*
+ * The 64-bit equivalent is trickier.  If we have 64x64->128-bit multiply,
+ * the same algorithm works.  If we don't (*cough* SPARCv9 *cough*),
+ * we can do it in two halves.
+ */
+static __always_inline u64 _get_random_max64(u64 range)
+{
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+	u64 lim = -range % range;
+	unsigned __int128 prod;
+
+	do
+		prod = (__int128)get_random_u64() * range;
+	while (unlikely((u64)prod < lim));
+	return prod >> 64;
+
+#else /* No 128-bit product - this is messy */
+	u32 lsbits, msbits = (range + 0xffffffff) >> 32;
+	/* If range >= 0xffffffff00000001, msbits will wrap to 0 */
+	if (msbits)
+		msbits = get_random_max32(msbits)
+	else
+		msbits = get_random_u32();
+	if ((u32)range && unlikely(msbits == (u32)(range >> 32)))
+		lsbits = get_random_max32((u32)range);
+	else
+		lsbits = get_random_u32()
+	return (u64)msbits << 32 | lsbits;
+#endif
+}
+
+u64 __get_random_max64(u64 range);
+
+static __always_inline u64 get_random_max64(u64 range)
+{
+	/*
+	 * We may know at compile time that range is 32 bits,
+	 * even if we don't know its exact value.
+	 */
+	if (__builtin_constant_p(range <= 0xffffffff) && range <= 0xffffffff)
+		return get_random_max32(range);
+	else if (!__builtin_constant_p(range))
+		return __get_random_max64(range);
+	else if (range & (range - 1))
+		return _get_random_max64(range);
+	else
+		return get_random_u64() / (0x100000000 / (range >> 32));
+}
+#endif
+
+/**
+ * get_random_max - Generate a uniform random number 0 <= x < range
+ * @range:	Modulus for random number; must not be zero!
+ *
+ * This uses a cryptographic random number generator, for safety
+ * against malicious attackers trying to guess the output.
+ *
+ * This generates exactly uniform random numbers in the range, retrying
+ * occasionally if range is not a power of two.  If we're going to
+ * go to the expense of a cryptographic RNG, we may as well get the
+ * distribution right.
+ *
+ * For less critical applications (self-tests, error injection, dithered
+ * timers), use prandom_u32_max().
+ */
+static __always_inline unsigned long get_random_max(unsigned long range)
+{
+#if BITS_PER_LONG == 64
+	return get_random_max64(range);
+#else
+	return get_random_max32(range);
+#endif
+}
+
 /*
  * On 64-bit architectures, protect against non-terminated C string overflows
  * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-03-27 21:43 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-26 11:10 Revising prandom_32 generator George Spelvin
2019-03-26 11:17 ` George Spelvin
2019-03-26 18:03   ` Hannes Frederic Sowa
2019-03-26 19:07     ` George Spelvin
2019-03-26 19:23       ` Stephen Hemminger
2019-03-27 18:32       ` Hannes Frederic Sowa
2019-03-27 21:43         ` George Spelvin
2019-03-26 14:58 ` Stephen Hemminger
2019-03-26 16:24   ` George Spelvin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.