All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Theodore Ts'o" <tytso@mit.edu>
To: George Spelvin <linux@horizon.com>
Cc: hpa@linux.intel.com, linux-kernel@vger.kernel.org,
	mingo@kernel.org, price@mit.edu
Subject: Re: drivers/char/random.c: More futzing about
Date: Wed, 11 Jun 2014 23:22:48 -0400	[thread overview]
Message-ID: <20140612032248.GA2437@thunk.org> (raw)
In-Reply-To: <20140612003249.13680.qmail@ns.horizon.com>

On Wed, Jun 11, 2014 at 08:32:49PM -0400, George Spelvin wrote:
> Comparable, but slightly slower.  Clearly, I need to do better.
> And you can see the first-iteration effects clearly.  Still,
> noting *remotely* like 7x!

I redid my numbers, and I can no longer reproduce the 7x slowdown.  I
do see that if you compile w/o -O2, fast_mix2 is twice as slow.  But
it's not 7x slower.

When compiling w/o -O2:

     	       fast_mix		fast_mix2
task-clock     221.3 ms 	460.7 ms

When compiling with -O2 -Os:

     	       fast_mix		fast_mix2
task-clock     115.4 ms		71.5 ms

And here's the numbers I got with a single iteration using rdtsc:

fast_mix: 164		fast_mix2: 237
fast_mix: 168		fast_mix2: 230
fast_mix: 166		fast_mix2: 228
fast_mix: 164		fast_mix2: 230
fast_mix: 166		fast_mix2: 230
fast_mix: 168		fast_mix2: 232
fast_mix: 166		fast_mix2: 228
fast_mix: 164		fast_mix2: 228
fast_mix: 166		fast_mix2: 234
fast_mix: 166		fast_mix2: 230

						- Ted


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>

typedef unsigned int __u32;

struct fast_pool {
	__u32		pool[4];
	unsigned long	last;
	unsigned short	count;
	unsigned char	rotate;
	unsigned char	last_timer_intr;
};


/**
 * rol32 - rotate a 32-bit value left
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u32 rol32(__u32 word, unsigned int shift)
{
	return (word << shift) | (word >> (32 - shift));
}

static __u32 const twist_table[8] = {
	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };

/*
 * This is a fast mixing routine used by the interrupt randomness
 * collector.  It's hardcoded for an 128 bit pool and assumes that any
 * locks that might be needed are taken by the caller.
 */
extern void fast_mix(struct fast_pool *f, __u32 input[4])
{
	__u32		w;
	unsigned	input_rotate = f->rotate;

	w = rol32(input[0], input_rotate) ^ f->pool[0] ^ f->pool[3];
	f->pool[0] = (w >> 3) ^ twist_table[w & 7];
	input_rotate = (input_rotate + 14) & 31;
	w = rol32(input[1], input_rotate) ^ f->pool[1] ^ f->pool[0];
	f->pool[1] = (w >> 3) ^ twist_table[w & 7];
	input_rotate = (input_rotate + 7) & 31;
	w = rol32(input[2], input_rotate) ^ f->pool[2] ^ f->pool[1];
	f->pool[2] = (w >> 3) ^ twist_table[w & 7];
	input_rotate = (input_rotate + 7) & 31;
	w = rol32(input[3], input_rotate) ^ f->pool[3] ^ f->pool[2];
	f->pool[3] = (w >> 3) ^ twist_table[w & 7];
	input_rotate = (input_rotate + 7) & 31;

	f->rotate = input_rotate;
	f->count++;
}

extern fast_mix2(struct fast_pool *f, __u32 const input[4])
{
	__u32 a = f->pool[0] ^ input[0],  b = f->pool[1] ^ input[1];
	__u32 c = f->pool[2] ^ input[2],  d = f->pool[3] ^ input[3];
	int i;


	for (i = 0; i < 3; i++) {
		/*
		 * Inspired by ChaCha's QuarterRound, but
		 * modified for much greater parallelism.
		 * Surprisingly, rotating a and c seems to work
		 * better than b and d.  And it runs faster.
		 */
		a += b;                 c += d;
		d ^= a;                 b ^= c;
		a = rol32(a, 15);       c = rol32(c, 21);

		a += b;                 c += d;
		d ^= a;                 b ^= c;
		a = rol32(a, 3);	c = rol32(c, 7);
	}
	f->pool[0] = a;  f->pool[1] = b;
	f->pool[2] = c;  f->pool[3] = d;
	f->count++;
}

static __inline__ volatile unsigned long long rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}

int main(int argc, char **argv)
{
	struct fast_pool f;
	int i;
	__u32 input[4];
	unsigned volatile long long start_time, end_time;

	memset(&f, 0, sizeof(f));
	memset(&input, 0, sizeof(input));
	f.pool[0] = 1;

#if !defined(BENCH_FASTMIX) && !defined(BENCH_FASTMIX2)
	for (i=0; i < 10; i++) {
		usleep(50000);
		start_time = rdtsc();
		fast_mix(&f, input);
		end_time = rdtsc();
		printf("fast_mix: %llu\t", end_time - start_time);
		usleep(50000);
		start_time = rdtsc();
		fast_mix2(&f, input);
		end_time = rdtsc();
		printf("fast_mix2: %llu\n", end_time - start_time);
	}
	
#endif

#ifdef BENCH_FASTMIX
	for (i=0; i < 10240000; i++) {
		fast_mix(&f, input);
	}
#endif

#ifdef BENCH_FASTMIX2
	for (i=0; i < 10240000; i++) {
		fast_mix2(&f, input);
	}
#endif
}

  reply	other threads:[~2014-06-12  3:22 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-09  0:05 [RFC PATCH] drivers/char/random.c: Is reducing locking range like this safe? George Spelvin
2014-06-09  1:35 ` Theodore Ts'o
2014-06-09  2:10   ` George Spelvin
2014-06-09  2:18     ` George Spelvin
2014-06-09  4:03       ` George Spelvin
2014-06-09  9:23         ` George Spelvin
2014-06-09 13:34         ` Theodore Ts'o
2014-06-09 15:04           ` George Spelvin
2014-06-09 15:50             ` Theodore Ts'o
2014-06-09 16:11               ` George Spelvin
2014-06-10  0:20               ` drivers/char/random.c: more ruminations George Spelvin
2014-06-10  1:20                 ` Theodore Ts'o
2014-06-10  3:10                   ` George Spelvin
2014-06-10 15:25                     ` Theodore Ts'o
2014-06-10 20:40                       ` George Spelvin
2014-06-10 21:20                         ` Theodore Ts'o
2014-06-11  0:10                           ` George Spelvin
2014-06-11  2:08                             ` Theodore Ts'o
2014-06-11  3:58                               ` George Spelvin
2014-06-11 13:11                                 ` Theodore Ts'o
2014-06-12  0:42                                   ` George Spelvin
2014-06-12  1:03                                   ` H. Peter Anvin
2014-06-11  4:34                               ` George Spelvin
2014-06-11 13:09                                 ` Theodore Ts'o
2014-06-11  2:21                             ` Theodore Ts'o
2014-06-09 13:17   ` drivers/char/random.c: More futzing about George Spelvin
2014-06-11 16:38     ` Theodore Ts'o
2014-06-11 16:48       ` H. Peter Anvin
2014-06-11 19:25         ` Theodore Ts'o
2014-06-11 20:41           ` H. Peter Anvin
2014-06-12  0:44             ` H. Peter Anvin
2014-06-12  1:51               ` George Spelvin
2014-06-12  0:32       ` George Spelvin
2014-06-12  3:22         ` Theodore Ts'o [this message]
2014-06-12  4:13           ` random: Benchamrking fast_mix2 George Spelvin
2014-06-12 11:18             ` George Spelvin
2014-06-12 20:17               ` Theodore Ts'o
2014-06-12 20:46               ` Theodore Ts'o
2014-06-13  0:23                 ` George Spelvin
2014-06-13 15:52                   ` Theodore Ts'o
2014-06-14  2:10                     ` George Spelvin
2014-06-14  3:06                       ` Theodore Ts'o
2014-06-14  5:25                         ` George Spelvin
2014-06-14  6:24                           ` Theodore Ts'o
2014-06-14  8:03                             ` George Spelvin
2014-06-14 11:14                               ` George Spelvin
2014-06-14 15:13                                 ` George Spelvin
2014-06-14 16:33                                   ` Theodore Ts'o
2014-06-15  0:23                                     ` George Spelvin
2014-06-15  1:17                                       ` Theodore Ts'o
2014-06-15  6:58                                         ` George Spelvin
2014-06-15 13:01                                           ` Theodore Ts'o
2014-06-14  6:27                           ` Theodore Ts'o
2014-06-14  4:55                     ` [RFC] random: is the IRQF_TIMER test working as intended? George Spelvin
2014-06-14  6:43                       ` Theodore Ts'o
2014-06-14  7:23                         ` George Spelvin
2014-06-12  3:43       ` drivers/char/random.c: More futzing about George Spelvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140612032248.GA2437@thunk.org \
    --to=tytso@mit.edu \
    --cc=hpa@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@horizon.com \
    --cc=mingo@kernel.org \
    --cc=price@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.