* [PATCH] [1/2] random: SMP locking @ 2003-08-02 4:24 Oliver Xymoron 2003-08-02 11:00 ` Andrew Morton 0 siblings, 1 reply; 12+ messages in thread From: Oliver Xymoron @ 2003-08-02 4:24 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel This patch adds locking for SMP. Apparently Willy never managed to revive his laptop with his version so I revived mine. The batch pool is copied as a block to avoid long lock hold times while mixing it into the primary pool. diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c --- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500 +++ work/drivers/char/random.c 2003-08-01 22:15:42.000000000 -0500 @@ -484,6 +484,7 @@ int extract_count; struct poolinfo poolinfo; __u32 *pool; + spinlock_t lock; }; /* @@ -522,6 +523,7 @@ return -ENOMEM; } memset(r->pool, 0, POOLBYTES); + r->lock = SPIN_LOCK_UNLOCKED; *ret_bucket = r; return 0; } @@ -564,6 +566,8 @@ int wordmask = r->poolinfo.poolwords - 1; __u32 w; + spin_lock(&r->lock); + while (nwords--) { w = rotate_left(r->input_rotate, *in++); i = r->add_ptr = (r->add_ptr - 1) & wordmask; @@ -587,6 +591,8 @@ w ^= r->pool[i]; r->pool[i] = (w >> 3) ^ twist_table[w & 7]; } + + spin_unlock(&r->lock); } /* @@ -594,6 +600,8 @@ */ static void credit_entropy_store(struct entropy_store *r, int nbits) { + spin_lock(&r->lock); + if (r->entropy_count + nbits < 0) { DEBUG_ENT("negative entropy/overflow (%d+%d)\n", r->entropy_count, nbits); @@ -608,6 +616,8 @@ r == random_state ? "primary" : "unknown", nbits, r->entropy_count); } + + spin_unlock(&r->lock); } /********************************************************************** @@ -618,27 +628,33 @@ * **********************************************************************/ -static __u32 *batch_entropy_pool; -static int *batch_entropy_credit; -static int batch_max; +struct sample { + __u32 data[2]; + int credit; +}; + +static struct sample *batch_entropy_pool, *batch_entropy_copy; static int batch_head, batch_tail; +static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED; + +static int batch_max; static void batch_entropy_process(void *private_); static DECLARE_WORK(batch_work, batch_entropy_process, NULL); /* note: the size must be a power of 2 */ static int __init batch_entropy_init(int size, struct entropy_store *r) { - batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL); + batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL); if (!batch_entropy_pool) return -1; - batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL); - if (!batch_entropy_credit) { + batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL); + if (!batch_entropy_copy) { kfree(batch_entropy_pool); return -1; } batch_head = batch_tail = 0; - batch_max = size; batch_work.data = r; + batch_max = size; return 0; } @@ -650,27 +666,33 @@ */ void batch_entropy_store(u32 a, u32 b, int num) { - int new; + int new; + unsigned long flags; if (!batch_max) return; + + spin_lock_irqsave(&batch_lock, flags); - batch_entropy_pool[2*batch_head] = a; - batch_entropy_pool[(2*batch_head) + 1] = b; - batch_entropy_credit[batch_head] = num; + batch_entropy_pool[batch_head].data[0] = a; + batch_entropy_pool[batch_head].data[1] = b; + batch_entropy_pool[batch_head].credit = num; - new = (batch_head+1) & (batch_max-1); - if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) { + if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) { /* * Schedule it for the next timer tick: */ schedule_delayed_work(&batch_work, 1); - batch_head = new; - } else if (new == batch_tail) { + } + + new = (batch_head+1) & (batch_max-1); + if (new == batch_tail) { DEBUG_ENT("batch entropy buffer full\n"); } else { batch_head = new; } + + spin_unlock_irqrestore(&batch_lock, flags); } /* @@ -682,20 +704,34 @@ { struct entropy_store *r = (struct entropy_store *) private_, *p; int max_entropy = r->poolinfo.POOLBITS; + unsigned head, tail; - if (!batch_max) - return; + /* Mixing into the pool is expensive, so copy over the batch + * data and release the batch lock. The pool is at least half + * full, so don't worry too much about copying only the used + * part. + */ + spin_lock_irq(&batch_lock); + + memcpy(batch_entropy_copy, batch_entropy_pool, + batch_max*sizeof(struct sample)); + + head = batch_head; + tail = batch_tail; + batch_tail = batch_head; + + spin_unlock_irq(&batch_lock); p = r; - while (batch_head != batch_tail) { + while (head != tail) { if (r->entropy_count >= max_entropy) { r = (r == sec_random_state) ? random_state : sec_random_state; max_entropy = r->poolinfo.POOLBITS; } - add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2); - credit_entropy_store(r, batch_entropy_credit[batch_tail]); - batch_tail = (batch_tail+1) & (batch_max-1); + add_entropy_words(r, batch_entropy_copy[tail].data, 2); + credit_entropy_store(r, batch_entropy_copy[tail].credit); + tail = (tail+1) & (batch_max-1); } if (p->entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); @@ -1286,6 +1322,9 @@ DEBUG_ENT("%s has %d bits, want %d bits\n", r == sec_random_state ? "secondary" : + /* Hold lock while accounting */ + spin_lock(&r->lock); + r == random_state ? "primary" : "unknown", r->entropy_count, nbytes * 8); @@ -1298,6 +1337,8 @@ wake_up_interruptible(&random_write_wait); r->extract_count += nbytes; + + spin_unlock(&r->lock); ret = 0; while (nbytes) { @@ -1619,18 +1660,23 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; p = (int *) arg; + spin_lock(&random_state->lock); ent_count = random_state->entropy_count; if (put_user(ent_count, p++) || get_user(size, p) || put_user(random_state->poolinfo.poolwords, p++)) - return -EFAULT; + goto fail; if (size < 0) - return -EINVAL; + goto fail; if (size > random_state->poolinfo.poolwords) size = random_state->poolinfo.poolwords; if (copy_to_user(p, random_state->pool, size * sizeof(__u32))) - return -EFAULT; + goto fail; + spin_unlock(&random_state->lock); return 0; + fail: + spin_unlock(&random_state->lock); + return -EFAULT; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- "Love the dolphins," she advised him. "Write by W.A.S.T.E.." ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 4:24 [PATCH] [1/2] random: SMP locking Oliver Xymoron @ 2003-08-02 11:00 ` Andrew Morton 2003-08-02 12:35 ` Zwane Mwaikambo 2003-08-02 14:32 ` Matt Mackall 0 siblings, 2 replies; 12+ messages in thread From: Andrew Morton @ 2003-08-02 11:00 UTC (permalink / raw) To: Oliver Xymoron; +Cc: linux-kernel Oliver Xymoron <oxymoron@waste.org> wrote: > > This patch adds locking for SMP. Apparently Willy never managed to > revive his laptop with his version so I revived mine. hrm. I'm a random ignoramus. I'll look it over... Are you really sure that all the decisions about where to use spin_lock() vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are non-obvious. > @@ -1619,18 +1660,23 @@ > if (!capable(CAP_SYS_ADMIN)) > return -EPERM; > p = (int *) arg; > + spin_lock(&random_state->lock); > ent_count = random_state->entropy_count; > if (put_user(ent_count, p++) || > get_user(size, p) || > put_user(random_state->poolinfo.poolwords, p++)) Cannot perform userspace access while holding a lock - a pagefault could occur, perform IO, schedule away and the same CPU tries to take the same lock via a different process. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 11:00 ` Andrew Morton @ 2003-08-02 12:35 ` Zwane Mwaikambo 2003-08-02 14:43 ` Matt Mackall ` (2 more replies) 2003-08-02 14:32 ` Matt Mackall 1 sibling, 3 replies; 12+ messages in thread From: Zwane Mwaikambo @ 2003-08-02 12:35 UTC (permalink / raw) To: Andrew Morton; +Cc: Oliver Xymoron, linux-kernel On Sat, 2 Aug 2003, Andrew Morton wrote: > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > This patch adds locking for SMP. Apparently Willy never managed to > > revive his laptop with his version so I revived mine. > > hrm. I'm a random ignoramus. I'll look it over... > > Are you really sure that all the decisions about where to use spin_lock() > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are > non-obvious. > > > @@ -1619,18 +1660,23 @@ > > if (!capable(CAP_SYS_ADMIN)) > > return -EPERM; > > p = (int *) arg; > > + spin_lock(&random_state->lock); > > ent_count = random_state->entropy_count; > > if (put_user(ent_count, p++) || > > get_user(size, p) || > > put_user(random_state->poolinfo.poolwords, p++)) > > Cannot perform userspace access while holding a lock - a pagefault could > occur, perform IO, schedule away and the same CPU tries to take the same > lock via a different process. Perhaps might_sleep() in *_user, copy_* etc is in order? -- function.linuxpower.ca ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 12:35 ` Zwane Mwaikambo @ 2003-08-02 14:43 ` Matt Mackall 2003-08-02 15:02 ` Zwane Mwaikambo 2003-08-02 18:39 ` Andrew Morton 2003-08-02 18:42 ` Matt Mackall 2 siblings, 1 reply; 12+ messages in thread From: Matt Mackall @ 2003-08-02 14:43 UTC (permalink / raw) To: Zwane Mwaikambo; +Cc: Andrew Morton, linux-kernel On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote: > On Sat, 2 Aug 2003, Andrew Morton wrote: > > Cannot perform userspace access while holding a lock - a pagefault could > > occur, perform IO, schedule away and the same CPU tries to take the same > > lock via a different process. > > Perhaps might_sleep() in *_user, copy_* etc is in order? Wouldn't have caught this case - this interface hasn't actually been used/useful for many years as it only gives access to one of the pools and has never been atomic either. -- Matt Mackall : http://www.selenic.com : of or relating to the moon ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 14:43 ` Matt Mackall @ 2003-08-02 15:02 ` Zwane Mwaikambo 0 siblings, 0 replies; 12+ messages in thread From: Zwane Mwaikambo @ 2003-08-02 15:02 UTC (permalink / raw) To: Matt Mackall; +Cc: Andrew Morton, linux-kernel On Sat, 2 Aug 2003, Matt Mackall wrote: > On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote: > > On Sat, 2 Aug 2003, Andrew Morton wrote: > > > Cannot perform userspace access while holding a lock - a pagefault could > > > occur, perform IO, schedule away and the same CPU tries to take the same > > > lock via a different process. > > > > Perhaps might_sleep() in *_user, copy_* etc is in order? > > Wouldn't have caught this case - this interface hasn't actually been > used/useful for many years as it only gives access to one of the > pools and has never been atomic either. Aha, thanks for the explanation. Zwane -- function.linuxpower.ca ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 12:35 ` Zwane Mwaikambo 2003-08-02 14:43 ` Matt Mackall @ 2003-08-02 18:39 ` Andrew Morton 2003-08-02 18:42 ` Matt Mackall 2 siblings, 0 replies; 12+ messages in thread From: Andrew Morton @ 2003-08-02 18:39 UTC (permalink / raw) To: Zwane Mwaikambo; +Cc: oxymoron, linux-kernel Zwane Mwaikambo <zwane@arm.linux.org.uk> wrote: > > Perhaps might_sleep() in *_user, copy_* etc is in order? Probably, with a little care. A userspace copy while in an atomic region is actually legal, on behalf of the read() and write() pagecache copy functions: if we take a fault while holding an atomic kmap, the fault handler bales and the usercopy returns a short copy. In other words: if you stick a might_sleep() in __copy_from_user() and __copy_to_user() you get a false positive on every read() and write(). We could probably add it to copy_to_user() and copy_from_user() though. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 12:35 ` Zwane Mwaikambo 2003-08-02 14:43 ` Matt Mackall 2003-08-02 18:39 ` Andrew Morton @ 2003-08-02 18:42 ` Matt Mackall 2 siblings, 0 replies; 12+ messages in thread From: Matt Mackall @ 2003-08-02 18:42 UTC (permalink / raw) To: Zwane Mwaikambo; +Cc: Andrew Morton, linux-kernel On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote: > On Sat, 2 Aug 2003, Andrew Morton wrote: > > > Cannot perform userspace access while holding a lock - a pagefault could > > occur, perform IO, schedule away and the same CPU tries to take the same > > lock via a different process. > > Perhaps might_sleep() in *_user, copy_* etc is in order? I think it's been suggested before, but I threw a patch together for i386 anyway. This only checks in the non-__ versions, as those are occassionally called inside things like kmap_atomic pairs which take a spinlock in with highmem. It's all conditional on CONFIG_DEBUG_SPINLOCK_SLEEP (which isn't quite the right name) so there's no overhead for normal builds. diff -urN -X dontdiff orig/arch/i386/lib/usercopy.c work/arch/i386/lib/usercopy.c --- orig/arch/i386/lib/usercopy.c 2003-07-13 22:28:54.000000000 -0500 +++ work/arch/i386/lib/usercopy.c 2003-08-02 13:33:43.000000000 -0500 @@ -149,6 +149,7 @@ unsigned long clear_user(void __user *to, unsigned long n) { + might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -188,6 +189,8 @@ unsigned long mask = -__addr_ok(s); unsigned long res, tmp; + might_sleep(); + __asm__ __volatile__( " testl %0, %0\n" " jz 3f\n" diff -urN -X dontdiff orig/include/asm-i386/uaccess.h work/include/asm-i386/uaccess.h --- orig/include/asm-i386/uaccess.h 2003-07-13 22:30:48.000000000 -0500 +++ work/include/asm-i386/uaccess.h 2003-08-02 13:15:42.000000000 -0500 @@ -260,6 +260,7 @@ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) *__pu_addr = (ptr); \ + might_sleep(); \ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \ __pu_err; \ @@ -469,6 +470,7 @@ static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { + might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; @@ -493,6 +495,7 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { + might_sleep(); if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); else -- Matt Mackall : http://www.selenic.com : of or relating to the moon ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 11:00 ` Andrew Morton 2003-08-02 12:35 ` Zwane Mwaikambo @ 2003-08-02 14:32 ` Matt Mackall 2003-08-02 19:00 ` Andrew Morton 1 sibling, 1 reply; 12+ messages in thread From: Matt Mackall @ 2003-08-02 14:32 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Sat, Aug 02, 2003 at 04:00:15AM -0700, Andrew Morton wrote: > Oliver Xymoron <oxymoron@waste.org> wrote: > > > > This patch adds locking for SMP. Apparently Willy never managed to > > revive his laptop with his version so I revived mine. > > hrm. I'm a random ignoramus. I'll look it over... > > Are you really sure that all the decisions about where to use spin_lock() > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are > non-obvious. Aside from the put_user stuff below, yes. &batch_lock batch_entropy_store can be called from any context, and typically from interrupts -> spin_lock_irqsave batch_entropy_process is called called via schedule_delayed_work and runs in process context -> spin_lock_irq &r->lock the mixing process is too expensive to be called from an interrupt context and the basic worker function extract_entropy can sleep, so all this stuff can be under a normal spin_lock > > @@ -1619,18 +1660,23 @@ > > if (!capable(CAP_SYS_ADMIN)) > > return -EPERM; > > p = (int *) arg; > > + spin_lock(&random_state->lock); > > ent_count = random_state->entropy_count; > > if (put_user(ent_count, p++) || > > get_user(size, p) || > > put_user(random_state->poolinfo.poolwords, p++)) > > Cannot perform userspace access while holding a lock - a pagefault could > occur, perform IO, schedule away and the same CPU tries to take the same > lock via a different process. Doh. Fixed. All the other userspace transfers are done without locks. diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c --- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500 +++ work/drivers/char/random.c 2003-08-02 09:28:04.000000000 -0500 @@ -484,6 +484,7 @@ int extract_count; struct poolinfo poolinfo; __u32 *pool; + spinlock_t lock; }; /* @@ -522,6 +523,7 @@ return -ENOMEM; } memset(r->pool, 0, POOLBYTES); + r->lock = SPIN_LOCK_UNLOCKED; *ret_bucket = r; return 0; } @@ -564,6 +566,8 @@ int wordmask = r->poolinfo.poolwords - 1; __u32 w; + spin_lock(&r->lock); + while (nwords--) { w = rotate_left(r->input_rotate, *in++); i = r->add_ptr = (r->add_ptr - 1) & wordmask; @@ -587,6 +591,8 @@ w ^= r->pool[i]; r->pool[i] = (w >> 3) ^ twist_table[w & 7]; } + + spin_unlock(&r->lock); } /* @@ -594,6 +600,8 @@ */ static void credit_entropy_store(struct entropy_store *r, int nbits) { + spin_lock(&r->lock); + if (r->entropy_count + nbits < 0) { DEBUG_ENT("negative entropy/overflow (%d+%d)\n", r->entropy_count, nbits); @@ -608,6 +616,8 @@ r == random_state ? "primary" : "unknown", nbits, r->entropy_count); } + + spin_unlock(&r->lock); } /********************************************************************** @@ -618,27 +628,33 @@ * **********************************************************************/ -static __u32 *batch_entropy_pool; -static int *batch_entropy_credit; -static int batch_max; +struct sample { + __u32 data[2]; + int credit; +}; + +static struct sample *batch_entropy_pool, *batch_entropy_copy; static int batch_head, batch_tail; +static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED; + +static int batch_max; static void batch_entropy_process(void *private_); static DECLARE_WORK(batch_work, batch_entropy_process, NULL); /* note: the size must be a power of 2 */ static int __init batch_entropy_init(int size, struct entropy_store *r) { - batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL); + batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL); if (!batch_entropy_pool) return -1; - batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL); - if (!batch_entropy_credit) { + batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL); + if (!batch_entropy_copy) { kfree(batch_entropy_pool); return -1; } batch_head = batch_tail = 0; - batch_max = size; batch_work.data = r; + batch_max = size; return 0; } @@ -650,27 +666,33 @@ */ void batch_entropy_store(u32 a, u32 b, int num) { - int new; + int new; + unsigned long flags; if (!batch_max) return; + + spin_lock_irqsave(&batch_lock, flags); - batch_entropy_pool[2*batch_head] = a; - batch_entropy_pool[(2*batch_head) + 1] = b; - batch_entropy_credit[batch_head] = num; + batch_entropy_pool[batch_head].data[0] = a; + batch_entropy_pool[batch_head].data[1] = b; + batch_entropy_pool[batch_head].credit = num; - new = (batch_head+1) & (batch_max-1); - if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) { + if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) { /* * Schedule it for the next timer tick: */ schedule_delayed_work(&batch_work, 1); - batch_head = new; - } else if (new == batch_tail) { + } + + new = (batch_head+1) & (batch_max-1); + if (new == batch_tail) { DEBUG_ENT("batch entropy buffer full\n"); } else { batch_head = new; } + + spin_unlock_irqrestore(&batch_lock, flags); } /* @@ -682,20 +704,34 @@ { struct entropy_store *r = (struct entropy_store *) private_, *p; int max_entropy = r->poolinfo.POOLBITS; + unsigned head, tail; - if (!batch_max) - return; + /* Mixing into the pool is expensive, so copy over the batch + * data and release the batch lock. The pool is at least half + * full, so don't worry too much about copying only the used + * part. + */ + spin_lock_irq(&batch_lock); + + memcpy(batch_entropy_copy, batch_entropy_pool, + batch_max*sizeof(struct sample)); + + head = batch_head; + tail = batch_tail; + batch_tail = batch_head; + + spin_unlock_irq(&batch_lock); p = r; - while (batch_head != batch_tail) { + while (head != tail) { if (r->entropy_count >= max_entropy) { r = (r == sec_random_state) ? random_state : sec_random_state; max_entropy = r->poolinfo.POOLBITS; } - add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2); - credit_entropy_store(r, batch_entropy_credit[batch_tail]); - batch_tail = (batch_tail+1) & (batch_max-1); + add_entropy_words(r, batch_entropy_copy[tail].data, 2); + credit_entropy_store(r, batch_entropy_copy[tail].credit); + tail = (tail+1) & (batch_max-1); } if (p->entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); @@ -1286,6 +1322,9 @@ DEBUG_ENT("%s has %d bits, want %d bits\n", r == sec_random_state ? "secondary" : + /* Hold lock while accounting */ + spin_lock(&r->lock); + r == random_state ? "primary" : "unknown", r->entropy_count, nbytes * 8); @@ -1298,6 +1337,8 @@ wake_up_interruptible(&random_write_wait); r->extract_count += nbytes; + + spin_unlock(&r->lock); ret = 0; while (nbytes) { @@ -1593,7 +1634,7 @@ random_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { - int *p, size, ent_count; + int *p, *tmp, size, ent_count; int retval; switch (cmd) { @@ -1619,18 +1660,40 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; p = (int *) arg; - ent_count = random_state->entropy_count; - if (put_user(ent_count, p++) || - get_user(size, p) || + if (get_user(size, p) || put_user(random_state->poolinfo.poolwords, p++)) - return -EFAULT; + goto fail; if (size < 0) - return -EINVAL; + goto fail; if (size > random_state->poolinfo.poolwords) size = random_state->poolinfo.poolwords; - if (copy_to_user(p, random_state->pool, size * sizeof(__u32))) - return -EFAULT; + + /* prepare to atomically snapshot pool */ + + tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL); + + if (!tmp) + goto fail; + + spin_lock(&random_state->lock); + ent_count = random_state->entropy_count; + memcpy(tmp, random_state->pool, size * sizeof(__u32)); + spin_unlock(&random_state->lock); + + if (!copy_to_user(p, tmp, size * sizeof(__u32))) { + kfree(tmp); + goto fail; + } + + kfree(tmp); + + if(put_user(ent_count, p++)) + goto fail; + return 0; + fail: + spin_unlock(&random_state->lock); + return -EFAULT; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- Matt Mackall : http://www.selenic.com : of or relating to the moon ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 14:32 ` Matt Mackall @ 2003-08-02 19:00 ` Andrew Morton 2003-08-02 19:33 ` Matt Mackall 0 siblings, 1 reply; 12+ messages in thread From: Andrew Morton @ 2003-08-02 19:00 UTC (permalink / raw) To: Matt Mackall; +Cc: linux-kernel Matt Mackall <mpm@selenic.com> wrote: > > > Are you really sure that all the decisions about where to use spin_lock() > > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are > > non-obvious. > > Aside from the put_user stuff below, yes. Well I see in Linus's current tree: ndev->regen_timer.function = ipv6_regen_rndid; And ipv6_regen_rndid() ends up calling get_random_bytes() which calls extract_entropy() which now does a bare spin_lock(). So I think if the timer is run while some process-context code on the same CPU is running get_random_bytes() we deadlock don't we? Probably, we should make get_random_bytes() callable from any context. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 19:00 ` Andrew Morton @ 2003-08-02 19:33 ` Matt Mackall 2003-08-02 19:46 ` Andrew Morton 0 siblings, 1 reply; 12+ messages in thread From: Matt Mackall @ 2003-08-02 19:33 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Sat, Aug 02, 2003 at 12:00:23PM -0700, Andrew Morton wrote: > Matt Mackall <mpm@selenic.com> wrote: > > > > > Are you really sure that all the decisions about where to use spin_lock() > > > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are > > > non-obvious. > > > > Aside from the put_user stuff below, yes. > > Well I see in Linus's current tree: > > ndev->regen_timer.function = ipv6_regen_rndid; > > And ipv6_regen_rndid() ends up calling get_random_bytes() which calls > extract_entropy() which now does a bare spin_lock(). > > So I think if the timer is run while some process-context code on the same > CPU is running get_random_bytes() we deadlock don't we? Sigh. I had hastily assumed the rescheduling in extract_entropy would break any such users in a non-process context, but that's conditional on the EXTRACT_ENTROPY_USER flag so doesn't apply in the get_random_bytes path. > Probably, we should make get_random_bytes() callable from any context. Ok, new patch follows. diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c --- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500 +++ work/drivers/char/random.c 2003-08-02 14:22:51.000000000 -0500 @@ -484,6 +484,7 @@ int extract_count; struct poolinfo poolinfo; __u32 *pool; + spinlock_t lock; }; /* @@ -522,6 +523,7 @@ return -ENOMEM; } memset(r->pool, 0, POOLBYTES); + r->lock = SPIN_LOCK_UNLOCKED; *ret_bucket = r; return 0; } @@ -563,6 +565,9 @@ int new_rotate; int wordmask = r->poolinfo.poolwords - 1; __u32 w; + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); while (nwords--) { w = rotate_left(r->input_rotate, *in++); @@ -587,6 +592,8 @@ w ^= r->pool[i]; r->pool[i] = (w >> 3) ^ twist_table[w & 7]; } + + spin_unlock_irqrestore(&r->lock, flags); } /* @@ -594,6 +601,10 @@ */ static void credit_entropy_store(struct entropy_store *r, int nbits) { + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + if (r->entropy_count + nbits < 0) { DEBUG_ENT("negative entropy/overflow (%d+%d)\n", r->entropy_count, nbits); @@ -608,6 +619,8 @@ r == random_state ? "primary" : "unknown", nbits, r->entropy_count); } + + spin_unlock_irqrestore(&r->lock, flags); } /********************************************************************** @@ -618,27 +631,33 @@ * **********************************************************************/ -static __u32 *batch_entropy_pool; -static int *batch_entropy_credit; -static int batch_max; +struct sample { + __u32 data[2]; + int credit; +}; + +static struct sample *batch_entropy_pool, *batch_entropy_copy; static int batch_head, batch_tail; +static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED; + +static int batch_max; static void batch_entropy_process(void *private_); static DECLARE_WORK(batch_work, batch_entropy_process, NULL); /* note: the size must be a power of 2 */ static int __init batch_entropy_init(int size, struct entropy_store *r) { - batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL); + batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL); if (!batch_entropy_pool) return -1; - batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL); - if (!batch_entropy_credit) { + batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL); + if (!batch_entropy_copy) { kfree(batch_entropy_pool); return -1; } batch_head = batch_tail = 0; - batch_max = size; batch_work.data = r; + batch_max = size; return 0; } @@ -650,27 +669,33 @@ */ void batch_entropy_store(u32 a, u32 b, int num) { - int new; + int new; + unsigned long flags; if (!batch_max) return; + + spin_lock_irqsave(&batch_lock, flags); - batch_entropy_pool[2*batch_head] = a; - batch_entropy_pool[(2*batch_head) + 1] = b; - batch_entropy_credit[batch_head] = num; + batch_entropy_pool[batch_head].data[0] = a; + batch_entropy_pool[batch_head].data[1] = b; + batch_entropy_pool[batch_head].credit = num; - new = (batch_head+1) & (batch_max-1); - if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) { + if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) { /* * Schedule it for the next timer tick: */ schedule_delayed_work(&batch_work, 1); - batch_head = new; - } else if (new == batch_tail) { + } + + new = (batch_head+1) & (batch_max-1); + if (new == batch_tail) { DEBUG_ENT("batch entropy buffer full\n"); } else { batch_head = new; } + + spin_unlock_irqrestore(&batch_lock, flags); } /* @@ -682,20 +707,34 @@ { struct entropy_store *r = (struct entropy_store *) private_, *p; int max_entropy = r->poolinfo.POOLBITS; + unsigned head, tail; - if (!batch_max) - return; + /* Mixing into the pool is expensive, so copy over the batch + * data and release the batch lock. The pool is at least half + * full, so don't worry too much about copying only the used + * part. + */ + spin_lock_irq(&batch_lock); + + memcpy(batch_entropy_copy, batch_entropy_pool, + batch_max*sizeof(struct sample)); + + head = batch_head; + tail = batch_tail; + batch_tail = batch_head; + + spin_unlock_irq(&batch_lock); p = r; - while (batch_head != batch_tail) { + while (head != tail) { if (r->entropy_count >= max_entropy) { r = (r == sec_random_state) ? random_state : sec_random_state; max_entropy = r->poolinfo.POOLBITS; } - add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2); - credit_entropy_store(r, batch_entropy_credit[batch_tail]); - batch_tail = (batch_tail+1) & (batch_max-1); + add_entropy_words(r, batch_entropy_copy[tail].data, 2); + credit_entropy_store(r, batch_entropy_copy[tail].credit); + tail = (tail+1) & (batch_max-1); } if (p->entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); @@ -1274,6 +1313,7 @@ ssize_t ret, i; __u32 tmp[TMP_BUF_SIZE]; __u32 x; + unsigned long cpuflags; add_timer_randomness(&extract_timer_state, nbytes); @@ -1284,6 +1324,9 @@ if (flags & EXTRACT_ENTROPY_SECONDARY) xfer_secondary_pool(r, nbytes, tmp); + /* Hold lock while accounting */ + spin_lock_irqsave(&r->lock, cpuflags); + DEBUG_ENT("%s has %d bits, want %d bits\n", r == sec_random_state ? "secondary" : r == random_state ? "primary" : "unknown", @@ -1298,6 +1341,8 @@ wake_up_interruptible(&random_write_wait); r->extract_count += nbytes; + + spin_unlock_irqrestore(&r->lock, cpuflags); ret = 0; while (nbytes) { @@ -1593,7 +1638,7 @@ random_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { - int *p, size, ent_count; + int *p, *tmp, size, ent_count; int retval; switch (cmd) { @@ -1619,18 +1664,40 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; p = (int *) arg; - ent_count = random_state->entropy_count; - if (put_user(ent_count, p++) || - get_user(size, p) || + if (get_user(size, p) || put_user(random_state->poolinfo.poolwords, p++)) - return -EFAULT; + goto fail; if (size < 0) - return -EINVAL; + goto fail; if (size > random_state->poolinfo.poolwords) size = random_state->poolinfo.poolwords; - if (copy_to_user(p, random_state->pool, size * sizeof(__u32))) - return -EFAULT; + + /* prepare to atomically snapshot pool */ + + tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL); + + if (!tmp) + goto fail; + + spin_lock(&random_state->lock); + ent_count = random_state->entropy_count; + memcpy(tmp, random_state->pool, size * sizeof(__u32)); + spin_unlock(&random_state->lock); + + if (!copy_to_user(p, tmp, size * sizeof(__u32))) { + kfree(tmp); + goto fail; + } + + kfree(tmp); + + if(put_user(ent_count, p++)) + goto fail; + return 0; + fail: + spin_unlock(&random_state->lock); + return -EFAULT; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- Matt Mackall : http://www.selenic.com : of or relating to the moon ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 19:33 ` Matt Mackall @ 2003-08-02 19:46 ` Andrew Morton 2003-08-02 20:14 ` Matt Mackall 0 siblings, 1 reply; 12+ messages in thread From: Andrew Morton @ 2003-08-02 19:46 UTC (permalink / raw) To: Matt Mackall; +Cc: linux-kernel Matt Mackall <mpm@selenic.com> wrote: > > + spin_lock(&random_state->lock); > + ent_count = random_state->entropy_count; > + memcpy(tmp, random_state->pool, size * sizeof(__u32)); > + spin_unlock(&random_state->lock); > + This needs to be spin_lock_irqsave(). > + if (!copy_to_user(p, tmp, size * sizeof(__u32))) { > + kfree(tmp); > + goto fail; > + } > + > + kfree(tmp); > + > + if(put_user(ent_count, p++)) > + goto fail; > + > return 0; > + fail: > + spin_unlock(&random_state->lock); Double unlock ;) ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] [1/2] random: SMP locking 2003-08-02 19:46 ` Andrew Morton @ 2003-08-02 20:14 ` Matt Mackall 0 siblings, 0 replies; 12+ messages in thread From: Matt Mackall @ 2003-08-02 20:14 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Sat, Aug 02, 2003 at 12:46:47PM -0700, Andrew Morton wrote: > Matt Mackall <mpm@selenic.com> wrote: > > > > + spin_lock(&random_state->lock); > > This needs to be spin_lock_irqsave(). > > > + fail: > > + spin_unlock(&random_state->lock); > > Double unlock ;) Damnit. Will you take a patch that just returns -ENOTTY here? Direct access to the pool is only useful for debugging the driver (or after you've broken root and want to do backtracking attacks) and hasn't been useful for that since the second pool was added (sometime in 1.3 IIRC) because you need to be able to snapshot both pools atomically. Anyway, fixed version. diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c --- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500 +++ work/drivers/char/random.c 2003-08-02 15:05:18.000000000 -0500 @@ -484,6 +484,7 @@ int extract_count; struct poolinfo poolinfo; __u32 *pool; + spinlock_t lock; }; /* @@ -522,6 +523,7 @@ return -ENOMEM; } memset(r->pool, 0, POOLBYTES); + r->lock = SPIN_LOCK_UNLOCKED; *ret_bucket = r; return 0; } @@ -563,6 +565,9 @@ int new_rotate; int wordmask = r->poolinfo.poolwords - 1; __u32 w; + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); while (nwords--) { w = rotate_left(r->input_rotate, *in++); @@ -587,6 +592,8 @@ w ^= r->pool[i]; r->pool[i] = (w >> 3) ^ twist_table[w & 7]; } + + spin_unlock_irqrestore(&r->lock, flags); } /* @@ -594,6 +601,10 @@ */ static void credit_entropy_store(struct entropy_store *r, int nbits) { + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + if (r->entropy_count + nbits < 0) { DEBUG_ENT("negative entropy/overflow (%d+%d)\n", r->entropy_count, nbits); @@ -608,6 +619,8 @@ r == random_state ? "primary" : "unknown", nbits, r->entropy_count); } + + spin_unlock_irqrestore(&r->lock, flags); } /********************************************************************** @@ -618,27 +631,33 @@ * **********************************************************************/ -static __u32 *batch_entropy_pool; -static int *batch_entropy_credit; -static int batch_max; +struct sample { + __u32 data[2]; + int credit; +}; + +static struct sample *batch_entropy_pool, *batch_entropy_copy; static int batch_head, batch_tail; +static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED; + +static int batch_max; static void batch_entropy_process(void *private_); static DECLARE_WORK(batch_work, batch_entropy_process, NULL); /* note: the size must be a power of 2 */ static int __init batch_entropy_init(int size, struct entropy_store *r) { - batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL); + batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL); if (!batch_entropy_pool) return -1; - batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL); - if (!batch_entropy_credit) { + batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL); + if (!batch_entropy_copy) { kfree(batch_entropy_pool); return -1; } batch_head = batch_tail = 0; - batch_max = size; batch_work.data = r; + batch_max = size; return 0; } @@ -650,27 +669,33 @@ */ void batch_entropy_store(u32 a, u32 b, int num) { - int new; + int new; + unsigned long flags; if (!batch_max) return; + + spin_lock_irqsave(&batch_lock, flags); - batch_entropy_pool[2*batch_head] = a; - batch_entropy_pool[(2*batch_head) + 1] = b; - batch_entropy_credit[batch_head] = num; + batch_entropy_pool[batch_head].data[0] = a; + batch_entropy_pool[batch_head].data[1] = b; + batch_entropy_pool[batch_head].credit = num; - new = (batch_head+1) & (batch_max-1); - if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) { + if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) { /* * Schedule it for the next timer tick: */ schedule_delayed_work(&batch_work, 1); - batch_head = new; - } else if (new == batch_tail) { + } + + new = (batch_head+1) & (batch_max-1); + if (new == batch_tail) { DEBUG_ENT("batch entropy buffer full\n"); } else { batch_head = new; } + + spin_unlock_irqrestore(&batch_lock, flags); } /* @@ -682,20 +707,34 @@ { struct entropy_store *r = (struct entropy_store *) private_, *p; int max_entropy = r->poolinfo.POOLBITS; + unsigned head, tail; - if (!batch_max) - return; + /* Mixing into the pool is expensive, so copy over the batch + * data and release the batch lock. The pool is at least half + * full, so don't worry too much about copying only the used + * part. + */ + spin_lock_irq(&batch_lock); + + memcpy(batch_entropy_copy, batch_entropy_pool, + batch_max*sizeof(struct sample)); + + head = batch_head; + tail = batch_tail; + batch_tail = batch_head; + + spin_unlock_irq(&batch_lock); p = r; - while (batch_head != batch_tail) { + while (head != tail) { if (r->entropy_count >= max_entropy) { r = (r == sec_random_state) ? random_state : sec_random_state; max_entropy = r->poolinfo.POOLBITS; } - add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2); - credit_entropy_store(r, batch_entropy_credit[batch_tail]); - batch_tail = (batch_tail+1) & (batch_max-1); + add_entropy_words(r, batch_entropy_copy[tail].data, 2); + credit_entropy_store(r, batch_entropy_copy[tail].credit); + tail = (tail+1) & (batch_max-1); } if (p->entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); @@ -1274,6 +1313,7 @@ ssize_t ret, i; __u32 tmp[TMP_BUF_SIZE]; __u32 x; + unsigned long cpuflags; add_timer_randomness(&extract_timer_state, nbytes); @@ -1284,6 +1324,9 @@ if (flags & EXTRACT_ENTROPY_SECONDARY) xfer_secondary_pool(r, nbytes, tmp); + /* Hold lock while accounting */ + spin_lock_irqsave(&r->lock, cpuflags); + DEBUG_ENT("%s has %d bits, want %d bits\n", r == sec_random_state ? "secondary" : r == random_state ? "primary" : "unknown", @@ -1298,6 +1341,8 @@ wake_up_interruptible(&random_write_wait); r->extract_count += nbytes; + + spin_unlock_irqrestore(&r->lock, cpuflags); ret = 0; while (nbytes) { @@ -1593,8 +1638,9 @@ random_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { - int *p, size, ent_count; + int *p, *tmp, size, ent_count; int retval; + unsigned long flags; switch (cmd) { case RNDGETENTCNT: @@ -1619,17 +1665,36 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; p = (int *) arg; - ent_count = random_state->entropy_count; - if (put_user(ent_count, p++) || - get_user(size, p) || + if (get_user(size, p) || put_user(random_state->poolinfo.poolwords, p++)) return -EFAULT; if (size < 0) - return -EINVAL; + return -EFAULT; if (size > random_state->poolinfo.poolwords) size = random_state->poolinfo.poolwords; - if (copy_to_user(p, random_state->pool, size * sizeof(__u32))) + + /* prepare to atomically snapshot pool */ + + tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL); + + if (!tmp) + return -EFAULT; + + spin_lock_irqsave(&random_state->lock, flags); + ent_count = random_state->entropy_count; + memcpy(tmp, random_state->pool, size * sizeof(__u32)); + spin_unlock_irqrestore(&random_state->lock, flags); + + if (!copy_to_user(p, tmp, size * sizeof(__u32))) { + kfree(tmp); return -EFAULT; + } + + kfree(tmp); + + if(put_user(ent_count, p++)) + return -EFAULT; + return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) -- Matt Mackall : http://www.selenic.com : of or relating to the moon ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2003-08-02 20:15 UTC | newest] Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2003-08-02 4:24 [PATCH] [1/2] random: SMP locking Oliver Xymoron 2003-08-02 11:00 ` Andrew Morton 2003-08-02 12:35 ` Zwane Mwaikambo 2003-08-02 14:43 ` Matt Mackall 2003-08-02 15:02 ` Zwane Mwaikambo 2003-08-02 18:39 ` Andrew Morton 2003-08-02 18:42 ` Matt Mackall 2003-08-02 14:32 ` Matt Mackall 2003-08-02 19:00 ` Andrew Morton 2003-08-02 19:33 ` Matt Mackall 2003-08-02 19:46 ` Andrew Morton 2003-08-02 20:14 ` Matt Mackall
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).