From mboxrd@z Thu Jan 1 00:00:00 1970 From: Davidlohr Bueso Subject: [PATCH -next] ipc: make refcounter atomic (was Re: linux-next: Tree for Apr 23 [ Call-Traces: lib/debugobjects.c | kernel/rcupdate.c | kernel/rcutree.c ]) Date: Wed, 24 Apr 2013 15:16:13 -0700 Message-ID: <1366841773.1790.7.camel@buesod1.americas.hpqcorp.net> References: <1366741069.1802.12.camel@buesod1.americas.hpqcorp.net> <1366741394.1802.13.camel@buesod1.americas.hpqcorp.net> Mime-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 7bit Return-path: Received: from g1t0027.austin.hp.com ([15.216.28.34]:19905 "EHLO g1t0027.austin.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758571Ab3DXWQP (ORCPT ); Wed, 24 Apr 2013 18:16:15 -0400 In-Reply-To: Sender: linux-next-owner@vger.kernel.org List-ID: To: Linus Torvalds Cc: Sedat Dilek , Rik van Riel , Andrew Morton , Paul McKenney , Paul McKenney , Emmanuel Benisty , linux-next , Linux Kernel Mailing List From: Davidlohr Bueso Sedat reported an issue leading to a NULL dereference in update_queue(): [ 178.490583] BUG: spinlock bad magic on CPU#1, sh/8066 [ 178.490595] lock: 0xffff88008b53ea18, .magic: 6b6b6b6b, .owner: make/8068, .owner_cpu: 3 [ 178.490599] BUG: unable to handle kernel NULL pointer dereference at (null) [ 178.490608] IP: [] update_queue+0x70/0x210 [ 178.490610] PGD 0 [ 178.490612] Oops: 0000 [#1] SMP ... [ 178.490704] Call Trace: [ 178.490710] [] do_smart_update+0xe1/0x140 [ 178.490713] [] exit_sem+0x2b1/0x350 [ 178.490718] [] do_exit+0x290/0xa70 [ 178.490721] [] do_group_exit+0x44/0xa0 [ 178.490724] [] SyS_exit_group+0x17/0x20 [ 178.490728] [] system_call_fastpath+0x1a/0x1f Linus pin-pointed the problem to a race in the reference counter. To quote: "That dmesg spew very much implies that the same RCU head got added twice to the RCU freeing list, and the only way that happens is if the refcount goes to zero twice. Which implies that either we increment a zero, or we lack locking and the coherency of the non-atomic access goes away." This patch converts the IPC RCU header's reference counter to atomic_t. The return of ipc_rcu_getref() is modified to inform the callers if it actually succeeded. Now all callers return -EIDRM upon failure and abort the current operation. Two exceptions are in semaphore code where sem_getref_and_unlock() and sem_getref() trigger a warning but proceed to freeing up any held locks. Signed-off-by: Davidlohr Bueso Suggested-by: Linus Torvalds CC: Rik van Riel CC: Paul McKenney CC: Sedat Dilek CC: Emmanuel Benisty CC: Andrew Morton --- ipc/msg.c | 7 ++++++- ipc/sem.c | 18 ++++++++++++------ ipc/util.c | 46 ++++++++++++++++++++++++---------------------- ipc/util.h | 2 +- 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 9d11955..2978721 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -668,7 +668,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, goto out_unlock_free; } ss_add(msq, &s); - ipc_rcu_getref(msq); + + if (!ipc_rcu_getref(msq)) { + err = -EIDRM; + goto out_unlock_free; + } + msg_unlock(msq); schedule(); diff --git a/ipc/sem.c b/ipc/sem.c index 92f4d0e..9a71b5a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -460,7 +460,7 @@ static inline void sem_lock_and_putref(struct sem_array *sma) static inline void sem_getref_and_unlock(struct sem_array *sma) { - ipc_rcu_getref(sma); + WARN_ON_ONCE(!ipc_rcu_getref(sma)); sem_unlock(sma, -1); } @@ -476,7 +476,7 @@ static inline void sem_putref(struct sem_array *sma) static inline void sem_getref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_getref(sma); + WARN_ON_ONCE(!ipc_rcu_getref(sma)); sem_unlock(sma, -1); } @@ -1275,7 +1275,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; struct sem_undo *un; - ipc_rcu_getref(sma); + if (!ipc_rcu_getref(sma)) { + rcu_read_unlock(); + return -EIDRM; + } rcu_read_unlock(); if(nsems > SEMMSL_FAST) { @@ -1544,8 +1547,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) struct sem_array *sma; struct sem_undo_list *ulp; struct sem_undo *un, *new; - int nsems; - int error; + int nsems, error; error = get_undo_list(&ulp); if (error) @@ -1567,7 +1569,11 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) } nsems = sma->sem_nsems; - ipc_rcu_getref(sma); + if (!ipc_rcu_getref(sma)) { + rcu_read_unlock(); + un = ERR_PTR(-EIDRM); + goto out; + } rcu_read_unlock(); /* step 2: allocate new undo structure */ diff --git a/ipc/util.c b/ipc/util.c index 18135bc..5e60ebd 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -439,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) * NULL is returned if the allocation fails */ -void* ipc_alloc(int size) +void *ipc_alloc(int size) { - void* out; + void *out; if(size > PAGE_SIZE) out = vmalloc(size); else @@ -478,7 +478,7 @@ void ipc_free(void* ptr, int size) */ struct ipc_rcu_hdr { - int refcount; + atomic_t refcount; int is_vmalloc; void *data[0]; }; @@ -516,39 +516,41 @@ static inline int rcu_use_vmalloc(int size) * @size: size desired * * Allocate memory for the rcu header structure + the object. - * Returns the pointer to the object. - * NULL is returned if the allocation fails. + * Returns the pointer to the object or NULL upon failure. */ - -void* ipc_rcu_alloc(int size) +void *ipc_rcu_alloc(int size) { void* out; - /* + + /* * We prepend the allocation with the rcu struct, and - * workqueue if necessary (for vmalloc). + * workqueue if necessary (for vmalloc). */ if (rcu_use_vmalloc(size)) { out = vmalloc(HDRLEN_VMALLOC + size); - if (out) { - out += HDRLEN_VMALLOC; - container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; - container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; - } + if (!out) + goto done; + + out += HDRLEN_VMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; } else { out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); - if (out) { - out += HDRLEN_KMALLOC; - container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; - container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; - } + if (!out) + goto done; + + out += HDRLEN_KMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; } + /* set reference counter no matter what kind of allocation was done */ + atomic_set(&container_of(out, struct ipc_rcu_hdr, data)->refcount, 1); +done: return out; } -void ipc_rcu_getref(void *ptr) +int ipc_rcu_getref(void *ptr) { - container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; + return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount); } static void ipc_do_vfree(struct work_struct *work) @@ -578,7 +580,7 @@ static void ipc_schedule_free(struct rcu_head *head) void ipc_rcu_putref(void *ptr) { - if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) + if (!atomic_dec_and_test(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount)) return; if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { diff --git a/ipc/util.h b/ipc/util.h index c36b997..2b0bdd5 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -119,7 +119,7 @@ void ipc_free(void* ptr, int size); * to 0 schedules the rcu destruction. Caller must guarantee locking. */ void* ipc_rcu_alloc(int size); -void ipc_rcu_getref(void *ptr); +int ipc_rcu_getref(void *ptr); void ipc_rcu_putref(void *ptr); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); -- 1.7.11.7