linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fix for futex_wait signal stack corruption
@ 2007-12-04 20:57 Steven Rostedt
  2007-12-04 21:09 ` Linus Torvalds
  0 siblings, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2007-12-04 20:57 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner
  Cc: LKML, David Holmes - Sun Microsystems, Andrew Morton, Linus Torvalds


David Holmes found a bug in the RT patch with respect to
pthread_cond_timedwait. After trying his test program on the latest git
from mainline, I found the bug was there too.  The bug he was seeing
that his test program showed, was that if one were to do a "Ctrl-Z" on a
process that was in the pthread_cond_timedwait, and then did a "bg" on
that process, it would return with a "-ETIMEDOUT" but early. That is,
the timer would go off early.

Looking into this, I found the source of the problem. And it is a rather
nasty bug at that.

Here's the relevant code from kernel/futex.c: (not in order in the file)

[...]
smlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
                          struct timespec __user *utime, u32 __user *uaddr2,
                          u32 val3)
{
        struct timespec ts;
        ktime_t t, *tp = NULL;
        u32 val2 = 0;
        int cmd = op & FUTEX_CMD_MASK;

        if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
                if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
                        return -EFAULT;
                if (!timespec_valid(&ts))
                        return -EINVAL;

                t = timespec_to_ktime(ts);
                if (cmd == FUTEX_WAIT)
                        t = ktime_add(ktime_get(), t);
                tp = &t;
        }
[...]
        return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}

[...]

long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
{
        int ret;
        int cmd = op & FUTEX_CMD_MASK;
        struct rw_semaphore *fshared = NULL;

        if (!(op & FUTEX_PRIVATE_FLAG))
                fshared = &current->mm->mmap_sem;

        switch (cmd) {
        case FUTEX_WAIT:
                ret = futex_wait(uaddr, fshared, val, timeout);

[...]

static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
                      u32 val, ktime_t *abs_time)
{
[...]
               struct restart_block *restart;
                restart = &current_thread_info()->restart_block;
                restart->fn = futex_wait_restart;
                restart->arg0 = (unsigned long)uaddr;
                restart->arg1 = (unsigned long)val;
                restart->arg2 = (unsigned long)abs_time;
                restart->arg3 = 0;
                if (fshared)
                        restart->arg3 |= ARG3_SHARED;
                return -ERESTART_RESTARTBLOCK;
[...]

static long futex_wait_restart(struct restart_block *restart)
{
        u32 __user *uaddr = (u32 __user *)restart->arg0;
        u32 val = (u32)restart->arg1;
        ktime_t *abs_time = (ktime_t *)restart->arg2;
        struct rw_semaphore *fshared = NULL;

        restart->fn = do_no_restart_syscall;
        if (restart->arg3 & ARG3_SHARED)
                fshared = &current->mm->mmap_sem;
        return (long)futex_wait(uaddr, fshared, val, abs_time);
}


So when the futex_wait is interrupt by a signal we break out of the
hrtimer code and set up or return from signal. This code does not return
back to userspace, so we set up a RESTARTBLOCK.  The bug here is that we
save the "abs_time" which is a pointer to the stack variable "ktime_t t"
from sys_futex.

This returns and unwinds the stack before we get to call our signal. On
return from the signal we go to futex_wait_restart, where we update all
the parameters for futex_wait and call it. But here we have a problem
where abs_time is no longer valid.

I verified this with print statements, and sure enough, what abs_time
was set to ends up being garbage when we get to futex_wait_restart.

The solution I did to solve this is to allocate a temporary buffer when
setting up the block and free it in futex_wait_restart. This patch
allows David's test program to actually pass.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/futex.c b/kernel/futex.c
index 9dc591a..74be1cb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1288,11 +1288,27 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
 		return -ERESTARTSYS;
 	else {
 		struct restart_block *restart;
+		ktime_t *tmp_time = NULL;
+
+		/*
+		 * abs_time is on the stack and is not a parameter.
+		 * If we save it, it will be overridden on return and
+		 * what is sent to futex_wait_restart will be corrupted.
+		 */
+		if (abs_time) {
+			tmp_time = kmalloc(sizeof(*tmp_time), GFP_KERNEL);
+			if (unlikely(!tmp_time))
+				/* Ahh, what else can we do?? */
+				printk(KERN_WARNING
+				       "Can't allocate temp timer storage\n");
+			else
+				*tmp_time = *abs_time;
+		}
 		restart = &current_thread_info()->restart_block;
 		restart->fn = futex_wait_restart;
 		restart->arg0 = (unsigned long)uaddr;
 		restart->arg1 = (unsigned long)val;
-		restart->arg2 = (unsigned long)abs_time;
+		restart->arg2 = (unsigned long)tmp_time;
 		restart->arg3 = 0;
 		if (fshared)
 			restart->arg3 |= ARG3_SHARED;
@@ -1312,13 +1328,19 @@ static long futex_wait_restart(struct restart_block *restart)
 {
 	u32 __user *uaddr = (u32 __user *)restart->arg0;
 	u32 val = (u32)restart->arg1;
-	ktime_t *abs_time = (ktime_t *)restart->arg2;
+	ktime_t *tmp_time = (ktime_t *)restart->arg2;
+	ktime_t t;
 	struct rw_semaphore *fshared = NULL;
 
+	if (tmp_time) {
+		t = *tmp_time;
+		kfree(tmp_time);
+		tmp_time = &t;
+	}
 	restart->fn = do_no_restart_syscall;
 	if (restart->arg3 & ARG3_SHARED)
 		fshared = &current->mm->mmap_sem;
-	return (long)futex_wait(uaddr, fshared, val, abs_time);
+	return (long)futex_wait(uaddr, fshared, val, tmp_time);
 }
 
 



^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-12-05 10:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-04 20:57 [PATCH] fix for futex_wait signal stack corruption Steven Rostedt
2007-12-04 21:09 ` Linus Torvalds
2007-12-04 21:39   ` Steven Rostedt
2007-12-04 22:43     ` Linus Torvalds
2007-12-05  1:23       ` Steven Rostedt
2007-12-05  1:46         ` Linus Torvalds
2007-12-05  3:17           ` [PATCH -v2] " Steven Rostedt
2007-12-05  3:41             ` Linus Torvalds
2007-12-05  3:47               ` Randy Dunlap
2007-12-05  3:53                 ` Steven Rostedt
2007-12-05  5:33               ` David Holmes - Sun Microsystems
2007-12-05  6:06                 ` Linus Torvalds
2007-12-05  6:14                   ` David Holmes - Sun Microsystems
2007-12-05  5:54               ` Thomas Gleixner
2007-12-05 10:31                 ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).