linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
@ 2015-02-01 14:40 Benjamin LaHaise
  2015-02-01 21:01 ` Linus Torvalds
  0 siblings, 1 reply; 24+ messages in thread
From: Benjamin LaHaise @ 2015-02-01 14:40 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-aio, Linux Kernel

Hello Linus,

The following changes since commit 5f785de588735306ec4d7c875caf9d28481c8b21:

  aio: Skip timer for io_getevents if timeout=0 (2014-12-13 17:50:20 -0500)

are available in the git repository at:

  git://git.kvack.org/~bcrl/aio-fixes.git master

for you to fetch changes up to f84249f4cfef5ffa8a3e6d588a1d185f3f1fef45:

  fs/aio: fix sleeping while TASK_INTERRUPTIBLE (2015-01-30 11:18:36 -0500)

----------------------------------------------------------------
Chris Mason (1):
      fs/aio: fix sleeping while TASK_INTERRUPTIBLE

 fs/aio.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 93 insertions(+), 24 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 1b7893e..71b192a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1131,6 +1131,8 @@ EXPORT_SYMBOL(aio_complete);
 /* aio_read_events_ring
  *	Pull an event off of the ioctx's event ring.  Returns the number of
  *	events fetched
+ *
+ *	must be called with ctx->ring locked
  */
 static long aio_read_events_ring(struct kioctx *ctx,
 				 struct io_event __user *event, long nr)
@@ -1139,8 +1141,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
 	unsigned head, tail, pos;
 	long ret = 0;
 	int copy_ret;
-
-	mutex_lock(&ctx->ring_lock);
+	int running = current->state == TASK_RUNNING;
 
 	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
 	ring = kmap_atomic(ctx->ring_pages[0]);
@@ -1179,6 +1180,17 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		pos %= AIO_EVENTS_PER_PAGE;
 
+		/* we're called after calling prepare_to_wait, which means
+		 * our current state might not be TASK_RUNNING.  Set it
+		 * to running here to sleeps in kmap or copy_to_user don't
+		 * trigger warnings.  If we don't copy enough events out, we'll
+		 * loop through schedule() one time before sleeping.
+		 */
+		if (!running) {
+			__set_current_state(TASK_RUNNING);
+			running = 1;
+		}
+
 		ev = kmap(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);
@@ -1201,11 +1213,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
 
 	pr_debug("%li  h%u t%u\n", ret, head, tail);
 out:
-	mutex_unlock(&ctx->ring_lock);
-
 	return ret;
 }
 
+/* must be called with ctx->ring locked */
 static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
 			    struct io_event __user *event, long *i)
 {
@@ -1223,6 +1234,73 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
 	return ret < 0 || *i >= min_nr;
 }
 
+/*
+ * called without ctx->ring_lock held
+ */
+static long aio_wait_events(struct kioctx *ctx, long min_nr, long nr,
+			    struct io_event __user *event,
+			    long *i_ret, ktime_t until)
+{
+	struct hrtimer_sleeper t;
+	long ret;
+	DEFINE_WAIT(wait);
+
+	mutex_lock(&ctx->ring_lock);
+
+	/*
+	 * this is an open coding of wait_event_interruptible_hrtimeout
+	 * so we can deal with the ctx->mutex nicely.  It starts with the
+	 * fast path for existing events:
+	 */
+	ret = aio_read_events(ctx, min_nr, nr, event, i_ret);
+	if (ret) {
+		mutex_unlock(&ctx->ring_lock);
+		return ret;
+	}
+
+	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init_sleeper(&t, current);
+	if (until.tv64 != KTIME_MAX) {
+		hrtimer_start_range_ns(&t.timer, until, current->timer_slack_ns,
+				      HRTIMER_MODE_REL);
+	}
+
+	while (1) {
+		ret = prepare_to_wait_event(&ctx->wait, &wait,
+					    TASK_INTERRUPTIBLE);
+		if (ret)
+			break;
+
+		ret = aio_read_events(ctx, min_nr, nr, event, i_ret);
+		if (ret)
+			break;
+
+		/* make sure we didn't timeout taking the mutex */
+		if (!t.task) {
+			ret = -ETIME;
+			break;
+		}
+
+		mutex_unlock(&ctx->ring_lock);
+		schedule();
+
+		if (!t.task) {
+			ret = -ETIME;
+			goto out;
+		}
+		mutex_lock(&ctx->ring_lock);
+
+	}
+	mutex_unlock(&ctx->ring_lock);
+
+out:
+	finish_wait(&ctx->wait, &wait);
+	hrtimer_cancel(&t.timer);
+	destroy_hrtimer_on_stack(&t.timer);
+	return ret;
+
+}
+
 static long read_events(struct kioctx *ctx, long min_nr, long nr,
 			struct io_event __user *event,
 			struct timespec __user *timeout)
@@ -1239,27 +1317,18 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 		until = timespec_to_ktime(ts);
 	}
 
-	/*
-	 * Note that aio_read_events() is being called as the conditional - i.e.
-	 * we're calling it after prepare_to_wait() has set task state to
-	 * TASK_INTERRUPTIBLE.
-	 *
-	 * But aio_read_events() can block, and if it blocks it's going to flip
-	 * the task state back to TASK_RUNNING.
-	 *
-	 * This should be ok, provided it doesn't flip the state back to
-	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
-	 * will only happen if the mutex_lock() call blocks, and we then find
-	 * the ringbuffer empty. So in practice we should be ok, but it's
-	 * something to be aware of when touching this code.
-	 */
-	if (until.tv64 == 0)
-		aio_read_events(ctx, min_nr, nr, event, &ret);
-	else
-		wait_event_interruptible_hrtimeout(ctx->wait,
-				aio_read_events(ctx, min_nr, nr, event, &ret),
-				until);
 
+	if (until.tv64 == 0) {
+		mutex_lock(&ctx->ring_lock);
+		aio_read_events(ctx, min_nr, nr, event, &ret);
+		mutex_unlock(&ctx->ring_lock);
+	} else {
+		/*
+		 * we're pitching the -ETIME and -ERESTARTSYS return values
+		 * here.  It'll turn into -EINTR? ick
+		 */
+		aio_wait_events(ctx, min_nr, nr, event, &ret, until);
+	}
 	if (!ret && signal_pending(current))
 		ret = -EINTR;
 
-- 
"Thought is the essence of where you are now."

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-01 14:40 [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE Benjamin LaHaise
@ 2015-02-01 21:01 ` Linus Torvalds
  2015-02-01 22:14   ` Benjamin LaHaise
  0 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2015-02-01 21:01 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-aio, Linux Kernel

On Sun, Feb 1, 2015 at 6:40 AM, Benjamin LaHaise <bcrl@kvack.org> wrote:
>
> Chris Mason (1):
>       fs/aio: fix sleeping while TASK_INTERRUPTIBLE

Ugh.

This patch is too ugly to live. As far as I can tell, this is another
case of people just mindlessly trying to make the warning go away,
rather than fixing anything in teh code itself. In fact, the code gets
less readable, and more hacky, with that insane "running" variable
that doesn't actually add anything to the logic, just makes the code
harder to read, and it's *very* non-obvious why this is done in the
first place.

If you want to shut up the warning without actually changing the code,
use sched_annotate_sleep(). The comment about why the nested sleep
isn't a problem ("sleeps in kmap or copy_to_user don't trigger
warnings: If we don't copy enough events out, we'll loop through
schedule() one time before sleeping").

I'm just about to push out a commit that makes
"sched_annotate_sleep()" do the right thing, and *not* set
TASK_RUNNING, but instead just disable the warning for that case.
Which makes all these games unnecessary. I'm just waiting for my
'allmodconfig' build to finish before I push it out. Just another
minute or two, I think.

I really detest debug code (or compiler warnings) that encourage
people to write code that is *worse* than the code that causes the
debug code or warning to trigger. It's fundamentally wrong when those
"fixes" actually  make the code less readable and maintainable in the
long run.

                         Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-01 21:01 ` Linus Torvalds
@ 2015-02-01 22:14   ` Benjamin LaHaise
  2015-02-01 23:09     ` Linus Torvalds
  2015-02-01 23:33     ` Linus Torvalds
  0 siblings, 2 replies; 24+ messages in thread
From: Benjamin LaHaise @ 2015-02-01 22:14 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-aio, Linux Kernel

On Sun, Feb 01, 2015 at 01:01:06PM -0800, Linus Torvalds wrote:
> On Sun, Feb 1, 2015 at 6:40 AM, Benjamin LaHaise <bcrl@kvack.org> wrote:
> >
> > Chris Mason (1):
> >       fs/aio: fix sleeping while TASK_INTERRUPTIBLE
> 
> Ugh.
> 
> This patch is too ugly to live. As far as I can tell, this is another
> case of people just mindlessly trying to make the warning go away,
> rather than fixing anything in teh code itself. In fact, the code gets
> less readable, and more hacky, with that insane "running" variable
> that doesn't actually add anything to the logic, just makes the code
> harder to read, and it's *very* non-obvious why this is done in the
> first place.
> 
> If you want to shut up the warning without actually changing the code,
> use sched_annotate_sleep(). The comment about why the nested sleep
> isn't a problem ("sleeps in kmap or copy_to_user don't trigger
> warnings: If we don't copy enough events out, we'll loop through
> schedule() one time before sleeping").

It's ugly, but it actually is revealing a bug.  Spurious wake ups caused 
by the task already being added to ctx->wait when calling into mutex_lock() 
could inadvertently cause things to go wrong.  I can envision there being  
code invoked that possibly expects a 1-1 relationship between sleeps and 
wake ups, which being on the additional wait queue might break.

> I'm just about to push out a commit that makes
> "sched_annotate_sleep()" do the right thing, and *not* set
> TASK_RUNNING, but instead just disable the warning for that case.
> Which makes all these games unnecessary. I'm just waiting for my
> 'allmodconfig' build to finish before I push it out. Just another
> minute or two, I think.
> 
> I really detest debug code (or compiler warnings) that encourage
> people to write code that is *worse* than the code that causes the
> debug code or warning to trigger. It's fundamentally wrong when those
> "fixes" actually  make the code less readable and maintainable in the
> long run.

I think in this case the debug code reveals an actual bug.  I looked at 
other ways to fix it, and after a few attempts, Chris Mason's solution was 
the least-bad.  An alternative approach would be to go back to making 
ctx->ring_lock back into a spinlock, but it ends up being just as much 
(or even more) code churn.

>                          Linus

		-ben
-- 
"Thought is the essence of where you are now."

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-01 22:14   ` Benjamin LaHaise
@ 2015-02-01 23:09     ` Linus Torvalds
  2015-02-01 23:33     ` Linus Torvalds
  1 sibling, 0 replies; 24+ messages in thread
From: Linus Torvalds @ 2015-02-01 23:09 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-aio, Linux Kernel

On Sun, Feb 1, 2015 at 2:14 PM, Benjamin LaHaise <bcrl@kvack.org> wrote:
>
> It's ugly, but it actually is revealing a bug.

If so, that patch is *still* not the right thing to do. Leave the
warning. It's better than horrible crap code that doesn't warn, but
also doesn't make any *sense*.

Because code like this is just crap:

+       int running = current->state == TASK_RUNNING;

really. It's just crazy.

It makes no sense. It's all just avoiding the warning, it's not making
the code any better. In fact, it's actively making it worse, because
now the code is literally designed to give random different behavior
depending on timing.


For all I know, there might be multiple concurrent waiters etc that
got woken up on the same wait-queue, afaik, so the "am I runnable"
question is fundamentally nonsensical, since it isn't necessarily the
same as testing the state of the ring that we're waiting for anyway.
And if it *is* the same thing (because maybe the locking does end up
guaranteeing that "running" here ends up always matching some
particular state of the aio ring), then it should still be rewritten
in terms of that ring state, not some random "am I runnable" question.

So *clearly* the code has explicitly been written for the debug test,
not to make sense. Seriously. It's the only possible reason for why it
would test "current->state".

If the code relies on some 1:1 relationship with the wakeups
(*despite* having comments currently explicitly saying that it
doesn't), then use the new "wait_woken()" interface instead, which
actually does exactly that. That one doesn't test some random state,
it tests "was I explicitly woken". Then the whole "oh, but the mutex
changed the task state" doesn't even *matter*.

Which may in the end be the same thing in practice, but at least the
code makes *sense*. In ways that that "current->state == TASK_RUNNING"
test does not.

                         Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-01 22:14   ` Benjamin LaHaise
  2015-02-01 23:09     ` Linus Torvalds
@ 2015-02-01 23:33     ` Linus Torvalds
  2015-02-02  0:16       ` Benjamin LaHaise
  1 sibling, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2015-02-01 23:33 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-aio, Linux Kernel

On Sun, Feb 1, 2015 at 2:14 PM, Benjamin LaHaise <bcrl@kvack.org> wrote:
>
> It's ugly, but it actually is revealing a bug.  Spurious wake ups caused
> by the task already being added to ctx->wait when calling into mutex_lock()
> could inadvertently cause things to go wrong.  I can envision there being
> code invoked that possibly expects a 1-1 relationship between sleeps and
> wake ups, which being on the additional wait queue might break.

So I'm looking at it, and I don't see it.

One side uses wait_event_interruptible_hrtimeout(), which waits for
the return value (or the timeout), and it doesn't matter how many
times it gets woken up, regardless of what it's waiting for. If it
gets extra wakeups, it will just go through the loop again.

The other side is just a plain aio_read_events() ->
aio_read_events_ring(), and that one just reads as many events as it
can, unless some error happens.

In other words, it really looks like the warning is spurious, and the
comments about how the semaphore could cause it to loop around but it
all works look entirely correct.

So no, I don't see it revealing a bug at all. All I see is a spurious warning.

What's the bug you think could happen?

                         Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-01 23:33     ` Linus Torvalds
@ 2015-02-02  0:16       ` Benjamin LaHaise
  2015-02-02  1:18         ` Linus Torvalds
  0 siblings, 1 reply; 24+ messages in thread
From: Benjamin LaHaise @ 2015-02-02  0:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-aio, Linux Kernel

On Sun, Feb 01, 2015 at 03:33:25PM -0800, Linus Torvalds wrote:
> On Sun, Feb 1, 2015 at 2:14 PM, Benjamin LaHaise <bcrl@kvack.org> wrote:
> >
> > It's ugly, but it actually is revealing a bug.  Spurious wake ups caused
> > by the task already being added to ctx->wait when calling into mutex_lock()
> > could inadvertently cause things to go wrong.  I can envision there being
> > code invoked that possibly expects a 1-1 relationship between sleeps and
> > wake ups, which being on the additional wait queue might break.
> 
> So I'm looking at it, and I don't see it.
> 
> One side uses wait_event_interruptible_hrtimeout(), which waits for
> the return value (or the timeout), and it doesn't matter how many
> times it gets woken up, regardless of what it's waiting for. If it
> gets extra wakeups, it will just go through the loop again.
> 
> The other side is just a plain aio_read_events() ->
> aio_read_events_ring(), and that one just reads as many events as it
> can, unless some error happens.
> 
> In other words, it really looks like the warning is spurious, and the
> comments about how the semaphore could cause it to loop around but it
> all works look entirely correct.
> 
> So no, I don't see it revealing a bug at all. All I see is a spurious warning.
> 
> What's the bug you think could happen?

The bug would be in code that gets run via mutex_lock(), kmap(), or (more 
likely) in the random mm or filesystem code that could be invoked via 
copy_to_user().

If someone has code that works like the following inside of, say, a filesystem 
that's doing i/o somewhere:

static void foo_done(struct foo *foo)
{
	/* stuff err somewhere */
	wake_up(foo->task);
}

void some_random_code_in_some_fs()
{
	struct foo *foo;

	/* setup the foo */
	foo->task = current;
	set_current_state(TASK_UNINTERRUPTIBLE);
	/* send the foo on to do some other work */
	schedule();
	/* foo_done should have been called at this point. */
}

When the task in question can receive spurious wake ups, there is the 
possibility that schedule() ends up returning without foo_done() having 
been called, which is not the case normally (that is, there should be a 
one to one relationship between the wake_up and schedule returning in 
this case).  While I don't immediately see any code that relies on this, 
I'm not convinced that every possible code path that can be invoked 
(especially via copy_to_user()) does not rely on these semantics.  Maybe 
I'm just being paranoid, but this is one of the concerns I raised when 
this issue came forth.  Nobody has addressed it yet, though.

		-ben

>                          Linus

-- 
"Thought is the essence of where you are now."

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-02  0:16       ` Benjamin LaHaise
@ 2015-02-02  1:18         ` Linus Torvalds
  2015-02-02  5:29           ` Dave Chinner
  2015-02-03 11:27           ` Peter Zijlstra
  0 siblings, 2 replies; 24+ messages in thread
From: Linus Torvalds @ 2015-02-02  1:18 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-aio, Linux Kernel

On Sun, Feb 1, 2015 at 4:16 PM, Benjamin LaHaise <bcrl@kvack.org> wrote:
>>
>> What's the bug you think could happen?
>
> The bug would be in code that gets run via mutex_lock(), kmap(), or (more
> likely) in the random mm or filesystem code that could be invoked via
> copy_to_user().

Ahh. That would be a bug, yes, but it wouldn't be one in the aio code.

If somebody just does a "schedule()" and thinks that their own private
events are the only thing that can wake it up, and doesn't use one of
the millions of "wait_event_xyz()" variations to actually wait for the
real completion, that is just buggy. Always. Always has been.

So I wouldn't worry too much about it. It has never been correct to do
that, and it's not one of the standard patterns for sleeping anyway.
Which is not to say that it might not exist in some dank corner of the
kernel, of course, but you shouldn't write code trying to make buggy
code like that happy. If we ever find code like that, let's just fix
it where the bug exists, not try to write odd code in places where it
isn't.

And I'd actually be a bit surprised to see that kind of really broken
code. You really almost have to work at it. All our normal "sleep
until X" patterns are much more obvious, and it's just _simpler_ to do
the right thing with "wait_event()" than to mis-code an explicit "set
task state and then just schedule without actually checking the thing
you are waiting for".

                         Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-02  1:18         ` Linus Torvalds
@ 2015-02-02  5:29           ` Dave Chinner
       [not found]             ` <CA+55aFwvEcq-rAbqF2qTut=kJgFZZnhHptoPi6FSVrF4+1tBNA@mail.gmail.com>
  2015-02-03 11:27           ` Peter Zijlstra
  1 sibling, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2015-02-02  5:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Benjamin LaHaise, linux-aio, Linux Kernel

On Sun, Feb 01, 2015 at 05:18:17PM -0800, Linus Torvalds wrote:
> On Sun, Feb 1, 2015 at 4:16 PM, Benjamin LaHaise <bcrl@kvack.org> wrote:
> >>
> >> What's the bug you think could happen?
> >
> > The bug would be in code that gets run via mutex_lock(), kmap(), or (more
> > likely) in the random mm or filesystem code that could be invoked via
> > copy_to_user().
> 
> Ahh. That would be a bug, yes, but it wouldn't be one in the aio code.
> 
> If somebody just does a "schedule()" and thinks that their own private
> events are the only thing that can wake it up, and doesn't use one of
> the millions of "wait_event_xyz()" variations to actually wait for the
> real completion, that is just buggy. Always. Always has been.
> 
> So I wouldn't worry too much about it. It has never been correct to do
> that, and it's not one of the standard patterns for sleeping anyway.
> Which is not to say that it might not exist in some dank corner of the
> kernel, of course, but you shouldn't write code trying to make buggy
> code like that happy. If we ever find code like that, let's just fix
> it where the bug exists, not try to write odd code in places where it
> isn't.
> 
> And I'd actually be a bit surprised to see that kind of really broken
> code. You really almost have to work at it. All our normal "sleep
> until X" patterns are much more obvious, and it's just _simpler_ to do
> the right thing with "wait_event()" than to mis-code an explicit "set
> task state and then just schedule without actually checking the thing
> you are waiting for".

So what's the outcome here? I'm running v3.19-rc7 kernel and
xfstests::generic/036 is still tripping this warning from the aio
code:

[   23.920785] run fstests generic/036 at 2015-02-02 16:13:01
[   24.168001] xfs_io (4814) used greatest stack depth: 11640 bytes left
[   24.187061] ------------[ cut here ]------------
[   24.187071] WARNING: CPU: 1 PID: 4820 at kernel/sched/core.c:7300 __might_sleep+0x7f/0x90()
[   24.187076] do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810d85a3>] prepare_to_wait_event+0x63/0x110
[   24.187078] Modules linked in:
[   24.187080] CPU: 1 PID: 4820 Comm: aio-dio-fcntl-r Not tainted 3.19.0-rc7-dgc+ #706
[   24.187081] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
[   24.187084]  ffffffff821c0372 ffff8800b84d3cd8 ffffffff81daf2bd 0000000000008c8c
[   24.187085]  ffff8800b84d3d28 ffff8800b84d3d18 ffffffff8109beda ffff8800b84d3cf8
[   24.187087]  ffffffff821c115e 0000000000000061 0000000000000000 00007fff38e95180
[   24.187087] Call Trace:
[   24.187093]  [<ffffffff81daf2bd>] dump_stack+0x4c/0x65
[   24.187096]  [<ffffffff8109beda>] warn_slowpath_common+0x8a/0xc0
[   24.187099]  [<ffffffff8109bf56>] warn_slowpath_fmt+0x46/0x50
[   24.187101]  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
[   24.187103]  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
[   24.187104]  [<ffffffff810bdfcf>] __might_sleep+0x7f/0x90
[   24.187107]  [<ffffffff81db8344>] mutex_lock+0x24/0x45
[   24.187111]  [<ffffffff81216b7c>] aio_read_events+0x4c/0x290
[   24.187113]  [<ffffffff81216fac>] read_events+0x1ec/0x220
[   24.187115]  [<ffffffff810d8650>] ? prepare_to_wait_event+0x110/0x110
[   24.187119]  [<ffffffff810fdb10>] ? hrtimer_get_res+0x50/0x50
[   24.187121]  [<ffffffff8121899d>] SyS_io_getevents+0x4d/0xb0
[   24.187124]  [<ffffffff81130b16>] ? __audit_syscall_exit+0x236/0x2e0
[   24.187127]  [<ffffffff81dba5a9>] system_call_fastpath+0x12/0x17
[   24.187128] ---[ end trace 9f078e8e19f765dd ]---

This test does a aio write loop with a single outstanding
event. i.e:

	while (1) {
		io_prep_pwrite(&iocb, fd, buf, BUF_SIZE, BUF_SIZE);
		err = io_submit(ctx, 1, iocbs);
		if (err)
			.....
		err = io_getevents(ctx, 1, 1, &ev, NULL);
		.....
	}

The kernel should not be throwing warnings on basic regression
tests, so something needs to be fixed...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
       [not found]             ` <CA+55aFwvEcq-rAbqF2qTut=kJgFZZnhHptoPi6FSVrF4+1tBNA@mail.gmail.com>
@ 2015-02-02  5:54               ` Dave Chinner
  2015-02-02 18:45                 ` Linus Torvalds
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2015-02-02  5:54 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-aio, Kernel Mailing List, Benjamin LaHaise

On Sun, Feb 01, 2015 at 09:36:52PM -0800, Linus Torvalds wrote:
> On Feb 1, 2015 9:29 PM, "Dave Chinner" <david@fromorbit.com> wrote:
> >
> > So what's the outcome here? I'm running v3.19-rc7 kernel and
> > xfstests::generic/036 is still tripping this warning from the aio
> > code:
> 
> So for the aio case, I suspect just adding a sched_annotate_sleep() is the
> solution. But considering that there are apparently some other cases of
> this warning that are nobody seems to bother fixing, it may just be that
> I'll have to disable the warning entirely for 3.19 (and then maybe
> re-enable it during the merge window and see if that makes anybody care
> again)

Simple enough - the patch below removes the warning from generic/036
for me.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com


aio: annotate aio_read_event_ring for sleep patterns

From: Dave Chinner <dchinner@redhat.com>

Under CONFIG_DEBUG_ATOMIC_SLEEP=y, aio_read_event_ring() will throw
warnings like the following due to being called from wait_event
context:

 WARNING: CPU: 0 PID: 16006 at kernel/sched/core.c:7300 __might_sleep+0x7f/0x90()
 do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810d85a3>] prepare_to_wait_event+0x63/0x110
 Modules linked in:
 CPU: 0 PID: 16006 Comm: aio-dio-fcntl-r Not tainted 3.19.0-rc6-dgc+ #705
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  ffffffff821c0372 ffff88003c117cd8 ffffffff81daf2bd 000000000000d8d8
  ffff88003c117d28 ffff88003c117d18 ffffffff8109beda ffff88003c117cf8
  ffffffff821c115e 0000000000000061 0000000000000000 00007ffffe4aa300
 Call Trace:
  [<ffffffff81daf2bd>] dump_stack+0x4c/0x65
  [<ffffffff8109beda>] warn_slowpath_common+0x8a/0xc0
  [<ffffffff8109bf56>] warn_slowpath_fmt+0x46/0x50
  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
  [<ffffffff810bdfcf>] __might_sleep+0x7f/0x90
  [<ffffffff81db8344>] mutex_lock+0x24/0x45
  [<ffffffff81216b7c>] aio_read_events+0x4c/0x290
  [<ffffffff81216fac>] read_events+0x1ec/0x220
  [<ffffffff810d8650>] ? prepare_to_wait_event+0x110/0x110
  [<ffffffff810fdb10>] ? hrtimer_get_res+0x50/0x50
  [<ffffffff8121899d>] SyS_io_getevents+0x4d/0xb0
  [<ffffffff81dba5a9>] system_call_fastpath+0x12/0x17
 ---[ end trace bde69eaf655a4fea ]---

There is not actually a bug here, so annotate the code to tell the
debug logic that everything is just fine and not to fire a false
positive.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/aio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/aio.c b/fs/aio.c
index 1b7893e..3176d6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1140,6 +1140,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
 	long ret = 0;
 	int copy_ret;
 
+	sched_annotate_sleep();
 	mutex_lock(&ctx->ring_lock);
 
 	/* Access to ->ring_pages here is protected by ctx->ring_lock. */

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-02  5:54               ` Dave Chinner
@ 2015-02-02 18:45                 ` Linus Torvalds
  2015-02-03 22:23                   ` Dave Chinner
  0 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2015-02-02 18:45 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-aio, Kernel Mailing List, Benjamin LaHaise

On Sun, Feb 1, 2015 at 9:54 PM, Dave Chinner <david@fromorbit.com> wrote:
>
> Simple enough - the patch below removes the warning from generic/036
> for me.

So because this is a debugging thing, I'd actually prefer these
"sched_annotate_sleep()" calls to always come with short  comments in
code why they exist and why they are fine.

In this case, it might be as simple as

 "If the mutex blocks and wakes us up, the loop in
wait_event_interruptible_hrtimeout() will just schedule without
sleeping and repeat. The ting-lock doesn't block often enough for this
to be a performance issue".

or perhaps just point to the comment in read_events().

                          Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-02  1:18         ` Linus Torvalds
  2015-02-02  5:29           ` Dave Chinner
@ 2015-02-03 11:27           ` Peter Zijlstra
  2015-02-03 11:33             ` Peter Zijlstra
  2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
  1 sibling, 2 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-02-03 11:27 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Benjamin LaHaise, linux-aio, Linux Kernel

On Sun, Feb 01, 2015 at 05:18:17PM -0800, Linus Torvalds wrote:
> Ahh. That would be a bug, yes, but it wouldn't be one in the aio code.
> 
> If somebody just does a "schedule()" and thinks that their own private
> events are the only thing that can wake it up, and doesn't use one of
> the millions of "wait_event_xyz()" variations to actually wait for the
> real completion, that is just buggy. Always. Always has been.
> 
> So I wouldn't worry too much about it. It has never been correct to do
> that, and it's not one of the standard patterns for sleeping anyway.
> Which is not to say that it might not exist in some dank corner of the
> kernel, of course, but you shouldn't write code trying to make buggy
> code like that happy. If we ever find code like that, let's just fix
> it where the bug exists, not try to write odd code in places where it
> isn't.
> 
> And I'd actually be a bit surprised to see that kind of really broken
> code. You really almost have to work at it. All our normal "sleep
> until X" patterns are much more obvious, and it's just _simpler_ to do
> the right thing with "wait_event()" than to mis-code an explicit "set
> task state and then just schedule without actually checking the thing
> you are waiting for".

block/bsg.c-    prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
block/bsg.c-    spin_unlock_irq(&bd->lock);
block/bsg.c:    io_schedule();
block/bsg.c-    finish_wait(&bd->wq_done, &wait);

Which is double buggy because:
 1) it doesn't loop
 2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.


drivers/iommu/amd_iommu_v2.c-static void put_device_state_wait(struct device_state *dev_state)
drivers/iommu/amd_iommu_v2.c-{
drivers/iommu/amd_iommu_v2.c-   DEFINE_WAIT(wait);
drivers/iommu/amd_iommu_v2.c-
drivers/iommu/amd_iommu_v2.c-   prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
drivers/iommu/amd_iommu_v2.c-   if (!atomic_dec_and_test(&dev_state->count))
drivers/iommu/amd_iommu_v2.c:           schedule();
drivers/iommu/amd_iommu_v2.c-   finish_wait(&dev_state->wq, &wait);
drivers/iommu/amd_iommu_v2.c-
drivers/iommu/amd_iommu_v2.c-   free_device_state(dev_state);
drivers/iommu/amd_iommu_v2.c-}

No loop...

drivers/md/dm-bufio.c-static void __wait_for_free_buffer(struct dm_bufio_client *c)
drivers/md/dm-bufio.c-{
drivers/md/dm-bufio.c-  DECLARE_WAITQUEUE(wait, current);
drivers/md/dm-bufio.c-
drivers/md/dm-bufio.c-  add_wait_queue(&c->free_buffer_wait, &wait);
drivers/md/dm-bufio.c-  set_task_state(current, TASK_UNINTERRUPTIBLE);
drivers/md/dm-bufio.c-  dm_bufio_unlock(c);
drivers/md/dm-bufio.c-
drivers/md/dm-bufio.c:  io_schedule();
drivers/md/dm-bufio.c-
drivers/md/dm-bufio.c-  remove_wait_queue(&c->free_buffer_wait, &wait);
drivers/md/dm-bufio.c-
drivers/md/dm-bufio.c-  dm_bufio_lock(c);
drivers/md/dm-bufio.c-}


No loop..

And I'm sure there's a _waaay_ more. And the above examples are all in
relatively well maintained code afaik.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-03 11:27           ` Peter Zijlstra
@ 2015-02-03 11:33             ` Peter Zijlstra
  2015-02-03 11:55               ` Peter Zijlstra
  2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-02-03 11:33 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Benjamin LaHaise, linux-aio, Linux Kernel

On Tue, Feb 03, 2015 at 12:27:33PM +0100, Peter Zijlstra wrote:
> On Sun, Feb 01, 2015 at 05:18:17PM -0800, Linus Torvalds wrote:
> > Ahh. That would be a bug, yes, but it wouldn't be one in the aio code.
> > 
> > If somebody just does a "schedule()" and thinks that their own private
> > events are the only thing that can wake it up, and doesn't use one of
> > the millions of "wait_event_xyz()" variations to actually wait for the
> > real completion, that is just buggy. Always. Always has been.
> > 
> > So I wouldn't worry too much about it. It has never been correct to do
> > that, and it's not one of the standard patterns for sleeping anyway.
> > Which is not to say that it might not exist in some dank corner of the
> > kernel, of course, but you shouldn't write code trying to make buggy
> > code like that happy. If we ever find code like that, let's just fix
> > it where the bug exists, not try to write odd code in places where it
> > isn't.
> > 
> > And I'd actually be a bit surprised to see that kind of really broken
> > code. You really almost have to work at it. All our normal "sleep
> > until X" patterns are much more obvious, and it's just _simpler_ to do
> > the right thing with "wait_event()" than to mis-code an explicit "set
> > task state and then just schedule without actually checking the thing
> > you are waiting for".
> 
> block/bsg.c-    prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
> block/bsg.c-    spin_unlock_irq(&bd->lock);
> block/bsg.c:    io_schedule();
> block/bsg.c-    finish_wait(&bd->wq_done, &wait);
> 
> Which is double buggy because:
>  1) it doesn't loop
>  2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.

OK, actually had a look at this one; it might be ok.

The spinlock might fully serialize the state so no fails, and the entire
function is called in a loop. Still seriously obtuse code.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-03 11:33             ` Peter Zijlstra
@ 2015-02-03 11:55               ` Peter Zijlstra
  2015-02-03 23:24                 ` Jens Axboe
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-02-03 11:55 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Benjamin LaHaise, linux-aio, Linux Kernel, Jens Axboe

On Tue, Feb 03, 2015 at 12:33:48PM +0100, Peter Zijlstra wrote:
> > block/bsg.c-    prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
> > block/bsg.c-    spin_unlock_irq(&bd->lock);
> > block/bsg.c:    io_schedule();
> > block/bsg.c-    finish_wait(&bd->wq_done, &wait);
> > 
> > Which is double buggy because:
> >  1) it doesn't loop
> >  2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.
> 
> OK, actually had a look at this one; it might be ok.
> 
> The spinlock might fully serialize the state so no fails, and the entire
> function is called in a loop. Still seriously obtuse code.

Jens, would something like the below work for you?

---
 block/bsg.c          | 72 ++++++++++++++++++----------------------------------
 include/linux/wait.h | 15 +++++++++++
 2 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index 276e869e686c..d214e929ce18 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -136,42 +136,6 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
 	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
 }
 
-static int bsg_io_schedule(struct bsg_device *bd)
-{
-	DEFINE_WAIT(wait);
-	int ret = 0;
-
-	spin_lock_irq(&bd->lock);
-
-	BUG_ON(bd->done_cmds > bd->queued_cmds);
-
-	/*
-	 * -ENOSPC or -ENODATA?  I'm going for -ENODATA, meaning "I have no
-	 * work to do", even though we return -ENOSPC after this same test
-	 * during bsg_write() -- there, it means our buffer can't have more
-	 * bsg_commands added to it, thus has no space left.
-	 */
-	if (bd->done_cmds == bd->queued_cmds) {
-		ret = -ENODATA;
-		goto unlock;
-	}
-
-	if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
-		ret = -EAGAIN;
-		goto unlock;
-	}
-
-	prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
-	spin_unlock_irq(&bd->lock);
-	io_schedule();
-	finish_wait(&bd->wq_done, &wait);
-
-	return ret;
-unlock:
-	spin_unlock_irq(&bd->lock);
-	return ret;
-}
-
 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 				struct sg_io_v4 *hdr, struct bsg_device *bd,
 				fmode_t has_write_perm)
@@ -482,6 +446,30 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	return ret;
 }
 
+static bool bsg_complete(struct bsg_device *bd)
+{
+	bool ret = false;
+	bool spin;
+
+	do {
+		spin_lock_irq(&bd->lock);
+
+		BUG_ON(bd->done_cmds > bd->queued_cmds);
+
+		/*
+		 * All commands consumed.
+		 */
+		if (bd->done_cmds == bd->queued_cmds)
+			ret = true;
+
+		spin = !test_bit(BSG_F_BLOCK, &bd->flags);
+
+		spin_unlock_irq(&bd->lock);
+	} while (!ret && spin);
+
+	return ret;
+}
+
 static int bsg_complete_all_commands(struct bsg_device *bd)
 {
 	struct bsg_command *bc;
@@ -492,17 +480,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd)
 	/*
 	 * wait for all commands to complete
 	 */
-	ret = 0;
-	do {
-		ret = bsg_io_schedule(bd);
-		/*
-		 * look for -ENODATA specifically -- we'll sometimes get
-		 * -ERESTARTSYS when we've taken a signal, but we can't
-		 * return until we're done freeing the queue, so ignore
-		 * it.  The signal will get handled when we're done freeing
-		 * the bsg_device.
-		 */
-	} while (ret != -ENODATA);
+	io_wait_event(bd->wq_done, bsg_complete(bd));
 
 	/*
 	 * discard done commands
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2232ed16635a..71fc1d31e48d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -267,6 +267,21 @@ do {									\
 	__wait_event(wq, condition);					\
 } while (0)
 
+#define __io_wait_event(wq, condition)					\
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    io_schedule())
+
+/*
+ * io_wait_event() -- like wait_event() but with io_schedule()
+ */
+#define io_wait_event(wq, condition)					\
+do {									\
+	might_sleep();							\
+	if (condition)							\
+		break;							\
+	__io_wait_event(wq, condition);					\
+} while (0)
+
 #define __wait_event_freezable(wq, condition)				\
 	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			    schedule(); try_to_freeze())

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 11:27           ` Peter Zijlstra
  2015-02-03 11:33             ` Peter Zijlstra
@ 2015-02-03 12:25             ` Peter Zijlstra
  2015-02-03 17:04               ` Jesse Barnes
                                 ` (2 more replies)
  1 sibling, 3 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-02-03 12:25 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Benjamin LaHaise, linux-aio, Linux Kernel, Joerg Roedel, Jesse Barnes

On Tue, Feb 03, 2015 at 12:27:33PM +0100, Peter Zijlstra wrote:
> drivers/iommu/amd_iommu_v2.c-static void put_device_state_wait(struct device_state *dev_state)
> drivers/iommu/amd_iommu_v2.c-{
> drivers/iommu/amd_iommu_v2.c-   DEFINE_WAIT(wait);
> drivers/iommu/amd_iommu_v2.c-
> drivers/iommu/amd_iommu_v2.c-   prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
> drivers/iommu/amd_iommu_v2.c-   if (!atomic_dec_and_test(&dev_state->count))
> drivers/iommu/amd_iommu_v2.c:           schedule();
> drivers/iommu/amd_iommu_v2.c-   finish_wait(&dev_state->wq, &wait);
> drivers/iommu/amd_iommu_v2.c-
> drivers/iommu/amd_iommu_v2.c-   free_device_state(dev_state);
> drivers/iommu/amd_iommu_v2.c-}
> 
> No loop...

Jesse, any objections to this?

---
Subject: iommu/amd: Fix amd_iommu_free_device()

put_device_state_wait() doesn't loop on the condition and a spurious
wakeup will have it free the device state even though there might still
be references out to it.

Fix this by using 'normal' wait primitives.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/iommu/amd_iommu_v2.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 90f70d0e1141..b6398d7285f7 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -151,18 +151,6 @@ static void put_device_state(struct device_state *dev_state)
 		wake_up(&dev_state->wq);
 }
 
-static void put_device_state_wait(struct device_state *dev_state)
-{
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
-	if (!atomic_dec_and_test(&dev_state->count))
-		schedule();
-	finish_wait(&dev_state->wq, &wait);
-
-	free_device_state(dev_state);
-}
-
 /* Must be called under dev_state->lock */
 static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
 						  int pasid, bool alloc)
@@ -851,7 +839,13 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 	/* Get rid of any remaining pasid states */
 	free_pasid_states(dev_state);
 
-	put_device_state_wait(dev_state);
+	put_device_state(dev_state);
+	/*
+	 * Wait until the last reference is dropped before freeing
+	 * the device state.
+	 */
+	wait_event(dev_state->wq, !atomic_read(&dev_state->count));
+	free_device_state(dev_state);
 }
 EXPORT_SYMBOL(amd_iommu_free_device);
 

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
@ 2015-02-03 17:04               ` Jesse Barnes
  2015-02-03 17:34               ` Joerg Roedel
  2015-02-04 14:35               ` Joerg Roedel
  2 siblings, 0 replies; 24+ messages in thread
From: Jesse Barnes @ 2015-02-03 17:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Benjamin LaHaise, linux-aio, Linux Kernel, Joerg Roedel

On Tue, 3 Feb 2015 13:25:51 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Tue, Feb 03, 2015 at 12:27:33PM +0100, Peter Zijlstra wrote:
> > drivers/iommu/amd_iommu_v2.c-static void
> > put_device_state_wait(struct device_state *dev_state)
> > drivers/iommu/amd_iommu_v2.c-{ drivers/iommu/amd_iommu_v2.c-
> > DEFINE_WAIT(wait); drivers/iommu/amd_iommu_v2.c-
> > drivers/iommu/amd_iommu_v2.c-   prepare_to_wait(&dev_state->wq,
> > &wait, TASK_UNINTERRUPTIBLE); drivers/iommu/amd_iommu_v2.c-   if
> > (!atomic_dec_and_test(&dev_state->count))
> > drivers/iommu/amd_iommu_v2.c:           schedule();
> > drivers/iommu/amd_iommu_v2.c-   finish_wait(&dev_state->wq, &wait);
> > drivers/iommu/amd_iommu_v2.c- drivers/iommu/amd_iommu_v2.c-
> > free_device_state(dev_state); drivers/iommu/amd_iommu_v2.c-}
> > 
> > No loop...
> 
> Jesse, any objections to this?

None from me, seems reasonable.  But this is Joerg's code I think, so
he should ack.

Jesse

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
  2015-02-03 17:04               ` Jesse Barnes
@ 2015-02-03 17:34               ` Joerg Roedel
  2015-02-03 19:23                 ` Linus Torvalds
  2015-02-04 14:35               ` Joerg Roedel
  2 siblings, 1 reply; 24+ messages in thread
From: Joerg Roedel @ 2015-02-03 17:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Benjamin LaHaise, linux-aio, Linux Kernel, Jesse Barnes

Hi Peter,

On Tue, Feb 03, 2015 at 01:25:51PM +0100, Peter Zijlstra wrote:
> Subject: iommu/amd: Fix amd_iommu_free_device()
> 
> put_device_state_wait() doesn't loop on the condition and a spurious
> wakeup will have it free the device state even though there might still
> be references out to it.

Hmm, have you seen spurious wakeups happening? The wakeup only comes
from put_device_state() and only when the reference count goes to zero.
>From my understanding this should be correct, but maybe I got the API
wrong.


	Joerg


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 17:34               ` Joerg Roedel
@ 2015-02-03 19:23                 ` Linus Torvalds
  2015-02-03 22:56                   ` Joerg Roedel
  0 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2015-02-03 19:23 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: Peter Zijlstra, Benjamin LaHaise, linux-aio, Linux Kernel, Jesse Barnes

On Tue, Feb 3, 2015 at 9:34 AM, Joerg Roedel <joro@8bytes.org> wrote:
>
> Hmm, have you seen spurious wakeups happening? The wakeup only comes
> from put_device_state() and only when the reference count goes to zero.

Even if that were true - and it isn't - the patch in question making
it simpler and more robust, removing more lines than it adds.

Maybe the callers have other events pending that might wake that
process up? Are you going to guarantee that no caller ever might be
doing their own wakeup thing?

In fact, even if you do *(not* have anything in your call chain that
has other wait queues, active, it's still wrong to do the
single-wakeup thing, because we've also traditionally had cases where
we do directed wakeups of threads

Basically, you should always assume you can get spurious wakeups due
to multiple independent wait-queues, the same way you should always
assume you can get spurious hardware interrupts due to shared
interrupt lines among multiple idnependent devices.

Because there are also non-wait-queue wakeup events, although they are
relatively rare. The obvious one that people are aware of is signals,
which only affects TASK_INTERRUPTIBLE - or TASK_KILLABLE - waits, but
there can actually be spurious wakeups even for TASK_UNINTERRUPTIBLE
cases.

In particular, there are things like "wake_up_process()" which
generally doesn't wake up random tasks, but we do have cases where we
use it locklessly (taking a reference to a task). See for example
"kernel/locking/rwsem-add.c", which uses this model for waking up a
process *without* necessarily holding a lock, which can result in the
process actually being woken up *after* it already started running and
doing something else.

The __rwsem_do_wake() thing does

                smp_mb();
                waiter->task = NULL;
                wake_up_process(tsk);
                put_task_struct(tsk);

to basically say "I can wake up the process even after it has released
the "waiter" structure, using the "waiter->task" thing as a release.
The waiter, in turn, waits for waiter.task to become NULL, but what
this all means is that you can actually get a "delayed wakeup" from
having done a successful down_read() some time ago.

All of these are really really unlikely to hit you, but basically you
should *never* assume anything about "single wakeup".

The linux wakeup model has always been that there can be multiple
sources of wakeup events, and the proper way to wait for something is
generally a variation of

   prepare_to_wait(..);
   for (;;) {
       set_task_state(..);
       .. test for condition and break out ..
       schedule()
   }
   finish_wait()

although obviously these days we *heavily* favor the "wait_event()"
interfaces that encapsulate something that looks like that loop and
makes for a much simpler programming model. We should basically never
favor the open-coded version, because it's so easy to get it wrong.

                            Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-02 18:45                 ` Linus Torvalds
@ 2015-02-03 22:23                   ` Dave Chinner
  2015-02-03 23:34                     ` Benjamin LaHaise
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2015-02-03 22:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-aio, Kernel Mailing List, Benjamin LaHaise

On Mon, Feb 02, 2015 at 10:45:42AM -0800, Linus Torvalds wrote:
> On Sun, Feb 1, 2015 at 9:54 PM, Dave Chinner <david@fromorbit.com> wrote:
> >
> > Simple enough - the patch below removes the warning from generic/036
> > for me.
> 
> So because this is a debugging thing, I'd actually prefer these
> "sched_annotate_sleep()" calls to always come with short  comments in
> code why they exist and why they are fine.

Ok, I just copied the existing users which don't have any comments.

> In this case, it might be as simple as
> 
>  "If the mutex blocks and wakes us up, the loop in
> wait_event_interruptible_hrtimeout() will just schedule without
> sleeping and repeat. The ting-lock doesn't block often enough for this
> to be a performance issue".
> 
> or perhaps just point to the comment in read_events().

Both. New patch below.

-Dave.
-- 
Dave Chinner
david@fromorbit.com

aio: annotate aio_read_event_ring for sleep patterns

From: Dave Chinner <dchinner@redhat.com>

Under CONFIG_DEBUG_ATOMIC_SLEEP=y, aio_read_event_ring() will throw
warnings like the following due to being called from wait_event
context:

 WARNING: CPU: 0 PID: 16006 at kernel/sched/core.c:7300 __might_sleep+0x7f/0x90()
 do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810d85a3>] prepare_to_wait_event+0x63/0x110
 Modules linked in:
 CPU: 0 PID: 16006 Comm: aio-dio-fcntl-r Not tainted 3.19.0-rc6-dgc+ #705
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  ffffffff821c0372 ffff88003c117cd8 ffffffff81daf2bd 000000000000d8d8
  ffff88003c117d28 ffff88003c117d18 ffffffff8109beda ffff88003c117cf8
  ffffffff821c115e 0000000000000061 0000000000000000 00007ffffe4aa300
 Call Trace:
  [<ffffffff81daf2bd>] dump_stack+0x4c/0x65
  [<ffffffff8109beda>] warn_slowpath_common+0x8a/0xc0
  [<ffffffff8109bf56>] warn_slowpath_fmt+0x46/0x50
  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
  [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
  [<ffffffff810bdfcf>] __might_sleep+0x7f/0x90
  [<ffffffff81db8344>] mutex_lock+0x24/0x45
  [<ffffffff81216b7c>] aio_read_events+0x4c/0x290
  [<ffffffff81216fac>] read_events+0x1ec/0x220
  [<ffffffff810d8650>] ? prepare_to_wait_event+0x110/0x110
  [<ffffffff810fdb10>] ? hrtimer_get_res+0x50/0x50
  [<ffffffff8121899d>] SyS_io_getevents+0x4d/0xb0
  [<ffffffff81dba5a9>] system_call_fastpath+0x12/0x17
 ---[ end trace bde69eaf655a4fea ]---

There is not actually a bug here, so annotate the code to tell the
debug logic that everything is just fine and not to fire a false
positive.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
V2: added comment to explain the annotation.

 fs/aio.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 1b7893e..327ef6d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1140,6 +1140,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
 	long ret = 0;
 	int copy_ret;
 
+	/*
+	 * The mutex can block and wake us up and that will cause
+	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
+	 * and repeat. This should be rare enough that it doesn't cause
+	 * peformance issues. See the comment in read_events() for more detail.
+	 */
+	sched_annotate_sleep();
 	mutex_lock(&ctx->ring_lock);
 
 	/* Access to ->ring_pages here is protected by ctx->ring_lock. */

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 19:23                 ` Linus Torvalds
@ 2015-02-03 22:56                   ` Joerg Roedel
  0 siblings, 0 replies; 24+ messages in thread
From: Joerg Roedel @ 2015-02-03 22:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, Benjamin LaHaise, linux-aio, Linux Kernel, Jesse Barnes

Hi Linus,

On Tue, Feb 03, 2015 at 11:23:44AM -0800, Linus Torvalds wrote:
> The linux wakeup model has always been that there can be multiple
> sources of wakeup events, and the proper way to wait for something is
> generally a variation of
> 
>    prepare_to_wait(..);
>    for (;;) {
>        set_task_state(..);
>        .. test for condition and break out ..
>        schedule()
>    }
>    finish_wait()
> 
> although obviously these days we *heavily* favor the "wait_event()"
> interfaces that encapsulate something that looks like that loop and
> makes for a much simpler programming model. We should basically never
> favor the open-coded version, because it's so easy to get it wrong.

Thanks for your explanations. I was aware that there could be spurious
wakeups for TASK_INTERRUPTIBLE sleeps, but thought that this couldn't
happen for TASK_UNINTERRUPTIBLE. I didn't know about the lockless
wakeups and their implications, but now the Peters patch makes total
sense.

That these spurious wakeup situations are so unlikely also explains why
they were not yet reported to me :)


	Joerg


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-03 11:55               ` Peter Zijlstra
@ 2015-02-03 23:24                 ` Jens Axboe
  2015-02-04 10:18                   ` [PATCH] block: Simplify bsg complete all Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Jens Axboe @ 2015-02-03 23:24 UTC (permalink / raw)
  To: Peter Zijlstra, Linus Torvalds; +Cc: Benjamin LaHaise, linux-aio, Linux Kernel

On 02/03/2015 04:55 AM, Peter Zijlstra wrote:
> On Tue, Feb 03, 2015 at 12:33:48PM +0100, Peter Zijlstra wrote:
>>> block/bsg.c-    prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
>>> block/bsg.c-    spin_unlock_irq(&bd->lock);
>>> block/bsg.c:    io_schedule();
>>> block/bsg.c-    finish_wait(&bd->wq_done, &wait);
>>>
>>> Which is double buggy because:
>>>   1) it doesn't loop
>>>   2) it sets TASK_UNINTERRUPTIBLE _after_ testing for the sleep event.
>>
>> OK, actually had a look at this one; it might be ok.
>>
>> The spinlock might fully serialize the state so no fails, and the entire
>> function is called in a loop. Still seriously obtuse code.
>
> Jens, would something like the below work for you?

Yes, from a cursory look, that seems fine to me. Though I will hold the 
fact that you label my code as 'seriously obtuse' against you. Some day.

I can pull this in for testing for 3.20. Mind sending a properly 
formatted patch (signed off, commit message, all that stuff)?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE
  2015-02-03 22:23                   ` Dave Chinner
@ 2015-02-03 23:34                     ` Benjamin LaHaise
  0 siblings, 0 replies; 24+ messages in thread
From: Benjamin LaHaise @ 2015-02-03 23:34 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Linus Torvalds, linux-aio, Kernel Mailing List

On Wed, Feb 04, 2015 at 09:23:12AM +1100, Dave Chinner wrote:
> On Mon, Feb 02, 2015 at 10:45:42AM -0800, Linus Torvalds wrote:
> > On Sun, Feb 1, 2015 at 9:54 PM, Dave Chinner <david@fromorbit.com> wrote:
> > >
> > > Simple enough - the patch below removes the warning from generic/036
> > > for me.
> > 
> > So because this is a debugging thing, I'd actually prefer these
> > "sched_annotate_sleep()" calls to always come with short  comments in
> > code why they exist and why they are fine.
> 
> Ok, I just copied the existing users which don't have any comments.
> 
> > In this case, it might be as simple as
> > 
> >  "If the mutex blocks and wakes us up, the loop in
> > wait_event_interruptible_hrtimeout() will just schedule without
> > sleeping and repeat. The ting-lock doesn't block often enough for this
> > to be a performance issue".
> > 
> > or perhaps just point to the comment in read_events().
> 
> Both. New patch below.

I've added my Signed-off-by and applied it to my aio-fixes tree.  I'll send 
a pull for this later this evening once I commit a fix for an mremap case 
that just got pointed out earlier today as well.

		-ben

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> 
> aio: annotate aio_read_event_ring for sleep patterns
> 
> From: Dave Chinner <dchinner@redhat.com>
> 
> Under CONFIG_DEBUG_ATOMIC_SLEEP=y, aio_read_event_ring() will throw
> warnings like the following due to being called from wait_event
> context:
> 
>  WARNING: CPU: 0 PID: 16006 at kernel/sched/core.c:7300 __might_sleep+0x7f/0x90()
>  do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810d85a3>] prepare_to_wait_event+0x63/0x110
>  Modules linked in:
>  CPU: 0 PID: 16006 Comm: aio-dio-fcntl-r Not tainted 3.19.0-rc6-dgc+ #705
>  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>   ffffffff821c0372 ffff88003c117cd8 ffffffff81daf2bd 000000000000d8d8
>   ffff88003c117d28 ffff88003c117d18 ffffffff8109beda ffff88003c117cf8
>   ffffffff821c115e 0000000000000061 0000000000000000 00007ffffe4aa300
>  Call Trace:
>   [<ffffffff81daf2bd>] dump_stack+0x4c/0x65
>   [<ffffffff8109beda>] warn_slowpath_common+0x8a/0xc0
>   [<ffffffff8109bf56>] warn_slowpath_fmt+0x46/0x50
>   [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
>   [<ffffffff810d85a3>] ? prepare_to_wait_event+0x63/0x110
>   [<ffffffff810bdfcf>] __might_sleep+0x7f/0x90
>   [<ffffffff81db8344>] mutex_lock+0x24/0x45
>   [<ffffffff81216b7c>] aio_read_events+0x4c/0x290
>   [<ffffffff81216fac>] read_events+0x1ec/0x220
>   [<ffffffff810d8650>] ? prepare_to_wait_event+0x110/0x110
>   [<ffffffff810fdb10>] ? hrtimer_get_res+0x50/0x50
>   [<ffffffff8121899d>] SyS_io_getevents+0x4d/0xb0
>   [<ffffffff81dba5a9>] system_call_fastpath+0x12/0x17
>  ---[ end trace bde69eaf655a4fea ]---
> 
> There is not actually a bug here, so annotate the code to tell the
> debug logic that everything is just fine and not to fire a false
> positive.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> V2: added comment to explain the annotation.
> 
>  fs/aio.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 1b7893e..327ef6d 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1140,6 +1140,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
>  	long ret = 0;
>  	int copy_ret;
>  
> +	/*
> +	 * The mutex can block and wake us up and that will cause
> +	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
> +	 * and repeat. This should be rare enough that it doesn't cause
> +	 * peformance issues. See the comment in read_events() for more detail.
> +	 */
> +	sched_annotate_sleep();
>  	mutex_lock(&ctx->ring_lock);
>  
>  	/* Access to ->ring_pages here is protected by ctx->ring_lock. */

-- 
"Thought is the essence of where you are now."

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] block: Simplify bsg complete all
  2015-02-03 23:24                 ` Jens Axboe
@ 2015-02-04 10:18                   ` Peter Zijlstra
  2015-02-04 17:06                     ` Jens Axboe
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-02-04 10:18 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linus Torvalds, Benjamin LaHaise, linux-aio, Linux Kernel

On Tue, Feb 03, 2015 at 04:24:55PM -0700, Jens Axboe wrote:
> 
> Yes, from a cursory look, that seems fine to me. Though I will hold the fact
> that you label my code as 'seriously obtuse' against you. Some day.

Hehe, fair enough. I'm sure I've written my fair share of it too, we've
all got our 'bad' days, I'll get you a beer in BOS to make up if you
like :-)

> I can pull this in for testing for 3.20. Mind sending a properly formatted
> patch (signed off, commit message, all that stuff)?

Sure, here goes.


---
Subject: block: Simplify bsg complete all
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 3 Feb 2015 12:55:31 +0100

It took me a few tries to figure out what this code did; lets rewrite
it into a more regular form.

The thing that makes this one 'special' is the BSG_F_BLOCK flag, if
that is not set we're not supposed/allowed to block and should spin
wait for completion.

The (new) io_wait_event() will never see a false condition in case of
the spinning and we will therefore not block.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 block/bsg.c          |   72 +++++++++++++++++----------------------------------
 include/linux/wait.h |   15 ++++++++++
 2 files changed, 40 insertions(+), 47 deletions(-)

--- a/block/bsg.c
+++ b/block/bsg.c
@@ -136,42 +136,6 @@ static inline struct hlist_head *bsg_dev
 	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
 }
 
-static int bsg_io_schedule(struct bsg_device *bd)
-{
-	DEFINE_WAIT(wait);
-	int ret = 0;
-
-	spin_lock_irq(&bd->lock);
-
-	BUG_ON(bd->done_cmds > bd->queued_cmds);
-
-	/*
-	 * -ENOSPC or -ENODATA?  I'm going for -ENODATA, meaning "I have no
-	 * work to do", even though we return -ENOSPC after this same test
-	 * during bsg_write() -- there, it means our buffer can't have more
-	 * bsg_commands added to it, thus has no space left.
-	 */
-	if (bd->done_cmds == bd->queued_cmds) {
-		ret = -ENODATA;
-		goto unlock;
-	}
-
-	if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
-		ret = -EAGAIN;
-		goto unlock;
-	}
-
-	prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
-	spin_unlock_irq(&bd->lock);
-	io_schedule();
-	finish_wait(&bd->wq_done, &wait);
-
-	return ret;
-unlock:
-	spin_unlock_irq(&bd->lock);
-	return ret;
-}
-
 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 				struct sg_io_v4 *hdr, struct bsg_device *bd,
 				fmode_t has_write_perm)
@@ -482,6 +446,30 @@ static int blk_complete_sgv4_hdr_rq(stru
 	return ret;
 }
 
+static bool bsg_complete(struct bsg_device *bd)
+{
+	bool ret = false;
+	bool spin;
+
+	do {
+		spin_lock_irq(&bd->lock);
+
+		BUG_ON(bd->done_cmds > bd->queued_cmds);
+
+		/*
+		 * All commands consumed.
+		 */
+		if (bd->done_cmds == bd->queued_cmds)
+			ret = true;
+
+		spin = !test_bit(BSG_F_BLOCK, &bd->flags);
+
+		spin_unlock_irq(&bd->lock);
+	} while (!ret && spin);
+
+	return ret;
+}
+
 static int bsg_complete_all_commands(struct bsg_device *bd)
 {
 	struct bsg_command *bc;
@@ -492,17 +480,7 @@ static int bsg_complete_all_commands(str
 	/*
 	 * wait for all commands to complete
 	 */
-	ret = 0;
-	do {
-		ret = bsg_io_schedule(bd);
-		/*
-		 * look for -ENODATA specifically -- we'll sometimes get
-		 * -ERESTARTSYS when we've taken a signal, but we can't
-		 * return until we're done freeing the queue, so ignore
-		 * it.  The signal will get handled when we're done freeing
-		 * the bsg_device.
-		 */
-	} while (ret != -ENODATA);
+	io_wait_event(bd->wq_done, bsg_complete(bd));
 
 	/*
 	 * discard done commands
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -267,6 +267,21 @@ do {									\
 	__wait_event(wq, condition);					\
 } while (0)
 
+#define __io_wait_event(wq, condition)					\
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    io_schedule())
+
+/*
+ * io_wait_event() -- like wait_event() but with io_schedule()
+ */
+#define io_wait_event(wq, condition)					\
+do {									\
+	might_sleep();							\
+	if (condition)							\
+		break;							\
+	__io_wait_event(wq, condition);					\
+} while (0)
+
 #define __wait_event_freezable(wq, condition)				\
 	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			    schedule(); try_to_freeze())

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] iommu/amd: Fix amd_iommu_free_device()
  2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
  2015-02-03 17:04               ` Jesse Barnes
  2015-02-03 17:34               ` Joerg Roedel
@ 2015-02-04 14:35               ` Joerg Roedel
  2 siblings, 0 replies; 24+ messages in thread
From: Joerg Roedel @ 2015-02-04 14:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Benjamin LaHaise, linux-aio, Linux Kernel, Jesse Barnes

On Tue, Feb 03, 2015 at 01:25:51PM +0100, Peter Zijlstra wrote:
> Subject: iommu/amd: Fix amd_iommu_free_device()
> 
> put_device_state_wait() doesn't loop on the condition and a spurious
> wakeup will have it free the device state even though there might still
> be references out to it.
> 
> Fix this by using 'normal' wait primitives.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  drivers/iommu/amd_iommu_v2.c | 20 +++++++-------------
>  1 file changed, 7 insertions(+), 13 deletions(-)

Applied this, thanks Peter.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] block: Simplify bsg complete all
  2015-02-04 10:18                   ` [PATCH] block: Simplify bsg complete all Peter Zijlstra
@ 2015-02-04 17:06                     ` Jens Axboe
  0 siblings, 0 replies; 24+ messages in thread
From: Jens Axboe @ 2015-02-04 17:06 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Linus Torvalds, Benjamin LaHaise, linux-aio, Linux Kernel

On 02/04/2015 03:18 AM, Peter Zijlstra wrote:
> On Tue, Feb 03, 2015 at 04:24:55PM -0700, Jens Axboe wrote:
>>
>> Yes, from a cursory look, that seems fine to me. Though I will hold the fact
>> that you label my code as 'seriously obtuse' against you. Some day.
>
> Hehe, fair enough. I'm sure I've written my fair share of it too, we've
> all got our 'bad' days, I'll get you a beer in BOS to make up if you
> like :-)

I wont pass on that :-)

>> I can pull this in for testing for 3.20. Mind sending a properly formatted
>> patch (signed off, commit message, all that stuff)?
>
> Sure, here goes.

Great thanks, applied.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2015-02-04 17:06 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-01 14:40 [GIT PULL] aio: fix sleeping while TASK_INTERRUPTIBLE Benjamin LaHaise
2015-02-01 21:01 ` Linus Torvalds
2015-02-01 22:14   ` Benjamin LaHaise
2015-02-01 23:09     ` Linus Torvalds
2015-02-01 23:33     ` Linus Torvalds
2015-02-02  0:16       ` Benjamin LaHaise
2015-02-02  1:18         ` Linus Torvalds
2015-02-02  5:29           ` Dave Chinner
     [not found]             ` <CA+55aFwvEcq-rAbqF2qTut=kJgFZZnhHptoPi6FSVrF4+1tBNA@mail.gmail.com>
2015-02-02  5:54               ` Dave Chinner
2015-02-02 18:45                 ` Linus Torvalds
2015-02-03 22:23                   ` Dave Chinner
2015-02-03 23:34                     ` Benjamin LaHaise
2015-02-03 11:27           ` Peter Zijlstra
2015-02-03 11:33             ` Peter Zijlstra
2015-02-03 11:55               ` Peter Zijlstra
2015-02-03 23:24                 ` Jens Axboe
2015-02-04 10:18                   ` [PATCH] block: Simplify bsg complete all Peter Zijlstra
2015-02-04 17:06                     ` Jens Axboe
2015-02-03 12:25             ` [PATCH] iommu/amd: Fix amd_iommu_free_device() Peter Zijlstra
2015-02-03 17:04               ` Jesse Barnes
2015-02-03 17:34               ` Joerg Roedel
2015-02-03 19:23                 ` Linus Torvalds
2015-02-03 22:56                   ` Joerg Roedel
2015-02-04 14:35               ` Joerg Roedel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).